9#include <opensubdiv/far/error.h>
10#include <opensubdiv/far/patchDescriptor.h>
11#include <opensubdiv/far/stencilTable.h>
12#include <opensubdiv/osd/glslPatchShaderSource.h>
28using OpenSubdiv::Far::LimitStencilTable;
29using OpenSubdiv::Far::StencilTable;
30using OpenSubdiv::Osd::BufferDescriptor;
31using OpenSubdiv::Osd::PatchArray;
32using OpenSubdiv::Osd::PatchArrayVector;
34#define SHADER_SRC_VERTEX_BUFFER_BUF_SLOT 0
35#define SHADER_DST_VERTEX_BUFFER_BUF_SLOT 1
36#define SHADER_DU_BUFFER_BUF_SLOT 2
37#define SHADER_DV_BUFFER_BUF_SLOT 3
38#define SHADER_SIZES_BUF_SLOT 4
39#define SHADER_OFFSETS_BUF_SLOT 5
40#define SHADER_INDICES_BUF_SLOT 6
41#define SHADER_WEIGHTS_BUF_SLOT 7
42#define SHADER_DU_WEIGHTS_BUF_SLOT 8
43#define SHADER_DV_WEIGHTS_BUF_SLOT 9
45#define SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT 4
46#define SHADER_PATCH_COORDS_BUF_SLOT 5
47#define SHADER_PATCH_INDEX_BUFFER_BUF_SLOT 6
48#define SHADER_PATCH_PARAM_BUFFER_BUF_SLOT 7
52template<
class T> GPUStorageBuf *
create_buffer(std::vector<T>
const &src,
const char *name)
58 const size_t buffer_size = src.size() *
sizeof(
T);
62 return storage_buffer;
67 _numStencils = stencilTable->GetNumStencils();
68 if (_numStencils > 0) {
69 sizes_buf =
create_buffer(stencilTable->GetSizes(),
"osd_sized");
70 offsets_buf =
create_buffer(stencilTable->GetOffsets(),
"osd_offsets");
71 indices_buf =
create_buffer(stencilTable->GetControlIndices(),
"osd_control_indices");
72 weights_buf =
create_buffer(stencilTable->GetWeights(),
"osd_weights");
78 _numStencils = limitStencilTable->GetNumStencils();
79 if (_numStencils > 0) {
80 sizes_buf =
create_buffer(limitStencilTable->GetSizes(),
"osd_sized");
81 offsets_buf =
create_buffer(limitStencilTable->GetOffsets(),
"osd_offsets");
82 indices_buf =
create_buffer(limitStencilTable->GetControlIndices(),
"osd_control_indices");
83 weights_buf =
create_buffer(limitStencilTable->GetWeights(),
"osd_weights");
84 du_weights_buf =
create_buffer(limitStencilTable->GetDuWeights(),
"osd_du_weights");
85 dv_weights_buf =
create_buffer(limitStencilTable->GetDvWeights(),
"osd_dv_weights");
86 duu_weights_buf =
create_buffer(limitStencilTable->GetDuuWeights(),
"osd_duu_weights");
87 duv_weights_buf =
create_buffer(limitStencilTable->GetDuvWeights(),
"osd_duv_weights");
88 dvv_weights_buf =
create_buffer(limitStencilTable->GetDvvWeights(),
"osd_dvv_weights");
117 memset((
void *)&_stencilKernel, 0,
sizeof(_stencilKernel));
118 memset((
void *)&_patchKernel, 0,
sizeof(_patchKernel));
123 if (_patchArraysSSBO) {
125 _patchArraysSSBO =
nullptr;
130 BufferDescriptor
const &dstDesc,
131 BufferDescriptor
const &duDesc,
132 BufferDescriptor
const &dvDesc)
135 if (!_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc, _workGroupSize)) {
139 if (!_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc, _workGroupSize)) {
154int GPUComputeEvaluator::GetDispatchSize(
int count)
const
156 return (
count + _workGroupSize - 1) / _workGroupSize;
159void GPUComputeEvaluator::DispatchCompute(GPUShader *shader,
int totalDispatchSize)
const
161 const int dispatchSize = GetDispatchSize(totalDispatchSize);
162 int dispatchRX = dispatchSize;
172 dispatchRX = dispatchRY = std::ceil(std::sqrt(dispatchSize));
174 if ((dispatchRX * (dispatchRY - 1)) >= dispatchSize) {
191 BufferDescriptor
const &srcDesc,
192 gpu::VertBuf *dstBuffer,
193 BufferDescriptor
const &dstDesc,
194 gpu::VertBuf *duBuffer,
195 BufferDescriptor
const &duDesc,
196 gpu::VertBuf *dvBuffer,
197 BufferDescriptor
const &dvDesc,
198 GPUStorageBuf *sizesBuffer,
199 GPUStorageBuf *offsetsBuffer,
200 GPUStorageBuf *indicesBuffer,
201 GPUStorageBuf *weightsBuffer,
202 GPUStorageBuf *duWeightsBuffer,
203 GPUStorageBuf *dvWeightsBuffer,
207 if (_stencilKernel.shader ==
nullptr) {
210 int count = end - start;
228 if (duWeightsBuffer) {
231 if (dvWeightsBuffer) {
238 _stencilKernel.shader, _stencilKernel.uniformSrcOffset, 1, 1, &srcDesc.offset);
240 _stencilKernel.shader, _stencilKernel.uniformDstOffset, 1, 1, &dstDesc.offset);
244#define BIND_BUF_DESC(uniform, desc) \
245 if (_stencilKernel.uniform > 0) { \
246 int value[] = {desc.offset, desc.length, desc.stride}; \
247 GPU_shader_uniform_int_ex(_stencilKernel.shader, _stencilKernel.uniform, 3, 1, value); \
252 DispatchCompute(_stencilKernel.shader,
count);
260 BufferDescriptor
const &srcDesc,
261 gpu::VertBuf *dstBuffer,
262 BufferDescriptor
const &dstDesc,
263 gpu::VertBuf *duBuffer,
264 BufferDescriptor
const &duDesc,
265 gpu::VertBuf *dvBuffer,
266 BufferDescriptor
const &dvDesc,
268 gpu::VertBuf *patchCoordsBuffer,
269 const PatchArrayVector &patchArrays,
270 GPUStorageBuf *patchIndexBuffer,
271 GPUStorageBuf *patchParamsBuffer)
273 if (_patchKernel.shader ==
nullptr) {
289 int patchArraySize =
sizeof(PatchArray);
290 if (_patchArraysSSBO) {
292 _patchArraysSSBO =
nullptr;
295 static_cast<const void *
>(&patchArrays[0]),
301 _patchKernel.shader, _patchKernel.uniformSrcOffset, 1, 1, &srcDesc.offset);
303 _patchKernel.shader, _patchKernel.uniformDstOffset, 1, 1, &dstDesc.offset);
306#define BIND_BUF_DESC(uniform, desc) \
307 if (_stencilKernel.uniform > 0) { \
308 int value[] = {desc.offset, desc.length, desc.stride}; \
309 GPU_shader_uniform_int_ex(_patchKernel.shader, _patchKernel.uniform, 3, 1, value); \
315 DispatchCompute(_patchKernel.shader, numPatchCoords);
322GPUComputeEvaluator::_StencilKernel::_StencilKernel() {}
323GPUComputeEvaluator::_StencilKernel::~_StencilKernel()
331 BufferDescriptor
const &dstDesc,
332 BufferDescriptor
const &duDesc,
333 BufferDescriptor
const &dvDesc,
340 info.
define(
"OSD_PATCH_BASIS_METAL");
343 info.
define(
"OSD_PATCH_BASIS_GLSL");
350 std::string
length = std::to_string(srcDesc.length);
351 std::string src_stride = std::to_string(srcDesc.stride);
352 std::string dst_stride = std::to_string(dstDesc.stride);
353 std::string work_group_size = std::to_string(workGroupSize);
355 info.
define(
"SRC_STRIDE", src_stride);
356 info.
define(
"DST_STRIDE", dst_stride);
357 info.
define(
"WORK_GROUP_SIZE", work_group_size);
366 bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
368 info.
define(
"OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
390 reinterpret_cast<const GPUShaderCreateInfo *
>(&info));
394bool GPUComputeEvaluator::_StencilKernel::Compile(BufferDescriptor
const &srcDesc,
395 BufferDescriptor
const &dstDesc,
396 BufferDescriptor
const &duDesc,
397 BufferDescriptor
const &dvDesc,
423GPUComputeEvaluator::_PatchKernel::_PatchKernel() {}
424GPUComputeEvaluator::_PatchKernel::~_PatchKernel()
433 BufferDescriptor
const &dstDesc,
434 BufferDescriptor
const &duDesc,
435 BufferDescriptor
const &dvDesc,
442 info.
define(
"OSD_PATCH_BASIS_METAL");
445 info.
define(
"OSD_PATCH_BASIS_GLSL");
452 std::string
length = std::to_string(srcDesc.length);
453 std::string src_stride = std::to_string(srcDesc.stride);
454 std::string dst_stride = std::to_string(dstDesc.stride);
455 std::string work_group_size = std::to_string(workGroupSize);
457 info.
define(
"SRC_STRIDE", src_stride);
458 info.
define(
"DST_STRIDE", dst_stride);
459 info.
define(
"WORK_GROUP_SIZE", work_group_size);
468 bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
470 info.
define(
"OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
488 reinterpret_cast<const GPUShaderCreateInfo *
>(&info));
492bool GPUComputeEvaluator::_PatchKernel::Compile(BufferDescriptor
const &srcDesc,
493 BufferDescriptor
const &dstDesc,
494 BufferDescriptor
const &duDesc,
495 BufferDescriptor
const &dvDesc,
int GPU_max_work_group_count(int index)
void GPU_compute_dispatch(GPUShader *shader, uint groups_x_len, uint groups_y_len, uint groups_z_len, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
eGPUBackendType GPU_backend_get_type()
int GPU_shader_get_uniform(GPUShader *shader, const char *name)
void GPU_shader_uniform_int_ex(GPUShader *shader, int location, int length, int array_size, const int *value)
void GPU_shader_bind(GPUShader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
GPUShader * GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
void GPU_shader_free(GPUShader *shader)
void GPU_memory_barrier(eGPUBarrier barrier)
@ GPU_BARRIER_SHADER_STORAGE
void GPU_storagebuf_bind(GPUStorageBuf *ssbo, int slot)
GPUStorageBuf * GPU_storagebuf_create_ex(size_t size, const void *data, GPUUsageType usage, const char *name)
void GPU_storagebuf_free(GPUStorageBuf *ssbo)
void GPU_vertbuf_bind_as_ssbo(blender::gpu::VertBuf *verts, int binding)
static bool EvalPatches(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, GPUComputeEvaluator *instance, void *deviceContext=nullptr)
Generic limit eval function. This function has a same signature as other device kernels have so that ...
GPUComputeEvaluator()
Constructor.
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, OpenSubdiv::Osd::BufferDescriptor const &duDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &dvDesc=OpenSubdiv::Osd::BufferDescriptor())
static void Synchronize(void *deviceContext)
Wait the dispatched kernel finishes.
~GPUComputeEvaluator()
Destructor. note that the GL context must be made current.
static bool EvalStencils(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable, GPUComputeEvaluator *instance, void *deviceContext=nullptr)
Generic static stencil function. This function has a same signature as other device kernels have so t...
GPUStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable)
#define SHADER_SIZES_BUF_SLOT
#define SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT
#define SHADER_DST_VERTEX_BUFFER_BUF_SLOT
#define SHADER_OFFSETS_BUF_SLOT
#define SHADER_DU_BUFFER_BUF_SLOT
#define SHADER_PATCH_PARAM_BUFFER_BUF_SLOT
#define SHADER_DV_BUFFER_BUF_SLOT
#define SHADER_DV_WEIGHTS_BUF_SLOT
#define SHADER_DU_WEIGHTS_BUF_SLOT
#define SHADER_WEIGHTS_BUF_SLOT
#define SHADER_PATCH_COORDS_BUF_SLOT
#define SHADER_SRC_VERTEX_BUFFER_BUF_SLOT
#define SHADER_INDICES_BUF_SLOT
#define SHADER_PATCH_INDEX_BUFFER_BUF_SLOT
#define BIND_BUF_DESC(uniform, desc)
#define assert(assertion)
float length(VecOp< float, D >) RET
static GPUShader * compile_eval_stencil_shader(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, int workGroupSize)
GPUStorageBuf * create_buffer(std::vector< T > const &src, const char *name)
static GPUShader * compile_eval_patches_shader(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, int workGroupSize)
static void storage_buffer_free(GPUStorageBuf **buffer)
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Self & compute_source(StringRefNull filename)
Self & push_constant(Type type, StringRefNull name, int array_size=0)
Self & typedef_source(StringRefNull filename)
Self & storage_buf(int slot, Qualifier qualifiers, StringRefNull type_name, StringRefNull name, Frequency freq=Frequency::PASS)
Self & local_group_size(int local_size_x, int local_size_y=1, int local_size_z=1)
Self & define(StringRefNull name, StringRefNull value="")