9#include <opensubdiv/far/error.h>
10#include <opensubdiv/far/patchDescriptor.h>
11#include <opensubdiv/far/stencilTable.h>
27using OpenSubdiv::Far::LimitStencilTable;
28using OpenSubdiv::Far::StencilTable;
29using OpenSubdiv::Osd::BufferDescriptor;
30using OpenSubdiv::Osd::PatchArray;
31using OpenSubdiv::Osd::PatchArrayVector;
33#define SHADER_SRC_VERTEX_BUFFER_BUF_SLOT 0
34#define SHADER_DST_VERTEX_BUFFER_BUF_SLOT 1
35#define SHADER_DU_BUFFER_BUF_SLOT 2
36#define SHADER_DV_BUFFER_BUF_SLOT 3
37#define SHADER_SIZES_BUF_SLOT 4
38#define SHADER_OFFSETS_BUF_SLOT 5
39#define SHADER_INDICES_BUF_SLOT 6
40#define SHADER_WEIGHTS_BUF_SLOT 7
41#define SHADER_DU_WEIGHTS_BUF_SLOT 8
42#define SHADER_DV_WEIGHTS_BUF_SLOT 9
44#define SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT 4
45#define SHADER_PATCH_COORDS_BUF_SLOT 5
46#define SHADER_PATCH_INDEX_BUFFER_BUF_SLOT 6
47#define SHADER_PATCH_PARAM_BUFFER_BUF_SLOT 7
57 const size_t buffer_size = src.size() *
sizeof(
T);
61 return storage_buffer;
66 _numStencils = stencilTable->GetNumStencils();
67 if (_numStencils > 0) {
68 sizes_buf =
create_buffer(stencilTable->GetSizes(),
"osd_sized");
69 offsets_buf =
create_buffer(stencilTable->GetOffsets(),
"osd_offsets");
70 indices_buf =
create_buffer(stencilTable->GetControlIndices(),
"osd_control_indices");
71 weights_buf =
create_buffer(stencilTable->GetWeights(),
"osd_weights");
77 _numStencils = limitStencilTable->GetNumStencils();
78 if (_numStencils > 0) {
79 sizes_buf =
create_buffer(limitStencilTable->GetSizes(),
"osd_sized");
80 offsets_buf =
create_buffer(limitStencilTable->GetOffsets(),
"osd_offsets");
81 indices_buf =
create_buffer(limitStencilTable->GetControlIndices(),
"osd_control_indices");
82 weights_buf =
create_buffer(limitStencilTable->GetWeights(),
"osd_weights");
83 du_weights_buf =
create_buffer(limitStencilTable->GetDuWeights(),
"osd_du_weights");
84 dv_weights_buf =
create_buffer(limitStencilTable->GetDvWeights(),
"osd_dv_weights");
85 duu_weights_buf =
create_buffer(limitStencilTable->GetDuuWeights(),
"osd_duu_weights");
86 duv_weights_buf =
create_buffer(limitStencilTable->GetDuvWeights(),
"osd_duv_weights");
87 dvv_weights_buf =
create_buffer(limitStencilTable->GetDvvWeights(),
"osd_dvv_weights");
116 memset((
void *)&_stencilKernel, 0,
sizeof(_stencilKernel));
117 memset((
void *)&_patchKernel, 0,
sizeof(_patchKernel));
122 if (_patchArraysSSBO) {
124 _patchArraysSSBO =
nullptr;
129 BufferDescriptor
const &dstDesc,
130 BufferDescriptor
const &duDesc,
131 BufferDescriptor
const &dvDesc)
134 if (!_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc, _workGroupSize)) {
138 if (!_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc, _workGroupSize)) {
153int GPUComputeEvaluator::GetDispatchSize(
int count)
const
155 return (
count + _workGroupSize - 1) / _workGroupSize;
159 int totalDispatchSize)
const
161 const int dispatchSize = GetDispatchSize(totalDispatchSize);
162 int dispatchRX = dispatchSize;
172 dispatchRX = dispatchRY = std::ceil(std::sqrt(dispatchSize));
174 if ((dispatchRX * (dispatchRY - 1)) >= dispatchSize) {
191 BufferDescriptor
const &srcDesc,
192 gpu::VertBuf *dstBuffer,
193 BufferDescriptor
const &dstDesc,
194 gpu::VertBuf *duBuffer,
195 BufferDescriptor
const &duDesc,
196 gpu::VertBuf *dvBuffer,
197 BufferDescriptor
const &dvDesc,
198 gpu::StorageBuf *sizesBuffer,
199 gpu::StorageBuf *offsetsBuffer,
200 gpu::StorageBuf *indicesBuffer,
201 gpu::StorageBuf *weightsBuffer,
202 gpu::StorageBuf *duWeightsBuffer,
203 gpu::StorageBuf *dvWeightsBuffer,
207 if (_stencilKernel.shader ==
nullptr) {
210 int count = end - start;
228 if (duWeightsBuffer) {
231 if (dvWeightsBuffer) {
238 _stencilKernel.shader, _stencilKernel.uniformSrcOffset, 1, 1, &srcDesc.offset);
240 _stencilKernel.shader, _stencilKernel.uniformDstOffset, 1, 1, &dstDesc.offset);
244#define BIND_BUF_DESC(uniform, desc) \
245 if (_stencilKernel.uniform > 0) { \
246 int value[] = {desc.offset, desc.length, desc.stride}; \
247 GPU_shader_uniform_int_ex(_stencilKernel.shader, _stencilKernel.uniform, 3, 1, value); \
252 DispatchCompute(_stencilKernel.shader,
count);
260 BufferDescriptor
const &srcDesc,
261 gpu::VertBuf *dstBuffer,
262 BufferDescriptor
const &dstDesc,
263 gpu::VertBuf *duBuffer,
264 BufferDescriptor
const &duDesc,
265 gpu::VertBuf *dvBuffer,
266 BufferDescriptor
const &dvDesc,
268 gpu::VertBuf *patchCoordsBuffer,
269 const PatchArrayVector &patchArrays,
270 gpu::StorageBuf *patchIndexBuffer,
271 gpu::StorageBuf *patchParamsBuffer)
273 if (_patchKernel.shader ==
nullptr) {
289 int patchArraySize =
sizeof(PatchArray);
290 if (_patchArraysSSBO) {
292 _patchArraysSSBO =
nullptr;
295 static_cast<const void *
>(&patchArrays[0]),
301 _patchKernel.shader, _patchKernel.uniformSrcOffset, 1, 1, &srcDesc.offset);
303 _patchKernel.shader, _patchKernel.uniformDstOffset, 1, 1, &dstDesc.offset);
306#define BIND_BUF_DESC(uniform, desc) \
307 if (_stencilKernel.uniform > 0) { \
308 int value[] = {desc.offset, desc.length, desc.stride}; \
309 GPU_shader_uniform_int_ex(_patchKernel.shader, _patchKernel.uniform, 3, 1, value); \
315 DispatchCompute(_patchKernel.shader, numPatchCoords);
322GPUComputeEvaluator::_StencilKernel::_StencilKernel() {}
323GPUComputeEvaluator::_StencilKernel::~_StencilKernel()
331 BufferDescriptor
const &dstDesc,
332 BufferDescriptor
const &duDesc,
333 BufferDescriptor
const &dvDesc,
345 info.
define(
"OSD_PATCH_BASIS_METAL",
"1");
348 info.
define(
"OSD_PATCH_BASIS_GLSL");
355 std::string
length = std::to_string(srcDesc.length);
356 std::string src_stride = std::to_string(srcDesc.stride);
357 std::string dst_stride = std::to_string(dstDesc.stride);
358 std::string work_group_size = std::to_string(workGroupSize);
360 info.
define(
"SRC_STRIDE", src_stride);
361 info.
define(
"DST_STRIDE", dst_stride);
362 info.
define(
"WORK_GROUP_SIZE", work_group_size);
371 bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
373 info.
define(
"OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
395 reinterpret_cast<const GPUShaderCreateInfo *
>(&info));
399bool GPUComputeEvaluator::_StencilKernel::Compile(BufferDescriptor
const &srcDesc,
400 BufferDescriptor
const &dstDesc,
401 BufferDescriptor
const &duDesc,
402 BufferDescriptor
const &dvDesc,
428GPUComputeEvaluator::_PatchKernel::_PatchKernel() {}
429GPUComputeEvaluator::_PatchKernel::~_PatchKernel()
438 BufferDescriptor
const &dstDesc,
439 BufferDescriptor
const &duDesc,
440 BufferDescriptor
const &dvDesc,
452 info.
define(
"OSD_PATCH_BASIS_METAL",
"1");
455 info.
define(
"OSD_PATCH_BASIS_GLSL");
462 std::string
length = std::to_string(srcDesc.length);
463 std::string src_stride = std::to_string(srcDesc.stride);
464 std::string dst_stride = std::to_string(dstDesc.stride);
465 std::string work_group_size = std::to_string(workGroupSize);
467 info.
define(
"SRC_STRIDE", src_stride);
468 info.
define(
"DST_STRIDE", dst_stride);
469 info.
define(
"WORK_GROUP_SIZE", work_group_size);
478 bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
480 info.
define(
"OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
498 reinterpret_cast<const GPUShaderCreateInfo *
>(&info));
502bool GPUComputeEvaluator::_PatchKernel::Compile(BufferDescriptor
const &srcDesc,
503 BufferDescriptor
const &dstDesc,
504 BufferDescriptor
const &duDesc,
505 BufferDescriptor
const &dvDesc,
int GPU_max_work_group_count(int index)
void GPU_compute_dispatch(blender::gpu::Shader *shader, uint groups_x_len, uint groups_y_len, uint groups_z_len, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
GPUBackendType GPU_backend_get_type()
void GPU_shader_free(blender::gpu::Shader *shader)
blender::gpu::Shader * GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
void GPU_shader_bind(blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_uniform_int_ex(blender::gpu::Shader *shader, int location, int length, int array_size, const int *value)
int GPU_shader_get_uniform(blender::gpu::Shader *shader, const char *name)
@ GPU_BARRIER_SHADER_STORAGE
void GPU_memory_barrier(GPUBarrier barrier)
void GPU_storagebuf_free(blender::gpu::StorageBuf *ssbo)
blender::gpu::StorageBuf * GPU_storagebuf_create_ex(size_t size, const void *data, GPUUsageType usage, const char *name)
void GPU_storagebuf_bind(blender::gpu::StorageBuf *ssbo, int slot)
void GPU_vertbuf_bind_as_ssbo(blender::gpu::VertBuf *verts, int binding)
static bool EvalPatches(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, GPUComputeEvaluator *instance, void *deviceContext=nullptr)
Generic limit eval function. This function has a same signature as other device kernels have so that ...
GPUComputeEvaluator()
Constructor.
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, OpenSubdiv::Osd::BufferDescriptor const &duDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &dvDesc=OpenSubdiv::Osd::BufferDescriptor())
static void Synchronize(void *deviceContext)
Wait the dispatched kernel finishes.
~GPUComputeEvaluator()
Destructor. note that the GL context must be made current.
static bool EvalStencils(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable, GPUComputeEvaluator *instance, void *deviceContext=nullptr)
Generic static stencil function. This function has a same signature as other device kernels have so t...
GPUStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable)
#define SHADER_SIZES_BUF_SLOT
#define SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT
#define SHADER_DST_VERTEX_BUFFER_BUF_SLOT
#define SHADER_OFFSETS_BUF_SLOT
#define SHADER_DU_BUFFER_BUF_SLOT
#define SHADER_PATCH_PARAM_BUFFER_BUF_SLOT
#define SHADER_DV_BUFFER_BUF_SLOT
#define SHADER_DV_WEIGHTS_BUF_SLOT
#define SHADER_DU_WEIGHTS_BUF_SLOT
#define SHADER_WEIGHTS_BUF_SLOT
#define SHADER_PATCH_COORDS_BUF_SLOT
#define SHADER_SRC_VERTEX_BUFFER_BUF_SLOT
#define SHADER_INDICES_BUF_SLOT
#define SHADER_PATCH_INDEX_BUFFER_BUF_SLOT
#define BIND_BUF_DESC(uniform, desc)
#define assert(assertion)
float length(VecOp< float, D >) RET
gpu::StorageBuf * create_buffer(std::vector< T > const &src, const char *name)
static blender::gpu::Shader * compile_eval_patches_shader(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, int workGroupSize)
static blender::gpu::Shader * compile_eval_stencil_shader(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, int workGroupSize)
static void storage_buffer_free(gpu::StorageBuf **buffer)
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Self & compute_source(StringRefNull filename)
Self & push_constant(Type type, StringRefNull name, int array_size=0)
Self & typedef_source(StringRefNull filename)
Self & storage_buf(int slot, Qualifier qualifiers, StringRefNull type_name, StringRefNull name, Frequency freq=Frequency::PASS)
Self & local_group_size(int local_size_x, int local_size_y=1, int local_size_z=1)
Self & define(StringRefNull name, StringRefNull value="")