Blender V4.5
gpu_compute_evaluator.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2025 Blender Foundation
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include <epoxy/gl.h>
6
8
9#include <opensubdiv/far/error.h>
10#include <opensubdiv/far/patchDescriptor.h>
11#include <opensubdiv/far/stencilTable.h>
12#include <opensubdiv/osd/glslPatchShaderSource.h>
13
14#include <cassert>
15#include <cmath>
16#include <sstream>
17#include <string>
18#include <vector>
19
20#include "GPU_capabilities.hh"
21#include "GPU_compute.hh"
22#include "GPU_context.hh"
23#include "GPU_debug.hh"
24#include "GPU_state.hh"
25#include "GPU_vertex_buffer.hh"
27
28using OpenSubdiv::Far::LimitStencilTable;
29using OpenSubdiv::Far::StencilTable;
30using OpenSubdiv::Osd::BufferDescriptor;
31using OpenSubdiv::Osd::PatchArray;
32using OpenSubdiv::Osd::PatchArrayVector;
33
34#define SHADER_SRC_VERTEX_BUFFER_BUF_SLOT 0
35#define SHADER_DST_VERTEX_BUFFER_BUF_SLOT 1
36#define SHADER_DU_BUFFER_BUF_SLOT 2
37#define SHADER_DV_BUFFER_BUF_SLOT 3
38#define SHADER_SIZES_BUF_SLOT 4
39#define SHADER_OFFSETS_BUF_SLOT 5
40#define SHADER_INDICES_BUF_SLOT 6
41#define SHADER_WEIGHTS_BUF_SLOT 7
42#define SHADER_DU_WEIGHTS_BUF_SLOT 8
43#define SHADER_DV_WEIGHTS_BUF_SLOT 9
44
45#define SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT 4
46#define SHADER_PATCH_COORDS_BUF_SLOT 5
47#define SHADER_PATCH_INDEX_BUFFER_BUF_SLOT 6
48#define SHADER_PATCH_PARAM_BUFFER_BUF_SLOT 7
49
50namespace blender::opensubdiv {
51
52template<class T> GPUStorageBuf *create_buffer(std::vector<T> const &src, const char *name)
53{
54 if (src.empty()) {
55 return nullptr;
56 }
57
58 const size_t buffer_size = src.size() * sizeof(T);
59 GPUStorageBuf *storage_buffer = GPU_storagebuf_create_ex(
60 buffer_size, &src.at(0), GPU_USAGE_STATIC, name);
61
62 return storage_buffer;
63}
64
65GPUStencilTableSSBO::GPUStencilTableSSBO(StencilTable const *stencilTable)
66{
67 _numStencils = stencilTable->GetNumStencils();
68 if (_numStencils > 0) {
69 sizes_buf = create_buffer(stencilTable->GetSizes(), "osd_sized");
70 offsets_buf = create_buffer(stencilTable->GetOffsets(), "osd_offsets");
71 indices_buf = create_buffer(stencilTable->GetControlIndices(), "osd_control_indices");
72 weights_buf = create_buffer(stencilTable->GetWeights(), "osd_weights");
73 }
74}
75
76GPUStencilTableSSBO::GPUStencilTableSSBO(LimitStencilTable const *limitStencilTable)
77{
78 _numStencils = limitStencilTable->GetNumStencils();
79 if (_numStencils > 0) {
80 sizes_buf = create_buffer(limitStencilTable->GetSizes(), "osd_sized");
81 offsets_buf = create_buffer(limitStencilTable->GetOffsets(), "osd_offsets");
82 indices_buf = create_buffer(limitStencilTable->GetControlIndices(), "osd_control_indices");
83 weights_buf = create_buffer(limitStencilTable->GetWeights(), "osd_weights");
84 du_weights_buf = create_buffer(limitStencilTable->GetDuWeights(), "osd_du_weights");
85 dv_weights_buf = create_buffer(limitStencilTable->GetDvWeights(), "osd_dv_weights");
86 duu_weights_buf = create_buffer(limitStencilTable->GetDuuWeights(), "osd_duu_weights");
87 duv_weights_buf = create_buffer(limitStencilTable->GetDuvWeights(), "osd_duv_weights");
88 dvv_weights_buf = create_buffer(limitStencilTable->GetDvvWeights(), "osd_dvv_weights");
89 }
90}
91
92static void storage_buffer_free(GPUStorageBuf **buffer)
93{
94 if (*buffer) {
95 GPU_storagebuf_free(*buffer);
96 *buffer = nullptr;
97 }
98}
99
101{
102 storage_buffer_free(&sizes_buf);
103 storage_buffer_free(&offsets_buf);
104 storage_buffer_free(&indices_buf);
105 storage_buffer_free(&weights_buf);
106 storage_buffer_free(&du_weights_buf);
107 storage_buffer_free(&dv_weights_buf);
108 storage_buffer_free(&duu_weights_buf);
109 storage_buffer_free(&duv_weights_buf);
110 storage_buffer_free(&dvv_weights_buf);
111}
112
113// ---------------------------------------------------------------------------
114
115GPUComputeEvaluator::GPUComputeEvaluator() : _workGroupSize(64), _patchArraysSSBO(nullptr)
116{
117 memset((void *)&_stencilKernel, 0, sizeof(_stencilKernel));
118 memset((void *)&_patchKernel, 0, sizeof(_patchKernel));
119}
120
122{
123 if (_patchArraysSSBO) {
124 GPU_storagebuf_free(_patchArraysSSBO);
125 _patchArraysSSBO = nullptr;
126 }
127}
128
129bool GPUComputeEvaluator::Compile(BufferDescriptor const &srcDesc,
130 BufferDescriptor const &dstDesc,
131 BufferDescriptor const &duDesc,
132 BufferDescriptor const &dvDesc)
133{
134
135 if (!_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc, _workGroupSize)) {
136 return false;
137 }
138
139 if (!_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc, _workGroupSize)) {
140 return false;
141 }
142
143 return true;
144}
145
146/* static */
147void GPUComputeEvaluator::Synchronize(void * /*kernel*/)
148{
149 // XXX: this is currently just for the performance measuring purpose.
150 // need to be reimplemented by fence and sync.
151 GPU_finish();
152}
153
154int GPUComputeEvaluator::GetDispatchSize(int count) const
155{
156 return (count + _workGroupSize - 1) / _workGroupSize;
157}
158
159void GPUComputeEvaluator::DispatchCompute(GPUShader *shader, int totalDispatchSize) const
160{
161 const int dispatchSize = GetDispatchSize(totalDispatchSize);
162 int dispatchRX = dispatchSize;
163 int dispatchRY = 1u;
164 if (dispatchRX > GPU_max_work_group_count(0)) {
165 /* Since there are some limitations with regards to the maximum work group size (could be as
166 * low as 64k elements per call), we split the number elements into a "2d" number, with the
167 * final index being computed as `res_x + res_y * max_work_group_size`. Even with a maximum
168 * work group size of 64k, that still leaves us with roughly `64k * 64k = 4` billion elements
169 * total, which should be enough. If not, we could also use the 3rd dimension. */
170 /* TODO(fclem): We could dispatch fewer groups if we compute the prime factorization and
171 * get the smallest rect fitting the requirements. */
172 dispatchRX = dispatchRY = std::ceil(std::sqrt(dispatchSize));
173 /* Avoid a completely empty dispatch line caused by rounding. */
174 if ((dispatchRX * (dispatchRY - 1)) >= dispatchSize) {
175 dispatchRY -= 1;
176 }
177 }
178
179 /* X and Y dimensions may have different limits so the above computation may not be right, but
180 * even with the standard 64k minimum on all dimensions we still have a lot of room. Therefore,
181 * we presume it all fits. */
182 assert(dispatchRY < GPU_max_work_group_count(1));
183 GPU_compute_dispatch(shader, dispatchRX, dispatchRY, 1);
184
185 /* Next usage of the src/dst buffers will always be a shader storage. Vertices/normals/attributes
186 * are copied over to the final buffers using compute shaders. */
188}
189
190bool GPUComputeEvaluator::EvalStencils(gpu::VertBuf *srcBuffer,
191 BufferDescriptor const &srcDesc,
192 gpu::VertBuf *dstBuffer,
193 BufferDescriptor const &dstDesc,
194 gpu::VertBuf *duBuffer,
195 BufferDescriptor const &duDesc,
196 gpu::VertBuf *dvBuffer,
197 BufferDescriptor const &dvDesc,
198 GPUStorageBuf *sizesBuffer,
199 GPUStorageBuf *offsetsBuffer,
200 GPUStorageBuf *indicesBuffer,
201 GPUStorageBuf *weightsBuffer,
202 GPUStorageBuf *duWeightsBuffer,
203 GPUStorageBuf *dvWeightsBuffer,
204 int start,
205 int end) const
206{
207 if (_stencilKernel.shader == nullptr) {
208 return false;
209 }
210 int count = end - start;
211 if (count <= 0) {
212 return true;
213 }
214
215 GPU_shader_bind(_stencilKernel.shader);
218 if (duBuffer) {
220 }
221 if (dvBuffer) {
223 }
228 if (duWeightsBuffer) {
230 }
231 if (dvWeightsBuffer) {
233 }
234
235 GPU_shader_uniform_int_ex(_stencilKernel.shader, _stencilKernel.uniformStart, 1, 1, &start);
236 GPU_shader_uniform_int_ex(_stencilKernel.shader, _stencilKernel.uniformEnd, 1, 1, &end);
238 _stencilKernel.shader, _stencilKernel.uniformSrcOffset, 1, 1, &srcDesc.offset);
240 _stencilKernel.shader, _stencilKernel.uniformDstOffset, 1, 1, &dstDesc.offset);
241
242// TODO init to -1 and check >= 0 to align with GPU module. Currently we assume that the uniform
243// location is not zero as there are other uniforms defined as well.
244#define BIND_BUF_DESC(uniform, desc) \
245 if (_stencilKernel.uniform > 0) { \
246 int value[] = {desc.offset, desc.length, desc.stride}; \
247 GPU_shader_uniform_int_ex(_stencilKernel.shader, _stencilKernel.uniform, 3, 1, value); \
248 }
249 BIND_BUF_DESC(uniformDuDesc, duDesc)
250 BIND_BUF_DESC(uniformDvDesc, dvDesc)
251#undef BIND_BUF_DESC
252 DispatchCompute(_stencilKernel.shader, count);
253 // GPU_storagebuf_unbind_all();
255
256 return true;
257}
258
259bool GPUComputeEvaluator::EvalPatches(gpu::VertBuf *srcBuffer,
260 BufferDescriptor const &srcDesc,
261 gpu::VertBuf *dstBuffer,
262 BufferDescriptor const &dstDesc,
263 gpu::VertBuf *duBuffer,
264 BufferDescriptor const &duDesc,
265 gpu::VertBuf *dvBuffer,
266 BufferDescriptor const &dvDesc,
267 int numPatchCoords,
268 gpu::VertBuf *patchCoordsBuffer,
269 const PatchArrayVector &patchArrays,
270 GPUStorageBuf *patchIndexBuffer,
271 GPUStorageBuf *patchParamsBuffer)
272{
273 if (_patchKernel.shader == nullptr) {
274 return false;
275 }
276
277 GPU_shader_bind(_patchKernel.shader);
280 if (duBuffer) {
282 }
283 if (dvBuffer) {
285 }
289 int patchArraySize = sizeof(PatchArray);
290 if (_patchArraysSSBO) {
291 GPU_storagebuf_free(_patchArraysSSBO);
292 _patchArraysSSBO = nullptr;
293 }
294 _patchArraysSSBO = GPU_storagebuf_create_ex(patchArrays.size() * patchArraySize,
295 static_cast<const void *>(&patchArrays[0]),
297 "osd_patch_array");
299
301 _patchKernel.shader, _patchKernel.uniformSrcOffset, 1, 1, &srcDesc.offset);
303 _patchKernel.shader, _patchKernel.uniformDstOffset, 1, 1, &dstDesc.offset);
304
305// TODO init to -1 and check >= 0 to align with GPU module.
306#define BIND_BUF_DESC(uniform, desc) \
307 if (_stencilKernel.uniform > 0) { \
308 int value[] = {desc.offset, desc.length, desc.stride}; \
309 GPU_shader_uniform_int_ex(_patchKernel.shader, _patchKernel.uniform, 3, 1, value); \
310 }
311 BIND_BUF_DESC(uniformDuDesc, duDesc)
312 BIND_BUF_DESC(uniformDvDesc, dvDesc)
313#undef BIND_BUF_DESC
314
315 DispatchCompute(_patchKernel.shader, numPatchCoords);
317
318 return true;
319}
320// ---------------------------------------------------------------------------
321
322GPUComputeEvaluator::_StencilKernel::_StencilKernel() {}
323GPUComputeEvaluator::_StencilKernel::~_StencilKernel()
324{
325 if (shader) {
326 GPU_shader_free(shader);
327 shader = nullptr;
328 }
329}
330static GPUShader *compile_eval_stencil_shader(BufferDescriptor const &srcDesc,
331 BufferDescriptor const &dstDesc,
332 BufferDescriptor const &duDesc,
333 BufferDescriptor const &dvDesc,
334 int workGroupSize)
335{
336 using namespace blender::gpu::shader;
337 ShaderCreateInfo info("opensubdiv_compute_eval");
338 info.local_group_size(workGroupSize, 1, 1);
340 info.define("OSD_PATCH_BASIS_METAL");
341 }
342 else {
343 info.define("OSD_PATCH_BASIS_GLSL");
344 }
345
346 // TODO: use specialization constants for src_stride, dst_stride. Not sure we can use
347 // work group size as that requires extensions. This allows us to compile less shaders and
348 // improve overall performance. Adding length as specialization constant will not work as it is
349 // used to define an array length. This is not supported by Metal.
350 std::string length = std::to_string(srcDesc.length);
351 std::string src_stride = std::to_string(srcDesc.stride);
352 std::string dst_stride = std::to_string(dstDesc.stride);
353 std::string work_group_size = std::to_string(workGroupSize);
354 info.define("LENGTH", length);
355 info.define("SRC_STRIDE", src_stride);
356 info.define("DST_STRIDE", dst_stride);
357 info.define("WORK_GROUP_SIZE", work_group_size);
358 info.typedef_source("osd_patch_basis.glsl");
359 info.storage_buf(
360 SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::read, "float", "srcVertexBuffer[]");
361 info.storage_buf(
362 SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::write, "float", "dstVertexBuffer[]");
363 info.push_constant(Type::int_t, "srcOffset");
364 info.push_constant(Type::int_t, "dstOffset");
365
366 bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
367 if (deriv1) {
368 info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
369 info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::read_write, "float", "duBuffer[]");
370 info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::read_write, "float", "dvBuffer[]");
371 info.push_constant(Type::int3_t, "duDesc");
372 info.push_constant(Type::int3_t, "dvDesc");
373 }
374
375 info.storage_buf(SHADER_SIZES_BUF_SLOT, Qualifier::read, "int", "sizes_buf[]");
376 info.storage_buf(SHADER_OFFSETS_BUF_SLOT, Qualifier::read, "int", "offsets_buf[]");
377 info.storage_buf(SHADER_INDICES_BUF_SLOT, Qualifier::read, "int", "indices_buf[]");
378 info.storage_buf(SHADER_WEIGHTS_BUF_SLOT, Qualifier::read, "float", "weights_buf[]");
379 if (deriv1) {
380 info.storage_buf(
381 SHADER_DU_WEIGHTS_BUF_SLOT, Qualifier::read_write, "float", "du_weights_buf[]");
382 info.storage_buf(
383 SHADER_DV_WEIGHTS_BUF_SLOT, Qualifier::read_write, "float", "dv_weights_buf[]");
384 }
385 info.push_constant(Type::int_t, "batchStart");
386 info.push_constant(Type::int_t, "batchEnd");
387
388 info.compute_source("osd_eval_stencils_comp.glsl");
390 reinterpret_cast<const GPUShaderCreateInfo *>(&info));
391 return shader;
392}
393
394bool GPUComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
395 BufferDescriptor const &dstDesc,
396 BufferDescriptor const &duDesc,
397 BufferDescriptor const &dvDesc,
398 int workGroupSize)
399{
400 if (shader) {
402 shader = nullptr;
403 }
404
405 shader = compile_eval_stencil_shader(srcDesc, dstDesc, duDesc, dvDesc, workGroupSize);
406 if (shader == nullptr) {
407 return false;
408 }
409
410 // cache uniform locations (TODO: use uniform block)
411 uniformStart = GPU_shader_get_uniform(shader, "batchStart");
412 uniformEnd = GPU_shader_get_uniform(shader, "batchEnd");
413 uniformSrcOffset = GPU_shader_get_uniform(shader, "srcOffset");
414 uniformDstOffset = GPU_shader_get_uniform(shader, "dstOffset");
415 uniformDuDesc = GPU_shader_get_uniform(shader, "duDesc");
416 uniformDvDesc = GPU_shader_get_uniform(shader, "dvDesc");
417
418 return true;
419}
420
421// ---------------------------------------------------------------------------
422
423GPUComputeEvaluator::_PatchKernel::_PatchKernel() {}
424GPUComputeEvaluator::_PatchKernel::~_PatchKernel()
425{
426 if (shader) {
427 GPU_shader_free(shader);
428 shader = nullptr;
429 }
430}
431
432static GPUShader *compile_eval_patches_shader(BufferDescriptor const &srcDesc,
433 BufferDescriptor const &dstDesc,
434 BufferDescriptor const &duDesc,
435 BufferDescriptor const &dvDesc,
436 int workGroupSize)
437{
438 using namespace blender::gpu::shader;
439 ShaderCreateInfo info("opensubdiv_compute_eval");
440 info.local_group_size(workGroupSize, 1, 1);
442 info.define("OSD_PATCH_BASIS_METAL");
443 }
444 else {
445 info.define("OSD_PATCH_BASIS_GLSL");
446 }
447
448 // TODO: use specialization constants for src_stride, dst_stride. Not sure we can use
449 // work group size as that requires extensions. This allows us to compile less shaders and
450 // improve overall performance. Adding length as specialization constant will not work as it is
451 // used to define an array length. This is not supported by Metal.
452 std::string length = std::to_string(srcDesc.length);
453 std::string src_stride = std::to_string(srcDesc.stride);
454 std::string dst_stride = std::to_string(dstDesc.stride);
455 std::string work_group_size = std::to_string(workGroupSize);
456 info.define("LENGTH", length);
457 info.define("SRC_STRIDE", src_stride);
458 info.define("DST_STRIDE", dst_stride);
459 info.define("WORK_GROUP_SIZE", work_group_size);
460 info.typedef_source("osd_patch_basis.glsl");
461 info.storage_buf(
462 SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::read, "float", "srcVertexBuffer[]");
463 info.storage_buf(
464 SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::write, "float", "dstVertexBuffer[]");
465 info.push_constant(Type::int_t, "srcOffset");
466 info.push_constant(Type::int_t, "dstOffset");
467
468 bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
469 if (deriv1) {
470 info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
471 info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::read_write, "float", "duBuffer[]");
472 info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::read_write, "float", "dvBuffer[]");
473 info.push_constant(Type::int3_t, "duDesc");
474 info.push_constant(Type::int3_t, "dvDesc");
475 }
476
477 info.storage_buf(
478 SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT, Qualifier::read, "OsdPatchArray", "patchArrayBuffer[]");
479 info.storage_buf(
480 SHADER_PATCH_COORDS_BUF_SLOT, Qualifier::read, "OsdPatchCoord", "patchCoords[]");
481 info.storage_buf(
482 SHADER_PATCH_INDEX_BUFFER_BUF_SLOT, Qualifier::read, "int", "patchIndexBuffer[]");
483 info.storage_buf(
484 SHADER_PATCH_PARAM_BUFFER_BUF_SLOT, Qualifier::read, "OsdPatchParam", "patchParamBuffer[]");
485
486 info.compute_source("osd_eval_patches_comp.glsl");
488 reinterpret_cast<const GPUShaderCreateInfo *>(&info));
489 return shader;
490}
491
492bool GPUComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
493 BufferDescriptor const &dstDesc,
494 BufferDescriptor const &duDesc,
495 BufferDescriptor const &dvDesc,
496 int workGroupSize)
497{
498 if (shader) {
500 shader = nullptr;
501 }
502
503 shader = compile_eval_patches_shader(srcDesc, dstDesc, duDesc, dvDesc, workGroupSize);
504 if (shader == nullptr) {
505 return false;
506 }
507
508 // cache uniform locations
509 uniformSrcOffset = GPU_shader_get_uniform(shader, "srcOffset");
510 uniformDstOffset = GPU_shader_get_uniform(shader, "dstOffset");
511 uniformDuDesc = GPU_shader_get_uniform(shader, "duDesc");
512 uniformDvDesc = GPU_shader_get_uniform(shader, "dvDesc");
513
514 return true;
515}
516
517} // namespace blender::opensubdiv
int GPU_max_work_group_count(int index)
void GPU_compute_dispatch(GPUShader *shader, uint groups_x_len, uint groups_y_len, uint groups_z_len, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
eGPUBackendType GPU_backend_get_type()
int GPU_shader_get_uniform(GPUShader *shader, const char *name)
void GPU_shader_uniform_int_ex(GPUShader *shader, int location, int length, int array_size, const int *value)
void GPU_shader_bind(GPUShader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
GPUShader * GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
void GPU_shader_free(GPUShader *shader)
void GPU_shader_unbind()
void GPU_memory_barrier(eGPUBarrier barrier)
Definition gpu_state.cc:385
void GPU_finish()
Definition gpu_state.cc:310
@ GPU_BARRIER_SHADER_STORAGE
Definition GPU_state.hh:48
void GPU_storagebuf_bind(GPUStorageBuf *ssbo, int slot)
GPUStorageBuf * GPU_storagebuf_create_ex(size_t size, const void *data, GPUUsageType usage, const char *name)
void GPU_storagebuf_free(GPUStorageBuf *ssbo)
void GPU_vertbuf_bind_as_ssbo(blender::gpu::VertBuf *verts, int binding)
@ GPU_USAGE_STATIC
static bool EvalPatches(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, GPUComputeEvaluator *instance, void *deviceContext=nullptr)
Generic limit eval function. This function has a same signature as other device kernels have so that ...
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, OpenSubdiv::Osd::BufferDescriptor const &duDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &dvDesc=OpenSubdiv::Osd::BufferDescriptor())
static void Synchronize(void *deviceContext)
Wait the dispatched kernel finishes.
~GPUComputeEvaluator()
Destructor. note that the GL context must be made current.
static bool EvalStencils(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable, GPUComputeEvaluator *instance, void *deviceContext=nullptr)
Generic static stencil function. This function has a same signature as other device kernels have so t...
GPUStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable)
#define SHADER_SIZES_BUF_SLOT
#define SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT
#define SHADER_DST_VERTEX_BUFFER_BUF_SLOT
#define SHADER_OFFSETS_BUF_SLOT
#define SHADER_DU_BUFFER_BUF_SLOT
#define SHADER_PATCH_PARAM_BUFFER_BUF_SLOT
#define SHADER_DV_BUFFER_BUF_SLOT
#define SHADER_DV_WEIGHTS_BUF_SLOT
#define SHADER_DU_WEIGHTS_BUF_SLOT
#define SHADER_WEIGHTS_BUF_SLOT
#define SHADER_PATCH_COORDS_BUF_SLOT
#define SHADER_SRC_VERTEX_BUFFER_BUF_SLOT
#define SHADER_INDICES_BUF_SLOT
#define SHADER_PATCH_INDEX_BUFFER_BUF_SLOT
#define BIND_BUF_DESC(uniform, desc)
#define assert(assertion)
float length(VecOp< float, D >) RET
int count
#define T
static GPUShader * compile_eval_stencil_shader(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, int workGroupSize)
GPUStorageBuf * create_buffer(std::vector< T > const &src, const char *name)
static GPUShader * compile_eval_patches_shader(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, int workGroupSize)
static void storage_buffer_free(GPUStorageBuf **buffer)
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Self & compute_source(StringRefNull filename)
Self & push_constant(Type type, StringRefNull name, int array_size=0)
Self & typedef_source(StringRefNull filename)
Self & storage_buf(int slot, Qualifier qualifiers, StringRefNull type_name, StringRefNull name, Frequency freq=Frequency::PASS)
Self & local_group_size(int local_size_x, int local_size_y=1, int local_size_z=1)
Self & define(StringRefNull name, StringRefNull value="")