Blender V5.0
gpu_compute_evaluator.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2025 Blender Foundation
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include <epoxy/gl.h>
6
8
9#include <opensubdiv/far/error.h>
10#include <opensubdiv/far/patchDescriptor.h>
11#include <opensubdiv/far/stencilTable.h>
12
13#include <cassert>
14#include <cmath>
15#include <sstream>
16#include <string>
17#include <vector>
18
19#include "GPU_capabilities.hh"
20#include "GPU_compute.hh"
21#include "GPU_context.hh"
22#include "GPU_debug.hh"
23#include "GPU_state.hh"
24#include "GPU_vertex_buffer.hh"
26
27using OpenSubdiv::Far::LimitStencilTable;
28using OpenSubdiv::Far::StencilTable;
29using OpenSubdiv::Osd::BufferDescriptor;
30using OpenSubdiv::Osd::PatchArray;
31using OpenSubdiv::Osd::PatchArrayVector;
32
33#define SHADER_SRC_VERTEX_BUFFER_BUF_SLOT 0
34#define SHADER_DST_VERTEX_BUFFER_BUF_SLOT 1
35#define SHADER_DU_BUFFER_BUF_SLOT 2
36#define SHADER_DV_BUFFER_BUF_SLOT 3
37#define SHADER_SIZES_BUF_SLOT 4
38#define SHADER_OFFSETS_BUF_SLOT 5
39#define SHADER_INDICES_BUF_SLOT 6
40#define SHADER_WEIGHTS_BUF_SLOT 7
41#define SHADER_DU_WEIGHTS_BUF_SLOT 8
42#define SHADER_DV_WEIGHTS_BUF_SLOT 9
43
44#define SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT 4
45#define SHADER_PATCH_COORDS_BUF_SLOT 5
46#define SHADER_PATCH_INDEX_BUFFER_BUF_SLOT 6
47#define SHADER_PATCH_PARAM_BUFFER_BUF_SLOT 7
48
49namespace blender::opensubdiv {
50
51template<class T> gpu::StorageBuf *create_buffer(std::vector<T> const &src, const char *name)
52{
53 if (src.empty()) {
54 return nullptr;
55 }
56
57 const size_t buffer_size = src.size() * sizeof(T);
59 buffer_size, &src.at(0), GPU_USAGE_STATIC, name);
60
61 return storage_buffer;
62}
63
64GPUStencilTableSSBO::GPUStencilTableSSBO(StencilTable const *stencilTable)
65{
66 _numStencils = stencilTable->GetNumStencils();
67 if (_numStencils > 0) {
68 sizes_buf = create_buffer(stencilTable->GetSizes(), "osd_sized");
69 offsets_buf = create_buffer(stencilTable->GetOffsets(), "osd_offsets");
70 indices_buf = create_buffer(stencilTable->GetControlIndices(), "osd_control_indices");
71 weights_buf = create_buffer(stencilTable->GetWeights(), "osd_weights");
72 }
73}
74
75GPUStencilTableSSBO::GPUStencilTableSSBO(LimitStencilTable const *limitStencilTable)
76{
77 _numStencils = limitStencilTable->GetNumStencils();
78 if (_numStencils > 0) {
79 sizes_buf = create_buffer(limitStencilTable->GetSizes(), "osd_sized");
80 offsets_buf = create_buffer(limitStencilTable->GetOffsets(), "osd_offsets");
81 indices_buf = create_buffer(limitStencilTable->GetControlIndices(), "osd_control_indices");
82 weights_buf = create_buffer(limitStencilTable->GetWeights(), "osd_weights");
83 du_weights_buf = create_buffer(limitStencilTable->GetDuWeights(), "osd_du_weights");
84 dv_weights_buf = create_buffer(limitStencilTable->GetDvWeights(), "osd_dv_weights");
85 duu_weights_buf = create_buffer(limitStencilTable->GetDuuWeights(), "osd_duu_weights");
86 duv_weights_buf = create_buffer(limitStencilTable->GetDuvWeights(), "osd_duv_weights");
87 dvv_weights_buf = create_buffer(limitStencilTable->GetDvvWeights(), "osd_dvv_weights");
88 }
89}
90
92{
93 if (*buffer) {
94 GPU_storagebuf_free(*buffer);
95 *buffer = nullptr;
96 }
97}
98
100{
101 storage_buffer_free(&sizes_buf);
102 storage_buffer_free(&offsets_buf);
103 storage_buffer_free(&indices_buf);
104 storage_buffer_free(&weights_buf);
105 storage_buffer_free(&du_weights_buf);
106 storage_buffer_free(&dv_weights_buf);
107 storage_buffer_free(&duu_weights_buf);
108 storage_buffer_free(&duv_weights_buf);
109 storage_buffer_free(&dvv_weights_buf);
110}
111
112// ---------------------------------------------------------------------------
113
114GPUComputeEvaluator::GPUComputeEvaluator() : _workGroupSize(64), _patchArraysSSBO(nullptr)
115{
116 memset((void *)&_stencilKernel, 0, sizeof(_stencilKernel));
117 memset((void *)&_patchKernel, 0, sizeof(_patchKernel));
118}
119
121{
122 if (_patchArraysSSBO) {
123 GPU_storagebuf_free(_patchArraysSSBO);
124 _patchArraysSSBO = nullptr;
125 }
126}
127
128bool GPUComputeEvaluator::Compile(BufferDescriptor const &srcDesc,
129 BufferDescriptor const &dstDesc,
130 BufferDescriptor const &duDesc,
131 BufferDescriptor const &dvDesc)
132{
133
134 if (!_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc, _workGroupSize)) {
135 return false;
136 }
137
138 if (!_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc, _workGroupSize)) {
139 return false;
140 }
141
142 return true;
143}
144
145/* static */
146void GPUComputeEvaluator::Synchronize(void * /*kernel*/)
147{
148 // XXX: this is currently just for the performance measuring purpose.
149 // need to be reimplemented by fence and sync.
150 GPU_finish();
151}
152
153int GPUComputeEvaluator::GetDispatchSize(int count) const
154{
155 return (count + _workGroupSize - 1) / _workGroupSize;
156}
157
158void GPUComputeEvaluator::DispatchCompute(blender::gpu::Shader *shader,
159 int totalDispatchSize) const
160{
161 const int dispatchSize = GetDispatchSize(totalDispatchSize);
162 int dispatchRX = dispatchSize;
163 int dispatchRY = 1u;
164 if (dispatchRX > GPU_max_work_group_count(0)) {
165 /* Since there are some limitations with regards to the maximum work group size (could be as
166 * low as 64k elements per call), we split the number elements into a "2d" number, with the
167 * final index being computed as `res_x + res_y * max_work_group_size`. Even with a maximum
168 * work group size of 64k, that still leaves us with roughly `64k * 64k = 4` billion elements
169 * total, which should be enough. If not, we could also use the 3rd dimension. */
170 /* TODO(fclem): We could dispatch fewer groups if we compute the prime factorization and
171 * get the smallest rect fitting the requirements. */
172 dispatchRX = dispatchRY = std::ceil(std::sqrt(dispatchSize));
173 /* Avoid a completely empty dispatch line caused by rounding. */
174 if ((dispatchRX * (dispatchRY - 1)) >= dispatchSize) {
175 dispatchRY -= 1;
176 }
177 }
178
179 /* X and Y dimensions may have different limits so the above computation may not be right, but
180 * even with the standard 64k minimum on all dimensions we still have a lot of room. Therefore,
181 * we presume it all fits. */
182 assert(dispatchRY < GPU_max_work_group_count(1));
183 GPU_compute_dispatch(shader, dispatchRX, dispatchRY, 1);
184
185 /* Next usage of the src/dst buffers will always be a shader storage. Vertices/normals/attributes
186 * are copied over to the final buffers using compute shaders. */
188}
189
190bool GPUComputeEvaluator::EvalStencils(gpu::VertBuf *srcBuffer,
191 BufferDescriptor const &srcDesc,
192 gpu::VertBuf *dstBuffer,
193 BufferDescriptor const &dstDesc,
194 gpu::VertBuf *duBuffer,
195 BufferDescriptor const &duDesc,
196 gpu::VertBuf *dvBuffer,
197 BufferDescriptor const &dvDesc,
198 gpu::StorageBuf *sizesBuffer,
199 gpu::StorageBuf *offsetsBuffer,
200 gpu::StorageBuf *indicesBuffer,
201 gpu::StorageBuf *weightsBuffer,
202 gpu::StorageBuf *duWeightsBuffer,
203 gpu::StorageBuf *dvWeightsBuffer,
204 int start,
205 int end) const
206{
207 if (_stencilKernel.shader == nullptr) {
208 return false;
209 }
210 int count = end - start;
211 if (count <= 0) {
212 return true;
213 }
214
215 GPU_shader_bind(_stencilKernel.shader);
218 if (duBuffer) {
220 }
221 if (dvBuffer) {
223 }
228 if (duWeightsBuffer) {
230 }
231 if (dvWeightsBuffer) {
233 }
234
235 GPU_shader_uniform_int_ex(_stencilKernel.shader, _stencilKernel.uniformStart, 1, 1, &start);
236 GPU_shader_uniform_int_ex(_stencilKernel.shader, _stencilKernel.uniformEnd, 1, 1, &end);
238 _stencilKernel.shader, _stencilKernel.uniformSrcOffset, 1, 1, &srcDesc.offset);
240 _stencilKernel.shader, _stencilKernel.uniformDstOffset, 1, 1, &dstDesc.offset);
241
242// TODO init to -1 and check >= 0 to align with GPU module. Currently we assume that the uniform
243// location is not zero as there are other uniforms defined as well.
244#define BIND_BUF_DESC(uniform, desc) \
245 if (_stencilKernel.uniform > 0) { \
246 int value[] = {desc.offset, desc.length, desc.stride}; \
247 GPU_shader_uniform_int_ex(_stencilKernel.shader, _stencilKernel.uniform, 3, 1, value); \
248 }
249 BIND_BUF_DESC(uniformDuDesc, duDesc)
250 BIND_BUF_DESC(uniformDvDesc, dvDesc)
251#undef BIND_BUF_DESC
252 DispatchCompute(_stencilKernel.shader, count);
253 // GPU_storagebuf_unbind_all();
255
256 return true;
257}
258
259bool GPUComputeEvaluator::EvalPatches(gpu::VertBuf *srcBuffer,
260 BufferDescriptor const &srcDesc,
261 gpu::VertBuf *dstBuffer,
262 BufferDescriptor const &dstDesc,
263 gpu::VertBuf *duBuffer,
264 BufferDescriptor const &duDesc,
265 gpu::VertBuf *dvBuffer,
266 BufferDescriptor const &dvDesc,
267 int numPatchCoords,
268 gpu::VertBuf *patchCoordsBuffer,
269 const PatchArrayVector &patchArrays,
270 gpu::StorageBuf *patchIndexBuffer,
271 gpu::StorageBuf *patchParamsBuffer)
272{
273 if (_patchKernel.shader == nullptr) {
274 return false;
275 }
276
277 GPU_shader_bind(_patchKernel.shader);
280 if (duBuffer) {
282 }
283 if (dvBuffer) {
285 }
289 int patchArraySize = sizeof(PatchArray);
290 if (_patchArraysSSBO) {
291 GPU_storagebuf_free(_patchArraysSSBO);
292 _patchArraysSSBO = nullptr;
293 }
294 _patchArraysSSBO = GPU_storagebuf_create_ex(patchArrays.size() * patchArraySize,
295 static_cast<const void *>(&patchArrays[0]),
297 "osd_patch_array");
299
301 _patchKernel.shader, _patchKernel.uniformSrcOffset, 1, 1, &srcDesc.offset);
303 _patchKernel.shader, _patchKernel.uniformDstOffset, 1, 1, &dstDesc.offset);
304
305// TODO init to -1 and check >= 0 to align with GPU module.
306#define BIND_BUF_DESC(uniform, desc) \
307 if (_stencilKernel.uniform > 0) { \
308 int value[] = {desc.offset, desc.length, desc.stride}; \
309 GPU_shader_uniform_int_ex(_patchKernel.shader, _patchKernel.uniform, 3, 1, value); \
310 }
311 BIND_BUF_DESC(uniformDuDesc, duDesc)
312 BIND_BUF_DESC(uniformDvDesc, dvDesc)
313#undef BIND_BUF_DESC
314
315 DispatchCompute(_patchKernel.shader, numPatchCoords);
317
318 return true;
319}
320// ---------------------------------------------------------------------------
321
322GPUComputeEvaluator::_StencilKernel::_StencilKernel() {}
323GPUComputeEvaluator::_StencilKernel::~_StencilKernel()
324{
325 if (shader) {
326 GPU_shader_free(shader);
327 shader = nullptr;
328 }
329}
330static blender::gpu::Shader *compile_eval_stencil_shader(BufferDescriptor const &srcDesc,
331 BufferDescriptor const &dstDesc,
332 BufferDescriptor const &duDesc,
333 BufferDescriptor const &dvDesc,
334 int workGroupSize)
335{
336 using namespace blender::gpu::shader;
337 ShaderCreateInfo info("opensubdiv_compute_eval");
338 info.local_group_size(workGroupSize, 1, 1);
339
340 /* Ensure the basis code has access to proper backend specification define: it is not guaranteed
341 * that the code provided by OpenSubdiv specifies it. For example, it doesn't for GLSL but it
342 * does for Metal. Additionally, for Metal OpenSubdiv defines OSD_PATCH_BASIS_METAL as 1, so do
343 * the same here to avoid possible warning about value being re-defined. */
345 info.define("OSD_PATCH_BASIS_METAL", "1");
346 }
347 else {
348 info.define("OSD_PATCH_BASIS_GLSL");
349 }
350
351 // TODO: use specialization constants for src_stride, dst_stride. Not sure we can use
352 // work group size as that requires extensions. This allows us to compile less shaders and
353 // improve overall performance. Adding length as specialization constant will not work as it is
354 // used to define an array length. This is not supported by Metal.
355 std::string length = std::to_string(srcDesc.length);
356 std::string src_stride = std::to_string(srcDesc.stride);
357 std::string dst_stride = std::to_string(dstDesc.stride);
358 std::string work_group_size = std::to_string(workGroupSize);
359 info.define("LENGTH", length);
360 info.define("SRC_STRIDE", src_stride);
361 info.define("DST_STRIDE", dst_stride);
362 info.define("WORK_GROUP_SIZE", work_group_size);
363 info.typedef_source("osd_patch_basis.glsl");
364 info.storage_buf(
365 SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::read, "float", "srcVertexBuffer[]");
366 info.storage_buf(
367 SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::write, "float", "dstVertexBuffer[]");
368 info.push_constant(Type::int_t, "srcOffset");
369 info.push_constant(Type::int_t, "dstOffset");
370
371 bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
372 if (deriv1) {
373 info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
374 info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::read_write, "float", "duBuffer[]");
375 info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::read_write, "float", "dvBuffer[]");
376 info.push_constant(Type::int3_t, "duDesc");
377 info.push_constant(Type::int3_t, "dvDesc");
378 }
379
380 info.storage_buf(SHADER_SIZES_BUF_SLOT, Qualifier::read, "int", "sizes_buf[]");
381 info.storage_buf(SHADER_OFFSETS_BUF_SLOT, Qualifier::read, "int", "offsets_buf[]");
382 info.storage_buf(SHADER_INDICES_BUF_SLOT, Qualifier::read, "int", "indices_buf[]");
383 info.storage_buf(SHADER_WEIGHTS_BUF_SLOT, Qualifier::read, "float", "weights_buf[]");
384 if (deriv1) {
385 info.storage_buf(
386 SHADER_DU_WEIGHTS_BUF_SLOT, Qualifier::read_write, "float", "du_weights_buf[]");
387 info.storage_buf(
388 SHADER_DV_WEIGHTS_BUF_SLOT, Qualifier::read_write, "float", "dv_weights_buf[]");
389 }
390 info.push_constant(Type::int_t, "batchStart");
391 info.push_constant(Type::int_t, "batchEnd");
392
393 info.compute_source("osd_eval_stencils_comp.glsl");
395 reinterpret_cast<const GPUShaderCreateInfo *>(&info));
396 return shader;
397}
398
399bool GPUComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
400 BufferDescriptor const &dstDesc,
401 BufferDescriptor const &duDesc,
402 BufferDescriptor const &dvDesc,
403 int workGroupSize)
404{
405 if (shader) {
407 shader = nullptr;
408 }
409
410 shader = compile_eval_stencil_shader(srcDesc, dstDesc, duDesc, dvDesc, workGroupSize);
411 if (shader == nullptr) {
412 return false;
413 }
414
415 // cache uniform locations (TODO: use uniform block)
416 uniformStart = GPU_shader_get_uniform(shader, "batchStart");
417 uniformEnd = GPU_shader_get_uniform(shader, "batchEnd");
418 uniformSrcOffset = GPU_shader_get_uniform(shader, "srcOffset");
419 uniformDstOffset = GPU_shader_get_uniform(shader, "dstOffset");
420 uniformDuDesc = GPU_shader_get_uniform(shader, "duDesc");
421 uniformDvDesc = GPU_shader_get_uniform(shader, "dvDesc");
422
423 return true;
424}
425
426// ---------------------------------------------------------------------------
427
428GPUComputeEvaluator::_PatchKernel::_PatchKernel() {}
429GPUComputeEvaluator::_PatchKernel::~_PatchKernel()
430{
431 if (shader) {
432 GPU_shader_free(shader);
433 shader = nullptr;
434 }
435}
436
437static blender::gpu::Shader *compile_eval_patches_shader(BufferDescriptor const &srcDesc,
438 BufferDescriptor const &dstDesc,
439 BufferDescriptor const &duDesc,
440 BufferDescriptor const &dvDesc,
441 int workGroupSize)
442{
443 using namespace blender::gpu::shader;
444 ShaderCreateInfo info("opensubdiv_compute_eval");
445 info.local_group_size(workGroupSize, 1, 1);
446
447 /* Ensure the basis code has access to proper backend specification define: it is not guaranteed
448 * that the code provided by OpenSubdiv specifies it. For example, it doesn't for GLSL but it
449 * does for Metal. Additionally, for Metal OpenSubdiv defines OSD_PATCH_BASIS_METAL as 1, so do
450 * the same here to avoid possible warning about value being re-defined. */
452 info.define("OSD_PATCH_BASIS_METAL", "1");
453 }
454 else {
455 info.define("OSD_PATCH_BASIS_GLSL");
456 }
457
458 // TODO: use specialization constants for src_stride, dst_stride. Not sure we can use
459 // work group size as that requires extensions. This allows us to compile less shaders and
460 // improve overall performance. Adding length as specialization constant will not work as it is
461 // used to define an array length. This is not supported by Metal.
462 std::string length = std::to_string(srcDesc.length);
463 std::string src_stride = std::to_string(srcDesc.stride);
464 std::string dst_stride = std::to_string(dstDesc.stride);
465 std::string work_group_size = std::to_string(workGroupSize);
466 info.define("LENGTH", length);
467 info.define("SRC_STRIDE", src_stride);
468 info.define("DST_STRIDE", dst_stride);
469 info.define("WORK_GROUP_SIZE", work_group_size);
470 info.typedef_source("osd_patch_basis.glsl");
471 info.storage_buf(
472 SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::read, "float", "srcVertexBuffer[]");
473 info.storage_buf(
474 SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::write, "float", "dstVertexBuffer[]");
475 info.push_constant(Type::int_t, "srcOffset");
476 info.push_constant(Type::int_t, "dstOffset");
477
478 bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
479 if (deriv1) {
480 info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
481 info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::read_write, "float", "duBuffer[]");
482 info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::read_write, "float", "dvBuffer[]");
483 info.push_constant(Type::int3_t, "duDesc");
484 info.push_constant(Type::int3_t, "dvDesc");
485 }
486
487 info.storage_buf(
488 SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT, Qualifier::read, "OsdPatchArray", "patchArrayBuffer[]");
489 info.storage_buf(
490 SHADER_PATCH_COORDS_BUF_SLOT, Qualifier::read, "OsdPatchCoord", "patchCoords[]");
491 info.storage_buf(
492 SHADER_PATCH_INDEX_BUFFER_BUF_SLOT, Qualifier::read, "int", "patchIndexBuffer[]");
493 info.storage_buf(
494 SHADER_PATCH_PARAM_BUFFER_BUF_SLOT, Qualifier::read, "OsdPatchParam", "patchParamBuffer[]");
495
496 info.compute_source("osd_eval_patches_comp.glsl");
498 reinterpret_cast<const GPUShaderCreateInfo *>(&info));
499 return shader;
500}
501
502bool GPUComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
503 BufferDescriptor const &dstDesc,
504 BufferDescriptor const &duDesc,
505 BufferDescriptor const &dvDesc,
506 int workGroupSize)
507{
508 if (shader) {
510 shader = nullptr;
511 }
512
513 shader = compile_eval_patches_shader(srcDesc, dstDesc, duDesc, dvDesc, workGroupSize);
514 if (shader == nullptr) {
515 return false;
516 }
517
518 // cache uniform locations
519 uniformSrcOffset = GPU_shader_get_uniform(shader, "srcOffset");
520 uniformDstOffset = GPU_shader_get_uniform(shader, "dstOffset");
521 uniformDuDesc = GPU_shader_get_uniform(shader, "duDesc");
522 uniformDvDesc = GPU_shader_get_uniform(shader, "dvDesc");
523
524 return true;
525}
526
527} // namespace blender::opensubdiv
int GPU_max_work_group_count(int index)
void GPU_compute_dispatch(blender::gpu::Shader *shader, uint groups_x_len, uint groups_y_len, uint groups_z_len, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
GPUBackendType GPU_backend_get_type()
void GPU_shader_free(blender::gpu::Shader *shader)
blender::gpu::Shader * GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
void GPU_shader_bind(blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_uniform_int_ex(blender::gpu::Shader *shader, int location, int length, int array_size, const int *value)
int GPU_shader_get_uniform(blender::gpu::Shader *shader, const char *name)
void GPU_shader_unbind()
@ GPU_BARRIER_SHADER_STORAGE
Definition GPU_state.hh:48
void GPU_finish()
Definition gpu_state.cc:310
void GPU_memory_barrier(GPUBarrier barrier)
Definition gpu_state.cc:326
void GPU_storagebuf_free(blender::gpu::StorageBuf *ssbo)
blender::gpu::StorageBuf * GPU_storagebuf_create_ex(size_t size, const void *data, GPUUsageType usage, const char *name)
void GPU_storagebuf_bind(blender::gpu::StorageBuf *ssbo, int slot)
void GPU_vertbuf_bind_as_ssbo(blender::gpu::VertBuf *verts, int binding)
@ GPU_USAGE_STATIC
static bool EvalPatches(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, GPUComputeEvaluator *instance, void *deviceContext=nullptr)
Generic limit eval function. This function has a same signature as other device kernels have so that ...
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, OpenSubdiv::Osd::BufferDescriptor const &duDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &dvDesc=OpenSubdiv::Osd::BufferDescriptor())
static void Synchronize(void *deviceContext)
Wait the dispatched kernel finishes.
~GPUComputeEvaluator()
Destructor. note that the GL context must be made current.
static bool EvalStencils(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable, GPUComputeEvaluator *instance, void *deviceContext=nullptr)
Generic static stencil function. This function has a same signature as other device kernels have so t...
GPUStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable)
#define SHADER_SIZES_BUF_SLOT
#define SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT
#define SHADER_DST_VERTEX_BUFFER_BUF_SLOT
#define SHADER_OFFSETS_BUF_SLOT
#define SHADER_DU_BUFFER_BUF_SLOT
#define SHADER_PATCH_PARAM_BUFFER_BUF_SLOT
#define SHADER_DV_BUFFER_BUF_SLOT
#define SHADER_DV_WEIGHTS_BUF_SLOT
#define SHADER_DU_WEIGHTS_BUF_SLOT
#define SHADER_WEIGHTS_BUF_SLOT
#define SHADER_PATCH_COORDS_BUF_SLOT
#define SHADER_SRC_VERTEX_BUFFER_BUF_SLOT
#define SHADER_INDICES_BUF_SLOT
#define SHADER_PATCH_INDEX_BUFFER_BUF_SLOT
#define BIND_BUF_DESC(uniform, desc)
#define assert(assertion)
float length(VecOp< float, D >) RET
int count
#define T
gpu::StorageBuf * create_buffer(std::vector< T > const &src, const char *name)
static blender::gpu::Shader * compile_eval_patches_shader(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, int workGroupSize)
static blender::gpu::Shader * compile_eval_stencil_shader(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, int workGroupSize)
static void storage_buffer_free(gpu::StorageBuf **buffer)
const char * name
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Self & compute_source(StringRefNull filename)
Self & push_constant(Type type, StringRefNull name, int array_size=0)
Self & typedef_source(StringRefNull filename)
Self & storage_buf(int slot, Qualifier qualifiers, StringRefNull type_name, StringRefNull name, Frequency freq=Frequency::PASS)
Self & local_group_size(int local_size_x, int local_size_y=1, int local_size_z=1)
Self & define(StringRefNull name, StringRefNull value="")