23#include <opensubdiv/far/error.h>
24#include <opensubdiv/far/patchDescriptor.h>
25#include <opensubdiv/far/stencilTable.h>
26#include <opensubdiv/osd/glslPatchShaderSource.h>
34using OpenSubdiv::Far::LimitStencilTable;
35using OpenSubdiv::Far::StencilTable;
36using OpenSubdiv::Osd::BufferDescriptor;
37using OpenSubdiv::Osd::PatchArray;
38using OpenSubdiv::Osd::PatchArrayVector;
44template<
class T> GLuint
createSSBO(std::vector<T>
const &src)
52#if defined(GL_ARB_direct_state_access)
53 if (epoxy_has_gl_extension(
"GL_ARB_direct_state_access")) {
54 glCreateBuffers(1, &devicePtr);
55 glNamedBufferData(devicePtr, src.size() *
sizeof(T), &src.at(0), GL_STATIC_DRAW);
61 glGetIntegerv(GL_SHADER_STORAGE_BUFFER_BINDING, &prev);
62 glGenBuffers(1, &devicePtr);
63 glBindBuffer(GL_SHADER_STORAGE_BUFFER, devicePtr);
64 glBufferData(GL_SHADER_STORAGE_BUFFER, src.size() *
sizeof(T), &src.at(0), GL_STATIC_DRAW);
65 glBindBuffer(GL_SHADER_STORAGE_BUFFER, prev);
73 _numStencils = stencilTable->GetNumStencils();
74 if (_numStencils > 0) {
76 _offsets =
createSSBO(stencilTable->GetOffsets());
77 _indices =
createSSBO(stencilTable->GetControlIndices());
78 _weights =
createSSBO(stencilTable->GetWeights());
79 _duWeights = _dvWeights = 0;
80 _duuWeights = _duvWeights = _dvvWeights = 0;
83 _sizes = _offsets = _indices = _weights = 0;
84 _duWeights = _dvWeights = 0;
85 _duuWeights = _duvWeights = _dvvWeights = 0;
91 _numStencils = limitStencilTable->GetNumStencils();
92 if (_numStencils > 0) {
93 _sizes =
createSSBO(limitStencilTable->GetSizes());
94 _offsets =
createSSBO(limitStencilTable->GetOffsets());
95 _indices =
createSSBO(limitStencilTable->GetControlIndices());
96 _weights =
createSSBO(limitStencilTable->GetWeights());
97 _duWeights =
createSSBO(limitStencilTable->GetDuWeights());
98 _dvWeights =
createSSBO(limitStencilTable->GetDvWeights());
99 _duuWeights =
createSSBO(limitStencilTable->GetDuuWeights());
100 _duvWeights =
createSSBO(limitStencilTable->GetDuvWeights());
101 _dvvWeights =
createSSBO(limitStencilTable->GetDvvWeights());
104 _sizes = _offsets = _indices = _weights = 0;
105 _duWeights = _dvWeights = 0;
106 _duuWeights = _duvWeights = _dvvWeights = 0;
113 glDeleteBuffers(1, &_sizes);
116 glDeleteBuffers(1, &_offsets);
119 glDeleteBuffers(1, &_indices);
122 glDeleteBuffers(1, &_weights);
125 glDeleteBuffers(1, &_duWeights);
128 glDeleteBuffers(1, &_dvWeights);
131 glDeleteBuffers(1, &_duuWeights);
134 glDeleteBuffers(1, &_duvWeights);
137 glDeleteBuffers(1, &_dvvWeights);
145 memset((
void *)&_stencilKernel, 0,
sizeof(_stencilKernel));
146 memset((
void *)&_patchKernel, 0,
sizeof(_patchKernel));
151 if (_patchArraysSSBO) {
152 glDeleteBuffers(1, &_patchArraysSSBO);
157 BufferDescriptor
const &dstDesc,
158 BufferDescriptor
const &duDesc,
159 BufferDescriptor
const &dvDesc,
160 BufferDescriptor
const &duuDesc,
161 BufferDescriptor
const &duvDesc,
162 BufferDescriptor
const &dvvDesc,
163 const char *kernelDefine,
166 GLuint program = glCreateProgram();
168 GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
170 std::string patchBasisShaderSource =
171 OpenSubdiv::Osd::GLSLPatchShaderSource::GetPatchBasisShaderSource();
172 const char *patchBasisShaderSourceDefine =
"#define OSD_PATCH_BASIS_GLSL\n";
174 std::ostringstream defines;
175 defines <<
"#define LENGTH " << srcDesc.length <<
"\n"
176 <<
"#define SRC_STRIDE " << srcDesc.stride <<
"\n"
177 <<
"#define DST_STRIDE " << dstDesc.stride <<
"\n"
178 <<
"#define WORK_GROUP_SIZE " << workGroupSize <<
"\n"
179 << kernelDefine <<
"\n"
180 << patchBasisShaderSourceDefine <<
"\n";
182 bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
183 bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
185 defines <<
"#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n";
188 defines <<
"#define OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES\n";
191 std::string defineStr = defines.str();
193 const char *shaderSources[4] = {
"#version 430\n", 0, 0, 0};
195 shaderSources[1] = defineStr.c_str();
196 shaderSources[2] = patchBasisShaderSource.c_str();
198 glShaderSource(shader, 4, shaderSources,
NULL);
199 glCompileShader(shader);
200 glAttachShader(program, shader);
203 glLinkProgram(program);
204 glGetProgramiv(program, GL_LINK_STATUS, &linked);
206 if (linked == GL_FALSE) {
208 glGetShaderInfoLog(shader, 1024,
NULL, buffer);
209 OpenSubdiv::Far::Error(OpenSubdiv::Far::FAR_RUNTIME_ERROR, buffer);
211 glGetProgramInfoLog(program, 1024,
NULL, buffer);
212 OpenSubdiv::Far::Error(OpenSubdiv::Far::FAR_RUNTIME_ERROR, buffer);
214 glDeleteProgram(program);
218 glDeleteShader(shader);
224 BufferDescriptor
const &dstDesc,
225 BufferDescriptor
const &duDesc,
226 BufferDescriptor
const &dvDesc,
227 BufferDescriptor
const &duuDesc,
228 BufferDescriptor
const &duvDesc,
229 BufferDescriptor
const &dvvDesc)
233 if (!_stencilKernel.Compile(
234 srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, _workGroupSize))
240 if (!_patchKernel.Compile(
241 srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, _workGroupSize))
247 if (!_patchArraysSSBO) {
248 glGenBuffers(1, &_patchArraysSSBO);
262int GLComputeEvaluator::GetDispatchSize(
int count)
const
264 return (
count + _workGroupSize - 1) / _workGroupSize;
267void GLComputeEvaluator::DispatchCompute(
int totalDispatchSize)
const
269 int maxWorkGroupCount[2] = {0, 0};
271 glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &maxWorkGroupCount[0]);
272 glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &maxWorkGroupCount[1]);
274 const GLuint maxResX =
static_cast<GLuint
>(maxWorkGroupCount[0]);
276 const int dispatchSize = GetDispatchSize(totalDispatchSize);
277 GLuint dispatchRX =
static_cast<GLuint
>(dispatchSize);
278 GLuint dispatchRY = 1u;
279 if (dispatchRX > maxResX) {
287 dispatchRX = dispatchRY = std::ceil(std::sqrt(dispatchSize));
289 if ((dispatchRX * (dispatchRY - 1)) >= dispatchSize) {
297 assert(dispatchRY <
static_cast<GLuint
>(maxWorkGroupCount[1]));
299 glDispatchCompute(dispatchRX, dispatchRY, 1);
303 BufferDescriptor
const &srcDesc,
305 BufferDescriptor
const &dstDesc,
307 BufferDescriptor
const &duDesc,
309 BufferDescriptor
const &dvDesc,
311 GLuint offsetsBuffer,
312 GLuint indicesBuffer,
313 GLuint weightsBuffer,
314 GLuint duWeightsBuffer,
315 GLuint dvWeightsBuffer,
348 BufferDescriptor
const &srcDesc,
350 BufferDescriptor
const &dstDesc,
352 BufferDescriptor
const &duDesc,
354 BufferDescriptor
const &dvDesc,
356 BufferDescriptor
const &duuDesc,
358 BufferDescriptor
const &duvDesc,
360 BufferDescriptor
const &dvvDesc,
362 GLuint offsetsBuffer,
363 GLuint indicesBuffer,
364 GLuint weightsBuffer,
365 GLuint duWeightsBuffer,
366 GLuint dvWeightsBuffer,
367 GLuint duuWeightsBuffer,
368 GLuint duvWeightsBuffer,
369 GLuint dvvWeightsBuffer,
374 if (!_stencilKernel.program) {
377 int count = end - start;
382 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer);
383 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
384 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
385 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
386 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer);
387 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer);
388 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer);
389 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, sizesBuffer);
390 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, offsetsBuffer);
391 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, indicesBuffer);
392 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, weightsBuffer);
393 if (duWeightsBuffer) {
394 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 8, duWeightsBuffer);
396 if (dvWeightsBuffer) {
397 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, dvWeightsBuffer);
399 if (duuWeightsBuffer) {
400 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 13, duuWeightsBuffer);
402 if (duvWeightsBuffer) {
403 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 14, duvWeightsBuffer);
405 if (dvvWeightsBuffer) {
406 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 15, dvvWeightsBuffer);
410 glGetIntegerv(GL_CURRENT_PROGRAM, &activeProgram);
411 glUseProgram(_stencilKernel.program);
413 glUniform1i(_stencilKernel.uniformStart, start);
414 glUniform1i(_stencilKernel.uniformEnd, end);
415 glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset);
416 glUniform1i(_stencilKernel.uniformDstOffset, dstDesc.offset);
417 if (_stencilKernel.uniformDuDesc > 0) {
418 glUniform3i(_stencilKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride);
420 if (_stencilKernel.uniformDvDesc > 0) {
421 glUniform3i(_stencilKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride);
423 if (_stencilKernel.uniformDuuDesc > 0) {
424 glUniform3i(_stencilKernel.uniformDuuDesc, duuDesc.offset, duuDesc.length, duuDesc.stride);
426 if (_stencilKernel.uniformDuvDesc > 0) {
427 glUniform3i(_stencilKernel.uniformDuvDesc, duvDesc.offset, duvDesc.length, duvDesc.stride);
429 if (_stencilKernel.uniformDvvDesc > 0) {
430 glUniform3i(_stencilKernel.uniformDvvDesc, dvvDesc.offset, dvvDesc.length, dvvDesc.stride);
433 DispatchCompute(
count);
435 glUseProgram(activeProgram);
437 glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
438 for (
int i = 0; i < 16; ++i) {
439 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, 0);
446 BufferDescriptor
const &srcDesc,
448 BufferDescriptor
const &dstDesc,
450 BufferDescriptor
const &duDesc,
452 BufferDescriptor
const &dvDesc,
454 GLuint patchCoordsBuffer,
455 const PatchArrayVector &patchArrays,
456 GLuint patchIndexBuffer,
457 GLuint patchParamsBuffer)
const
482 BufferDescriptor
const &srcDesc,
484 BufferDescriptor
const &dstDesc,
486 BufferDescriptor
const &duDesc,
488 BufferDescriptor
const &dvDesc,
490 BufferDescriptor
const &duuDesc,
492 BufferDescriptor
const &duvDesc,
494 BufferDescriptor
const &dvvDesc,
496 GLuint patchCoordsBuffer,
497 const PatchArrayVector &patchArrays,
498 GLuint patchIndexBuffer,
499 GLuint patchParamsBuffer)
const
502 if (!_patchKernel.program) {
506 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer);
507 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
508 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
509 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
510 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer);
511 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer);
512 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer);
513 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchCoordsBuffer);
514 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchIndexBuffer);
515 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, patchParamsBuffer);
518 glGetIntegerv(GL_CURRENT_PROGRAM, &activeProgram);
519 glUseProgram(_patchKernel.program);
521 glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset);
522 glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset);
524 int patchArraySize =
sizeof(PatchArray);
525 glBindBuffer(GL_SHADER_STORAGE_BUFFER, _patchArraysSSBO);
527 GL_SHADER_STORAGE_BUFFER, patchArrays.size() * patchArraySize,
NULL, GL_STATIC_DRAW);
528 for (
int i = 0; i < (
int)patchArrays.size(); ++i) {
530 GL_SHADER_STORAGE_BUFFER, i * patchArraySize,
sizeof(PatchArray), &patchArrays[i]);
532 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, _patchArraysSSBO);
534 if (_patchKernel.uniformDuDesc > 0) {
535 glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride);
537 if (_patchKernel.uniformDvDesc > 0) {
538 glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride);
540 if (_patchKernel.uniformDuuDesc > 0) {
541 glUniform3i(_patchKernel.uniformDuuDesc, duuDesc.offset, duuDesc.length, duuDesc.stride);
543 if (_patchKernel.uniformDuvDesc > 0) {
544 glUniform3i(_patchKernel.uniformDuvDesc, duvDesc.offset, duvDesc.length, duvDesc.stride);
546 if (_patchKernel.uniformDvvDesc > 0) {
547 glUniform3i(_patchKernel.uniformDvvDesc, dvvDesc.offset, dvvDesc.length, dvvDesc.stride);
550 DispatchCompute(numPatchCoords);
552 glUseProgram(activeProgram);
554 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
555 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
556 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0);
557 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0);
558 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0);
559 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0);
560 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0);
562 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, 0);
563 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, 0);
564 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, 0);
570GLComputeEvaluator::_StencilKernel::_StencilKernel() : program(0) {}
571GLComputeEvaluator::_StencilKernel::~_StencilKernel()
574 glDeleteProgram(program);
578bool GLComputeEvaluator::_StencilKernel::Compile(BufferDescriptor
const &srcDesc,
579 BufferDescriptor
const &dstDesc,
580 BufferDescriptor
const &duDesc,
581 BufferDescriptor
const &dvDesc,
582 BufferDescriptor
const &duuDesc,
583 BufferDescriptor
const &duvDesc,
584 BufferDescriptor
const &dvvDesc,
589 glDeleteProgram(program);
592 const char *kernelDefine =
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS\n";
595 srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, kernelDefine, workGroupSize);
601 uniformStart = glGetUniformLocation(program,
"batchStart");
602 uniformEnd = glGetUniformLocation(program,
"batchEnd");
603 uniformSrcOffset = glGetUniformLocation(program,
"srcOffset");
604 uniformDstOffset = glGetUniformLocation(program,
"dstOffset");
605 uniformDuDesc = glGetUniformLocation(program,
"duDesc");
606 uniformDvDesc = glGetUniformLocation(program,
"dvDesc");
607 uniformDuuDesc = glGetUniformLocation(program,
"duuDesc");
608 uniformDuvDesc = glGetUniformLocation(program,
"duvDesc");
609 uniformDvvDesc = glGetUniformLocation(program,
"dvvDesc");
616GLComputeEvaluator::_PatchKernel::_PatchKernel() : program(0) {}
617GLComputeEvaluator::_PatchKernel::~_PatchKernel()
620 glDeleteProgram(program);
624bool GLComputeEvaluator::_PatchKernel::Compile(BufferDescriptor
const &srcDesc,
625 BufferDescriptor
const &dstDesc,
626 BufferDescriptor
const &duDesc,
627 BufferDescriptor
const &dvDesc,
628 BufferDescriptor
const &duuDesc,
629 BufferDescriptor
const &duvDesc,
630 BufferDescriptor
const &dvvDesc,
635 glDeleteProgram(program);
638 const char *kernelDefine =
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES\n";
641 srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, kernelDefine, workGroupSize);
647 uniformSrcOffset = glGetUniformLocation(program,
"srcOffset");
648 uniformDstOffset = glGetUniformLocation(program,
"dstOffset");
649 uniformPatchArray = glGetUniformLocation(program,
"patchArray");
650 uniformDuDesc = glGetUniformLocation(program,
"duDesc");
651 uniformDvDesc = glGetUniformLocation(program,
"dvDesc");
652 uniformDuuDesc = glGetUniformLocation(program,
"duuDesc");
653 uniformDuvDesc = glGetUniformLocation(program,
"duvDesc");
654 uniformDvvDesc = glGetUniformLocation(program,
"dvvDesc");
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, OpenSubdiv::Osd::BufferDescriptor const &duDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &dvDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &duuDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &duvDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &dvvDesc=OpenSubdiv::Osd::BufferDescriptor())
static bool EvalPatches(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, GLComputeEvaluator const *instance, void *deviceContext=NULL)
Generic limit eval function. This function has a same signature as other device kernels have so that ...
static bool EvalStencils(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable, GLComputeEvaluator const *instance, void *deviceContext=NULL)
Generic static stencil function. This function has a same signature as other device kernels have so t...
static void Synchronize(void *deviceContext)
Wait the dispatched kernel finishes.
~GLComputeEvaluator()
Destructor. note that the GL context must be made current.
GLComputeEvaluator()
Constructor.
GLStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable)
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
char datatoc_glsl_compute_kernel_glsl[]
static GLuint compileKernel(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, BufferDescriptor const &duuDesc, BufferDescriptor const &duvDesc, BufferDescriptor const &dvvDesc, const char *kernelDefine, int workGroupSize)
GLuint createSSBO(std::vector< T > const &src)