Blender V4.5
gpu_shader_binder.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2025 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
6
7#include <sstream>
8
9#include "MEM_guardedalloc.h"
10
11#include "BLI_assert.h"
12#include "BLI_build_config.h"
13#include "BLI_string_utils.hh"
14#include "BLI_vector.hh"
15
16#include "BKE_colortools.hh"
17
18#include "GPU_immediate.hh"
19#include "GPU_shader.hh"
20#include "GPU_texture.hh"
21#include "GPU_uniform_buffer.hh"
22
24
25#include "OCIO_config.hh"
26
28#include "source_processor.hh"
29#include "white_point.hh"
30
31namespace blender::ocio {
32namespace internal {
33/* -------------------------------------------------------------------- */
36
38{
39 for (GPULutTexture &lut : luts) {
40 GPU_texture_free(lut.texture);
41 }
42 if (dummy) {
44 }
45 if (uniforms_buffer) {
47 }
48}
49
51{
53
54 luts.clear();
55 uniforms.clear();
56
57 return (dummy != nullptr);
58}
59
62/* -------------------------------------------------------------------- */
65
67{
68 if (lut) {
70 }
71
72 if (texture) {
74 }
75 if (buffer) {
77 }
78}
79
81{
82 if (lut) {
84 }
85
86 BKE_curvemapping_init(&curve_mapping);
87 BKE_curvemapping_premultiply(&curve_mapping, false);
88 BKE_curvemapping_table_RGBA(&curve_mapping, &lut, &lut_size);
89}
90
91bool GPUCurveMappping::initialize_common(const bool use_curve_mapping)
92{
93 if (!use_curve_mapping) {
94 return true;
95 }
96
98 "OCIOCurveMap", lut_size, 1, GPU_RGBA16F, GPU_TEXTURE_USAGE_SHADER_READ, nullptr);
101
103
104 if (texture == nullptr || buffer == nullptr) {
105 return false;
106 }
107
108 return true;
109}
110
112
113/* -------------------------------------------------------------------- */
116
126
127bool GPUDisplayShader::matches(const GPUDisplayParameters &display_parameters) const
128{
129 const bool use_curve_mapping = (display_parameters.curve_mapping != nullptr);
130 return (this->from_colorspace == display_parameters.from_colorspace &&
131 this->view == display_parameters.view && this->display == display_parameters.display &&
132 this->look == display_parameters.look && this->use_curve_mapping == use_curve_mapping);
133}
134
136{
137 if (!textures.initialize_common()) {
138 is_valid = false;
139 return false;
140 }
141
142 if (!curve_mapping.initialize_common(use_curve_mapping)) {
143 is_valid = false;
144 return false;
145 }
146
147 return true;
148}
149
151
152/* -------------------------------------------------------------------- */
155
160
162{
163 for (std::list<GPUDisplayShader>::iterator it = cache_.begin(); it != cache_.end(); it++) {
164 if (it->matches(display_parameters)) {
165 /* Move to front of the cache to mark as most recently used. */
166 if (it != cache_.begin()) {
167 cache_.splice(cache_.begin(), cache_, it);
168 }
169 return &(*it);
170 }
171 }
172 return nullptr;
173}
174
176{
177 /* Remove least recently used element from cache. */
178 while (cache_.size() >= MAX_SIZE) {
179 cache_.pop_back();
180 }
181
182 /* Create GPU shader. */
183 cache_.emplace_front();
184
185 return cache_.front();
186}
187
189{
190 cache_.clear();
191}
192
194
195} // namespace internal
196
197namespace {
198
199/* -------------------------------------------------------------------- */
202
209static void process_source(std::string &source)
210{
212 source = GPU_shader_preprocess_source(source);
213
214 /* Comparison operator in Metal returns per-element comparison and returns a vector of booleans.
215 * Need a special syntax to see if two vec3 are matched.
216 *
217 * NOTE: The replacement is optimized for transforming code generated by
218 * GradingPrimaryTransform. A more general approach is possible, but for now prefer processing
219 * speed.
220 *
221 * NOTE: The syntax works for all backends Blender supports. */
223 source, "if ( gamma != vec3(1., 1., 1.) )", "if (! all(equal(gamma, vec3(1., 1., 1.))) )");
224}
225
226static void gpu_curve_mapping_update(internal::GPUCurveMappping &gpu_curve_mapping,
227 CurveMapping &curve_mapping)
228{
229 /* Test if we need to update. */
230 /* TODO(sergey): Use more reliable cache identifier.
231 * Something like monotonously incrementing change counter feels to have less collisions. */
232 const size_t cache_id = size_t(&curve_mapping) + curve_mapping.changed_timestamp;
233 if (gpu_curve_mapping.cache_id == cache_id) {
234 return;
235 }
236 gpu_curve_mapping.cache_id = cache_id;
237
238 /* Update texture. */
239 const int offset[3] = {0, 0, 0};
240 const int extent[3] = {gpu_curve_mapping.lut_size, 0, 0};
241 GPU_texture_update_sub(gpu_curve_mapping.texture,
243 gpu_curve_mapping.lut,
244 UNPACK3(offset),
245 UNPACK3(extent));
246
247 /* Update uniforms. */
248 OCIO_GPUCurveMappingParameters data;
249 for (int i = 0; i < 4; i++) {
250 const CurveMap &curve_map = curve_mapping.cm[i];
251 data.range[i] = curve_map.range;
252 data.mintable[i] = curve_map.mintable;
253 data.ext_in_x[i] = curve_map.ext_in[0];
254 data.ext_in_y[i] = curve_map.ext_in[1];
255 data.ext_out_x[i] = curve_map.ext_out[0];
256 data.ext_out_y[i] = curve_map.ext_out[1];
257 data.first_x[i] = curve_map.table[0].x;
258 data.first_y[i] = curve_map.table[0].y;
259 data.last_x[i] = curve_map.table[CM_TABLE].x;
260 data.last_y[i] = curve_map.table[CM_TABLE].y;
261 }
262 for (int i = 0; i < 3; i++) {
263 data.black[i] = curve_mapping.black[i];
264 data.bwmul[i] = curve_mapping.bwmul[i];
265 }
266 data.lut_size = gpu_curve_mapping.lut_size;
267 data.use_extend_extrapolate = (curve_mapping.flag & CUMA_EXTEND_EXTRAPOLATE) != 0;
268
269 GPU_uniformbuf_update(gpu_curve_mapping.buffer, &data);
270}
271
272static void gpu_display_shader_parameters_update(internal::GPUDisplayShader &display_shader,
273 const GPUDisplayParameters &display_parameters,
274 float4x4 scene_linear_matrix)
275{
276 bool do_update = false;
277 if (display_shader.parameters_buffer == nullptr) {
278 display_shader.parameters_buffer = GPU_uniformbuf_create(sizeof(OCIO_GPUParameters));
279 do_update = true;
280 }
281
282 OCIO_GPUParameters &data = display_shader.parameters;
283 if (data.scene_linear_matrix != scene_linear_matrix) {
284 data.scene_linear_matrix = scene_linear_matrix;
285 do_update = true;
286 }
287 if (data.exponent != display_parameters.exponent) {
288 data.exponent = display_parameters.exponent;
289 do_update = true;
290 }
291 if (data.dither != display_parameters.dither) {
292 data.dither = display_parameters.dither;
293 do_update = true;
294 }
295 if (bool(data.use_predivide) != display_parameters.use_predivide) {
296 data.use_predivide = display_parameters.use_predivide;
297 do_update = true;
298 }
299 if (bool(data.do_overlay_merge) != display_parameters.do_overlay_merge) {
300 data.do_overlay_merge = display_parameters.do_overlay_merge;
301 do_update = true;
302 }
303 if (bool(data.use_hdr) != display_parameters.use_hdr) {
304 data.use_hdr = display_parameters.use_hdr;
305 do_update = true;
306 }
307
308 if (do_update) {
309 GPU_uniformbuf_update(display_shader.parameters_buffer, &data);
310 }
311}
312
313/* Bind the shader and update parameters and uniforms. */
314static bool gpu_shader_bind(const Config &config,
315 internal::GPUDisplayShader &display_shader,
316 const GPUDisplayParameters &display_parameters)
317{
320
321 /* Verify the shader is valid. */
322 if (!display_shader.is_valid) {
323 return false;
324 }
325
326 /* Update and bind curve mapping data. */
327 if (display_parameters.curve_mapping) {
328 gpu_curve_mapping_update(display_shader.curve_mapping, *display_parameters.curve_mapping);
329 GPU_uniformbuf_bind(display_shader.curve_mapping.buffer, UniformBufferSlot::CURVE_MAPPING);
330 GPU_texture_bind(display_shader.curve_mapping.texture, TextureSlot::CURVE_MAPPING);
331 /* TODO(sergey): Can free the curve mapping's lookup table.
332 * Seems minor, maybe not that important. */
333 }
334
335 /* Bind textures to sampler units. Texture 0 is set by caller.
336 * Uniforms have already been set for texture bind points. */
337 if (!display_parameters.do_overlay_merge) {
338 /* Avoid missing binds. */
339 GPU_texture_bind(display_shader.textures.dummy, TextureSlot::OVERLAY);
340 }
341 for (int i = 0; i < display_shader.textures.luts.size(); i++) {
342 GPU_texture_bind(display_shader.textures.luts[i].texture, TextureSlot::LUTS_OFFSET + i);
343 }
344
345 if (display_shader.textures.uniforms_buffer) {
346 GPU_uniformbuf_bind(display_shader.textures.uniforms_buffer, UniformBufferSlot::LUTS);
347 }
348
349 float3x3 matrix = float3x3::identity() * display_parameters.scale;
350 if (display_parameters.use_white_balance) {
352 config, display_parameters.temperature, display_parameters.tint);
353 }
354
355 gpu_display_shader_parameters_update(display_shader, display_parameters, float4x4(matrix));
356 GPU_uniformbuf_bind(display_shader.parameters_buffer, UniformBufferSlot::DISPLAY);
357
358 /* TODO(fclem): remove remains of IMM. */
359 immBindShader(display_shader.shader);
360
361 return true;
362}
363
365
366} // namespace
367
369 : display_cache_(std::make_unique<internal::GPUShaderCache>()),
370 scene_linear_cache_(std::make_unique<internal::GPUShaderCache>()),
371 config_(config)
372{
373}
374
375/* Keep private to the translation unit to allow proper destruction of smart pointers to internal
376 * data. */
378
379bool GPUShaderBinder::display_bind(const GPUDisplayParameters &display_parameters) const
380{
381 /* Attempt to get shader from the cache. */
382 internal::GPUDisplayShader *display_shader = display_cache_->get(display_parameters);
383
384 if (!display_shader) {
385 display_shader = &display_cache_->create_default();
386 BLI_assert(display_shader);
387
388 display_shader->from_colorspace = display_parameters.from_colorspace;
389 display_shader->view = display_parameters.view;
390 display_shader->display = display_parameters.display;
391 display_shader->look = display_parameters.look;
392 display_shader->use_curve_mapping = (display_parameters.curve_mapping != nullptr);
393 display_shader->is_valid = false;
394
395 if (display_parameters.curve_mapping) {
396 /* Rasterize curve mapping early so that texture allocation can know the size of the lookup
397 * table. */
398 display_shader->curve_mapping.rasterize(*display_parameters.curve_mapping);
399 }
400
401 if (display_shader->initialize_common()) {
402 construct_display_shader(*display_shader);
403 }
404 }
405 else if (display_parameters.curve_mapping) {
406 /* Update curve mapping's lookup table. */
407 display_shader->curve_mapping.rasterize(*display_parameters.curve_mapping);
408 }
409
410 return gpu_shader_bind(config_, *display_shader, display_parameters);
411}
412
414 const bool use_predivide) const
415{
416 /* Re-use code and logic with the conversion to the display space. This assumes that empty names
417 * for display, view, and look are not valid for the OpenColorIO configuration, and so they can
418 * be used to indicate that the processor is used to convert from the given space to the linear.
419 */
420
421 GPUDisplayParameters display_parameters;
422 display_parameters.from_colorspace = from_colorspace;
423 display_parameters.use_predivide = use_predivide;
424
425 /* Attempt to get shader from the cache. */
426 internal::GPUDisplayShader *display_shader = scene_linear_cache_->get(display_parameters);
427
428 if (!display_shader) {
429 display_shader = &display_cache_->create_default();
430 BLI_assert(display_shader);
431
432 display_shader->from_colorspace = display_parameters.from_colorspace;
433
434 if (display_shader->initialize_common()) {
435 construct_scene_linear_shader(*display_shader);
436 }
437 }
438
439 return gpu_shader_bind(config_, *display_shader, display_parameters);
440}
441
443{
445}
446
448 internal::GPUDisplayShader &display_shader,
449 StringRefNull fragment_source,
450 const Span<std::array<StringRefNull, 2>> additional_defines)
451{
452 using namespace blender::gpu::shader;
453
454 StageInterfaceInfo iface("OCIO_Interface", "");
455 iface.smooth(Type::float2_t, "texCoord_interp");
456
457 ShaderCreateInfo info("OCIO_Display");
458
459 for (const auto &additional_define : additional_defines) {
460 info.define(additional_define[0], additional_define[1]);
461 }
462
463 /* Work around OpenColorIO not supporting latest GLSL yet. */
464 info.define("texture1D", "texture");
465 info.define("texture2D", "texture");
466 info.define("texture3D", "texture");
467
468 /* Work around unsupported in keyword in Metal GLSL emulation. */
469#if OS_MAC
470 info.define("in", "");
471#endif
472
473 info.typedef_source("ocio_shader_shared.hh");
474 info.sampler(internal::TextureSlot::IMAGE, ImageType::Float2D, "image_texture");
475 info.sampler(internal::TextureSlot::OVERLAY, ImageType::Float2D, "overlay_texture");
476 info.uniform_buf(internal::UniformBufferSlot::DISPLAY, "OCIO_GPUParameters", "parameters");
477 info.push_constant(Type::float4x4_t, "ModelViewProjectionMatrix");
478 info.vertex_in(0, Type::float2_t, "pos");
479 info.vertex_in(1, Type::float2_t, "texCoord");
480 info.vertex_out(iface);
481 info.fragment_out(0, Type::float4_t, "fragColor");
482 info.vertex_source("gpu_shader_display_transform_vert.glsl");
483 info.fragment_source("gpu_shader_display_transform_frag.glsl");
484
485 info.fragment_source_generated = fragment_source;
486 process_source(info.fragment_source_generated);
487
488 /* #96502: Work around for incorrect OCIO GLSL code generation when using
489 * GradingPrimaryTransform. Should be reevaluated when changing to a next version of OCIO.
490 * (currently v2.1.1). */
491 info.define("inf 1e32");
492
493 if (display_shader.use_curve_mapping) {
494 info.define("USE_CURVE_MAPPING");
496 "OCIO_GPUCurveMappingParameters",
497 "curve_mapping");
498 info.sampler(
499 internal::TextureSlot::CURVE_MAPPING, ImageType::Float1D, "curve_mapping_texture");
500 }
501
502 /* Set LUT textures. */
504 for (const internal::GPULutTexture &texture : display_shader.textures.luts) {
505 const int dimensions = GPU_texture_dimensions(texture.texture);
506 const ImageType type = (dimensions == 1) ? ImageType::Float1D :
507 (dimensions == 2) ? ImageType::Float2D :
508 ImageType::Float3D;
509 info.sampler(slot++, type, texture.sampler_name.c_str());
510 }
511
512 /* Set LUT uniforms. */
513#if defined(WITH_OPENCOLORIO)
514 if (!display_shader.textures.uniforms.is_empty()) {
515 /* NOTE: For simplicity, we pad everything to size of vec4 avoiding sorting and alignment
516 * issues. It is unlikely that this becomes a real issue. */
517 const size_t ubo_size = display_shader.textures.uniforms.size() * sizeof(float) * 4;
518 Vector<uint8_t> ubo_data_buf(ubo_size);
519
520 uint32_t *ubo_data = reinterpret_cast<uint32_t *>(ubo_data_buf.data());
521
522 std::stringstream ss;
523 ss << "struct OCIO_GPULutParameters {\n";
524
525 int index = 0;
526 for (internal::GPUUniform &uniform : display_shader.textures.uniforms) {
527 index += 1;
528 const OCIO_NAMESPACE::GpuShaderDesc::UniformData &data = uniform.data;
529 const char *name = uniform.name.c_str();
530 char prefix = ' ';
531 int vec_len;
532 switch (data.m_type) {
533 case OCIO_NAMESPACE::UNIFORM_DOUBLE: {
534 vec_len = 1;
535 float value = float(data.m_getDouble());
536 memcpy(ubo_data, &value, sizeof(float));
537 break;
538 }
539 case OCIO_NAMESPACE::UNIFORM_BOOL: {
540 prefix = 'b';
541 vec_len = 1;
542 int value = int(data.m_getBool());
543 memcpy(ubo_data, &value, sizeof(int));
544 break;
545 }
546 case OCIO_NAMESPACE::UNIFORM_FLOAT3:
547 vec_len = 3;
548 memcpy(ubo_data, data.m_getFloat3().data(), sizeof(float) * 3);
549 break;
550 case OCIO_NAMESPACE::UNIFORM_VECTOR_FLOAT:
551 vec_len = data.m_vectorFloat.m_getSize();
552 memcpy(ubo_data, data.m_vectorFloat.m_getVector(), sizeof(float) * vec_len);
553 break;
554 case OCIO_NAMESPACE::UNIFORM_VECTOR_INT:
555 prefix = 'i';
556 vec_len = data.m_vectorInt.m_getSize();
557 memcpy(ubo_data, data.m_vectorInt.m_getVector(), sizeof(int) * vec_len);
558 break;
559 default:
560 continue;
561 }
562 /* Align every member to 16 bytes. */
563 ubo_data += 4;
564 /* Use a generic variable name because some GLSL compilers can interpret the preprocessor
565 * define as recursive. */
566 ss << " " << prefix << "vec4 var" << index << ";\n";
567 /* Use a define to keep the generated code working. */
568 StringRef suffix = StringRefNull("xyzw").substr(0, vec_len);
569 ss << "#define " << name << " lut_parameters.var" << index << "." << suffix << "\n";
570 }
571 ss << "};\n";
572 info.typedef_source_generated = ss.str();
573
574 info.uniform_buf(internal::UniformBufferSlot::LUTS, "OCIO_GPULutParameters", "lut_parameters");
575
577 ubo_size, ubo_data_buf.data(), "OCIO_LutParameters");
578 }
579#endif
580
581 display_shader.shader = GPU_shader_create_from_info(
582 reinterpret_cast<GPUShaderCreateInfo *>(&info));
583
584 return (display_shader.shader != nullptr);
585}
586
587} // namespace blender::ocio
void BKE_curvemapping_premultiply(CurveMapping *cumap, bool restore)
void BKE_curvemapping_init(CurveMapping *cumap)
void BKE_curvemapping_table_RGBA(const CurveMapping *cumap, float **array, int *size)
#define BLI_assert(a)
Definition BLI_assert.h:46
void BLI_string_replace(std::string &haystack, blender::StringRef needle, blender::StringRef other)
#define UNPACK3(a)
@ CUMA_EXTEND_EXTRAPOLATE
struct CurveMap CurveMap
void immUnbindProgram()
void immBindShader(GPUShader *shader)
std::string GPU_shader_preprocess_source(blender::StringRefNull original)
GPUShader * GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
void GPU_shader_free(GPUShader *shader)
void GPU_texture_bind(GPUTexture *texture, int unit)
GPUTexture * GPU_texture_create_1d(const char *name, int width, int mip_len, eGPUTextureFormat format, eGPUTextureUsage usage, const float *data)
void GPU_texture_free(GPUTexture *texture)
int GPU_texture_dimensions(const GPUTexture *texture)
GPUTexture * GPU_texture_create_error(int dimension, bool array)
@ GPU_DATA_FLOAT
void GPU_texture_extend_mode(GPUTexture *texture, GPUSamplerExtendMode extend_mode)
@ GPU_TEXTURE_USAGE_SHADER_READ
@ GPU_SAMPLER_EXTEND_MODE_EXTEND
void GPU_texture_update_sub(GPUTexture *texture, eGPUDataFormat data_format, const void *pixels, int offset_x, int offset_y, int offset_z, int width, int height, int depth)
void GPU_texture_filter_mode(GPUTexture *texture, bool use_filter)
@ GPU_RGBA16F
GPUUniformBuf * GPU_uniformbuf_create_ex(size_t size, const void *data, const char *name)
#define GPU_uniformbuf_create(size)
void GPU_uniformbuf_update(GPUUniformBuf *ubo, const void *data)
void GPU_uniformbuf_free(GPUUniformBuf *ubo)
void GPU_uniformbuf_bind(GPUUniformBuf *ubo, int slot)
Read Guarded memory(de)allocation.
bool do_update
Definition WM_types.hh:1008
BMesh const char void * data
constexpr StringRef substr(int64_t start, int64_t size) const
virtual void construct_display_shader(internal::GPUDisplayShader &display_shader) const =0
static bool create_gpu_shader(internal::GPUDisplayShader &display_shader, StringRefNull fragment_source, Span< std::array< StringRefNull, 2 > > additional_defines)
bool to_scene_linear_bind(StringRefNull from_colorspace, bool use_predivide) const
bool display_bind(const GPUDisplayParameters &display_parameters) const
virtual void construct_scene_linear_shader(internal::GPUDisplayShader &display_shader) const =0
GPUShaderBinder(const Config &config)
void rasterize(CurveMapping &curve_mapping)
bool initialize_common(bool use_curve_mapping)
bool matches(const GPUDisplayParameters &display_parameters) const
GPUDisplayShader * get(const GPUDisplayParameters &display_parameters)
TEX_TEMPLATE DataVec texture(T, FltCoord, float=0.0f) RET
#define this
#define CM_TABLE
void MEM_freeN(void *vmemh)
Definition mallocn.cc:113
float3x3 calculate_white_point_matrix(const Config &config, const float temperature, const float tint)
void source_comment_out_uniforms(std::string &source)
MatBase< float, 4, 4 > float4x4
MatBase< float, 3, 3 > float3x3
CurveMapPoint * table
float ext_out[2]
float ext_in[2]
CurveMap cm[4]
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Self & fragment_source(StringRefNull filename)
std::string fragment_source_generated
Self & vertex_in(int slot, Type type, StringRefNull name)
Self & push_constant(Type type, StringRefNull name, int array_size=0)
Self & typedef_source(StringRefNull filename)
Self & fragment_out(int slot, Type type, StringRefNull name, DualBlend blend=DualBlend::NONE, int raster_order_group=-1)
Self & vertex_out(StageInterfaceInfo &interface)
Self & vertex_source(StringRefNull filename)
Self & sampler(int slot, ImageType type, StringRefNull name, Frequency freq=Frequency::PASS, GPUSamplerState sampler=GPUSamplerState::internal_sampler())
Self & uniform_buf(int slot, StringRefNull type_name, StringRefNull name, Frequency freq=Frequency::PASS)
Self & define(StringRefNull name, StringRefNull value="")
Self & smooth(Type type, StringRefNull _name)
i
Definition text_draw.cc:230