Blender V4.5
gl_shader.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2020 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <iomanip>
10
11#include "BKE_appdir.hh"
12#include "BKE_global.hh"
13
14#include "BLI_fileops.h"
15#include "BLI_path_utils.hh"
16#include "BLI_string.h"
17#include "BLI_time.h"
18#include "BLI_vector.hh"
19
20#include "BLI_system.h"
21#include BLI_SYSTEM_PID_H
22
23#include "GPU_capabilities.hh"
24#include "GPU_debug.hh"
25#include "GPU_platform.hh"
28
29#include "gl_debug.hh"
30#include "gl_vertex_buffer.hh"
31
33#include "gl_shader.hh"
35
36#include <chrono>
37#include <sstream>
38#include <stdio.h>
39#ifdef WIN32
40# define popen _popen
41# define pclose _pclose
42#endif
43
44using namespace blender;
45using namespace blender::gpu;
46using namespace blender::gpu::shader;
47
49
50/* -------------------------------------------------------------------- */
53
55{
56#if 0 /* Would be nice to have, but for now the Deferred compilation \
57 * does not have a GPUContext. */
58 BLI_assert(GLContext::get() != nullptr);
59#endif
60}
61
63{
64#if 0 /* Would be nice to have, but for now the Deferred compilation \
65 * does not have a GPUContext. */
66 BLI_assert(GLContext::get() != nullptr);
67#endif
68}
69
70void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilation)
71{
72 async_compilation_ = is_batch_compilation;
73
74 /* Extract the constants names from info and store them locally. */
75 for (const SpecializationConstant &constant : info.specialization_constants_) {
76 specialization_constant_names_.append(constant.name.c_str());
77 }
78
79 /* NOTE: This is not threadsafe with regards to the specialization constants state access.
80 * The shader creation must be externally synchronized. */
81 main_program_ = program_cache_
82 .lookup_or_add_cb(constants->values,
83 []() { return std::make_unique<GLProgram>(); })
84 .get();
85 if (!main_program_->program_id) {
86 main_program_->program_id = glCreateProgram();
87 debug::object_label(GL_PROGRAM, main_program_->program_id, name);
88 }
89}
90
92{
93 main_program_ = program_cache_
94 .lookup_or_add_cb(constants->values,
95 []() { return std::make_unique<GLProgram>(); })
96 .get();
97 if (!main_program_->program_id) {
98 main_program_->program_id = glCreateProgram();
99 debug::object_label(GL_PROGRAM, main_program_->program_id, name);
100 }
101}
102
104
105/* -------------------------------------------------------------------- */
108
109static const char *to_string(const Interpolation &interp)
110{
111 switch (interp) {
113 return "smooth";
115 return "flat";
117 return "noperspective";
118 default:
119 return "unknown";
120 }
121}
122
123static const char *to_string(const Type &type)
124{
125 switch (type) {
126 case Type::float_t:
127 return "float";
128 case Type::float2_t:
129 return "vec2";
130 case Type::float3_t:
131 return "vec3";
132 case Type::float4_t:
133 return "vec4";
134 case Type::float3x3_t:
135 return "mat3";
136 case Type::float4x4_t:
137 return "mat4";
138 case Type::uint_t:
139 return "uint";
140 case Type::uint2_t:
141 return "uvec2";
142 case Type::uint3_t:
143 return "uvec3";
144 case Type::uint4_t:
145 return "uvec4";
146 case Type::int_t:
147 return "int";
148 case Type::int2_t:
149 return "ivec2";
150 case Type::int3_t:
151 return "ivec3";
152 case Type::int4_t:
153 return "ivec4";
154 case Type::bool_t:
155 return "bool";
156 /* Alias special types. */
157 case Type::uchar_t:
158 case Type::ushort_t:
159 return "uint";
160 case Type::uchar2_t:
161 case Type::ushort2_t:
162 return "uvec2";
163 case Type::uchar3_t:
164 case Type::ushort3_t:
165 return "uvec3";
166 case Type::uchar4_t:
167 case Type::ushort4_t:
168 return "uvec4";
169 case Type::char_t:
170 case Type::short_t:
171 return "int";
172 case Type::char2_t:
173 case Type::short2_t:
174 return "ivec2";
175 case Type::char3_t:
176 case Type::short3_t:
177 return "ivec3";
178 case Type::char4_t:
179 case Type::short4_t:
180 return "ivec4";
182 return "vec3";
183 }
185 return "unknown";
186}
187
189{
190 switch (type) {
191 case Type::float_t:
192 case Type::float2_t:
193 case Type::float3_t:
194 case Type::float4_t:
195 case Type::float3x3_t:
196 case Type::float4x4_t:
197 return Type::float_t;
198 case Type::uint_t:
199 case Type::uint2_t:
200 case Type::uint3_t:
201 case Type::uint4_t:
202 return Type::uint_t;
203 case Type::int_t:
204 case Type::int2_t:
205 case Type::int3_t:
206 case Type::int4_t:
207 case Type::bool_t:
208 return Type::int_t;
209 /* Alias special types. */
210 case Type::uchar_t:
211 case Type::uchar2_t:
212 case Type::uchar3_t:
213 case Type::uchar4_t:
214 case Type::ushort_t:
215 case Type::ushort2_t:
216 case Type::ushort3_t:
217 case Type::ushort4_t:
218 return Type::uint_t;
219 case Type::char_t:
220 case Type::char2_t:
221 case Type::char3_t:
222 case Type::char4_t:
223 case Type::short_t:
224 case Type::short2_t:
225 case Type::short3_t:
226 case Type::short4_t:
227 return Type::int_t;
229 return Type::float_t;
230 }
232 return Type::float_t;
233}
234
235static const char *to_string(const eGPUTextureFormat &type)
236{
237 switch (type) {
238 case GPU_RGBA8UI:
239 return "rgba8ui";
240 case GPU_RGBA8I:
241 return "rgba8i";
242 case GPU_RGBA8:
243 return "rgba8";
244 case GPU_RGBA32UI:
245 return "rgba32ui";
246 case GPU_RGBA32I:
247 return "rgba32i";
248 case GPU_RGBA32F:
249 return "rgba32f";
250 case GPU_RGBA16UI:
251 return "rgba16ui";
252 case GPU_RGBA16I:
253 return "rgba16i";
254 case GPU_RGBA16F:
255 return "rgba16f";
256 case GPU_RGBA16:
257 return "rgba16";
258 case GPU_RG8UI:
259 return "rg8ui";
260 case GPU_RG8I:
261 return "rg8i";
262 case GPU_RG8:
263 return "rg8";
264 case GPU_RG32UI:
265 return "rg32ui";
266 case GPU_RG32I:
267 return "rg32i";
268 case GPU_RG32F:
269 return "rg32f";
270 case GPU_RG16UI:
271 return "rg16ui";
272 case GPU_RG16I:
273 return "rg16i";
274 case GPU_RG16F:
275 return "rg16f";
276 case GPU_RG16:
277 return "rg16";
278 case GPU_R8UI:
279 return "r8ui";
280 case GPU_R8I:
281 return "r8i";
282 case GPU_R8:
283 return "r8";
284 case GPU_R32UI:
285 return "r32ui";
286 case GPU_R32I:
287 return "r32i";
288 case GPU_R32F:
289 return "r32f";
290 case GPU_R16UI:
291 return "r16ui";
292 case GPU_R16I:
293 return "r16i";
294 case GPU_R16F:
295 return "r16f";
296 case GPU_R16:
297 return "r16";
299 return "r11f_g11f_b10f";
300 case GPU_RGB10_A2:
301 return "rgb10_a2";
302 default:
303 return "unknown";
304 }
305}
306
307static const char *to_string(const PrimitiveIn &layout)
308{
309 switch (layout) {
311 return "points";
313 return "lines";
315 return "lines_adjacency";
317 return "triangles";
319 return "triangles_adjacency";
320 default:
321 return "unknown";
322 }
323}
324
325static const char *to_string(const PrimitiveOut &layout)
326{
327 switch (layout) {
329 return "points";
331 return "line_strip";
333 return "triangle_strip";
334 default:
335 return "unknown";
336 }
337}
338
339static const char *to_string(const DepthWrite &value)
340{
341 switch (value) {
342 case DepthWrite::ANY:
343 return "depth_any";
345 return "depth_greater";
346 case DepthWrite::LESS:
347 return "depth_less";
348 default:
349 return "depth_unchanged";
350 }
351}
352
353static void print_image_type(std::ostream &os,
354 const ImageType &type,
356{
357 switch (type) {
358 case ImageType::IntBuffer:
359 case ImageType::Int1D:
360 case ImageType::Int1DArray:
361 case ImageType::Int2D:
362 case ImageType::Int2DArray:
363 case ImageType::Int3D:
364 case ImageType::IntCube:
365 case ImageType::IntCubeArray:
366 case ImageType::AtomicInt2D:
367 case ImageType::AtomicInt2DArray:
368 case ImageType::AtomicInt3D:
369 os << "i";
370 break;
371 case ImageType::UintBuffer:
372 case ImageType::Uint1D:
373 case ImageType::Uint1DArray:
374 case ImageType::Uint2D:
375 case ImageType::Uint2DArray:
376 case ImageType::Uint3D:
377 case ImageType::UintCube:
378 case ImageType::UintCubeArray:
379 case ImageType::AtomicUint2D:
380 case ImageType::AtomicUint2DArray:
381 case ImageType::AtomicUint3D:
382 os << "u";
383 break;
384 default:
385 break;
386 }
387
389 os << "image";
390 }
391 else {
392 os << "sampler";
393 }
394
395 switch (type) {
396 case ImageType::FloatBuffer:
397 case ImageType::IntBuffer:
398 case ImageType::UintBuffer:
399 os << "Buffer";
400 break;
401 case ImageType::Float1D:
402 case ImageType::Float1DArray:
403 case ImageType::Int1D:
404 case ImageType::Int1DArray:
405 case ImageType::Uint1D:
406 case ImageType::Uint1DArray:
407 os << "1D";
408 break;
409 case ImageType::Float2D:
410 case ImageType::Float2DArray:
411 case ImageType::Int2D:
412 case ImageType::Int2DArray:
413 case ImageType::AtomicInt2D:
414 case ImageType::AtomicInt2DArray:
415 case ImageType::Uint2D:
416 case ImageType::Uint2DArray:
417 case ImageType::AtomicUint2D:
418 case ImageType::AtomicUint2DArray:
419 case ImageType::Shadow2D:
420 case ImageType::Shadow2DArray:
421 case ImageType::Depth2D:
422 case ImageType::Depth2DArray:
423 os << "2D";
424 break;
425 case ImageType::Float3D:
426 case ImageType::Int3D:
427 case ImageType::Uint3D:
428 case ImageType::AtomicInt3D:
429 case ImageType::AtomicUint3D:
430 os << "3D";
431 break;
432 case ImageType::FloatCube:
433 case ImageType::FloatCubeArray:
434 case ImageType::IntCube:
435 case ImageType::IntCubeArray:
436 case ImageType::UintCube:
437 case ImageType::UintCubeArray:
438 case ImageType::ShadowCube:
439 case ImageType::ShadowCubeArray:
440 case ImageType::DepthCube:
441 case ImageType::DepthCubeArray:
442 os << "Cube";
443 break;
444 default:
445 break;
446 }
447
448 switch (type) {
449 case ImageType::Float1DArray:
450 case ImageType::Float2DArray:
451 case ImageType::FloatCubeArray:
452 case ImageType::Int1DArray:
453 case ImageType::Int2DArray:
454 case ImageType::IntCubeArray:
455 case ImageType::Uint1DArray:
456 case ImageType::Uint2DArray:
457 case ImageType::AtomicUint2DArray:
458 case ImageType::UintCubeArray:
459 case ImageType::Shadow2DArray:
460 case ImageType::ShadowCubeArray:
461 case ImageType::Depth2DArray:
462 case ImageType::DepthCubeArray:
463 os << "Array";
464 break;
465 default:
466 break;
467 }
468
469 switch (type) {
470 case ImageType::Shadow2D:
471 case ImageType::Shadow2DArray:
472 case ImageType::ShadowCube:
473 case ImageType::ShadowCubeArray:
474 os << "Shadow";
475 break;
476 default:
477 break;
478 }
479 os << " ";
480}
481
482static std::ostream &print_qualifier(std::ostream &os, const Qualifier &qualifiers)
483{
484 if (bool(qualifiers & Qualifier::no_restrict) == false) {
485 os << "restrict ";
486 }
487 if (bool(qualifiers & Qualifier::read) == false) {
488 os << "writeonly ";
489 }
490 if (bool(qualifiers & Qualifier::write) == false) {
491 os << "readonly ";
492 }
493 return os;
494}
495
496static void print_resource(std::ostream &os,
498 bool auto_resource_location)
499{
500 if (auto_resource_location && res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
501 /* Skip explicit binding location for samplers when not needed, since drivers can usually
502 * handle more sampler declarations this way (as long as they're not actually used by the
503 * shader). See #105661. */
504 }
506 os << "layout(binding = " << res.slot;
508 os << ", " << to_string(res.image.format);
509 }
511 os << ", std140";
512 }
514 os << ", std430";
515 }
516 os << ") ";
517 }
519 os << "layout(std140) ";
520 }
521
522 int64_t array_offset;
523 StringRef name_no_array;
524
525 switch (res.bind_type) {
527 os << "uniform ";
529 os << res.sampler.name << ";\n";
530 break;
532 os << "uniform ";
534 print_image_type(os, res.image.type, res.bind_type);
535 os << res.image.name << ";\n";
536 break;
538 array_offset = res.uniformbuf.name.find_first_of("[");
539 name_no_array = (array_offset == -1) ? res.uniformbuf.name :
540 StringRef(res.uniformbuf.name.c_str(), array_offset);
541 os << "uniform " << name_no_array << " { " << res.uniformbuf.type_name << " _"
542 << res.uniformbuf.name << "; };\n";
543 break;
545 array_offset = res.storagebuf.name.find_first_of("[");
546 name_no_array = (array_offset == -1) ? res.storagebuf.name :
547 StringRef(res.storagebuf.name.c_str(), array_offset);
548 print_qualifier(os, res.storagebuf.qualifiers);
549 os << "buffer ";
550 os << name_no_array << " { " << res.storagebuf.type_name << " _" << res.storagebuf.name
551 << "; };\n";
552 break;
553 }
554}
555
556static void print_resource_alias(std::ostream &os, const ShaderCreateInfo::Resource &res)
557{
558 int64_t array_offset;
559 StringRef name_no_array;
560
561 switch (res.bind_type) {
563 array_offset = res.uniformbuf.name.find_first_of("[");
564 name_no_array = (array_offset == -1) ? res.uniformbuf.name :
565 StringRef(res.uniformbuf.name.c_str(), array_offset);
566 os << "#define " << name_no_array << " (_" << name_no_array << ")\n";
567 break;
569 array_offset = res.storagebuf.name.find_first_of("[");
570 name_no_array = (array_offset == -1) ? res.storagebuf.name :
571 StringRef(res.storagebuf.name.c_str(), array_offset);
572 os << "#define " << name_no_array << " (_" << name_no_array << ")\n";
573 break;
574 default:
575 break;
576 }
577}
578
579static void print_interface(std::ostream &os,
580 const StringRefNull &prefix,
581 const StageInterfaceInfo &iface,
582 const StringRefNull &suffix = "")
583{
584 /* TODO(@fclem): Move that to interface check. */
585 // if (iface.instance_name.is_empty()) {
586 // BLI_assert_msg(0, "Interfaces require an instance name for geometry shader.");
587 // std::cout << iface.name << ": Interfaces require an instance name for geometry shader.\n";
588 // continue;
589 // }
590 os << prefix << " " << iface.name << "{" << std::endl;
591 for (const StageInterfaceInfo::InOut &inout : iface.inouts) {
592 os << " " << to_string(inout.interp) << " " << to_string(inout.type) << " " << inout.name
593 << ";\n";
594 }
595 os << "}";
596 os << (iface.instance_name.is_empty() ? "" : "\n") << iface.instance_name << suffix << ";\n";
597}
598
599std::string GLShader::resources_declare(const ShaderCreateInfo &info) const
600{
601 std::stringstream ss;
602
603 ss << "\n/* Compilation Constants (pass-through). */\n";
604 for (const CompilationConstant &sc : info.compilation_constants_) {
605 ss << "const ";
606 switch (sc.type) {
607 case Type::int_t:
608 ss << "int " << sc.name << "=" << std::to_string(sc.value.i) << ";\n";
609 break;
610 case Type::uint_t:
611 ss << "uint " << sc.name << "=" << std::to_string(sc.value.u) << "u;\n";
612 break;
613 case Type::bool_t:
614 ss << "bool " << sc.name << "=" << (sc.value.u ? "true" : "false") << ";\n";
615 break;
616 default:
618 break;
619 }
620 }
621 /* NOTE: We define macros in GLSL to trigger compilation error if the resource names
622 * are reused for local variables. This is to match other backend behavior which needs accessors
623 * macros. */
624 ss << "\n/* Pass Resources. */\n";
625 for (const ShaderCreateInfo::Resource &res : info.pass_resources_) {
627 }
628 for (const ShaderCreateInfo::Resource &res : info.pass_resources_) {
629 print_resource_alias(ss, res);
630 }
631 ss << "\n/* Batch Resources. */\n";
632 for (const ShaderCreateInfo::Resource &res : info.batch_resources_) {
634 }
635 for (const ShaderCreateInfo::Resource &res : info.batch_resources_) {
636 print_resource_alias(ss, res);
637 }
638 ss << "\n/* Geometry Resources. */\n";
639 for (const ShaderCreateInfo::Resource &res : info.geometry_resources_) {
641 }
642 for (const ShaderCreateInfo::Resource &res : info.geometry_resources_) {
643 print_resource_alias(ss, res);
644 }
645 ss << "\n/* Push Constants. */\n";
646 int location = 0;
647 for (const ShaderCreateInfo::PushConst &uniform : info.push_constants_) {
648 /* See #131227: Work around legacy Intel bug when using layout locations. */
649 if (!info.specialization_constants_.is_empty()) {
650 ss << "layout(location = " << location << ") ";
651 location += std::max(1, uniform.array_size);
652 }
653 ss << "uniform " << to_string(uniform.type) << " " << uniform.name;
654 if (uniform.array_size > 0) {
655 ss << "[" << uniform.array_size << "]";
656 }
657 ss << ";\n";
658 }
659#if 0 /* #95278: This is not be enough to prevent some compilers think it is recursive. */
660 for (const ShaderCreateInfo::PushConst &uniform : info.push_constants_) {
661 /* #95278: Double macro to avoid some compilers think it is recursive. */
662 ss << "#define " << uniform.name << "_ " << uniform.name << "\n";
663 ss << "#define " << uniform.name << " (" << uniform.name << "_)\n";
664 }
665#endif
666 ss << "\n";
667 return ss.str();
668}
669
671 const shader::SpecializationConstants &constants_state) const
672{
673 std::stringstream ss;
674
675 ss << "/* Specialization Constants. */\n";
676 for (int constant_index : IndexRange(constants_state.types.size())) {
677 const StringRefNull name = specialization_constant_names_[constant_index];
678 gpu::shader::Type constant_type = constants_state.types[constant_index];
679 const SpecializationConstant::Value &value = constants_state.values[constant_index];
680
681 switch (constant_type) {
682 case Type::int_t:
683 ss << "const int " << name << "=" << std::to_string(value.i) << ";\n";
684 break;
685 case Type::uint_t:
686 ss << "const uint " << name << "=" << std::to_string(value.u) << "u;\n";
687 break;
688 case Type::bool_t:
689 ss << "const bool " << name << "=" << (value.u ? "true" : "false") << ";\n";
690 break;
691 case Type::float_t:
692 /* Use uint representation to allow exact same bit pattern even if NaN. */
693 ss << "const float " << name << "= uintBitsToFloat(" << std::to_string(value.u) << "u);\n";
694 break;
695 default:
697 break;
698 }
699 }
700 return ss.str();
701}
702
703static std::string main_function_wrapper(std::string &pre_main, std::string &post_main)
704{
705 std::stringstream ss;
706 /* Prototype for the original main. */
707 ss << "\n";
708 ss << "void main_function_();\n";
709 /* Wrapper to the main function in order to inject code processing on globals. */
710 ss << "void main() {\n";
711 ss << pre_main;
712 ss << " main_function_();\n";
713 ss << post_main;
714 ss << "}\n";
715 /* Rename the original main. */
716 ss << "#define main main_function_\n";
717 ss << "\n";
718 return ss.str();
719}
720
722{
723 std::stringstream ss;
724 std::string post_main;
725
726 ss << "\n/* Inputs. */\n";
727 for (const ShaderCreateInfo::VertIn &attr : info.vertex_inputs_) {
729 /* Fix issue with AMDGPU-PRO + workbench_prepass_mesh_vert.glsl being quantized. */
731 {
732 ss << "layout(location = " << attr.index << ") ";
733 }
734 ss << "in " << to_string(attr.type) << " " << attr.name << ";\n";
735 }
736 ss << "\n/* Interfaces. */\n";
737 for (const StageInterfaceInfo *iface : info.vertex_out_interfaces_) {
738 print_interface(ss, "out", *iface);
739 }
740 const bool has_geometry_stage = do_geometry_shader_injection(&info) ||
742 const bool do_layer_output = bool(info.builtins_ & BuiltinBits::LAYER);
743 const bool do_viewport_output = bool(info.builtins_ & BuiltinBits::VIEWPORT_INDEX);
744 if (has_geometry_stage) {
745 if (do_layer_output) {
746 ss << "out int gpu_Layer;\n";
747 }
748 if (do_viewport_output) {
749 ss << "out int gpu_ViewportIndex;\n";
750 }
751 }
752 else {
753 if (do_layer_output) {
754 ss << "#define gpu_Layer gl_Layer\n";
755 }
756 if (do_viewport_output) {
757 ss << "#define gpu_ViewportIndex gl_ViewportIndex\n";
758 }
759 }
760 if (bool(info.builtins_ & BuiltinBits::CLIP_CONTROL)) {
761 if (GLContext::clip_control_support && !has_geometry_stage) {
762 /* Assume clip range is set to 0..1 and remap the range just like Vulkan and Metal.
763 * If geometry stage is needed, do that remapping inside the geometry shader stage. */
764 post_main += "gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;\n";
765 }
766 }
767 if (bool(info.builtins_ & BuiltinBits::BARYCENTRIC_COORD)) {
769 /* Disabled or unsupported. */
770 }
771 else if (epoxy_has_gl_extension("GL_AMD_shader_explicit_vertex_parameter")) {
772 /* Need this for stable barycentric. */
773 ss << "flat out vec4 gpu_pos_flat;\n";
774 ss << "out vec4 gpu_pos;\n";
775
776 post_main += " gpu_pos = gpu_pos_flat = gl_Position;\n";
777 }
778 }
779 ss << "\n";
780
781 if (post_main.empty() == false) {
782 std::string pre_main;
783 ss << main_function_wrapper(pre_main, post_main);
784 }
785 return ss.str();
786}
787
789{
790 std::stringstream ss;
791 std::string pre_main, post_main;
792
793 ss << "\n/* Interfaces. */\n";
794 const Span<StageInterfaceInfo *> in_interfaces = info.geometry_source_.is_empty() ?
797 for (const StageInterfaceInfo *iface : in_interfaces) {
798 print_interface(ss, "in", *iface);
799 }
800 if (bool(info.builtins_ & BuiltinBits::LAYER)) {
801 ss << "#define gpu_Layer gl_Layer\n";
802 }
803 if (bool(info.builtins_ & BuiltinBits::VIEWPORT_INDEX)) {
804 ss << "#define gpu_ViewportIndex gl_ViewportIndex\n";
805 }
806 if (bool(info.builtins_ & BuiltinBits::BARYCENTRIC_COORD)) {
808 ss << "flat in vec4 gpu_pos[3];\n";
809 ss << "smooth in vec3 gpu_BaryCoord;\n";
810 ss << "noperspective in vec3 gpu_BaryCoordNoPersp;\n";
811 }
812 else if (epoxy_has_gl_extension("GL_AMD_shader_explicit_vertex_parameter")) {
813 /* NOTE(fclem): This won't work with geometry shader. Hopefully, we don't need geometry
814 * shader workaround if this extension/feature is detected. */
815 ss << "\n/* Stable Barycentric Coordinates. */\n";
816 ss << "flat in vec4 gpu_pos_flat;\n";
817 ss << "__explicitInterpAMD in vec4 gpu_pos;\n";
818 /* Globals. */
819 ss << "vec3 gpu_BaryCoord;\n";
820 ss << "vec3 gpu_BaryCoordNoPersp;\n";
821 ss << "\n";
822 ss << "vec2 stable_bary_(vec2 in_bary) {\n";
823 ss << " vec3 bary = vec3(in_bary, 1.0 - in_bary.x - in_bary.y);\n";
824 ss << " if (interpolateAtVertexAMD(gpu_pos, 0) == gpu_pos_flat) { return bary.zxy; }\n";
825 ss << " if (interpolateAtVertexAMD(gpu_pos, 2) == gpu_pos_flat) { return bary.yzx; }\n";
826 ss << " return bary.xyz;\n";
827 ss << "}\n";
828 ss << "\n";
829
830 pre_main += " gpu_BaryCoord = stable_bary_(gl_BaryCoordSmoothAMD);\n";
831 pre_main += " gpu_BaryCoordNoPersp = stable_bary_(gl_BaryCoordNoPerspAMD);\n";
832 }
833 }
834 if (info.early_fragment_test_) {
835 ss << "layout(early_fragment_tests) in;\n";
836 }
837 ss << "layout(" << to_string(info.depth_write_) << ") out float gl_FragDepth;\n";
838
839 ss << "\n/* Sub-pass Inputs. */\n";
842 /* Declare as inout but do not write to it. */
843 ss << "layout(location = " << std::to_string(input.index) << ") inout "
844 << to_string(input.type) << " " << input.name << ";\n";
845 }
846 else {
847 std::string image_name = "gpu_subpass_img_";
848 image_name += std::to_string(input.index);
849
850 /* Declare global for input. */
851 ss << to_string(input.type) << " " << input.name << ";\n";
852
853 /* IMPORTANT: We assume that the frame-buffer will be layered or not based on the layer
854 * built-in flag. */
855 bool is_layered_fb = bool(info.builtins_ & BuiltinBits::LAYER);
856 bool is_layered_input = ELEM(
857 input.img_type, ImageType::Uint2DArray, ImageType::Int2DArray, ImageType::Float2DArray);
858
859 /* Declare image. */
860 using Resource = ShaderCreateInfo::Resource;
861 /* NOTE(fclem): Using the attachment index as resource index might be problematic as it might
862 * collide with other resources. */
863 Resource res(Resource::BindType::SAMPLER, input.index);
864 res.sampler.type = input.img_type;
865 res.sampler.sampler = GPUSamplerState::default_sampler();
866 res.sampler.name = image_name;
867 print_resource(ss, res, false);
868
869 char swizzle[] = "xyzw";
870 swizzle[to_component_count(input.type)] = '\0';
871
872 std::string texel_co = (is_layered_input) ?
873 ((is_layered_fb) ? "ivec3(gl_FragCoord.xy, gpu_Layer)" :
874 /* This should fetch the attached layer.
875 * But this is not simple to set. For now
876 * assume it is always the first layer. */
877 "ivec3(gl_FragCoord.xy, 0)") :
878 "ivec2(gl_FragCoord.xy)";
879
880 std::stringstream ss_pre;
881 /* Populate the global before main using imageLoad. */
882 ss_pre << " " << input.name << " = texelFetch(" << image_name << ", " << texel_co << ", 0)."
883 << swizzle << ";\n";
884
885 pre_main += ss_pre.str();
886 }
887 }
888 ss << "\n/* Outputs. */\n";
890 ss << "layout(location = " << output.index;
891 switch (output.blend) {
892 case DualBlend::SRC_0:
893 ss << ", index = 0";
894 break;
895 case DualBlend::SRC_1:
896 ss << ", index = 1";
897 break;
898 default:
899 break;
900 }
901 ss << ") ";
902 ss << "out " << to_string(output.type) << " " << output.name << ";\n";
903 }
904 ss << "\n";
905
906 if (!pre_main.empty() || !post_main.empty()) {
907 ss << main_function_wrapper(pre_main, post_main);
908 }
909 return ss.str();
910}
911
913{
914 int max_verts = info.geometry_layout_.max_vertices;
915 int invocations = info.geometry_layout_.invocations;
916
917 std::stringstream ss;
918 ss << "\n/* Geometry Layout. */\n";
919 ss << "layout(" << to_string(info.geometry_layout_.primitive_in);
920 if (invocations != -1) {
921 ss << ", invocations = " << invocations;
922 }
923 ss << ") in;\n";
924
925 ss << "layout(" << to_string(info.geometry_layout_.primitive_out)
926 << ", max_vertices = " << max_verts << ") out;\n";
927 ss << "\n";
928 return ss.str();
929}
930
932 const StringRefNull &name)
933{
934 for (auto *iface : ifaces) {
935 if (iface->instance_name == name) {
936 return iface;
937 }
938 }
939 return nullptr;
940}
941
943{
944 std::stringstream ss;
945
946 ss << "\n/* Interfaces. */\n";
947 for (const StageInterfaceInfo *iface : info.vertex_out_interfaces_) {
948 bool has_matching_output_iface = find_interface_by_name(info.geometry_out_interfaces_,
949 iface->instance_name) != nullptr;
950 const char *suffix = (has_matching_output_iface) ? "_in[]" : "[]";
951 print_interface(ss, "in", *iface, suffix);
952 }
953 ss << "\n";
954 for (const StageInterfaceInfo *iface : info.geometry_out_interfaces_) {
955 bool has_matching_input_iface = find_interface_by_name(info.vertex_out_interfaces_,
956 iface->instance_name) != nullptr;
957 const char *suffix = (has_matching_input_iface) ? "_out" : "";
958 print_interface(ss, "out", *iface, suffix);
959 }
960 ss << "\n";
961 return ss.str();
962}
963
965{
966 std::stringstream ss;
967 ss << "\n/* Compute Layout. */\n";
968 ss << "layout(";
969 ss << " local_size_x = " << info.compute_layout_.local_size_x;
970 ss << ", local_size_y = " << info.compute_layout_.local_size_y;
971 ss << ", local_size_z = " << info.compute_layout_.local_size_z;
972 ss << ") in;\n";
973 ss << "\n";
974 return ss.str();
975}
976
977
978/* -------------------------------------------------------------------- */
982
983std::string GLShader::workaround_geometry_shader_source_create(
984 const shader::ShaderCreateInfo &info)
985{
986 std::stringstream ss;
987
988 const bool do_layer_output = bool(info.builtins_ & BuiltinBits::LAYER);
989 const bool do_viewport_output = bool(info.builtins_ & BuiltinBits::VIEWPORT_INDEX);
990 const bool do_barycentric_workaround = !GLContext::native_barycentric_support &&
992
993 shader::ShaderCreateInfo info_modified = info;
994 info_modified.geometry_out_interfaces_ = info_modified.vertex_out_interfaces_;
1000
1001 ss << geometry_layout_declare(info_modified);
1002 ss << geometry_interface_declare(info_modified);
1003 if (do_layer_output) {
1004 ss << "in int gpu_Layer[];\n";
1005 }
1006 if (do_viewport_output) {
1007 ss << "in int gpu_ViewportIndex[];\n";
1008 }
1009
1010 if (do_barycentric_workaround) {
1011 ss << "flat out vec4 gpu_pos[3];\n";
1012 ss << "smooth out vec3 gpu_BaryCoord;\n";
1013 ss << "noperspective out vec3 gpu_BaryCoordNoPersp;\n";
1014 }
1015 ss << "\n";
1016
1017 ss << "void main()\n";
1018 ss << "{\n";
1019 if (do_barycentric_workaround) {
1020 ss << " gpu_pos[0] = gl_in[0].gl_Position;\n";
1021 ss << " gpu_pos[1] = gl_in[1].gl_Position;\n";
1022 ss << " gpu_pos[2] = gl_in[2].gl_Position;\n";
1023 }
1024 for (auto i : IndexRange(3)) {
1025 for (const StageInterfaceInfo *iface : info_modified.vertex_out_interfaces_) {
1026 for (auto &inout : iface->inouts) {
1027 ss << " " << iface->instance_name << "_out." << inout.name;
1028 ss << " = " << iface->instance_name << "_in[" << i << "]." << inout.name << ";\n";
1029 }
1030 }
1031 if (do_barycentric_workaround) {
1032 ss << " gpu_BaryCoordNoPersp = gpu_BaryCoord =";
1033 ss << " vec3(" << int(i == 0) << ", " << int(i == 1) << ", " << int(i == 2) << ");\n";
1034 }
1035 ss << " gl_Position = gl_in[" << i << "].gl_Position;\n";
1036 if (bool(info.builtins_ & BuiltinBits::CLIP_CONTROL)) {
1038 /* Assume clip range is set to 0..1 and remap the range just like Vulkan and Metal. */
1039 ss << "gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;\n";
1040 }
1041 }
1042 if (do_layer_output) {
1043 ss << " gl_Layer = gpu_Layer[" << i << "];\n";
1044 }
1045 if (do_viewport_output) {
1046 ss << " gl_ViewportIndex = gpu_ViewportIndex[" << i << "];\n";
1047 }
1048 ss << " EmitVertex();\n";
1049 }
1050 ss << "}\n";
1051 return ss.str();
1052}
1053
1054bool GLShader::do_geometry_shader_injection(const shader::ShaderCreateInfo *info) const
1055{
1056 BuiltinBits builtins = info->builtins_;
1057 if (!GLContext::native_barycentric_support && bool(builtins & BuiltinBits::BARYCENTRIC_COORD)) {
1058 return true;
1059 }
1060 if (!GLContext::layered_rendering_support && bool(builtins & BuiltinBits::LAYER)) {
1061 return true;
1062 }
1063 if (!GLContext::layered_rendering_support && bool(builtins & BuiltinBits::VIEWPORT_INDEX)) {
1064 return true;
1065 }
1066 return false;
1067}
1068
1070
1071/* -------------------------------------------------------------------- */
1074
1076{
1078 static std::string patch = []() {
1079 std::stringstream ss;
1080 /* Version need to go first. */
1081 ss << "#version 430\n";
1082
1083 /* Enable extensions for features that are not part of our base GLSL version
1084 * don't use an extension for something already available! */
1086 ss << "#extension GL_ARB_shader_draw_parameters : enable\n";
1087 ss << "#define GPU_ARB_shader_draw_parameters\n";
1088 ss << "#define gpu_BaseInstance gl_BaseInstanceARB\n";
1089 }
1091 ss << "#extension GL_ARB_shader_viewport_layer_array: enable\n";
1092 }
1094 ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
1095 }
1097 ss << "#define GPU_ARB_clip_control\n";
1098 }
1099
1100 /* Fallbacks. */
1102 ss << "uniform int gpu_BaseInstance;\n";
1103 }
1104
1105 /* Vulkan GLSL compatibility. */
1106 ss << "#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)\n";
1107
1108 /* Array compatibility. */
1109 ss << "#define gpu_Array(_type) _type[]\n";
1110
1111 /* Needs to have this defined upfront for configuring shader defines. */
1112 ss << "#define GPU_VERTEX_SHADER\n";
1113 /* GLSL Backend Lib. */
1115
1116 return ss.str();
1117 }();
1118 return patch;
1119}
1120
1122{
1124 static std::string patch = []() {
1125 std::stringstream ss;
1126 /* Version need to go first. */
1127 ss << "#version 430\n";
1128
1130 ss << "#extension GL_ARB_shader_viewport_layer_array: enable\n";
1131 }
1133 ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
1134 }
1136 ss << "#define GPU_ARB_clip_control\n";
1137 }
1138
1139 /* Array compatibility. */
1140 ss << "#define gpu_Array(_type) _type[]\n";
1141
1142 /* Needs to have this defined upfront for configuring shader defines. */
1143 ss << "#define GPU_GEOMETRY_SHADER\n";
1144 /* GLSL Backend Lib. */
1146
1147 return ss.str();
1148 }();
1149 return patch;
1150}
1151
1153{
1155 static std::string patch = []() {
1156 std::stringstream ss;
1157 /* Version need to go first. */
1158 ss << "#version 430\n";
1159
1161 ss << "#extension GL_ARB_shader_viewport_layer_array: enable\n";
1162 }
1164 ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
1165 }
1167 ss << "#extension GL_EXT_shader_framebuffer_fetch: enable\n";
1168 }
1170 ss << "#extension GL_ARB_shader_stencil_export: enable\n";
1171 ss << "#define GPU_ARB_shader_stencil_export\n";
1172 }
1174 ss << "#define GPU_ARB_clip_control\n";
1175 }
1176
1177 /* Array compatibility. */
1178 ss << "#define gpu_Array(_type) _type[]\n";
1179
1180 /* Needs to have this defined upfront for configuring shader defines. */
1181 ss << "#define GPU_FRAGMENT_SHADER\n";
1182 /* GLSL Backend Lib. */
1184
1185 return ss.str();
1186 }();
1187 return patch;
1188}
1189
1191{
1193 static std::string patch = []() {
1194 std::stringstream ss;
1195 /* Version need to go first. */
1196 ss << "#version 430\n";
1197
1198 /* Array compatibility. */
1199 ss << "#define gpu_Array(_type) _type[]\n";
1200
1201 /* Needs to have this defined upfront for configuring shader defines. */
1202 ss << "#define GPU_COMPUTE_SHADER\n";
1203
1205 ss << "#define GPU_ARB_clip_control\n";
1206 }
1207
1209
1210 return ss.str();
1211 }();
1212 return patch;
1213}
1214
1215StringRefNull GLShader::glsl_patch_get(GLenum gl_stage)
1216{
1217 if (gl_stage == GL_VERTEX_SHADER) {
1218 return glsl_patch_vertex_get();
1219 }
1220 if (gl_stage == GL_GEOMETRY_SHADER) {
1221 return glsl_patch_geometry_get();
1222 }
1223 if (gl_stage == GL_FRAGMENT_SHADER) {
1224 return glsl_patch_fragment_get();
1225 }
1226 if (gl_stage == GL_COMPUTE_SHADER) {
1227 return glsl_patch_compute_get();
1228 }
1230 return "";
1231}
1232
1233GLuint GLShader::create_shader_stage(GLenum gl_stage,
1234 MutableSpan<StringRefNull> sources,
1235 GLSources &gl_sources,
1236 const shader::SpecializationConstants &constants_state)
1237{
1238 /* Patch the shader sources to include specialization constants. */
1239 std::string constants_source;
1240 Vector<StringRefNull> recreated_sources;
1241 if (has_specialization_constants()) {
1242 constants_source = constants_declare(constants_state);
1243 if (sources.is_empty()) {
1244 recreated_sources = gl_sources.sources_get();
1245 sources = recreated_sources;
1246 }
1247 }
1248
1249 /* Patch the shader code using the first source slot. */
1250 sources[SOURCES_INDEX_VERSION] = glsl_patch_get(gl_stage);
1251 sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS] = constants_source;
1252
1253 if (async_compilation_) {
1254 gl_sources[SOURCES_INDEX_VERSION].source = std::string(sources[SOURCES_INDEX_VERSION]);
1255 gl_sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS].source = std::string(
1257 }
1258
1260 /* Store the generated source for printing in case the link fails. */
1261 StringRefNull source_type;
1262 switch (gl_stage) {
1263 case GL_VERTEX_SHADER:
1264 source_type = "VertShader";
1265 break;
1266 case GL_GEOMETRY_SHADER:
1267 source_type = "GeomShader";
1268 break;
1269 case GL_FRAGMENT_SHADER:
1270 source_type = "FragShader";
1271 break;
1272 case GL_COMPUTE_SHADER:
1273 source_type = "ComputeShader";
1274 break;
1275 }
1276
1277 debug_source += "\n\n----------" + source_type + "----------\n\n";
1278 for (StringRefNull source : sources) {
1279 debug_source.append(source);
1280 }
1281 }
1282
1283 if (async_compilation_) {
1284 /* Only build the sources. */
1285 return 0;
1286 }
1287
1288 GLuint shader = glCreateShader(gl_stage);
1289 if (shader == 0) {
1290 fprintf(stderr, "GLShader: Error: Could not create shader object.\n");
1291 return 0;
1292 }
1293
1294 Array<const char *, 16> c_str_sources(sources.size());
1295 for (const int i : sources.index_range()) {
1296 c_str_sources[i] = sources[i].c_str();
1297 }
1298 glShaderSource(shader, c_str_sources.size(), c_str_sources.data(), nullptr);
1299 glCompileShader(shader);
1300
1301 GLint status;
1302 glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
1303 if (!status || (G.debug & G_DEBUG_GPU)) {
1304 char log[5000] = "";
1305 glGetShaderInfoLog(shader, sizeof(log), nullptr, log);
1306 if (log[0] != '\0') {
1307 GLLogParser parser;
1308 switch (gl_stage) {
1309 case GL_VERTEX_SHADER:
1310 this->print_log(sources, log, "VertShader", !status, &parser);
1311 break;
1312 case GL_GEOMETRY_SHADER:
1313 this->print_log(sources, log, "GeomShader", !status, &parser);
1314 break;
1315 case GL_FRAGMENT_SHADER:
1316 this->print_log(sources, log, "FragShader", !status, &parser);
1317 break;
1318 case GL_COMPUTE_SHADER:
1319 this->print_log(sources, log, "ComputeShader", !status, &parser);
1320 break;
1321 }
1322 }
1323 }
1324 if (!status) {
1325 glDeleteShader(shader);
1326 compilation_failed_ = true;
1327 return 0;
1328 }
1329
1330 debug::object_label(gl_stage, shader, name);
1331 return shader;
1332}
1333
1334void GLShader::update_program_and_sources(GLSources &stage_sources,
1335 MutableSpan<StringRefNull> sources)
1336{
1337 const bool store_sources = has_specialization_constants() || async_compilation_;
1338 if (store_sources && stage_sources.is_empty()) {
1339 stage_sources = sources;
1340 }
1341}
1342
1344{
1345 update_program_and_sources(vertex_sources_, sources);
1346 main_program_->vert_shader = create_shader_stage(
1347 GL_VERTEX_SHADER, sources, vertex_sources_, *constants);
1348}
1349
1351{
1352 update_program_and_sources(geometry_sources_, sources);
1353 main_program_->geom_shader = create_shader_stage(
1354 GL_GEOMETRY_SHADER, sources, geometry_sources_, *constants);
1355}
1356
1358{
1359 update_program_and_sources(fragment_sources_, sources);
1360 main_program_->frag_shader = create_shader_stage(
1361 GL_FRAGMENT_SHADER, sources, fragment_sources_, *constants);
1362}
1363
1365{
1366 update_program_and_sources(compute_sources_, sources);
1367 main_program_->compute_shader = create_shader_stage(
1368 GL_COMPUTE_SHADER, sources, compute_sources_, *constants);
1369}
1370
1372{
1373 if (compilation_failed_) {
1374 return false;
1375 }
1376
1377 if (info && do_geometry_shader_injection(info)) {
1378 std::string source = workaround_geometry_shader_source_create(*info);
1379 Vector<StringRefNull> sources;
1380 sources.append("version");
1381 sources.append("/* Specialization Constants. */\n");
1382 sources.append(source);
1384 }
1385
1386 if (async_compilation_) {
1387 return true;
1388 }
1389
1390 main_program_->program_link(name);
1391 return post_finalize(info);
1392}
1393
1395{
1396 GLuint program_id = main_program_->program_id;
1397 GLint status;
1398 glGetProgramiv(program_id, GL_LINK_STATUS, &status);
1399 if (!status) {
1400 char log[5000];
1401 glGetProgramInfoLog(program_id, sizeof(log), nullptr, log);
1402 GLLogParser parser;
1403 print_log({debug_source}, log, "Linking", true, &parser);
1404 return false;
1405 }
1406
1407 /* Reset for specialization constants variations. */
1408 async_compilation_ = false;
1409
1410 if (info != nullptr) {
1411 interface = new GLShaderInterface(main_program_->program_id, *info);
1412 }
1413 else {
1414 interface = new GLShaderInterface(main_program_->program_id);
1415 }
1416
1417 return true;
1418}
1419
1421
1422/* -------------------------------------------------------------------- */
1425
1427{
1428 GLProgram &program = program_get(constants_state);
1429 glUseProgram(program.program_id);
1430}
1431
1433{
1434#ifndef NDEBUG
1435 glUseProgram(0);
1436#endif
1437}
1438
1440
1441/* -------------------------------------------------------------------- */
1444
1445void GLShader::uniform_float(int location, int comp_len, int array_size, const float *data)
1446{
1447 switch (comp_len) {
1448 case 1:
1449 glUniform1fv(location, array_size, data);
1450 break;
1451 case 2:
1452 glUniform2fv(location, array_size, data);
1453 break;
1454 case 3:
1455 glUniform3fv(location, array_size, data);
1456 break;
1457 case 4:
1458 glUniform4fv(location, array_size, data);
1459 break;
1460 case 9:
1461 glUniformMatrix3fv(location, array_size, 0, data);
1462 break;
1463 case 16:
1464 glUniformMatrix4fv(location, array_size, 0, data);
1465 break;
1466 default:
1467 BLI_assert(0);
1468 break;
1469 }
1470}
1471
1472void GLShader::uniform_int(int location, int comp_len, int array_size, const int *data)
1473{
1474 switch (comp_len) {
1475 case 1:
1476 glUniform1iv(location, array_size, data);
1477 break;
1478 case 2:
1479 glUniform2iv(location, array_size, data);
1480 break;
1481 case 3:
1482 glUniform3iv(location, array_size, data);
1483 break;
1484 case 4:
1485 glUniform4iv(location, array_size, data);
1486 break;
1487 default:
1488 BLI_assert(0);
1489 break;
1490 }
1491}
1492
1494
1495/* -------------------------------------------------------------------- */
1499{
1501 source = "";
1502 source_ref = other;
1503 }
1504 else {
1505 source = other;
1506 source_ref = std::nullopt;
1507 }
1508}
1509
1511{
1512 clear();
1513 reserve(other.size());
1514
1515 for (StringRefNull other_source : other) {
1516 /* Don't store empty string as compilers can optimize these away and result in pointing to a
1517 * string that isn't c-str compliant anymore. */
1518 if (other_source.is_empty()) {
1519 continue;
1520 }
1521 append(GLSource(other_source));
1522 }
1523
1524 return *this;
1525}
1526
1528{
1530 result.reserve(size());
1531
1532 for (const GLSource &source : *this) {
1533 if (source.source_ref) {
1534 result.append(*source.source_ref);
1535 }
1536 else {
1537 result.append(source.source);
1538 }
1539 }
1540 return result;
1541}
1542
1543std::string GLSources::to_string() const
1544{
1545 std::string result;
1546 for (const GLSource &source : *this) {
1547 if (source.source_ref) {
1548 result.append(*source.source_ref);
1549 }
1550 else {
1551 result.append(source.source);
1552 }
1553 }
1554 return result;
1555}
1556
1558{
1559 size_t result = 0;
1560 result += comp.empty() ? 0 : comp.size() + sizeof('\0');
1561 result += vert.empty() ? 0 : vert.size() + sizeof('\0');
1562 result += geom.empty() ? 0 : geom.size() + sizeof('\0');
1563 result += frag.empty() ? 0 : frag.size() + sizeof('\0');
1564 return result;
1565}
1566
1568
1569/* -------------------------------------------------------------------- */
1572
1573GLShader::GLProgram::~GLProgram()
1574{
1575 /* Invalid handles are silently ignored. */
1576 glDeleteShader(vert_shader);
1577 glDeleteShader(geom_shader);
1578 glDeleteShader(frag_shader);
1579 glDeleteShader(compute_shader);
1580 glDeleteProgram(program_id);
1581}
1582
1583void GLShader::GLProgram::program_link(StringRefNull shader_name)
1584{
1585 if (this->program_id == 0) {
1586 this->program_id = glCreateProgram();
1587 debug::object_label(GL_PROGRAM, this->program_id, shader_name.c_str());
1588 }
1589
1590 GLuint program_id = this->program_id;
1591
1592 if (this->vert_shader) {
1593 glAttachShader(program_id, this->vert_shader);
1594 }
1595 if (this->geom_shader) {
1596 glAttachShader(program_id, this->geom_shader);
1597 }
1598 if (this->frag_shader) {
1599 glAttachShader(program_id, this->frag_shader);
1600 }
1601 if (this->compute_shader) {
1602 glAttachShader(program_id, this->compute_shader);
1603 }
1604 glLinkProgram(program_id);
1605}
1606
1607GLShader::GLProgram &GLShader::program_get(const shader::SpecializationConstants *constants_state)
1608{
1609 BLI_assert(constants_state == nullptr || this->has_specialization_constants() == true);
1610
1611 if (constants_state == nullptr) {
1612 /* Early exit for shaders that doesn't use specialization constants. */
1613 BLI_assert(main_program_);
1614 return *main_program_;
1615 }
1616
1617 program_cache_mutex_.lock();
1618
1619 GLProgram &program = *program_cache_.lookup_or_add_cb(
1620 constants_state->values, []() { return std::make_unique<GLProgram>(); });
1621
1622 program_cache_mutex_.unlock();
1623
1624 /* Avoid two threads trying to specialize the same shader at the same time. */
1625 std::scoped_lock lock(program.compilation_mutex);
1626
1627 if (program.program_id != 0) {
1628 /* Specialization is already compiled. */
1629 return program;
1630 }
1631
1632 if (!vertex_sources_.is_empty()) {
1633 program.vert_shader = create_shader_stage(
1634 GL_VERTEX_SHADER, {}, vertex_sources_, *constants_state);
1635 }
1636 if (!geometry_sources_.is_empty()) {
1637 program.geom_shader = create_shader_stage(
1638 GL_GEOMETRY_SHADER, {}, geometry_sources_, *constants_state);
1639 }
1640 if (!fragment_sources_.is_empty()) {
1641 program.frag_shader = create_shader_stage(
1642 GL_FRAGMENT_SHADER, {}, fragment_sources_, *constants_state);
1643 }
1644 if (!compute_sources_.is_empty()) {
1645 program.compute_shader = create_shader_stage(
1646 GL_COMPUTE_SHADER, {}, compute_sources_, *constants_state);
1647 }
1648
1649 if (async_compilation_) {
1650 program.program_id = glCreateProgram();
1651 debug::object_label(GL_PROGRAM, program.program_id, name);
1652 return program;
1653 }
1654
1657
1658 program.program_link(name);
1659
1660 /* Ensure the specialization compiled correctly.
1661 * Specialization compilation should never fail, but adding this check seems to bypass an
1662 * internal Nvidia driver issue (See #142046). */
1663 GLint status;
1664 glGetProgramiv(program.program_id, GL_LINK_STATUS, &status);
1665 BLI_assert(status);
1666
1669
1670 return program;
1671}
1672
1674{
1676 result.comp = compute_sources_.to_string();
1677 result.vert = vertex_sources_.to_string();
1678 result.geom = geometry_sources_.to_string();
1679 result.frag = fragment_sources_.to_string();
1680 return result;
1681}
1682
1684
1685/* -------------------------------------------------------------------- */
1688
1690{
1691 dynamic_cast<GLShader *>(unwrap(specialization.shader))->program_get(&specialization.constants);
1692}
1693
1695
1696#if BLI_SUBPROCESS_SUPPORT
1697
1698/* -------------------------------------------------------------------- */
1701
1702GLCompilerWorker::GLCompilerWorker()
1703{
1704 using namespace std::chrono;
1705 /* This function has to be thread-safe. */
1706 static std::atomic<size_t> g_pipe_id = 0;
1707 size_t pipe_id = g_pipe_id++;
1708
1709 /* Use a timestamp on top of the PID.
1710 * If a Blender session crashes without unlinking its shared memory, and the PID is reused, we
1711 * may run into a name collision otherwise. */
1712 static size_t time_id =
1713 duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
1714
1715 std::string name = "BLENDER_SHADER_COMPILER_" + std::to_string(getpid()) + "_" +
1716 std::to_string(time_id) + "_" + std::to_string(pipe_id);
1717
1718 shared_mem_ = std::make_unique<SharedMemory>(
1719 name, compilation_subprocess_shared_memory_size, true);
1720 start_semaphore_ = std::make_unique<SharedSemaphore>(name + "_START", false);
1721 end_semaphore_ = std::make_unique<SharedSemaphore>(name + "_END", false);
1722 close_semaphore_ = std::make_unique<SharedSemaphore>(name + "_CLOSE", false);
1723
1724 subprocess_.create({"--compilation-subprocess", name.c_str()});
1725}
1726
1727GLCompilerWorker::~GLCompilerWorker()
1728{
1729 close_semaphore_->increment();
1730 /* Flag start so the subprocess can reach the close semaphore. */
1731 start_semaphore_->increment();
1732}
1733
1734void GLCompilerWorker::compile(const GLSourcesBaked &sources)
1735{
1736 BLI_assert(state_ == AVAILABLE);
1737
1738 ShaderSourceHeader *shared_src = reinterpret_cast<ShaderSourceHeader *>(shared_mem_->get_data());
1739 char *next_src = shared_src->sources;
1740
1741 auto add_src = [&](const std::string &src) {
1742 if (!src.empty()) {
1743 const size_t src_size = src.size() + 1;
1744 memcpy(next_src, src.c_str(), src_size);
1745 next_src += src_size;
1746 }
1747 };
1748
1749 add_src(sources.comp);
1750 add_src(sources.vert);
1751 add_src(sources.geom);
1752 add_src(sources.frag);
1753
1754 BLI_assert(size_t(next_src) <= size_t(shared_src) + compilation_subprocess_shared_memory_size);
1755
1756 if (!sources.comp.empty()) {
1757 BLI_assert(sources.vert.empty() && sources.geom.empty() && sources.frag.empty());
1758 shared_src->type = ShaderSourceHeader::Type::COMPUTE;
1759 }
1760 else {
1761 BLI_assert(sources.comp.empty() && !sources.vert.empty() && !sources.frag.empty());
1762 shared_src->type = sources.geom.empty() ?
1763 ShaderSourceHeader::Type::GRAPHICS :
1764 ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE;
1765 }
1766
1767 start_semaphore_->increment();
1768
1769 state_ = COMPILATION_REQUESTED;
1770 compilation_start = BLI_time_now_seconds();
1771}
1772
1773bool GLCompilerWorker::block_until_ready()
1774{
1775 BLI_assert(ELEM(state_, COMPILATION_REQUESTED, COMPILATION_READY));
1776 if (state_ == COMPILATION_READY) {
1777 return true;
1778 }
1779
1780 auto delete_cached_binary = [&]() {
1781 /* If the subprocess crashed when loading the binary,
1782 * its name should be stored in shared memory.
1783 * Delete it to prevent more crashes in the future. */
1784 char str_start[] = "SOURCE_HASH:";
1785 char *shared_mem = reinterpret_cast<char *>(shared_mem_->get_data());
1786 if (BLI_str_startswith(shared_mem, str_start)) {
1787 std::string path = GL_shader_cache_dir_get() + SEP_STR +
1788 std::string(shared_mem + sizeof(str_start) - 1);
1789 if (BLI_exists(path.c_str())) {
1790 BLI_delete(path.c_str(), false, false);
1791 }
1792 }
1793 };
1794
1795 while (!end_semaphore_->try_decrement(1000)) {
1796 if (is_lost()) {
1797 delete_cached_binary();
1798 return false;
1799 }
1800 }
1801
1802 state_ = COMPILATION_READY;
1803 return true;
1804}
1805
1806bool GLCompilerWorker::is_lost()
1807{
1808 /* Use a timeout for hanged processes. */
1809 float max_timeout_seconds = 30.0f;
1810 return !subprocess_.is_running() ||
1811 (state_ == COMPILATION_REQUESTED &&
1812 (BLI_time_now_seconds() - compilation_start) > max_timeout_seconds);
1813}
1814
1815bool GLCompilerWorker::load_program_binary(GLint program)
1816{
1817 if (!block_until_ready()) {
1818 return false;
1819 }
1820
1821 ShaderBinaryHeader *binary = (ShaderBinaryHeader *)shared_mem_->get_data();
1822
1823 state_ = COMPILATION_FINISHED;
1824
1825 if (binary->size > 0) {
1826 GPU_debug_group_begin("Load Binary");
1827 glProgramBinary(program, binary->format, binary->data, binary->size);
1829 return true;
1830 }
1831
1832 return false;
1833}
1834
1835void GLCompilerWorker::release()
1836{
1837 state_ = AVAILABLE;
1838}
1839
1841
1842/* -------------------------------------------------------------------- */
1845
1846GLSubprocessShaderCompiler::~GLSubprocessShaderCompiler()
1847{
1848 /* Must be called before we destruct the GLCompilerWorkers. */
1849 destruct_compilation_worker();
1850
1851 for (GLCompilerWorker *worker : workers_) {
1852 delete worker;
1853 }
1854}
1855
1856GLCompilerWorker *GLSubprocessShaderCompiler::get_compiler_worker()
1857{
1858 auto new_worker = [&]() {
1859 GLCompilerWorker *result = new GLCompilerWorker();
1860 std::lock_guard lock(workers_mutex_);
1861 workers_.append(result);
1862 return result;
1863 };
1864
1865 static thread_local GLCompilerWorker *worker = new_worker();
1866
1867 if (worker->is_lost()) {
1868 std::cerr << "ERROR: Compilation subprocess lost\n";
1869 {
1870 std::lock_guard lock(workers_mutex_);
1871 workers_.remove_first_occurrence_and_reorder(worker);
1872 }
1873 delete worker;
1874 worker = new_worker();
1875 }
1876
1877 return worker;
1878}
1879
1880Shader *GLSubprocessShaderCompiler::compile_shader(const shader::ShaderCreateInfo &info)
1881{
1882 const_cast<ShaderCreateInfo *>(&info)->finalize();
1883 GLShader *shader = static_cast<GLShader *>(compile(info, true));
1884 GLSourcesBaked sources = shader->get_sources();
1885
1886 size_t required_size = sources.size();
1887 bool do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources);
1888 if (!do_async_compilation) {
1889 /* TODO: Can't reuse? */
1890 delete shader;
1891 return compile(info, false);
1892 }
1893
1894 GLCompilerWorker *worker = get_compiler_worker();
1895 worker->compile(sources);
1896
1897 GPU_debug_group_begin("Subprocess Compilation");
1898
1899 /* This path is always called for the default shader compilation. Not for specialization.
1900 * Use the default constant template.*/
1901 const shader::SpecializationConstants &constants = GPU_shader_get_default_constant_state(
1902 wrap(shader));
1903
1904 if (!worker->load_program_binary(shader->program_cache_.lookup(constants.values)->program_id) ||
1905 !shader->post_finalize(&info))
1906 {
1907 /* Compilation failed, try to compile it locally. */
1908 delete shader;
1909 shader = nullptr;
1910 }
1911
1913
1914 worker->release();
1915
1916 if (!shader) {
1917 return compile(info, false);
1918 }
1919
1920 return shader;
1921}
1922
1923void GLSubprocessShaderCompiler::specialize_shader(ShaderSpecialization &specialization)
1924{
1925 static std::mutex mutex;
1926
1927 GLShader *shader = static_cast<GLShader *>(unwrap(specialization.shader));
1928
1929 auto program_get = [&]() -> GLShader::GLProgram * {
1930 if (shader->program_cache_.contains(specialization.constants.values)) {
1931 return shader->program_cache_.lookup(specialization.constants.values).get();
1932 }
1933 return nullptr;
1934 };
1935
1936 auto program_release = [&]() {
1937 /* Compilation failed, local compilation will be tried later on shader bind. */
1938 GLShader::GLProgram *program = program_get();
1939 glDeleteProgram(program->program_id);
1940 program->program_id = 0;
1941 };
1942
1943 GLSourcesBaked sources;
1944 {
1945 std::lock_guard lock(mutex);
1946
1947 if (program_get()) {
1948 /*Already compiled*/
1949 return;
1950 }
1951
1953 shader->async_compilation_ = true;
1954 shader->program_get(&specialization.constants);
1955 shader->async_compilation_ = false;
1956 sources = shader->get_sources();
1957
1958 size_t required_size = sources.size();
1959 bool do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources);
1960 if (!do_async_compilation) {
1961 program_release();
1962 return;
1963 }
1964 }
1965
1966 GPU_debug_group_begin("Subprocess Specialization");
1967
1968 GLCompilerWorker *worker = get_compiler_worker();
1969 worker->compile(sources);
1970 worker->block_until_ready();
1971
1972 std::lock_guard lock(mutex);
1973
1974 if (!worker->load_program_binary(program_get()->program_id)) {
1975 program_release();
1976 }
1977
1979
1980 worker->release();
1981}
1982
1984
1985#endif
@ G_DEBUG_GPU
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
#define BLI_assert(a)
Definition BLI_assert.h:46
File and directory operations.
int BLI_exists(const char *path) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition storage.cc:373
int BLI_delete(const char *path, bool dir, bool recursive) ATTR_NONNULL()
KDTree *BLI_kdtree_nd_ new(unsigned int nodes_len_capacity)
Definition kdtree_impl.h:97
int bool BLI_str_startswith(const char *__restrict str, const char *__restrict start) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1
Platform independent time functions.
double BLI_time_now_seconds(void)
Definition time.cc:65
#define UNUSED_FUNCTION(x)
#define ELEM(...)
float[3] Vector
bool GPU_stencil_export_support()
void GPU_debug_group_end()
Definition gpu_debug.cc:33
void GPU_debug_group_begin(const char *name)
Definition gpu_debug.cc:22
#define GPU_DEBUG_SHADER_SPECIALIZATION_GROUP
Definition GPU_debug.hh:65
@ GPU_DRIVER_OFFICIAL
@ GPU_OS_ANY
@ GPU_DEVICE_ATI
bool GPU_type_matches(eGPUDeviceType device, eGPUOSType os, eGPUDriverType driver)
const blender::gpu::shader::SpecializationConstants & GPU_shader_get_default_constant_state(GPUShader *sh)
eGPUTextureFormat
@ GPU_R16UI
@ GPU_RG16F
@ GPU_R32F
@ GPU_R16I
@ GPU_RGB10_A2
@ GPU_R32I
@ GPU_RG8UI
@ GPU_R16F
@ GPU_RG8I
@ GPU_RG16I
@ GPU_RG32UI
@ GPU_RGBA32F
@ GPU_RGBA16F
@ GPU_RG8
@ GPU_RG32I
@ GPU_RG16
@ GPU_RGBA32UI
@ GPU_R8I
@ GPU_R16
@ GPU_RG16UI
@ GPU_RGBA8I
@ GPU_RGBA8UI
@ GPU_RGBA16UI
@ GPU_RGBA16I
@ GPU_R8UI
@ GPU_RGBA16
@ GPU_RG32F
@ GPU_R8
@ GPU_R32UI
@ GPU_RGBA32I
@ GPU_R11F_G11F_B10F
@ GPU_RGBA8
volatile int lock
BMesh const char void * data
long long int int64_t
int64_t size() const
void append(const GLSource &value)
bool is_empty() const
void reserve(const int64_t min_capacity)
constexpr bool is_empty() const
Definition BLI_span.hh:509
constexpr int64_t size() const
Definition BLI_span.hh:252
constexpr bool is_empty() const
constexpr const char * c_str() const
void append(const T &value)
static bool layered_rendering_support
Definition gl_context.hh:56
static bool framebuffer_fetch_support
Definition gl_context.hh:55
static bool shader_draw_parameters_support
Definition gl_context.hh:61
static bool explicit_location_support
Definition gl_context.hh:54
static GLContext * get()
static bool clip_control_support
Definition gl_context.hh:51
static bool native_barycentric_support
Definition gl_context.hh:57
virtual void specialize_shader(ShaderSpecialization &specialization) override
std::string geometry_layout_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:912
std::string vertex_interface_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:721
void fragment_shader_from_glsl(MutableSpan< StringRefNull > sources) override
std::string geometry_interface_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:942
void geometry_shader_from_glsl(MutableSpan< StringRefNull > sources) override
void compute_shader_from_glsl(MutableSpan< StringRefNull > sources) override
void unbind() override
void uniform_float(int location, int comp_len, int array_size, const float *data) override
bool post_finalize(const shader::ShaderCreateInfo *info=nullptr)
GLShader(const char *name)
Definition gl_shader.cc:54
void init() override
Definition gl_shader.cc:91
GLSourcesBaked get_sources()
std::string compute_layout_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:964
std::string constants_declare(const shader::SpecializationConstants &constants_state) const
Definition gl_shader.cc:670
std::string resources_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:599
void uniform_int(int location, int comp_len, int array_size, const int *data) override
std::string fragment_interface_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:788
bool finalize(const shader::ShaderCreateInfo *info=nullptr) override
void bind(const shader::SpecializationConstants *constants_state) override
void vertex_shader_from_glsl(MutableSpan< StringRefNull > sources) override
GLSources & operator=(Span< StringRefNull > other)
std::string to_string() const
Vector< StringRefNull > sources_get() const
virtual void specialize_shader(ShaderSpecialization &)
virtual Shader * compile_shader(const shader::ShaderCreateInfo &info)
std::unique_ptr< const shader::SpecializationConstants > constants
void print_log(Span< StringRefNull > sources, const char *log, const char *stage, bool error, GPULogParser *parser)
Shader(const char *name)
Definition gpu_shader.cc:56
static StringRefNull glsl_patch_geometry_get()
static void print_resource_alias(std::ostream &os, const ShaderCreateInfo::Resource &res)
Definition gl_shader.cc:556
static StringRefNull glsl_patch_compute_get()
static StringRefNull glsl_patch_vertex_get()
static StringRefNull glsl_patch_fragment_get()
char datatoc_glsl_shader_defines_glsl[]
Definition gl_shader.cc:48
static Type UNUSED_FUNCTION to_component_type(const Type &type)
Definition gl_shader.cc:188
ThreadMutex mutex
#define inout
#define log
#define input
#define output
#define DEBUG_LOG_SHADER_SRC_ON_ERROR
#define SOURCES_INDEX_SPECIALIZATION_CONSTANTS
#define SOURCES_INDEX_VERSION
ccl_device_inline float interp(const float a, const float b, const float t)
Definition math_base.h:502
#define G(x, y, z)
void object_label(GLenum type, GLuint object, const char *name)
Definition gl_debug.cc:329
BLI_INLINE int to_component_count(const Type &type)
StringRefNull gpu_shader_dependency_get_filename_from_source_string(const StringRef source_string)
Find the name of the file from which the given string was generated.
static void print_image_type(std::ostream &os, const ImageType &type, const ShaderCreateInfo::Resource::BindType bind_type)
Definition vk_shader.cc:207
const char * to_string(ShaderStage stage)
Definition mtl_shader.mm:52
static Context * unwrap(GPUContext *ctx)
static StageInterfaceInfo * find_interface_by_name(const Span< StageInterfaceInfo * > ifaces, const StringRefNull name)
static void print_interface(std::ostream &os, const std::string &prefix, const StageInterfaceInfo &iface, int &location, const StringRefNull &suffix="")
Definition vk_shader.cc:453
static GPUContext * wrap(Context *ctx)
static std::ostream & print_qualifier(std::ostream &os, const Qualifier &qualifiers)
Definition vk_shader.cc:336
static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &res)
static std::string main_function_wrapper(std::string &pre_main, std::string &post_main)
Definition vk_shader.cc:468
static constexpr GPUSamplerState default_sampler()
blender::gpu::shader::SpecializationConstants constants
std::optional< StringRefNull > source_ref
Definition gl_shader.hh:40
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Vector< StageInterfaceInfo * > vertex_out_interfaces_
Self & geometry_layout(PrimitiveIn prim_in, PrimitiveOut prim_out, int max_vertices, int invocations=-1)
Vector< CompilationConstant, 0 > compilation_constants_
Vector< StageInterfaceInfo * > geometry_out_interfaces_
Vector< SpecializationConstant > specialization_constants_
Vector< SpecializationConstant::Value, 8 > values
i
Definition text_draw.cc:230
#define SEP_STR
Definition unit.cc:39