Blender V4.5
gpu_shader_create_info.hh
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
13
14#pragma once
15
16#if !defined(GPU_SHADER)
17# include "BLI_hash.hh"
18# include "BLI_string_ref.hh"
20# include "BLI_vector.hh"
21# include "GPU_common_types.hh"
22# include "GPU_material.hh"
23# include "GPU_texture.hh"
24
25# include <iostream>
26#endif
27
28/* Force enable `printf` support in release build. */
29#define GPU_FORCE_ENABLE_SHADER_PRINTF 0
30
31#if !defined(NDEBUG) || GPU_FORCE_ENABLE_SHADER_PRINTF
32# define GPU_SHADER_PRINTF_ENABLE 1
33#else
34# define GPU_SHADER_PRINTF_ENABLE 0
35#endif
36#define GPU_SHADER_PRINTF_SLOT 13
37#define GPU_SHADER_PRINTF_MAX_CAPACITY (1024 * 4)
38
39/* Used for primitive expansion. */
40#define GPU_SSBO_INDEX_BUF_SLOT 7
41/* Used for polylines. */
42#define GPU_SSBO_POLYLINE_POS_BUF_SLOT 0
43#define GPU_SSBO_POLYLINE_COL_BUF_SLOT 1
44
45#if defined(GLSL_CPP_STUBS)
46# define GPU_SHADER_NAMED_INTERFACE_INFO(_interface, _inst_name) \
47 namespace interface::_interface { \
48 struct {
49# define GPU_SHADER_NAMED_INTERFACE_END(_inst_name) \
50 } \
51 _inst_name; \
52 }
53
54# define GPU_SHADER_INTERFACE_INFO(_interface) namespace interface::_interface {
55# define GPU_SHADER_INTERFACE_END() }
56
57# define GPU_SHADER_CREATE_INFO(_info) \
58 namespace _info { \
59 namespace gl_VertexShader { \
60 } \
61 namespace gl_FragmentShader { \
62 } \
63 namespace gl_ComputeShader { \
64 }
65# define GPU_SHADER_CREATE_END() }
66
67# define SHADER_LIBRARY_CREATE_INFO(_info) using namespace _info;
68# define VERTEX_SHADER_CREATE_INFO(_info) \
69 using namespace ::gl_VertexShader; \
70 using namespace _info::gl_VertexShader; \
71 using namespace _info;
72# define FRAGMENT_SHADER_CREATE_INFO(_info) \
73 using namespace ::gl_FragmentShader; \
74 using namespace _info::gl_FragmentShader; \
75 using namespace _info;
76# define COMPUTE_SHADER_CREATE_INFO(_info) \
77 using namespace ::gl_ComputeShader; \
78 using namespace _info::gl_ComputeShader; \
79 using namespace _info;
80
81#elif !defined(GPU_SHADER_CREATE_INFO)
82/* Helps intellisense / auto-completion inside info files. */
83# define GPU_SHADER_NAMED_INTERFACE_INFO(_interface, _inst_name) \
84 static inline void autocomplete_helper_interface_##_interface() \
85 { \
86 StageInterfaceInfo _interface(#_interface, _inst_name); \
87 _interface
88# define GPU_SHADER_INTERFACE_INFO(_interface) \
89 static inline void autocomplete_helper_interface_##_interface() \
90 { \
91 StageInterfaceInfo _interface(#_interface); \
92 _interface
93# define GPU_SHADER_CREATE_INFO(_info) \
94 static inline void autocomplete_helper_info_##_info() \
95 { \
96 ShaderCreateInfo _info(#_info); \
97 _info
98
99# define GPU_SHADER_NAMED_INTERFACE_END(_inst_name) \
100 ; \
101 }
102# define GPU_SHADER_INTERFACE_END() \
103 ; \
104 }
105# define GPU_SHADER_CREATE_END() \
106 ; \
107 }
108
109#endif
110
111#ifndef GLSL_CPP_STUBS
112# define SMOOTH(type, name) .smooth(Type::type##_t, #name)
113# define FLAT(type, name) .flat(Type::type##_t, #name)
114# define NO_PERSPECTIVE(type, name) .no_perspective(Type::type##_t, #name)
115
116/* LOCAL_GROUP_SIZE(int size_x, int size_y = 1, int size_z = 1) */
117# define LOCAL_GROUP_SIZE(...) .local_group_size(__VA_ARGS__)
118
119# define VERTEX_IN(slot, type, name) .vertex_in(slot, Type::type##_t, #name)
120# define VERTEX_OUT(stage_interface) .vertex_out(stage_interface)
121/* TO REMOVE. */
122# define GEOMETRY_LAYOUT(...) .geometry_layout(__VA_ARGS__)
123# define GEOMETRY_OUT(stage_interface) .geometry_out(stage_interface)
124
125# define SUBPASS_IN(slot, type, img_type, name, rog) \
126 .subpass_in(slot, Type::type##_t, ImageType::img_type, #name, rog)
127
128# define FRAGMENT_OUT(slot, type, name) .fragment_out(slot, Type::type##_t, #name)
129# define FRAGMENT_OUT_DUAL(slot, type, name, blend) \
130 .fragment_out(slot, Type::type##_t, #name, DualBlend::blend)
131# define FRAGMENT_OUT_ROG(slot, type, name, rog) \
132 .fragment_out(slot, Type::type##_t, #name, DualBlend::NONE, rog)
133
134# define EARLY_FRAGMENT_TEST(enable) .early_fragment_test(enable)
135# define DEPTH_WRITE(value) .depth_write(value)
136
137# define SPECIALIZATION_CONSTANT(type, name, default_value) \
138 .specialization_constant(Type::type##_t, #name, default_value)
139
140# define COMPILATION_CONSTANT(type, name, value) \
141 .compilation_constant(Type::type##_t, #name, value)
142
143# define PUSH_CONSTANT(type, name) .push_constant(Type::type##_t, #name)
144# define PUSH_CONSTANT_ARRAY(type, name, array_size) \
145 .push_constant(Type::type##_t, #name, array_size)
146
147# define UNIFORM_BUF(slot, type_name, name) .uniform_buf(slot, #type_name, #name)
148# define UNIFORM_BUF_FREQ(slot, type_name, name, freq) \
149 .uniform_buf(slot, #type_name, #name, Frequency::freq)
150
151# define STORAGE_BUF(slot, qualifiers, type_name, name) \
152 .storage_buf(slot, Qualifier::qualifiers, STRINGIFY(type_name), #name)
153# define STORAGE_BUF_FREQ(slot, qualifiers, type_name, name, freq) \
154 .storage_buf(slot, Qualifier::qualifiers, STRINGIFY(type_name), #name, Frequency::freq)
155
156# define SAMPLER(slot, type, name) .sampler(slot, ImageType::type, #name)
157# define SAMPLER_FREQ(slot, type, name, freq) \
158 .sampler(slot, ImageType::type, #name, Frequency::freq)
159
160# define IMAGE(slot, format, qualifiers, type, name) \
161 .image(slot, format, Qualifier::qualifiers, ImageReadWriteType::type, #name)
162# define IMAGE_FREQ(slot, format, qualifiers, type, name, freq) \
163 .image(slot, format, Qualifier::qualifiers, ImageReadWriteType::type, #name, Frequency::freq)
164
165# define BUILTINS(builtin) .builtins(builtin)
166
167# define VERTEX_SOURCE(filename) .vertex_source(filename)
168# define FRAGMENT_SOURCE(filename) .fragment_source(filename)
169# define COMPUTE_SOURCE(filename) .compute_source(filename)
170
171# define DEFINE(name) .define(name)
172# define DEFINE_VALUE(name, value) .define(name, value)
173
174# define DO_STATIC_COMPILATION() .do_static_compilation(true)
175# define AUTO_RESOURCE_LOCATION() .auto_resource_location(true)
176
177/* TO REMOVE. */
178# define METAL_BACKEND_ONLY() .metal_backend_only(true)
179
180# define ADDITIONAL_INFO(info_name) .additional_info(#info_name)
181# define TYPEDEF_SOURCE(filename) .typedef_source(filename)
182
183# define MTL_MAX_TOTAL_THREADS_PER_THREADGROUP(value) \
184 .mtl_max_total_threads_per_threadgroup(value)
185
186#else
187
188# define _read const
189# define _write
190# define _read_write
191
192# define SMOOTH(type, name) type name = {};
193# define FLAT(type, name) type name = {};
194# define NO_PERSPECTIVE(type, name) type name = {};
195
196/* LOCAL_GROUP_SIZE(int size_x, int size_y = -1, int size_z = -1) */
197# define LOCAL_GROUP_SIZE(...)
198
199# define VERTEX_IN(slot, type, name) \
200 namespace gl_VertexShader { \
201 const type name = {}; \
202 }
203# define VERTEX_OUT(stage_interface) using namespace interface::stage_interface;
204/* TO REMOVE. */
205# define GEOMETRY_LAYOUT(...)
206# define GEOMETRY_OUT(stage_interface) using namespace interface::stage_interface;
207
208# define SUBPASS_IN(slot, type, img_type, name, rog) const type name = {};
209
210# define FRAGMENT_OUT(slot, type, name) \
211 namespace gl_FragmentShader { \
212 type name; \
213 }
214# define FRAGMENT_OUT_DUAL(slot, type, name, blend) \
215 namespace gl_FragmentShader { \
216 type name; \
217 }
218# define FRAGMENT_OUT_ROG(slot, type, name, rog) \
219 namespace gl_FragmentShader { \
220 type name; \
221 }
222
223# define EARLY_FRAGMENT_TEST(enable)
224# define DEPTH_WRITE(value)
225
226# define SPECIALIZATION_CONSTANT(type, name, default_value) \
227 constexpr type name = type(default_value);
228
229# define COMPILATION_CONSTANT(type, name, value) constexpr type name = type(value);
230
231# define PUSH_CONSTANT(type, name) extern const type name;
232# define PUSH_CONSTANT_ARRAY(type, name, array_size) extern const type name[array_size];
233
234# define UNIFORM_BUF(slot, type_name, name) extern const type_name name;
235# define UNIFORM_BUF_FREQ(slot, type_name, name, freq) extern const type_name name;
236
237# define STORAGE_BUF(slot, qualifiers, type_name, name) extern _##qualifiers type_name name;
238# define STORAGE_BUF_FREQ(slot, qualifiers, type_name, name, freq) \
239 extern _##qualifiers type_name name;
240
241# define SAMPLER(slot, type, name) type name;
242# define SAMPLER_FREQ(slot, type, name, freq) type name;
243
244# define IMAGE(slot, format, qualifiers, type, name) _##qualifiers type name;
245# define IMAGE_FREQ(slot, format, qualifiers, type, name, freq) _##qualifiers type name;
246
247# define BUILTINS(builtin)
248
249# define VERTEX_SOURCE(filename)
250# define GEOMETRY_SOURCE(filename)
251# define FRAGMENT_SOURCE(filename)
252# define COMPUTE_SOURCE(filename)
253
254# define DEFINE(name)
255# define DEFINE_VALUE(name, value)
256
257# define DO_STATIC_COMPILATION()
258# define AUTO_RESOURCE_LOCATION()
259
260/* TO REMOVE. */
261# define METAL_BACKEND_ONLY()
262
263# define ADDITIONAL_INFO(info_name) \
264 using namespace info_name; \
265 using namespace info_name::gl_FragmentShader; \
266 using namespace info_name::gl_VertexShader;
267
268# define TYPEDEF_SOURCE(filename)
269
270# define MTL_MAX_TOTAL_THREADS_PER_THREADGROUP(value)
271#endif
272
273#define _INFO_EXPAND2(a, b) ADDITIONAL_INFO(a) ADDITIONAL_INFO(b)
274#define _INFO_EXPAND3(a, b, c) _INFO_EXPAND2(a, b) ADDITIONAL_INFO(c)
275#define _INFO_EXPAND4(a, b, c, d) _INFO_EXPAND3(a, b, c) ADDITIONAL_INFO(d)
276#define _INFO_EXPAND5(a, b, c, d, e) _INFO_EXPAND4(a, b, c, d) ADDITIONAL_INFO(e)
277#define _INFO_EXPAND6(a, b, c, d, e, f) _INFO_EXPAND5(a, b, c, d, e) ADDITIONAL_INFO(f)
278
279#define ADDITIONAL_INFO_EXPAND(...) VA_NARGS_CALL_OVERLOAD(_INFO_EXPAND, __VA_ARGS__)
280
281#define CREATE_INFO_VARIANT(name, ...) \
282 GPU_SHADER_CREATE_INFO(name) \
283 DO_STATIC_COMPILATION() \
284 ADDITIONAL_INFO_EXPAND(__VA_ARGS__) \
285 GPU_SHADER_CREATE_END()
286
287#if !defined(GLSL_CPP_STUBS)
288
289namespace blender::gpu::shader {
290
291/* All of these functions is a bit out of place */
292static inline Type to_type(const eGPUType type)
293{
294 switch (type) {
295 case GPU_FLOAT:
296 return Type::float_t;
297 case GPU_VEC2:
298 return Type::float2_t;
299 case GPU_VEC3:
300 return Type::float3_t;
301 case GPU_VEC4:
302 return Type::float4_t;
303 case GPU_MAT3:
304 return Type::float3x3_t;
305 case GPU_MAT4:
306 return Type::float4x4_t;
307 default:
308 BLI_assert_msg(0, "Error: Cannot convert eGPUType to shader::Type.");
309 return Type::float_t;
310 }
311}
312
313static inline std::ostream &operator<<(std::ostream &stream, const Type type)
314{
315 switch (type) {
316 case Type::float_t:
317 return stream << "float";
318 case Type::float2_t:
319 return stream << "vec2";
320 case Type::float3_t:
321 return stream << "vec3";
322 case Type::float4_t:
323 return stream << "vec4";
324 case Type::float3x3_t:
325 return stream << "mat3";
326 case Type::float4x4_t:
327 return stream << "mat4";
329 return stream << "vec3_1010102_Inorm";
330 case Type::uchar_t:
331 return stream << "uchar";
332 case Type::uchar2_t:
333 return stream << "uchar2";
334 case Type::uchar3_t:
335 return stream << "uchar3";
336 case Type::uchar4_t:
337 return stream << "uchar4";
338 case Type::char_t:
339 return stream << "char";
340 case Type::char2_t:
341 return stream << "char2";
342 case Type::char3_t:
343 return stream << "char3";
344 case Type::char4_t:
345 return stream << "char4";
346 case Type::int_t:
347 return stream << "int";
348 case Type::int2_t:
349 return stream << "ivec2";
350 case Type::int3_t:
351 return stream << "ivec3";
352 case Type::int4_t:
353 return stream << "ivec4";
354 case Type::uint_t:
355 return stream << "uint";
356 case Type::uint2_t:
357 return stream << "uvec2";
358 case Type::uint3_t:
359 return stream << "uvec3";
360 case Type::uint4_t:
361 return stream << "uvec4";
362 case Type::ushort_t:
363 return stream << "ushort";
364 case Type::ushort2_t:
365 return stream << "ushort2";
366 case Type::ushort3_t:
367 return stream << "ushort3";
368 case Type::ushort4_t:
369 return stream << "ushort4";
370 case Type::short_t:
371 return stream << "short";
372 case Type::short2_t:
373 return stream << "short2";
374 case Type::short3_t:
375 return stream << "short3";
376 case Type::short4_t:
377 return stream << "short4";
378 case Type::bool_t:
379 return stream << "bool";
380 default:
381 BLI_assert(0);
382 return stream;
383 }
384}
385
386static inline std::ostream &operator<<(std::ostream &stream, const eGPUType type)
387{
388 switch (type) {
389 case GPU_CLOSURE:
390 return stream << "Closure";
391 default:
392 return stream << to_type(type);
393 }
394}
395
396enum class BuiltinBits {
397 NONE = 0,
403 FRAG_COORD = (1 << 2),
404 FRONT_FACING = (1 << 4),
406 INSTANCE_ID = (1 << 6),
411 LAYER = (1 << 7),
414 NUM_WORK_GROUP = (1 << 10),
415 POINT_COORD = (1 << 11),
416 POINT_SIZE = (1 << 12),
417 PRIMITIVE_ID = (1 << 13),
418 VERTEX_ID = (1 << 14),
419 WORK_GROUP_ID = (1 << 15),
420 WORK_GROUP_SIZE = (1 << 16),
425 VIEWPORT_INDEX = (1 << 17),
426
427 /* Texture atomics requires usage options to alter compilation flag. */
428 TEXTURE_ATOMIC = (1 << 18),
429
430 /* Enable shader patching on GL to remap clip range to 0..1.
431 * Will do nothing if ClipControl is unsupported. */
432 CLIP_CONTROL = (1 << 19),
433
434 /* Not a builtin but a flag we use to tag shaders that use the debug features. */
435 USE_PRINTF = (1 << 28),
436 USE_DEBUG_DRAW = (1 << 29),
437};
439
444enum class DepthWrite {
445 /* UNCHANGED specified as default to indicate gl_FragDepth is not used. */
450};
451
452/* Samplers & images. */
453enum class ImageType {
455# define TYPES_EXPAND(s) \
456 Float##s, Uint##s, Int##s, sampler##s = Float##s, usampler##s = Uint##s, isampler##s = Int##s
457
459 TYPES_EXPAND(1DArray),
461 TYPES_EXPAND(2DArray),
464 TYPES_EXPAND(CubeArray),
466# undef TYPES_EXPAND
467
468# define TYPES_EXPAND(s) \
469 Shadow##s, Depth##s, sampler##s##Shadow = Shadow##s, sampler##s##Depth = Depth##s
472 TYPES_EXPAND(2DArray),
474 TYPES_EXPAND(CubeArray),
475# undef TYPES_EXPAND
476
477# define TYPES_EXPAND(s) \
478 AtomicUint##s, AtomicInt##s, usampler##s##Atomic = AtomicUint##s, \
479 isampler##s##Atomic = AtomicInt##s
489 TYPES_EXPAND(2DArray),
491# undef TYPES_EXPAND
492};
493
494/* Samplers & images. */
497# define TYPES_EXPAND(s) \
498 Float##s = int(ImageType::Float##s), Uint##s = int(ImageType::Uint##s), \
499 Int##s = int(ImageType::Int##s), image##s = Float##s, uimage##s = Uint##s, iimage##s = Int##s
502 TYPES_EXPAND(1DArray),
504 TYPES_EXPAND(2DArray),
506# undef TYPES_EXPAND
507
508# define TYPES_EXPAND(s) \
509 AtomicUint##s = int(ImageType::AtomicUint##s), AtomicInt##s = int(ImageType::AtomicInt##s), \
510 uimage##s##Atomic = AtomicUint##s, iimage##s##Atomic = AtomicInt##s
520 TYPES_EXPAND(2DArray),
522# undef TYPES_EXPAND
523};
524
525/* Storage qualifiers. */
526enum class Qualifier {
528 no_restrict = (1 << 0),
529 read = (1 << 1),
530 write = (1 << 2),
533 QUALIFIER_MAX = (write << 1) - 1,
534};
536
538enum class Frequency {
539 BATCH = 0,
543};
544
546enum class DualBlend {
547 NONE = 0,
550};
551
558
567
576
583
592
593 StageInterfaceInfo(const char *name_, const char *instance_name_ = "")
594 : name(name_), instance_name(instance_name_){};
596
598
600 {
601 inouts.append({Interpolation::SMOOTH, type, _name});
602 return *(Self *)this;
603 }
604
606 {
607 inouts.append({Interpolation::FLAT, type, _name});
608 return *(Self *)this;
609 }
610
612 {
613 inouts.append({Interpolation::NO_PERSPECTIVE, type, _name});
614 return *(Self *)this;
615 }
616};
617
632 bool finalized_ = false;
657
658# define TEST_EQUAL(a, b, _member) \
659 if (!((a)._member == (b)._member)) { \
660 return false; \
661 }
662
663# define TEST_VECTOR_EQUAL(a, b, _vector) \
664 TEST_EQUAL(a, b, _vector.size()); \
665 for (auto i : _vector.index_range()) { \
666 TEST_EQUAL(a, b, _vector[i]); \
667 }
668
669 struct VertIn {
670 int index;
673
674 bool operator==(const VertIn &b) const
675 {
676 TEST_EQUAL(*this, b, index);
677 TEST_EQUAL(*this, b, type);
678 TEST_EQUAL(*this, b, name);
679 return true;
680 }
681 };
683
689 int max_vertices = -1;
690
692 {
693 TEST_EQUAL(*this, b, primitive_in);
694 TEST_EQUAL(*this, b, invocations);
695 TEST_EQUAL(*this, b, primitive_out);
696 TEST_EQUAL(*this, b, max_vertices);
697 return true;
698 }
699 };
701
703 int local_size_x = -1;
704 int local_size_y = -1;
705 int local_size_z = -1;
706
707 bool operator==(const ComputeStageLayout &b) const
708 {
709 TEST_EQUAL(*this, b, local_size_x);
710 TEST_EQUAL(*this, b, local_size_y);
711 TEST_EQUAL(*this, b, local_size_z);
712 return true;
713 }
714 };
716
717 struct FragOut {
718 int index;
722 /* NOTE: Currently only supported by Metal. */
724
725 bool operator==(const FragOut &b) const
726 {
727 TEST_EQUAL(*this, b, index);
728 TEST_EQUAL(*this, b, type);
729 TEST_EQUAL(*this, b, blend);
730 TEST_EQUAL(*this, b, name);
732 return true;
733 }
734 };
736
737 struct SubpassIn {
738 int index;
742 /* NOTE: Currently only supported by Metal. */
744
745 bool operator==(const SubpassIn &b) const
746 {
747 TEST_EQUAL(*this, b, index);
748 TEST_EQUAL(*this, b, type);
749 TEST_EQUAL(*this, b, img_type);
750 TEST_EQUAL(*this, b, name);
752 return true;
753 }
754 };
756
759
765
772
777
783
784 struct Resource {
791
793 int slot;
794 union {
799 };
800
801 Resource(BindType type, int _slot) : bind_type(type), slot(_slot){};
802
803 bool operator==(const Resource &b) const
804 {
805 TEST_EQUAL(*this, b, bind_type);
806 TEST_EQUAL(*this, b, slot);
807 switch (bind_type) {
808 case UNIFORM_BUFFER:
809 TEST_EQUAL(*this, b, uniformbuf.type_name);
810 TEST_EQUAL(*this, b, uniformbuf.name);
811 break;
812 case STORAGE_BUFFER:
813 TEST_EQUAL(*this, b, storagebuf.qualifiers);
814 TEST_EQUAL(*this, b, storagebuf.type_name);
815 TEST_EQUAL(*this, b, storagebuf.name);
816 break;
817 case SAMPLER:
818 TEST_EQUAL(*this, b, sampler.type);
819 TEST_EQUAL(*this, b, sampler.sampler);
820 TEST_EQUAL(*this, b, sampler.name);
821 break;
822 case IMAGE:
823 TEST_EQUAL(*this, b, image.format);
824 TEST_EQUAL(*this, b, image.type);
825 TEST_EQUAL(*this, b, image.qualifiers);
826 TEST_EQUAL(*this, b, image.name);
827 break;
828 }
829 return true;
830 }
831 };
832
840
842 {
843 switch (freq) {
844 case Frequency::PASS:
845 return pass_resources_;
846 case Frequency::BATCH:
847 return batch_resources_;
849 return geometry_resources_;
850 }
852 return pass_resources_;
853 }
854
855 /* Return all resources regardless of their frequency. */
857 {
858 Vector<Resource> all_resources;
859 all_resources.extend(pass_resources_);
860 all_resources.extend(batch_resources_);
861 all_resources.extend(geometry_resources_);
862 return all_resources;
863 }
864
867
868 struct PushConst {
872
873 bool operator==(const PushConst &b) const
874 {
875 TEST_EQUAL(*this, b, type);
876 TEST_EQUAL(*this, b, name);
877 TEST_EQUAL(*this, b, array_size);
878 return true;
879 }
880 };
881
883
884 /* Sources for resources type definitions. */
886
888
895
896 /* API-specific parameters. */
897# ifdef WITH_METAL_BACKEND
898 ushort mtl_max_threads_per_threadgroup_ = 0;
899# endif
900
901 public:
902 ShaderCreateInfo(const char *name) : name_(name){};
903 ~ShaderCreateInfo() = default;
904
906
907 /* -------------------------------------------------------------------- */
910
911 Self &vertex_in(int slot, Type type, StringRefNull name)
912 {
913 vertex_inputs_.append({slot, type, name});
914 interface_names_size_ += name.size() + 1;
915 return *(Self *)this;
916 }
917
919 {
921 return *(Self *)this;
922 }
923
925 PrimitiveOut prim_out,
926 int max_vertices,
927 int invocations = -1)
928 {
929 geometry_layout_.primitive_in = prim_in;
930 geometry_layout_.primitive_out = prim_out;
931 geometry_layout_.max_vertices = max_vertices;
932 geometry_layout_.invocations = invocations;
933 return *(Self *)this;
934 }
935
936 Self &local_group_size(int local_size_x, int local_size_y = 1, int local_size_z = 1)
937 {
938 compute_layout_.local_size_x = local_size_x;
939 compute_layout_.local_size_y = local_size_y;
940 compute_layout_.local_size_z = local_size_z;
941 return *(Self *)this;
942 }
943
949 {
950 early_fragment_test_ = enable;
951 return *(Self *)this;
952 }
953
961 {
963 return *(Self *)this;
964 }
965
967 Type type,
968 StringRefNull name,
970 int raster_order_group = -1)
971 {
972 fragment_outputs_.append({slot, type, blend, name, raster_order_group});
973 return *(Self *)this;
974 }
975
991 int slot, Type type, ImageType img_type, StringRefNull name, int raster_order_group = -1)
992 {
993 subpass_inputs_.append({slot, type, img_type, name, raster_order_group});
994 return *(Self *)this;
995 }
996
998
999 /* -------------------------------------------------------------------- */
1006
1007 Self &compilation_constant(Type type, StringRefNull name, double default_value)
1008 {
1009 CompilationConstant constant;
1010 constant.type = type;
1011 constant.name = name;
1012 switch (type) {
1013 case Type::int_t:
1014 constant.value.i = int(default_value);
1015 break;
1016 case Type::bool_t:
1017 case Type::uint_t:
1018 constant.value.u = uint(default_value);
1019 break;
1020 default:
1021 BLI_assert_msg(0, "Only scalar integer and bool types can be used as constants");
1022 break;
1023 }
1024 compilation_constants_.append(constant);
1025 interface_names_size_ += name.size() + 1;
1026 return *(Self *)this;
1027 }
1028
1030
1031 /* -------------------------------------------------------------------- */
1034
1035 /* Adds a specialization constant which is a dynamically modifiable value, which will be
1036 * statically compiled into a PSO configuration to provide optimal runtime performance,
1037 * with a reduced re-compilation cost vs Macro's with easier generation of unique permutations
1038 * based on run-time values.
1039 *
1040 * Tip: To evaluate use-cases of where specialization constants can provide a performance
1041 * gain, benchmark a given shader in its default case. Attempt to statically disable branches or
1042 * conditions which rely on uniform look-ups and measure if there is a marked improvement in
1043 * performance and/or reduction in memory bandwidth/register pressure.
1044 *
1045 * NOTE: Specialization constants will incur new compilation of PSOs and thus can incur an
1046 * unexpected cost. Specialization constants should be reserved for infrequently changing
1047 * parameters (e.g. user setting parameters such as toggling of features or quality level
1048 * presets), or those with a low set of possible runtime permutations.
1049 *
1050 * Specialization constants are assigned at runtime using:
1051 * - `GPU_shader_constant_*(shader, name, value)`
1052 * or
1053 * - `DrawPass::specialize_constant(shader, name, value)`
1054 *
1055 * All constants **MUST** be specified before binding a shader.
1056 */
1057 Self &specialization_constant(Type type, StringRefNull name, double default_value)
1058 {
1059 SpecializationConstant constant;
1060 constant.type = type;
1061 constant.name = name;
1062 switch (type) {
1063 case Type::int_t:
1064 constant.value.i = int(default_value);
1065 break;
1066 case Type::bool_t:
1067 case Type::uint_t:
1068 constant.value.u = uint(default_value);
1069 break;
1070 case Type::float_t:
1071 constant.value.f = float(default_value);
1072 break;
1073 default:
1074 BLI_assert_msg(0, "Only scalar types can be used as constants");
1075 break;
1076 }
1077 specialization_constants_.append(constant);
1078 interface_names_size_ += name.size() + 1;
1079 return *(Self *)this;
1080 }
1081
1082 /* TODO: Add API to specify unique specialization config permutations in CreateInfo, allowing
1083 * specialized compilation to be primed and handled in the background at start-up, rather than
1084 * waiting for a given permutation to occur dynamically. */
1085
1087
1088 /* -------------------------------------------------------------------- */
1091
1093 StringRefNull type_name,
1094 StringRefNull name,
1096 {
1098 res.uniformbuf.name = name;
1099 res.uniformbuf.type_name = type_name;
1100 resources_get_(freq).append(res);
1101 interface_names_size_ += name.size() + 1;
1102 return *(Self *)this;
1103 }
1104
1106 Qualifier qualifiers,
1107 StringRefNull type_name,
1108 StringRefNull name,
1110 {
1112 res.storagebuf.qualifiers = qualifiers;
1113 res.storagebuf.type_name = type_name;
1114 res.storagebuf.name = name;
1115 resources_get_(freq).append(res);
1116 interface_names_size_ += name.size() + 1;
1117 return *(Self *)this;
1118 }
1119
1120 Self &image(int slot,
1122 Qualifier qualifiers,
1123 ImageReadWriteType type,
1124 StringRefNull name,
1126 {
1128 res.image.format = format;
1129 res.image.qualifiers = qualifiers;
1130 res.image.type = ImageType(type);
1131 res.image.name = name;
1132 resources_get_(freq).append(res);
1133 interface_names_size_ += name.size() + 1;
1134 return *(Self *)this;
1135 }
1136
1137 Self &sampler(int slot,
1138 ImageType type,
1139 StringRefNull name,
1142 {
1144 res.sampler.type = type;
1145 res.sampler.name = name;
1146 /* Produces ASAN errors for the moment. */
1147 // res.sampler.sampler = sampler;
1149 resources_get_(freq).append(res);
1150 interface_names_size_ += name.size() + 1;
1151 return *(Self *)this;
1152 }
1153
1155
1156 /* -------------------------------------------------------------------- */
1159
1161 {
1162 vertex_source_ = filename;
1163 return *(Self *)this;
1164 }
1165
1167 {
1168 fragment_source_ = filename;
1169 return *(Self *)this;
1170 }
1171
1173 {
1174 compute_source_ = filename;
1175 return *(Self *)this;
1176 }
1177
1179
1180 /* -------------------------------------------------------------------- */
1185
1186 Self &push_constant(Type type, StringRefNull name, int array_size = 0)
1187 {
1188 /* We don't have support for UINT push constants yet, use INT instead. */
1189 BLI_assert(type != Type::uint_t);
1190 BLI_assert_msg(name.find("[") == -1,
1191 "Array syntax is forbidden for push constants."
1192 "Use the array_size parameter instead.");
1193 push_constants_.append({type, name, array_size});
1194 interface_names_size_ += name.size() + 1;
1195 return *(Self *)this;
1196 }
1197
1199
1200 /* -------------------------------------------------------------------- */
1203
1205 {
1206 defines_.append({name, value});
1207 return *(Self *)this;
1208 }
1209
1211
1212 /* -------------------------------------------------------------------- */
1215
1217 {
1218 do_static_compilation_ = value;
1219 return *(Self *)this;
1220 }
1221
1223 {
1224 builtins_ |= builtin;
1225 return *(Self *)this;
1226 }
1227
1228 /* Defines how the fragment shader will write to gl_FragDepth. */
1230 {
1231 depth_write_ = value;
1232 return *(Self *)this;
1233 }
1234
1236 {
1238 return *(Self *)this;
1239 }
1240
1242 {
1244 return *(Self *)this;
1245 }
1246
1248
1249 /* -------------------------------------------------------------------- */
1254
1256 {
1257 additional_infos_.append(info_name);
1258 return *(Self *)this;
1259 }
1260
1261 template<typename... Args> Self &additional_info(StringRefNull info_name, Args... args)
1262 {
1263 additional_info(info_name);
1264 additional_info(args...);
1265 return *(Self *)this;
1266 }
1267
1269
1270 /* -------------------------------------------------------------------- */
1277
1279 {
1280 typedef_sources_.append(filename);
1281 return *(Self *)this;
1282 }
1283
1285
1286 /* -------------------------------------------------------------------- */
1293
1294 /* \name mtl_max_total_threads_per_threadgroup
1295 * \a max_total_threads_per_threadgroup - Provides compiler hint for maximum threadgroup size up
1296 * front. Maximum value is 1024. */
1297 Self &mtl_max_total_threads_per_threadgroup(ushort max_total_threads_per_threadgroup)
1298 {
1299# ifdef WITH_METAL_BACKEND
1300 mtl_max_threads_per_threadgroup_ = max_total_threads_per_threadgroup;
1301# else
1302 UNUSED_VARS(max_total_threads_per_threadgroup);
1303# endif
1304 return *(Self *)this;
1305 }
1306
1308
1309 /* -------------------------------------------------------------------- */
1315
1316 /* WARNING: Recursive evaluation is not thread safe.
1317 * Non-recursive evaluation expects their dependencies to be already finalized.
1318 * (All statically declared CreateInfos are automatically finalized at startup) */
1319 void finalize(const bool recursive = false);
1320
1321 std::string check_error() const;
1322 bool is_vulkan_compatible() const;
1323
1325 void validate_merge(const ShaderCreateInfo &other_info);
1326 void validate_vertex_attributes(const ShaderCreateInfo *other_info = nullptr);
1327
1329
1330 /* -------------------------------------------------------------------- */
1334
1335 /* Comparison operator for GPUPass cache. We only compare if it will create the same shader
1336 * code. So we do not compare name and some other internal stuff. */
1364
1366 friend std::ostream &operator<<(std::ostream &stream, const ShaderCreateInfo &info)
1367 {
1368 /* TODO(@fclem): Complete print. */
1369
1370 auto print_resource = [&](const Resource &res) {
1371 switch (res.bind_type) {
1373 stream << "UNIFORM_BUFFER(" << res.slot << ", " << res.uniformbuf.name << ")"
1374 << std::endl;
1375 break;
1377 stream << "STORAGE_BUFFER(" << res.slot << ", " << res.storagebuf.name << ")"
1378 << std::endl;
1379 break;
1381 stream << "SAMPLER(" << res.slot << ", " << res.sampler.name << ")" << std::endl;
1382 break;
1384 stream << "IMAGE(" << res.slot << ", " << res.image.name << ")" << std::endl;
1385 break;
1386 }
1387 };
1388
1389 /* TODO(@fclem): Order the resources. */
1390 for (const auto &res : info.batch_resources_) {
1391 print_resource(res);
1392 }
1393 for (const auto &res : info.pass_resources_) {
1394 print_resource(res);
1395 }
1396 for (const auto &res : info.geometry_resources_) {
1397 print_resource(res);
1398 }
1399 return stream;
1400 }
1401
1403 {
1404 for (const auto &res : batch_resources_) {
1405 if (res.bind_type == bind_type) {
1406 return true;
1407 }
1408 }
1409 for (const auto &res : pass_resources_) {
1410 if (res.bind_type == bind_type) {
1411 return true;
1412 }
1413 }
1414 for (const auto &res : geometry_resources_) {
1415 if (res.bind_type == bind_type) {
1416 return true;
1417 }
1418 }
1419 return false;
1420 }
1421
1423 {
1425 }
1426
1428
1429# undef TEST_EQUAL
1430# undef TEST_VECTOR_EQUAL
1431};
1432
1433} // namespace blender::gpu::shader
1434
1435namespace blender {
1438 {
1439 uint64_t hash = 0;
1441 hash = hash * 33 ^ uint64_t(value.u);
1442 }
1443 return hash;
1444 }
1445};
1446} // namespace blender
1447
1448#endif
#define D
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
unsigned int uint
unsigned short ushort
#define UNUSED_VARS(...)
#define ENUM_OPERATORS(_type, _max)
eGPUType
@ GPU_VEC2
@ GPU_MAT4
@ GPU_VEC4
@ GPU_CLOSURE
@ GPU_VEC3
@ GPU_MAT3
@ GPU_FLOAT
eGPUTextureFormat
unsigned long long int uint64_t
static T Cube(const T &x)
constexpr int64_t find(char c, int64_t pos=0) const
constexpr int64_t size() const
void extend(Span< T > array)
#define SMOOTH(type, name)
#define TYPES_EXPAND(s)
#define TEST_VECTOR_EQUAL(a, b, _vector)
#define FLAT(type, name)
#define NO_PERSPECTIVE(type, name)
#define TEST_EQUAL(a, b, _member)
format
static std::ostream & operator<<(std::ostream &stream, const Type type)
static Type to_type(const eGPUType type)
static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &res)
#define hash
Definition noise_c.cc:154
static constexpr GPUSamplerState internal_sampler()
uint64_t operator()(const Vector< blender::gpu::shader::SpecializationConstant::Value > &key) const
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Vector< StageInterfaceInfo * > vertex_out_interfaces_
Self & mtl_max_total_threads_per_threadgroup(ushort max_total_threads_per_threadgroup)
Self & compute_source(StringRefNull filename)
Self & compilation_constant(Type type, StringRefNull name, double default_value)
Self & geometry_layout(PrimitiveIn prim_in, PrimitiveOut prim_out, int max_vertices, int invocations=-1)
Self & fragment_source(StringRefNull filename)
Vector< std::array< StringRefNull, 2 > > defines_
Vector< CompilationConstant, 0 > compilation_constants_
void validate_vertex_attributes(const ShaderCreateInfo *other_info=nullptr)
Vector< Resource > & resources_get_(Frequency freq)
Self & vertex_in(int slot, Type type, StringRefNull name)
void finalize(const bool recursive=false)
Self & image(int slot, eGPUTextureFormat format, Qualifier qualifiers, ImageReadWriteType type, StringRefNull name, Frequency freq=Frequency::PASS)
Self & push_constant(Type type, StringRefNull name, int array_size=0)
Self & geometry_out(StageInterfaceInfo &interface)
bool operator==(const ShaderCreateInfo &b) const
Self & additional_info(StringRefNull info_name)
Self & typedef_source(StringRefNull filename)
Self & fragment_out(int slot, Type type, StringRefNull name, DualBlend blend=DualBlend::NONE, int raster_order_group=-1)
Self & vertex_out(StageInterfaceInfo &interface)
Vector< StageInterfaceInfo * > geometry_out_interfaces_
Self & storage_buf(int slot, Qualifier qualifiers, StringRefNull type_name, StringRefNull name, Frequency freq=Frequency::PASS)
bool has_resource_type(Resource::BindType bind_type) const
Self & sampler(int slot, ImageType type, StringRefNull name, Frequency freq=Frequency::PASS, GPUSamplerState sampler=GPUSamplerState::internal_sampler())
Self & additional_info(StringRefNull info_name, Args... args)
Self & uniform_buf(int slot, StringRefNull type_name, StringRefNull name, Frequency freq=Frequency::PASS)
Self & subpass_in(int slot, Type type, ImageType img_type, StringRefNull name, int raster_order_group=-1)
Self & specialization_constant(Type type, StringRefNull name, double default_value)
friend std::ostream & operator<<(std::ostream &stream, const ShaderCreateInfo &info)
Self & local_group_size(int local_size_x, int local_size_y=1, int local_size_z=1)
void validate_merge(const ShaderCreateInfo &other_info)
Self & define(StringRefNull name, StringRefNull value="")
Vector< SpecializationConstant > specialization_constants_
Self & smooth(Type type, StringRefNull _name)
StageInterfaceInfo(const char *name_, const char *instance_name_="")
Self & no_perspective(Type type, StringRefNull _name)
Self & flat(Type type, StringRefNull _name)
read
static int blend(const Tex *tex, const float texvec[3], TexResult *texres)
uint8_t flag
Definition wm_window.cc:139