Blender V4.3
gpu_codegen.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2005 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
11#include "MEM_guardedalloc.h"
12
14#include "DNA_image_types.h"
15#include "DNA_material_types.h"
16
17#include "BLI_ghash.h"
18#include "BLI_hash_mm2a.hh"
19#include "BLI_link_utils.h"
20#include "BLI_listbase.h"
21#include "BLI_string.h"
22#include "BLI_threads.h"
23#include "BLI_time.h"
24#include "BLI_utildefines.h"
25
26#include "BKE_cryptomatte.hh"
27#include "BKE_material.h"
28
29#include "GPU_capabilities.hh"
30#include "GPU_context.hh"
31#include "GPU_material.hh"
32#include "GPU_shader.hh"
33#include "GPU_uniform_buffer.hh"
34#include "GPU_vertex_format.hh"
35
36#include "BLI_sys_types.h" /* for intptr_t support */
37#include "BLI_vector.hh"
38
39#include "gpu_codegen.hh"
40#include "gpu_node_graph.hh"
43
44#include <cstdarg>
45#include <cstring>
46
47#include <sstream>
48#include <string>
49
50using namespace blender::gpu::shader;
51
59 struct NameBuffer {
60 using NameEntry = std::array<char, 32>;
61
64 char var_names[16][8];
66
67 /* Returns the appended name memory location */
68 const char *append_sampler_name(const char name[32])
69 {
70 auto index = sampler_names.size();
71 sampler_names.append(std::make_unique<NameEntry>());
72 char *name_buffer = sampler_names[index]->data();
73 memcpy(name_buffer, name, 32);
74 return name_buffer;
75 }
76 };
77
82
83 GPUCodegenCreateInfo(const char *name) : ShaderCreateInfo(name){};
88};
89
117
118/* -------------------------------------------------------------------- */
126/* Only use one linklist that contains the GPUPasses grouped by hash. */
127static GPUPass *pass_cache = nullptr;
129
130/* Search by hash only. Return first pass with the same hash.
131 * There is hash collision if (pass->next && pass->next->hash == hash) */
133{
135 /* Could be optimized with a Lookup table. */
136 for (GPUPass *pass = pass_cache; pass; pass = pass->next) {
137 if (pass->hash == hash && pass->engine == engine) {
139 return pass;
140 }
141 }
143 return nullptr;
144}
145
147{
149 pass->cached = true;
150 if (node != nullptr) {
151 /* Add after the first pass having the same hash. */
152 pass->next = node->next;
153 node->next = pass;
154 }
155 else {
156 /* No other pass have same hash, just prepend to the list. */
158 }
160}
161
162/* Check all possible passes with the same hash. */
164 GPUShaderCreateInfo *info,
166{
167 eGPUMaterialEngine engine = pass->engine;
169 for (; pass && (pass->hash == hash); pass = pass->next) {
170 if (*reinterpret_cast<ShaderCreateInfo *>(info) ==
171 *reinterpret_cast<ShaderCreateInfo *>(pass->create_info) &&
172 pass->engine == engine)
173 {
175 return pass;
176 }
177 }
179 return nullptr;
180}
181
182static bool gpu_pass_is_valid(const GPUPass *pass)
183{
184 /* Shader is not null if compilation is successful. */
185 return (pass->compiled == false || pass->shader != nullptr);
186}
187
190/* -------------------------------------------------------------------- */
194#if 0
195# define SRC_NAME(io, link, list, type) \
196 link->node->name << "_" << io << BLI_findindex(&link->node->list, (const void *)link) << "_" \
197 << type
198#else
199# define SRC_NAME(io, list, link, type) type
200#endif
201
202static std::ostream &operator<<(std::ostream &stream, const GPUInput *input)
203{
204 switch (input->source) {
207 return stream << SRC_NAME("in", input, inputs, "tmp") << input->id;
209 return stream << SRC_NAME("in", input, inputs, "cons") << input->id;
211 return stream << "node_tree.u" << input->id;
212 case GPU_SOURCE_ATTR:
213 return stream << "var_attrs.v" << input->attr->id;
215 return stream << "UNI_ATTR(unf_attrs[resource_id].attr" << input->uniform_attr->id << ")";
217 return stream << "attr_load_layer(" << input->layer_attr->hash_code << ")";
219 return stream << "strct" << input->id;
220 case GPU_SOURCE_TEX:
221 return stream << input->texture->sampler_name;
223 return stream << input->texture->tiled_mapping_name;
224 default:
225 BLI_assert(0);
226 return stream;
227 }
228}
229
230static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output)
231{
232 return stream << SRC_NAME("out", output, outputs, "tmp") << output->id;
233}
234
235/* Trick type to change overload and keep a somewhat nice syntax. */
236struct GPUConstant : public GPUInput {};
237
238/* Print data constructor (i.e: vec2(1.0f, 1.0f)). */
239static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input)
240{
241 stream << input->type << "(";
242 for (int i = 0; i < input->type; i++) {
243 char formatted_float[32];
244 /* Use uint representation to allow exact same bit pattern even if NaN. This is because we can
245 * pass UINTs as floats for constants. */
246 const uint32_t *uint_vec = reinterpret_cast<const uint32_t *>(input->vec);
247 SNPRINTF(formatted_float, "uintBitsToFloat(%uu)", uint_vec[i]);
248 stream << formatted_float;
249 if (i < input->type - 1) {
250 stream << ", ";
251 }
252 }
253 stream << ")";
254 return stream;
255}
256
259/* -------------------------------------------------------------------- */
264 public:
267 GPUCodegenOutput output = {};
269
270 private:
271 uint32_t hash_ = 0;
272 BLI_HashMurmur2A hm2a_;
273 ListBase ubo_inputs_ = {nullptr, nullptr};
274 GPUInput *cryptomatte_input_ = nullptr;
275
277 uint nodes_total_ = 0;
278 uint textures_total_ = 0;
279 uint uniforms_total_ = 0;
280
281 public:
282 GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_)
283 {
286 create_info = new GPUCodegenCreateInfo("codegen");
287 output.create_info = reinterpret_cast<GPUShaderCreateInfo *>(
288 static_cast<ShaderCreateInfo *>(create_info));
289 }
290
292 {
293 MEM_SAFE_FREE(cryptomatte_input_);
294 delete create_info;
295 BLI_freelistN(&ubo_inputs_);
296 };
297
298 void generate_graphs();
301 void generate_attribs();
302 void generate_resources();
303 void generate_library();
304
306 {
307 return hash_;
308 }
309
310 /* Heuristic determined during pass codegen for whether a
311 * more optimal variant of this material should be compiled. */
313 {
314 /* If each of the maximal attributes are exceeded, we can optimize, but we should also ensure
315 * the baseline is met. */
316 bool do_optimize = (nodes_total_ >= 60 || textures_total_ >= 4 || uniforms_total_ >= 64) &&
317 (textures_total_ >= 1 && uniforms_total_ >= 8 && nodes_total_ >= 4);
318 return do_optimize;
319 }
320
321 private:
322 void set_unique_ids();
323
324 void node_serialize(std::stringstream &eval_ss, const GPUNode *node);
325 std::string graph_serialize(eGPUNodeTag tree_tag,
326 GPUNodeLink *output_link,
327 const char *output_default = nullptr);
328 std::string graph_serialize(eGPUNodeTag tree_tag);
329};
330
332{
333 if (BLI_listbase_is_empty(&graph.attributes)) {
334 output.attr_load.clear();
335 return;
336 }
337
339
340 info.interface_generated = new StageInterfaceInfo("codegen_iface", "var_attrs");
342 info.vertex_out(iface);
343
344 /* Input declaration, loading / assignment to interface and geometry shader passthrough. */
345 std::stringstream load_ss;
346
347 int slot = GPU_shader_draw_parameters_support() ? 15 : 14;
348 LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) {
349 if (slot == -1) {
350 BLI_assert_msg(0, "Too many attributes");
351 break;
352 }
353 STRNCPY(info.name_buffer.attr_names[slot], attr->input_name);
354 SNPRINTF(info.name_buffer.var_names[slot], "v%d", attr->id);
355
356 blender::StringRefNull attr_name = info.name_buffer.attr_names[slot];
357 blender::StringRefNull var_name = info.name_buffer.var_names[slot];
358
359 eGPUType input_type, iface_type;
360
361 load_ss << "var_attrs." << var_name;
362 if (attr->is_hair_length) {
363 iface_type = input_type = GPU_FLOAT;
364 load_ss << " = attr_load_" << input_type << "(" << attr_name << ");\n";
365 }
366 else {
367 switch (attr->type) {
368 case CD_ORCO:
369 /* Need vec4 to detect usage of default attribute. */
370 input_type = GPU_VEC4;
371 iface_type = GPU_VEC3;
372 load_ss << " = attr_load_orco(" << attr_name << ");\n";
373 break;
374 case CD_TANGENT:
375 iface_type = input_type = GPU_VEC4;
376 load_ss << " = attr_load_tangent(" << attr_name << ");\n";
377 break;
378 default:
379 iface_type = input_type = GPU_VEC4;
380 load_ss << " = attr_load_" << input_type << "(" << attr_name << ");\n";
381 break;
382 }
383 }
384
385 info.vertex_in(slot--, to_type(input_type), attr_name);
386 iface.smooth(to_type(iface_type), var_name);
387 }
388
389 output.attr_load = load_ss.str();
390}
391
393{
395
396 std::stringstream ss;
397
398 /* Textures. */
399 int slot = 0;
400 LISTBASE_FOREACH (GPUMaterialTexture *, tex, &graph.textures) {
401 if (tex->colorband) {
402 const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
403 info.sampler(slot++, ImageType::FLOAT_1D_ARRAY, name, Frequency::BATCH);
404 }
405 else if (tex->sky) {
406 const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
407 info.sampler(0, ImageType::FLOAT_2D_ARRAY, name, Frequency::BATCH);
408 }
409 else if (tex->tiled_mapping_name[0] != '\0') {
410 const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
411 info.sampler(slot++, ImageType::FLOAT_2D_ARRAY, name, Frequency::BATCH);
412
413 const char *name_mapping = info.name_buffer.append_sampler_name(tex->tiled_mapping_name);
414 info.sampler(slot++, ImageType::FLOAT_1D_ARRAY, name_mapping, Frequency::BATCH);
415 }
416 else {
417 const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
418 info.sampler(slot++, ImageType::FLOAT_2D, name, Frequency::BATCH);
419 }
420 }
421
422 /* Increment heuristic. */
423 textures_total_ = slot;
424
425 if (!BLI_listbase_is_empty(&ubo_inputs_)) {
426 /* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */
427 ss << "struct NodeTree {\n";
428 LISTBASE_FOREACH (LinkData *, link, &ubo_inputs_) {
429 GPUInput *input = (GPUInput *)(link->data);
430 if (input->source == GPU_SOURCE_CRYPTOMATTE) {
431 ss << input->type << " crypto_hash;\n";
432 }
433 else {
434 ss << input->type << " u" << input->id << ";\n";
435 }
436 }
437 ss << "};\n\n";
438
439 info.uniform_buf(GPU_NODE_TREE_UBO_SLOT, "NodeTree", GPU_UBO_BLOCK_NAME, Frequency::BATCH);
440 }
441
442 if (!BLI_listbase_is_empty(&graph.uniform_attrs.list)) {
443 ss << "struct UniformAttrs {\n";
444 LISTBASE_FOREACH (GPUUniformAttr *, attr, &graph.uniform_attrs.list) {
445 ss << "vec4 attr" << attr->id << ";\n";
446 }
447 ss << "};\n\n";
448
449 /* TODO(fclem): Use the macro for length. Currently not working for EEVEE. */
450 /* DRW_RESOURCE_CHUNK_LEN = 512 */
451 info.uniform_buf(2, "UniformAttrs", GPU_ATTRIBUTE_UBO_BLOCK_NAME "[512]", Frequency::BATCH);
452 }
453
454 if (!BLI_listbase_is_empty(&graph.layer_attrs)) {
455 info.additional_info("draw_layer_attributes");
456 }
457
458 info.typedef_source_generated = ss.str();
459}
460
462{
464
465 void *value;
466 blender::Vector<std::string> source_files;
467
468 /* Iterate over libraries. We need to keep this struct intact in case it is required for the
469 * optimization pass. The first pass just collects the keys from the GSET, given items in a GSET
470 * are unordered this can cause order differences between invocations, so we collect the keys
471 * first, and sort them before doing actual work, to guarantee stable behavior while still
472 * having cheap insertions into the GSET */
473 GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
474 while (!BLI_ghashIterator_done(ihash)) {
475 value = BLI_ghashIterator_getKey(ihash);
476 source_files.append((const char *)value);
478 }
480
481 std::sort(source_files.begin(), source_files.end());
482 for (auto &key : source_files) {
483 auto deps = gpu_shader_dependency_get_resolved_source(key.c_str());
485 }
486}
487
488void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
489{
490 /* Declare constants. */
491 LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
492 switch (input->source) {
494 eval_ss << input->type << " " << input << "; " << input->function_call << input << ");\n";
495 break;
497 eval_ss << input->type << " " << input << " = CLOSURE_DEFAULT;\n";
498 break;
500 eval_ss << input->type << " " << input << " = " << (GPUConstant *)input << ";\n";
501 break;
502 default:
503 break;
504 }
505 }
506 /* Declare temporary variables for node output storage. */
507 LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) {
508 eval_ss << output->type << " " << output << ";\n";
509 }
510
511 /* Function call. */
512 eval_ss << node->name << "(";
513 /* Input arguments. */
514 LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
515 switch (input->source) {
517 case GPU_SOURCE_ATTR: {
518 /* These inputs can have non matching types. Do conversion. */
519 eGPUType to = input->type;
520 eGPUType from = (input->source == GPU_SOURCE_ATTR) ? input->attr->gputype :
521 input->link->output->type;
522 if (from != to) {
523 /* Use defines declared inside codegen_lib (i.e: vec4_from_float). */
524 eval_ss << to << "_from_" << from << "(";
525 }
526
527 if (input->source == GPU_SOURCE_ATTR) {
528 eval_ss << input;
529 }
530 else {
531 eval_ss << input->link->output;
532 }
533
534 if (from != to) {
535 eval_ss << ")";
536 }
537 break;
538 }
539 default:
540 eval_ss << input;
541 break;
542 }
543 eval_ss << ", ";
544 }
545 /* Output arguments. */
546 LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) {
547 eval_ss << output;
548 if (output->next) {
549 eval_ss << ", ";
550 }
551 }
552 eval_ss << ");\n\n";
553
554 /* Increment heuristic. */
555 nodes_total_++;
556}
557
558std::string GPUCodegen::graph_serialize(eGPUNodeTag tree_tag,
559 GPUNodeLink *output_link,
560 const char *output_default)
561{
562 if (output_link == nullptr && output_default == nullptr) {
563 return "";
564 }
565
566 std::stringstream eval_ss;
567 bool has_nodes = false;
568 /* NOTE: The node order is already top to bottom (or left to right in node editor)
569 * because of the evaluation order inside ntreeExecGPUNodes(). */
570 LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
571 if ((node->tag & tree_tag) == 0) {
572 continue;
573 }
574 node_serialize(eval_ss, node);
575 has_nodes = true;
576 }
577
578 if (!has_nodes) {
579 return "";
580 }
581
582 if (output_link) {
583 eval_ss << "return " << output_link->output << ";\n";
584 }
585 else {
586 /* Default output in case there are only AOVs. */
587 eval_ss << "return " << output_default << ";\n";
588 }
589
590 std::string str = eval_ss.str();
591 BLI_hash_mm2a_add(&hm2a_, reinterpret_cast<const uchar *>(str.c_str()), str.size());
592 return str;
593}
594
595std::string GPUCodegen::graph_serialize(eGPUNodeTag tree_tag)
596{
597 std::stringstream eval_ss;
598 LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
599 if (node->tag & tree_tag) {
600 node_serialize(eval_ss, node);
601 }
602 }
603 std::string str = eval_ss.str();
604 BLI_hash_mm2a_add(&hm2a_, reinterpret_cast<const uchar *>(str.c_str()), str.size());
605 return str;
606}
607
609{
610 cryptomatte_input_ = static_cast<GPUInput *>(MEM_callocN(sizeof(GPUInput), __func__));
611 cryptomatte_input_->type = GPU_FLOAT;
612 cryptomatte_input_->source = GPU_SOURCE_CRYPTOMATTE;
613
614 float material_hash = 0.0f;
616 if (material) {
618 material->id.name + 2, BLI_strnlen(material->id.name + 2, MAX_NAME - 2));
619 material_hash = hash.float_encoded();
620 }
621 cryptomatte_input_->vec[0] = material_hash;
622
623 BLI_addtail(&ubo_inputs_, BLI_genericNodeN(cryptomatte_input_));
624}
625
627{
628 /* Extract uniform inputs. */
629 LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
630 LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
631 if (input->source == GPU_SOURCE_UNIFORM && !input->link) {
632 /* We handle the UBO uniforms separately. */
633 BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input));
634 uniforms_total_++;
635 }
636 }
637 }
638 if (!BLI_listbase_is_empty(&ubo_inputs_)) {
639 /* This sorts the inputs based on size. */
641 }
642}
643
644/* Sets id for unique names for all inputs, resources and temp variables. */
645void GPUCodegen::set_unique_ids()
646{
647 int id = 1;
648 LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
649 LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
650 input->id = id++;
651 }
652 LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) {
653 output->id = id++;
654 }
655 }
656}
657
659{
660 set_unique_ids();
661
662 output.surface = graph_serialize(
663 GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface, "CLOSURE_DEFAULT");
664 output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume, "CLOSURE_DEFAULT");
665 output.displacement = graph_serialize(
666 GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement, nullptr);
667 output.thickness = graph_serialize(GPU_NODE_TAG_THICKNESS, graph.outlink_thickness, nullptr);
668 if (!BLI_listbase_is_empty(&graph.outlink_compositor)) {
669 output.composite = graph_serialize(GPU_NODE_TAG_COMPOSITOR);
670 }
671
672 if (!BLI_listbase_is_empty(&graph.material_functions)) {
673 std::stringstream eval_ss;
674 eval_ss << "\n/* Generated Functions */\n\n";
675 LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, func_link, &graph.material_functions) {
676 /* Untag every node in the graph to avoid serializing nodes from other functions */
677 LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
678 node->tag &= ~GPU_NODE_TAG_FUNCTION;
679 }
680 /* Tag only the nodes needed for the current function */
681 gpu_nodes_tag(func_link->outlink, GPU_NODE_TAG_FUNCTION);
682 const std::string fn = graph_serialize(GPU_NODE_TAG_FUNCTION, func_link->outlink);
683 eval_ss << "float " << func_link->name << "() {\n" << fn << "}\n\n";
684 }
685 output.material_functions = eval_ss.str();
686 /* Leave the function tags as they were before serialization */
687 LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, funclink, &graph.material_functions) {
688 gpu_nodes_tag(funclink->outlink, GPU_NODE_TAG_FUNCTION);
689 }
690 }
691
692 LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) {
693 BLI_hash_mm2a_add(&hm2a_, (uchar *)attr->name, strlen(attr->name));
694 }
695
696 hash_ = BLI_hash_mm2a_end(&hm2a_);
697}
698
701/* -------------------------------------------------------------------- */
706 GPUNodeGraph *graph,
707 eGPUMaterialEngine engine,
708 GPUCodegenCallbackFn finalize_source_cb,
709 void *thunk,
710 bool optimize_graph)
711{
713
714 /* If Optimize flag is passed in, we are generating an optimized
715 * variant of the GPUMaterial's GPUPass. */
716 if (optimize_graph) {
718 }
719
720 /* Extract attributes before compiling so the generated VBOs are ready to accept the future
721 * shader. */
723
724 GPUCodegen codegen(material, graph);
725 codegen.generate_graphs();
726 codegen.generate_cryptomatte();
727
728 GPUPass *pass_hash = nullptr;
729
730 if (!optimize_graph) {
731 /* The optimized version of the shader should not re-generate a UBO.
732 * The UBO will not be used for this variant. */
733 codegen.generate_uniform_buffer();
734
739 pass_hash = gpu_pass_cache_lookup(engine, codegen.hash_get());
740
741 /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
742 * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
743 if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
744 if (!gpu_pass_is_valid(pass_hash)) {
745 /* Shader has already been created but failed to compile. */
746 return nullptr;
747 }
748 /* No collision, just return the pass. */
750 pass_hash->refcount += 1;
752 return pass_hash;
753 }
754 }
755
756 /* Either the shader is not compiled or there is a hash collision...
757 * continue generating the shader strings. */
758 codegen.generate_attribs();
759 codegen.generate_resources();
760 codegen.generate_library();
761
762 /* Make engine add its own code and implement the generated functions. */
763 finalize_source_cb(thunk, material, &codegen.output);
764
765 GPUPass *pass = nullptr;
766 if (pass_hash) {
767 /* Cache lookup: Reuse shaders already compiled. */
769 pass_hash, codegen.output.create_info, codegen.hash_get());
770 }
771
772 if (pass) {
773 /* Cache hit. Reuse the same GPUPass and GPUShader. */
774 if (!gpu_pass_is_valid(pass)) {
775 /* Shader has already been created but failed to compile. */
776 return nullptr;
777 }
779 pass->refcount += 1;
781 }
782 else {
783 /* We still create a pass even if shader compilation
784 * fails to avoid trying to compile again and again. */
785 pass = (GPUPass *)MEM_callocN(sizeof(GPUPass), "GPUPass");
786 pass->shader = nullptr;
787 pass->refcount = 1;
788 pass->create_info = codegen.create_info;
789 /* Finalize before adding the pass to the cache, to prevent race conditions. */
790 pass->create_info->finalize();
791 pass->engine = engine;
792 pass->hash = codegen.hash_get();
793 pass->compiled = false;
794 pass->compilation_requested = false;
795 pass->cached = false;
796 /* Only flag pass optimization hint if this is the first generated pass for a material.
797 * Optimized passes cannot be optimized further, even if the heuristic is still not
798 * favorable. */
799 pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic();
800 pass->async_compilation_handle = -1;
801 BLI_mutex_init(&pass->shader_creation_mutex);
802
803 codegen.create_info = nullptr;
804
805 /* Only insert non-optimized graphs into cache.
806 * Optimized graphs will continuously be recompiled with new unique source during material
807 * editing, and thus causing the cache to fill up quickly with materials offering minimal
808 * re-use. */
809 if (!optimize_graph) {
810 gpu_pass_cache_insert_after(pass_hash, pass);
811 }
812 }
813 return pass;
814}
815
817{
818 /* Returns optimization heuristic prepared during
819 * initial codegen.
820 * NOTE: Optimization currently limited to Metal backend as repeated compilations required for
821 * material specialization cause impactful CPU stalls on OpenGL platforms. */
822 return (GPU_backend_get_type() == GPU_BACKEND_METAL) && pass->should_optimize;
823}
824
827/* -------------------------------------------------------------------- */
832{
833 int num_samplers = 0;
834
835 for (const ShaderCreateInfo::Resource &res : pass->create_info->pass_resources_) {
836 if (res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
837 if (GPU_shader_get_uniform(shader, res.sampler.name.c_str()) != -1) {
838 num_samplers += 1;
839 }
840 }
841 }
842
843 return num_samplers;
844}
845
846static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
847{
848 if (shader == nullptr) {
849 return false;
850 }
851
852 /* NOTE: The only drawback of this method is that it will count a sampler
853 * used in the fragment shader and only declared (but not used) in the vertex
854 * shader as used by both. But this corner case is not happening for now. */
855 int active_samplers_len = count_active_texture_sampler(pass, shader);
856
857 /* Validate against opengl limit. */
858 if ((active_samplers_len > GPU_max_textures_frag()) ||
859 (active_samplers_len > GPU_max_textures_vert()))
860 {
861 return false;
862 }
863
864 if (pass->create_info->geometry_source_.is_empty() == false) {
865 if (active_samplers_len > GPU_max_textures_geom()) {
866 return false;
867 }
868 }
869
870 return (active_samplers_len * 3 <= GPU_max_textures());
871}
872
873GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname)
874{
875 if (!pass->compilation_requested) {
876 pass->compilation_requested = true;
877 pass->create_info->name_ = shname;
878 GPUShaderCreateInfo *info = reinterpret_cast<GPUShaderCreateInfo *>(
879 static_cast<ShaderCreateInfo *>(pass->create_info));
880 return info;
881 }
882 return nullptr;
883}
884
886{
887 bool success = true;
888 if (!pass->compiled) {
889 /* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
890 * We need to make sure to count active samplers to avoid undefined behavior. */
891 if (!gpu_pass_shader_validate(pass, shader)) {
892 success = false;
893 if (shader != nullptr) {
894 fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
895 GPU_shader_free(shader);
896 shader = nullptr;
897 }
898 }
899 pass->shader = shader;
900 pass->compiled = true;
901 }
902 return success;
903}
904
905void GPU_pass_begin_async_compilation(GPUPass *pass, const char *shname)
906{
907 BLI_mutex_lock(&pass->shader_creation_mutex);
908
909 if (pass->async_compilation_handle == -1) {
910 if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(pass, shname)) {
911 pass->async_compilation_handle = GPU_shader_batch_create_from_infos({info});
912 }
913 else {
914 /* The pass has been already compiled synchronously. */
915 BLI_assert(pass->compiled);
916 pass->async_compilation_handle = 0;
917 }
918 }
919
920 BLI_mutex_unlock(&pass->shader_creation_mutex);
921}
922
924{
925 BLI_mutex_lock(&pass->shader_creation_mutex);
926
927 BLI_assert(pass->async_compilation_handle != -1);
928 if (pass->async_compilation_handle) {
929 if (GPU_shader_batch_is_ready(pass->async_compilation_handle)) {
931 pass, GPU_shader_batch_finalize(pass->async_compilation_handle).first());
932 }
933 }
934
935 BLI_mutex_unlock(&pass->shader_creation_mutex);
936
937 return pass->async_compilation_handle == 0;
938}
939
940bool GPU_pass_compile(GPUPass *pass, const char *shname)
941{
942 BLI_mutex_lock(&pass->shader_creation_mutex);
943
944 bool success = true;
945 if (pass->async_compilation_handle > 0) {
946 /* We're trying to compile this pass synchronously, but there's a pending asynchronous
947 * compilation already started. */
949 pass, GPU_shader_batch_finalize(pass->async_compilation_handle).first());
950 }
951 else if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(pass, shname)) {
953 success = GPU_pass_finalize_compilation(pass, shader);
954 }
955
956 BLI_mutex_unlock(&pass->shader_creation_mutex);
957 return success;
958}
959
961{
962 return pass->shader;
963}
964
965static void gpu_pass_free(GPUPass *pass)
966{
967 BLI_assert(pass->refcount == 0);
968 BLI_mutex_end(&pass->shader_creation_mutex);
969 if (pass->shader) {
970 GPU_shader_free(pass->shader);
971 }
972 delete pass->create_info;
973 MEM_freeN(pass);
974}
975
977{
979 BLI_assert(pass->refcount > 0);
980 pass->refcount++;
982}
983
985{
987 BLI_assert(pass->refcount > 0);
988 pass->refcount--;
989 /* Un-cached passes will not be filtered by garbage collection, so release here. */
990 if (pass->refcount == 0 && !pass->cached) {
991 gpu_pass_free(pass);
992 }
994}
995
997{
998 const int shadercollectrate = 60; /* hardcoded for now. */
999 int ctime = int(BLI_time_now_seconds());
1000
1002 GPUPass *next, **prev_pass = &pass_cache;
1003 for (GPUPass *pass = pass_cache; pass; pass = next) {
1004 next = pass->next;
1005 if (pass->refcount > 0) {
1006 pass->gc_timestamp = ctime;
1007 }
1008 else if (pass->gc_timestamp + shadercollectrate < ctime) {
1009 /* Remove from list */
1010 *prev_pass = next;
1011 gpu_pass_free(pass);
1012 continue;
1013 }
1014 prev_pass = &pass->next;
1015 }
1017}
1018
1023
1036
1039/* -------------------------------------------------------------------- */
1044
1050
General operations, lookup, etc. for materials.
void BKE_material_defaults_free_gpu(void)
#define BLI_assert(a)
Definition BLI_assert.h:50
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:57
BLI_INLINE void * BLI_ghashIterator_getKey(GHashIterator *ghi) ATTR_WARN_UNUSED_RESULT
Definition BLI_ghash.h:299
void BLI_ghashIterator_step(GHashIterator *ghi)
Definition BLI_ghash.c:911
void BLI_ghashIterator_free(GHashIterator *ghi)
Definition BLI_ghash.c:925
GHashIterator * BLI_ghashIterator_new(GHash *gh) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT
Definition BLI_ghash.c:888
BLI_INLINE bool BLI_ghashIterator_done(const GHashIterator *ghi) ATTR_WARN_UNUSED_RESULT
Definition BLI_ghash.h:311
void BLI_hash_mm2a_init(BLI_HashMurmur2A *mm2, uint32_t seed)
Definition hash_mm2a.cc:62
void BLI_hash_mm2a_add(BLI_HashMurmur2A *mm2, const unsigned char *data, size_t len)
Definition hash_mm2a.cc:70
void BLI_hash_mm2a_add_int(BLI_HashMurmur2A *mm2, int data)
Definition hash_mm2a.cc:85
uint32_t BLI_hash_mm2a_end(BLI_HashMurmur2A *mm2)
Definition hash_mm2a.cc:90
BLI_INLINE bool BLI_listbase_is_empty(const struct ListBase *lb)
#define LISTBASE_FOREACH(type, var, list)
void void BLI_freelistN(struct ListBase *listbase) ATTR_NONNULL(1)
Definition listbase.cc:496
void BLI_addtail(struct ListBase *listbase, void *vlink) ATTR_NONNULL(1)
Definition listbase.cc:110
struct LinkData * BLI_genericNodeN(void *data)
Definition listbase.cc:909
#define STRNCPY(dst, src)
Definition BLI_string.h:593
#define SNPRINTF(dst, format,...)
Definition BLI_string.h:597
int char char int int int int size_t BLI_strnlen(const char *str, size_t maxlen) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
Definition string.c:909
unsigned char uchar
unsigned int uint
pthread_spinlock_t SpinLock
void BLI_mutex_end(ThreadMutex *mutex)
Definition threads.cc:360
void BLI_mutex_init(ThreadMutex *mutex)
Definition threads.cc:340
void BLI_mutex_lock(ThreadMutex *mutex)
Definition threads.cc:345
void BLI_mutex_unlock(ThreadMutex *mutex)
Definition threads.cc:350
void BLI_spin_init(SpinLock *spin)
Definition threads.cc:391
void BLI_spin_unlock(SpinLock *spin)
Definition threads.cc:430
void BLI_spin_lock(SpinLock *spin)
Definition threads.cc:405
pthread_mutex_t ThreadMutex
Definition BLI_threads.h:83
void BLI_spin_end(SpinLock *spin)
Definition threads.cc:445
Platform independent time functions.
double BLI_time_now_seconds(void)
Definition time.c:65
#define MAX_NAME
Definition DNA_defs.h:50
int GPU_max_textures()
int GPU_max_textures_geom()
int GPU_max_textures_frag()
bool GPU_shader_draw_parameters_support()
int GPU_max_textures_vert()
eGPUBackendType GPU_backend_get_type()
uint64_t GPU_material_uuid_get(GPUMaterial *mat)
eGPUMaterialEngine
void(*)(void *thunk, GPUMaterial *mat, GPUCodegenOutput *codegen) GPUCodegenCallbackFn
Material * GPU_material_get_material(GPUMaterial *material)
eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat)
eGPUType
@ GPU_VEC4
@ GPU_VEC3
@ GPU_FLOAT
void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs)
int GPU_shader_get_uniform(GPUShader *shader, const char *name)
blender::Vector< GPUShader * > GPU_shader_batch_finalize(BatchHandle &handle)
int64_t BatchHandle
Definition GPU_shader.hh:68
bool GPU_shader_batch_is_ready(BatchHandle handle)
GPUShader * GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
BatchHandle GPU_shader_batch_create_from_infos(blender::Span< const GPUShaderCreateInfo * > infos)
void GPU_shader_free(GPUShader *shader)
void GPU_shader_free_builtin_shaders()
#define GPU_UBO_BLOCK_NAME
#define GPU_NODE_TREE_UBO_SLOT
#define GPU_ATTRIBUTE_UBO_BLOCK_NAME
#define GPU_MAX_SAFE_ATTR_NAME
Read Guarded memory(de)allocation.
#define MEM_SAFE_FREE(v)
struct GPUShader GPUShader
GPUCodegenCreateInfo * create_info
bool should_optimize_heuristic() const
void generate_graphs()
uint32_t hash_get() const
void generate_cryptomatte()
GPUMaterial & mat
GPUNodeGraph & graph
void generate_library()
GPUCodegenOutput output
void generate_resources()
GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_)
void generate_uniform_buffer()
void generate_attribs()
int64_t size() const
void append(const T &value)
void extend_non_duplicates(Span< T > array)
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
#define str(s)
static GPUPass * gpu_pass_cache_resolve_collision(GPUPass *pass, GPUShaderCreateInfo *info, uint32_t hash)
static SpinLock pass_cache_spin
void GPU_pass_cache_garbage_collect()
static bool gpu_pass_is_valid(const GPUPass *pass)
static int count_active_texture_sampler(GPUPass *pass, GPUShader *shader)
static std::ostream & operator<<(std::ostream &stream, const GPUInput *input)
GPUPass * GPU_generate_pass(GPUMaterial *material, GPUNodeGraph *graph, eGPUMaterialEngine engine, GPUCodegenCallbackFn finalize_source_cb, void *thunk, bool optimize_graph)
static void gpu_pass_free(GPUPass *pass)
static GPUPass * pass_cache
GPUShaderCreateInfo * GPU_pass_begin_compilation(GPUPass *pass, const char *shname)
void GPU_pass_cache_init()
static GPUPass * gpu_pass_cache_lookup(eGPUMaterialEngine engine, uint32_t hash)
void GPU_pass_release(GPUPass *pass)
void GPU_pass_acquire(GPUPass *pass)
void GPU_pass_begin_async_compilation(GPUPass *pass, const char *shname)
void gpu_codegen_init()
void gpu_codegen_exit()
bool GPU_pass_compile(GPUPass *pass, const char *shname)
static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass)
bool GPU_pass_async_compilation_try_finalize(GPUPass *pass)
static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
#define SRC_NAME(io, list, link, type)
bool GPU_pass_should_optimize(GPUPass *pass)
bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader)
void GPU_pass_cache_free()
GPUShader * GPU_pass_shader_get(GPUPass *pass)
void gpu_nodes_tag(GPUNodeLink *link, eGPUNodeTag tag)
void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph)
void gpu_node_graph_optimize(GPUNodeGraph *graph)
void gpu_node_graph_prune_unused(GPUNodeGraph *graph)
eGPUNodeTag
@ GPU_NODE_TAG_SURFACE
@ GPU_NODE_TAG_DISPLACEMENT
@ GPU_NODE_TAG_VOLUME
@ GPU_NODE_TAG_FUNCTION
@ GPU_NODE_TAG_COMPOSITOR
@ GPU_NODE_TAG_THICKNESS
@ GPU_NODE_TAG_AOV
@ GPU_SOURCE_CONSTANT
@ GPU_SOURCE_FUNCTION_CALL
@ GPU_SOURCE_ATTR
@ GPU_SOURCE_CRYPTOMATTE
@ GPU_SOURCE_UNIFORM
@ GPU_SOURCE_OUTPUT
@ GPU_SOURCE_TEX_TILED_MAPPING
@ GPU_SOURCE_UNIFORM_ATTR
@ GPU_SOURCE_LAYER_ATTR
@ GPU_SOURCE_STRUCT
@ GPU_SOURCE_TEX
void MEM_freeN(void *vmemh)
Definition mallocn.cc:105
void *(* MEM_callocN)(size_t len, const char *str)
Definition mallocn.cc:42
static ulong * next
static Type to_type(const eGPUType type)
Vector< const char * > gpu_shader_dependency_get_resolved_source(const StringRefNull source_name)
#define hash
Definition noise.c:154
unsigned int uint32_t
Definition stdint.h:80
char attr_names[16][GPU_MAX_SAFE_ATTR_NAME+1]
const char * append_sampler_name(const char name[32])
std::array< char, 32 > NameEntry
blender::Vector< std::unique_ptr< NameEntry >, 16 > sampler_names
StageInterfaceInfo * interface_generated
GPUCodegenCreateInfo(const char *name)
GPUShaderCreateInfo * create_info
eGPUDataSource source
float vec[16]
eGPUType type
GPUShader * shader
eGPUMaterialEngine engine
BatchHandle async_compilation_handle
bool cached
bool compiled
uint32_t hash
bool should_optimize
ThreadMutex shader_creation_mutex
GPUPass * next
GPUCodegenCreateInfo * create_info
uint refcount
int gc_timestamp
bool compilation_requested
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Self & vertex_in(int slot, Type type, StringRefNull name)
void finalize(const bool recursive=false)
Self & additional_info(StringRefNull info_name)
Self & vertex_out(StageInterfaceInfo &interface)
Self & sampler(int slot, ImageType type, StringRefNull name, Frequency freq=Frequency::PASS, GPUSamplerState sampler=GPUSamplerState::internal_sampler())
Self & uniform_buf(int slot, StringRefNull type_name, StringRefNull name, Frequency freq=Frequency::PASS)
Self & smooth(Type type, StringRefNull _name)