Blender V5.0
gpu_pass.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2025 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
10
11#include "MEM_guardedalloc.h"
12
13#include "BLI_map.hh"
14#include "BLI_span.hh"
15#include "BLI_time.h"
16#include "BLI_vector.hh"
17
18#include "GPU_capabilities.hh"
19#include "GPU_context.hh"
20#include "GPU_pass.hh"
21#include "GPU_vertex_format.hh"
22#include "gpu_codegen.hh"
23
24#include <mutex>
25#include <string>
26
27using namespace blender;
28using namespace blender::gpu::shader;
29
30static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info);
31
32/* -------------------------------------------------------------------- */
35
36struct GPUPass {
37 static inline std::atomic<uint64_t> compilation_counts = 0;
38
41 std::atomic<blender::gpu::Shader *> shader = nullptr;
42 std::atomic<GPUPassStatus> status = GPU_PASS_QUEUED;
43 /* Orphaned GPUPasses gets freed by the garbage collector. */
44 std::atomic<int> refcount = 1;
45 double creation_timestamp = 0.0f;
46 /* The last time the refcount was greater than 0. */
47 double gc_timestamp = 0.0f;
48
50
53 bool should_optimize = false;
55
56 /* Number of seconds after creation required before compiling an optimization pass. */
57 static constexpr float optimization_delay = 10.0f;
58
60 bool deferred_compilation,
62 bool should_optimize)
63 : create_info(info),
67 {
69 if (is_optimization_pass && deferred_compilation) {
70 // Defer until all non optimization passes are compiled.
71 return;
72 }
73
74 GPUShaderCreateInfo *base_info = reinterpret_cast<GPUShaderCreateInfo *>(create_info);
75
76 if (deferred_compilation) {
79 }
80 else {
83 }
84 }
85
97
102
104 {
105 BLI_assert_msg(create_info, "GPUPass::finalize_compilation() called more than once.");
106
107 if (compilation_handle) {
109 }
110
112
114 fprintf(stderr, "blender::gpu::Shader: error: too many samplers in shader.\n");
115 }
116
118
119 MEM_delete(create_info);
120 create_info = nullptr;
121 }
122
123 void update(double timestamp)
124 {
125 update_compilation(timestamp);
126 update_gc_timestamp(timestamp);
127 }
128
129 void update_compilation(double timestamp)
130 {
131 if (compilation_handle) {
134 }
135 }
136 else if (status == GPU_PASS_QUEUED && refcount > 0 &&
137 ((creation_timestamp + optimization_delay) <= timestamp))
138 {
140 GPUShaderCreateInfo *base_info = reinterpret_cast<GPUShaderCreateInfo *>(create_info);
143 }
144 }
145
146 void update_gc_timestamp(double timestamp)
147 {
148 if (refcount != 0 || gc_timestamp == 0.0f) {
149 gc_timestamp = timestamp;
150 }
151 }
152
153 bool should_gc(int gc_collect_rate, double timestamp)
154 {
155 BLI_assert(gc_timestamp != 0.0f);
157 (timestamp - gc_timestamp) >= gc_collect_rate;
158 }
159};
160
162{
163 return pass->status;
164}
165
167{
168 /* Returns optimization heuristic prepared during
169 * initial codegen.
170 * NOTE: Only enabled on Metal, since it doesn't seem to yield any performance improvements for
171 * other backends. */
173}
174
176{
177 return pass->shader;
178}
179
181{
182 int previous_refcount = pass->refcount++;
183 UNUSED_VARS_NDEBUG(previous_refcount);
184 BLI_assert(previous_refcount > 0);
185}
186
188{
189 int previous_refcount = pass->refcount--;
190 UNUSED_VARS_NDEBUG(previous_refcount);
191 BLI_assert(previous_refcount > 0);
192}
193
198
203
205
206/* -------------------------------------------------------------------- */
213
215
217 static constexpr float gc_collect_rate_ = 60.0f;
218 static constexpr float optimization_gc_collect_rate_ = 1.0f;
219
220 Map<uint32_t, std::unique_ptr<GPUPass>> passes_[GPU_MAT_ENGINE_MAX][2 /*is_optimization_pass*/];
221 std::mutex mutex_;
222
223 public:
225 GPUCodegen &codegen,
226 bool deferred_compilation,
227 bool is_optimization_pass)
228 {
229 std::lock_guard lock(mutex_);
230
231 passes_[engine][is_optimization_pass].add(
232 codegen.hash_get(),
233 std::make_unique<GPUPass>(codegen.create_info,
234 deferred_compilation,
235 is_optimization_pass,
236 codegen.should_optimize_heuristic()));
237 };
238
240 size_t hash,
241 bool allow_deferred,
242 bool is_optimization_pass)
243 {
244 std::lock_guard lock(mutex_);
245 std::unique_ptr<GPUPass> *pass = passes_[engine][is_optimization_pass].lookup_ptr(hash);
246 if (!allow_deferred && pass && pass->get()->status == GPU_PASS_QUEUED) {
247 pass->get()->finalize_compilation();
248 }
249 return pass ? pass->get() : nullptr;
250 }
251
252 void update()
253 {
254 std::lock_guard lock(mutex_);
255
256 double timestamp = BLI_time_now_seconds();
257
258 /* Base Passes. */
259 for (auto &engine_passes : passes_) {
260 for (std::unique_ptr<GPUPass> &pass : engine_passes[false].values()) {
261 pass->update(timestamp);
262 }
263
264 engine_passes[false].remove_if(
265 [&](auto item) { return item.value->should_gc(gc_collect_rate_, timestamp); });
266 }
267
268 /* Optimization Passes */
269 for (auto &engine_passes : passes_) {
270 for (std::unique_ptr<GPUPass> &pass : engine_passes[true].values()) {
271 pass->update(timestamp);
272 }
273
274 engine_passes[true].remove_if([&](auto item) {
275 return item.value->should_gc(optimization_gc_collect_rate_, timestamp);
276 });
277 }
278 }
279
280 std::mutex &get_mutex()
281 {
282 return mutex_;
283 }
284};
285
286static GPUPassCache *g_cache = nullptr;
287
289{
290 if (pass->status == GPU_PASS_QUEUED) {
291 std::lock_guard lock(g_cache->get_mutex());
292 if (pass->status == GPU_PASS_QUEUED) {
293 pass->finalize_compilation();
294 }
295 }
296}
297
299{
300 g_cache = MEM_new<GPUPassCache>(__func__);
301}
302
304{
305 g_cache->update();
306}
307
313
315{
316 MEM_SAFE_DELETE(g_cache);
317}
318
320
321/* -------------------------------------------------------------------- */
324
326{
327 int samplers_len = 0;
328 for (const ShaderCreateInfo::Resource &res : create_info->resources_get_all_()) {
330 samplers_len++;
331 }
332 }
333
334 /* Validate against GPU limit. */
335 if ((samplers_len > GPU_max_textures_frag()) || (samplers_len > GPU_max_textures_vert())) {
336 return false;
337 }
338
339 return (samplers_len * 2 <= GPU_max_textures());
340}
341
343 GPUNodeGraph *graph,
344 const char *debug_name,
345 eGPUMaterialEngine engine,
346 bool deferred_compilation,
347 GPUCodegenCallbackFn finalize_source_cb,
348 void *thunk,
349 bool optimize_graph)
350{
352
353 /* If Optimize flag is passed in, we are generating an optimized
354 * variant of the GPUMaterial's GPUPass. */
355 if (optimize_graph) {
357 }
358
359 /* Extract attributes before compiling so the generated VBOs are ready to accept the future
360 * shader. */
362
363 GPUCodegen codegen(material, graph, debug_name);
364 codegen.generate_graphs();
365 codegen.generate_cryptomatte();
366
367 GPUPass *pass = nullptr;
368
369 if (!optimize_graph) {
370 /* The optimized version of the shader should not re-generate a UBO.
371 * The UBO will not be used for this variant. */
372 codegen.generate_uniform_buffer();
373 }
374
375 /* Cache lookup: Reuse shaders already compiled. */
376 pass = g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph);
377
378 if (pass) {
379 pass->refcount++;
380 return pass;
381 }
382
383 /* The shader is not compiled, continue generating the shader strings. */
384 codegen.generate_attribs();
385 codegen.generate_resources();
386
387 /* Make engine add its own code and implement the generated functions. */
388 finalize_source_cb(thunk, material, &codegen.output);
389
390 codegen.create_info->finalize();
391 g_cache->add(engine, codegen, deferred_compilation, optimize_graph);
392 codegen.create_info = nullptr;
393
394 return g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph);
395}
396
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
Platform independent time functions.
double BLI_time_now_seconds(void)
Definition time.cc:113
#define UNUSED_VARS_NDEBUG(...)
int GPU_max_textures()
int GPU_max_textures_frag()
int GPU_max_textures_vert()
GPUBackendType GPU_backend_get_type()
eGPUMaterialEngine
@ GPU_MAT_ENGINE_MAX
void(*)(void *thunk, GPUMaterial *mat, struct GPUCodegenOutput *codegen) GPUCodegenCallbackFn
GPUPassStatus
Definition GPU_pass.hh:20
@ GPU_PASS_FAILED
Definition GPU_pass.hh:21
@ GPU_PASS_QUEUED
Definition GPU_pass.hh:22
@ GPU_PASS_SUCCESS
Definition GPU_pass.hh:23
#define GPU_SHADER_FREE_SAFE(shader)
blender::gpu::Shader * GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
void GPU_shader_batch_wait_for_all()
int64_t BatchHandle
Definition GPU_shader.hh:82
blender::Vector< blender::gpu::Shader * > GPU_shader_batch_finalize(BatchHandle &handle)
bool GPU_shader_batch_is_ready(BatchHandle handle)
CompilationPriority
Definition GPU_shader.hh:80
void GPU_shader_batch_cancel(BatchHandle &handle)
BatchHandle GPU_shader_batch_create_from_infos(blender::Span< const GPUShaderCreateInfo * > infos, CompilationPriority priority=CompilationPriority::High)
Read Guarded memory(de)allocation.
volatile int lock
unsigned long long int uint64_t
void update()
Definition gpu_pass.cc:252
std::mutex & get_mutex()
Definition gpu_pass.cc:280
GPUPass * get(eGPUMaterialEngine engine, size_t hash, bool allow_deferred, bool is_optimization_pass)
Definition gpu_pass.cc:239
void add(eGPUMaterialEngine engine, GPUCodegen &codegen, bool deferred_compilation, bool is_optimization_pass)
Definition gpu_pass.cc:224
const T & first() const
GPUCodegenCreateInfo * create_info
void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph)
void gpu_node_graph_optimize(GPUNodeGraph *graph)
void gpu_node_graph_prune_unused(GPUNodeGraph *graph)
blender::gpu::Shader * GPU_pass_shader_get(GPUPass *pass)
Definition gpu_pass.cc:175
uint64_t GPU_pass_global_compilation_count()
Definition gpu_pass.cc:194
static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info)
Definition gpu_pass.cc:325
void GPU_pass_ensure_its_ready(GPUPass *pass)
Definition gpu_pass.cc:288
static GPUPassCache * g_cache
Definition gpu_pass.cc:286
void GPU_pass_cache_wait_for_all()
Definition gpu_pass.cc:308
GPUPassStatus GPU_pass_status(GPUPass *pass)
Definition gpu_pass.cc:161
void GPU_pass_cache_init()
Definition gpu_pass.cc:298
void GPU_pass_release(GPUPass *pass)
Definition gpu_pass.cc:187
void GPU_pass_acquire(GPUPass *pass)
Definition gpu_pass.cc:180
void GPU_pass_cache_update()
Definition gpu_pass.cc:303
GPUPass * GPU_generate_pass(GPUMaterial *material, GPUNodeGraph *graph, const char *debug_name, eGPUMaterialEngine engine, bool deferred_compilation, GPUCodegenCallbackFn finalize_source_cb, void *thunk, bool optimize_graph)
Definition gpu_pass.cc:342
uint64_t GPU_pass_compilation_timestamp(GPUPass *pass)
Definition gpu_pass.cc:199
bool GPU_pass_should_optimize(GPUPass *pass)
Definition gpu_pass.cc:166
void GPU_pass_cache_free()
Definition gpu_pass.cc:314
#define hash
Definition noise_c.cc:154
GPUPass(GPUCodegenCreateInfo *info, bool deferred_compilation, bool is_optimization_pass, bool should_optimize)
Definition gpu_pass.cc:59
void finalize_compilation()
Definition gpu_pass.cc:103
bool is_optimization_pass
Definition gpu_pass.cc:54
std::atomic< GPUPassStatus > status
Definition gpu_pass.cc:42
static constexpr float optimization_delay
Definition gpu_pass.cc:57
void update_gc_timestamp(double timestamp)
Definition gpu_pass.cc:146
~GPUPass()
Definition gpu_pass.cc:86
static std::atomic< uint64_t > compilation_counts
Definition gpu_pass.cc:37
std::atomic< blender::gpu::Shader * > shader
Definition gpu_pass.cc:41
BatchHandle compilation_handle
Definition gpu_pass.cc:40
double creation_timestamp
Definition gpu_pass.cc:45
void update(double timestamp)
Definition gpu_pass.cc:123
double gc_timestamp
Definition gpu_pass.cc:47
bool should_optimize
Definition gpu_pass.cc:53
bool should_gc(int gc_collect_rate, double timestamp)
Definition gpu_pass.cc:153
std::atomic< int > refcount
Definition gpu_pass.cc:44
void update_compilation(double timestamp)
Definition gpu_pass.cc:129
GPUCodegenCreateInfo * create_info
Definition gpu_pass.cc:39
uint64_t compilation_timestamp
Definition gpu_pass.cc:49
CompilationPriority compilation_priority()
Definition gpu_pass.cc:98
void finalize(const bool recursive=false)