Blender V5.0
device_impl.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_METAL
6
7# include <map>
8# include <mutex>
9
10# include "device/metal/device.h"
12
13# include "scene/scene.h"
14
16
17# include "util/debug.h"
18# include "util/md5.h"
19# include "util/path.h"
20# include "util/time.h"
21
22# include <TargetConditionals.h>
23# include <crt_externs.h>
24
26
27class MetalDevice;
28
29thread_mutex MetalDevice::existing_devices_mutex;
30std::map<int, MetalDevice *> MetalDevice::active_device_ids;
31
32/* Thread-safe device access for async work. Calling code must pass an appropriately scoped lock
33 * to existing_devices_mutex to safeguard against destruction of the returned instance. */
34MetalDevice *MetalDevice::get_device_by_ID(const int ID,
35 thread_scoped_lock & /*existing_devices_mutex_lock*/)
36{
37 auto it = active_device_ids.find(ID);
38 if (it != active_device_ids.end()) {
39 return it->second;
40 }
41 return nullptr;
42}
43
44bool MetalDevice::is_device_cancelled(const int ID)
45{
46 thread_scoped_lock lock(existing_devices_mutex);
47 return get_device_by_ID(ID, lock) == nullptr;
48}
49
50BVHLayoutMask MetalDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
51{
52 return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
53}
54
55void MetalDevice::set_error(const string &error)
56{
57 static std::mutex s_error_mutex;
58 std::lock_guard<std::mutex> lock(s_error_mutex);
59
61
62 if (!has_error) {
63 LOG_ERROR << "Refer to the Cycles GPU rendering documentation for possible solutions:\n"
64 "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n";
65 has_error = true;
66 }
67}
68
69MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
70 : Device(info, stats, profiler, headless), texture_info(this, "texture_info", MEM_GLOBAL)
71{
72 @autoreleasepool {
73 {
74 /* Assign an ID for this device which we can use to query whether async shader compilation
75 * requests are still relevant. */
76 thread_scoped_lock lock(existing_devices_mutex);
77 static int existing_devices_counter = 1;
78 device_id = existing_devices_counter++;
79 active_device_ids[device_id] = this;
80 }
81
82 mtlDevId = info.num;
83
84 /* select chosen device */
85 auto usable_devices = MetalInfo::get_usable_devices();
86 assert(mtlDevId < usable_devices.size());
87 mtlDevice = usable_devices[mtlDevId];
88 metal_printf("Creating new Cycles Metal device: %s", info.description.c_str());
89
90 /* Ensure that back-compatibility helpers for getting gpuAddress & gpuResourceID are set up. */
91 metal_gpu_address_helper_init(mtlDevice);
92
93 /* Enable increased concurrent shader compiler limit.
94 * This is also done by MTLContext::MTLContext, but only in GUI mode. */
95 if (@available(macOS 13.3, *)) {
96 [mtlDevice setShouldMaximizeConcurrentCompilation:YES];
97 }
98
99 max_threads_per_threadgroup = 512;
100
101 use_metalrt = info.use_hardware_raytracing;
102 if (const char *metalrt = getenv("CYCLES_METALRT")) {
103 use_metalrt = (atoi(metalrt) != 0);
104 }
105
106 if (const char *str = getenv("CYCLES_METALRT_EXTENDED_LIMITS")) {
107 use_metalrt_extended_limits = (atoi(str) != 0);
108 }
109
110# if defined(MAC_OS_VERSION_15_0)
111 /* Use "Ray tracing with per component motion interpolation" if available.
112 * Requires Apple9 support (https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf). */
113 if (use_metalrt && [mtlDevice supportsFamily:MTLGPUFamilyApple9]) {
114 /* Concave motion paths weren't correctly bounded prior to macOS 15.6 (#136253). */
115 if (@available(macos 15.6, *)) {
116 use_pcmi = DebugFlags().metal.use_metalrt_pcmi;
117 }
118 }
119# endif
120
121 if (getenv("CYCLES_DEBUG_METAL_CAPTURE_KERNEL")) {
122 capture_enabled = true;
123 }
124
125 /* Create a global counter sampling buffer when kernel profiling is enabled.
126 * There's a limit to the number of concurrent counter sampling buffers per device, so we
127 * create one that can be reused by successive device queues. */
128 if (auto str = getenv("CYCLES_METAL_PROFILING")) {
129 if (atoi(str) && [mtlDevice supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary])
130 {
131 NSArray<id<MTLCounterSet>> *counterSets = [mtlDevice counterSets];
132
133 NSError *error = nil;
134 MTLCounterSampleBufferDescriptor *desc = [[MTLCounterSampleBufferDescriptor alloc] init];
135 [desc setStorageMode:MTLStorageModeShared];
136 [desc setLabel:@"CounterSampleBuffer"];
137 [desc setSampleCount:MAX_SAMPLE_BUFFER_LENGTH];
138 [desc setCounterSet:counterSets[0]];
139 mtlCounterSampleBuffer = [mtlDevice newCounterSampleBufferWithDescriptor:desc
140 error:&error];
141 [mtlCounterSampleBuffer retain];
142 }
143 }
144
145 /* Set kernel_specialization_level based on user preferences. */
146 switch (info.kernel_optimization_level) {
148 kernel_specialization_level = PSO_GENERIC;
149 break;
150 default:
152 kernel_specialization_level = PSO_SPECIALIZED_INTERSECT;
153 break;
155 kernel_specialization_level = PSO_SPECIALIZED_SHADE;
156 break;
157 }
158
159 if (auto *envstr = getenv("CYCLES_METAL_SPECIALIZATION_LEVEL")) {
160 kernel_specialization_level = (MetalPipelineType)atoi(envstr);
161 }
162 metal_printf("kernel_specialization_level = %s",
163 kernel_type_as_string(
164 (MetalPipelineType)min((int)kernel_specialization_level, (int)PSO_NUM - 1)));
165
166 texture_bindings = [mtlDevice newBufferWithLength:8192 options:MTLResourceStorageModeShared];
167 stats.mem_alloc(texture_bindings.allocatedSize);
168
169 launch_params_buffer = [mtlDevice newBufferWithLength:sizeof(KernelParamsMetal)
170 options:MTLResourceStorageModeShared];
171 stats.mem_alloc(sizeof(KernelParamsMetal));
172
173 /* Cache unified pointer so we can write kernel params directly in place. */
174 launch_params = (KernelParamsMetal *)launch_params_buffer.contents;
175
176 /* Command queue for path-tracing work on the GPU. In a situation where multiple
177 * MetalDeviceQueues are spawned from one MetalDevice, they share the same MTLCommandQueue.
178 * This is thread safe and just as performant as each having their own instance. It also
179 * adheres to best practices of maximizing the lifetime of each MTLCommandQueue. */
180 mtlComputeCommandQueue = [mtlDevice newCommandQueue];
181
182 /* Command queue for non-tracing work on the GPU. */
183 mtlGeneralCommandQueue = [mtlDevice newCommandQueue];
184 }
185}
186
187MetalDevice::~MetalDevice()
188{
189 /* Cancel any async shader compilations that are in flight. */
190 cancel();
191
192 /* This lock safeguards against destruction during use (see other uses of
193 * existing_devices_mutex). */
194 thread_scoped_lock lock(existing_devices_mutex);
195
196 /* Release textures that weren't already freed by tex_free. */
197 for (int res = 0; res < texture_info.size(); res++) {
198 [texture_slot_map[res] release];
199 texture_slot_map[res] = nil;
200 }
201
202 free_bvh();
203 flush_delayed_free_list();
204
205 stats.mem_free(sizeof(KernelParamsMetal));
206 [launch_params_buffer release];
207
208 stats.mem_free(texture_bindings.allocatedSize);
209 [texture_bindings release];
210
211 [mtlComputeCommandQueue release];
212 [mtlGeneralCommandQueue release];
213 if (mtlCounterSampleBuffer) {
214 [mtlCounterSampleBuffer release];
215 }
216 [mtlDevice release];
217
218 texture_info.free();
219}
220
221bool MetalDevice::support_device(const uint /*kernel_features*/)
222{
223 return true;
224}
225
226bool MetalDevice::check_peer_access(Device * /*peer_device*/)
227{
228 assert(0);
229 /* does peer access make sense? */
230 return false;
231}
232
233bool MetalDevice::use_adaptive_compilation()
234{
236}
237
238bool MetalDevice::use_local_atomic_sort() const
239{
241}
242
243string MetalDevice::preprocess_source(MetalPipelineType pso_type,
244 const uint kernel_features,
245 string *source)
246{
247 string global_defines;
248 if (use_adaptive_compilation()) {
249 global_defines += "#define __KERNEL_FEATURES__ " + to_string(kernel_features) + "\n";
250 }
251
252 if (use_local_atomic_sort()) {
253 global_defines += "#define __KERNEL_LOCAL_ATOMIC_SORT__\n";
254 }
255
256 if (use_metalrt) {
257 global_defines += "#define __METALRT__\n";
258 if (motion_blur) {
259 global_defines += "#define __METALRT_MOTION__\n";
260 }
261 if (use_metalrt_extended_limits) {
262 global_defines += "#define __METALRT_EXTENDED_LIMITS__\n";
263 }
264 }
265
266# ifdef WITH_CYCLES_DEBUG
267 global_defines += "#define WITH_CYCLES_DEBUG\n";
268# endif
269
270 global_defines += "#define __KERNEL_METAL_APPLE__\n";
271 if (@available(macos 14.0, *)) {
272 /* Use Program Scope Global Built-ins, when available. */
273 global_defines += "#define __METAL_GLOBAL_BUILTINS__\n";
274 }
275# ifdef WITH_NANOVDB
276 /* Compiling in NanoVDB results in a marginal drop in render performance,
277 * so disable it for specialized PSOs when no textures are using it. */
278 if ((pso_type == PSO_GENERIC || using_nanovdb) && DebugFlags().metal.use_nanovdb) {
279 global_defines += "#define WITH_NANOVDB\n";
280 }
281# endif
282
283 NSProcessInfo *processInfo = [NSProcessInfo processInfo];
284 NSOperatingSystemVersion macos_ver = [processInfo operatingSystemVersion];
285 global_defines += "#define __KERNEL_METAL_MACOS__ " + to_string(macos_ver.majorVersion) + "\n";
286
287# if TARGET_CPU_ARM64
288 global_defines += "#define __KERNEL_METAL_TARGET_CPU_ARM64__\n";
289# endif
290
291 /* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
292 * the same character length. Build a string of all active constant values which is then hashed
293 * in order to identify the PSO.
294 */
295 if (pso_type != PSO_GENERIC) {
296 if (source) {
297 const double starttime = time_dt();
298
299# define KERNEL_STRUCT_BEGIN(name, parent) \
300 string_replace_same_length(*source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
301
302 bool next_member_is_specialized = true;
303
304# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
305
306# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
307 if (!next_member_is_specialized) { \
308 string_replace( \
309 *source, "kernel_data_" #parent "_" #name, "kernel_data." #parent ".__unused_" #name); \
310 next_member_is_specialized = true; \
311 }
312
313# include "kernel/data_template.h"
314
315# undef KERNEL_STRUCT_MEMBER
316# undef KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
317# undef KERNEL_STRUCT_BEGIN
318
319 /* Replace "kernel_data.kernel_features" memory fetches with a function constant. */
321 *source, "kernel_data.kernel_features", "kernel_data_kernel_features");
322
323 metal_printf("KernelData patching took %.1f ms", (time_dt() - starttime) * 1000.0);
324 }
325
326 /* Opt in to all of available specializations. This can be made more granular for the
327 * PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
328 * but the overhead should be negligible as these are very quick to (re)build and aren't
329 * serialized to disk via MTLBinaryArchives.
330 */
331 global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
332 }
333
334 if (source) {
335 *source = global_defines + *source;
336 }
337
338 MD5Hash md5;
339 md5.append(global_defines);
340 return md5.get_hex();
341}
342
343void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
344{
345 string &source = this->source[pso_type];
346 source = "\n#include \"kernel/device/metal/kernel.metal\"\n";
347 source = path_source_replace_includes(source, path_get("source"));
348
349 /* Perform any required specialization on the source.
350 * With Metal function constants we can generate a single variant of the kernel source which can
351 * be repeatedly respecialized.
352 */
353 global_defines_md5[pso_type] = preprocess_source(pso_type, kernel_features, &source);
354}
355
356bool MetalDevice::load_kernels(const uint _kernel_features)
357{
358 @autoreleasepool {
359 kernel_features |= _kernel_features;
360
361 /* check if GPU is supported */
362 if (!support_device(kernel_features)) {
363 return false;
364 }
365
366 /* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
367 * This is necessary since objects may be reported to have motion if the Vector pass is
368 * active, but may still need to be rendered without motion blur if that isn't active as well.
369 */
370 motion_blur = motion_blur || (kernel_features & KERNEL_FEATURE_OBJECT_MOTION);
371
372 /* Only request generic kernels if they aren't cached in memory. */
373 refresh_source_and_kernels_md5(PSO_GENERIC);
374 if (MetalDeviceKernels::should_load_kernels(this, PSO_GENERIC)) {
375 /* If needed, load them asynchronously in order to responsively message progress to the user.
376 */
377 int this_device_id = this->device_id;
378 auto compile_kernels_fn = ^() {
379 compile_and_load(this_device_id, PSO_GENERIC);
380 };
381
382 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
383 compile_kernels_fn);
384 }
385 }
386 return true;
387}
388
389void MetalDevice::refresh_source_and_kernels_md5(MetalPipelineType pso_type)
390{
391 string defines_md5 = preprocess_source(pso_type, kernel_features);
392
393 /* Rebuild the source string if the injected block of #defines has changed. */
394 if (global_defines_md5[pso_type] != defines_md5) {
395 make_source(pso_type, kernel_features);
396 }
397
398 string constant_values;
399 if (pso_type != PSO_GENERIC) {
400 bool next_member_is_specialized = true;
401
402# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
403
404 /* Add specialization constants to md5 so that 'get_best_pipeline' is able to return a suitable
405 * match. */
406# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
407 if (next_member_is_specialized) { \
408 constant_values += string(#parent "." #name "=") + \
409 to_string(_type(launch_params->data.parent.name)) + "\n"; \
410 } \
411 else { \
412 next_member_is_specialized = true; \
413 }
414
415# include "kernel/data_template.h"
416
417# undef KERNEL_STRUCT_MEMBER
418# undef KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
419 }
420
421 MD5Hash md5;
422 md5.append(constant_values);
423 md5.append(source[pso_type]);
424 if (use_metalrt) {
425 md5.append(string_printf("metalrt_features=%d", kernel_features & METALRT_FEATURE_MASK));
426 }
427 kernels_md5[pso_type] = md5.get_hex();
428}
429
430void MetalDevice::compile_and_load(const int device_id, MetalPipelineType pso_type)
431{
432 @autoreleasepool {
433 /* Thread-safe front-end compilation. Typically the MSL->AIR compilation can take a few
434 * seconds, so we avoid blocking device tear-down if the user cancels a render immediately. */
435
436 id<MTLDevice> mtlDevice;
437 string source;
438
439 /* Safely gather any state required for the MSL->AIR compilation. */
440 {
441 thread_scoped_lock lock(existing_devices_mutex);
442
443 /* Check whether the device still exists. */
444 MetalDevice *instance = get_device_by_ID(device_id, lock);
445 if (!instance) {
446 metal_printf("Ignoring %s compilation request - device no longer exists",
447 kernel_type_as_string(pso_type));
448 return;
449 }
450
451 if (!MetalDeviceKernels::should_load_kernels(instance, pso_type)) {
452 /* We already have a full set of matching pipelines which are cached or queued. Return
453 * early to avoid redundant MTLLibrary compilation. */
454 metal_printf("Ignoreing %s compilation request - kernels already requested",
455 kernel_type_as_string(pso_type));
456 return;
457 }
458
459 mtlDevice = instance->mtlDevice;
460 source = instance->source[pso_type];
461 }
462
463 /* Perform the actual compilation using our cached context. The MetalDevice can safely destruct
464 * in this time. */
465
466 MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
467
468 options.fastMathEnabled = YES;
469 if (@available(macos 12.0, *)) {
470 options.languageVersion = MTLLanguageVersion2_4;
471 }
472# if defined(MAC_OS_VERSION_13_0)
473 if (@available(macos 13.0, *)) {
474 options.languageVersion = MTLLanguageVersion3_0;
475 }
476# endif
477# if defined(MAC_OS_VERSION_14_0)
478 if (@available(macos 14.0, *)) {
479 options.languageVersion = MTLLanguageVersion3_1;
480 }
481# endif
482# if defined(MAC_OS_VERSION_15_0)
483 if (@available(macos 15.0, *)) {
484 options.languageVersion = MTLLanguageVersion3_2;
485 if (const char *loglevel = getenv("MTL_LOG_LEVEL")) {
486 if (strcmp(loglevel, "MTLLogLevelDebug") == 0) {
487 options.enableLogging = true;
488 }
489 }
490 }
491# endif
492
493 if (getenv("CYCLES_METAL_PROFILING") || getenv("CYCLES_METAL_DEBUG")) {
494 path_write_text(path_cache_get(string_printf("%s.metal", kernel_type_as_string(pso_type))),
495 source);
496 }
497
498 double starttime = time_dt();
499
500 NSError *error = nullptr;
501 id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
503 error:&error];
504
505 metal_printf("Front-end compilation finished in %.1f seconds (%s)",
506 time_dt() - starttime,
507 kernel_type_as_string(pso_type));
508
509 [options release];
510
511 bool blocking_pso_build = (getenv("CYCLES_METAL_PROFILING") ||
512 MetalDeviceKernels::is_benchmark_warmup());
513 if (blocking_pso_build) {
514 MetalDeviceKernels::wait_for_all();
515 starttime = 0.0;
516 }
517
518 /* Save the compiled MTLLibrary and trigger the AIR->PSO builds (if the MetalDevice still
519 * exists). */
520 {
521 thread_scoped_lock lock(existing_devices_mutex);
522 if (MetalDevice *instance = get_device_by_ID(device_id, lock)) {
523 if (mtlLibrary) {
524 if (error && [error localizedDescription]) {
525 LOG_WARNING << "MSL compilation messages: "
526 << [[error localizedDescription] UTF8String];
527 }
528
529 instance->mtlLibrary[pso_type] = mtlLibrary;
530
531 starttime = time_dt();
532 MetalDeviceKernels::load(instance, pso_type);
533 }
534 else {
535 NSString *err = [error localizedDescription];
536 instance->set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
537 }
538 }
539 }
540
541 if (starttime && blocking_pso_build) {
542 MetalDeviceKernels::wait_for_all();
543
544 metal_printf("Back-end compilation finished in %.1f seconds (%s)",
545 time_dt() - starttime,
546 kernel_type_as_string(pso_type));
547 }
548 }
549}
550
551bool MetalDevice::is_texture(const TextureInfo &tex)
552{
553 return tex.height > 0;
554}
555
556void MetalDevice::load_texture_info() {}
557
558void MetalDevice::erase_allocation(device_memory &mem)
559{
560 stats.mem_free(mem.device_size);
561 mem.device_pointer = 0;
562 mem.device_size = 0;
563
564 auto it = metal_mem_map.find(&mem);
565 if (it != metal_mem_map.end()) {
566 MetalMem *mmem = it->second.get();
567
568 /* blank out reference to resource in the launch params (fixes crash #94736) */
569 if (mmem->pointer_index >= 0) {
570 device_ptr *pointers = (device_ptr *)launch_params;
571 pointers[mmem->pointer_index] = 0;
572 }
573 metal_mem_map.erase(it);
574 }
575}
576
577bool MetalDevice::max_working_set_exceeded(const size_t safety_margin) const
578{
579 /* We're allowed to allocate beyond the safe working set size, but then if all resources are made
580 * resident we will get command buffer failures at render time. */
581 size_t available = [mtlDevice recommendedMaxWorkingSetSize] - safety_margin;
582 return (stats.mem_used > available);
583}
584
585MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
586{
587 @autoreleasepool {
588 size_t size = mem.memory_size();
589
590 mem.device_pointer = 0;
591
592 id<MTLBuffer> metal_buffer = nil;
593 MTLResourceOptions options = MTLResourceStorageModeShared;
594
595 if (size > 0) {
596 if (mem.type == MEM_DEVICE_ONLY && !capture_enabled) {
597 options = MTLResourceStorageModePrivate;
598 }
599
600 metal_buffer = [mtlDevice newBufferWithLength:size options:options];
601
602 if (!metal_buffer) {
603 set_error("System is out of GPU memory");
604 return nullptr;
605 }
606 }
607
608 if (mem.name) {
609 LOG_DEBUG << "Buffer allocate: " << mem.name << ", "
610 << string_human_readable_number(mem.memory_size()) << " bytes. ("
612 }
613
614 mem.device_size = metal_buffer.allocatedSize;
615 stats.mem_alloc(mem.device_size);
616
617 metal_buffer.label = [NSString stringWithFormat:@"%s", mem.name];
618
619 std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
620
621 assert(metal_mem_map.count(&mem) == 0); /* assert against double-alloc */
622 unique_ptr<MetalMem> mmem = make_unique<MetalMem>();
623
624 mmem->mem = &mem;
625 mmem->mtlBuffer = metal_buffer;
626 mmem->offset = 0;
627 mmem->size = size;
628 if (options != MTLResourceStorageModePrivate) {
629 mmem->hostPtr = [metal_buffer contents];
630 }
631 else {
632 mmem->hostPtr = nullptr;
633 }
634
635 /* encode device_pointer as (MetalMem*) in order to handle resource relocation and device
636 * pointer recalculation */
637 mem.device_pointer = device_ptr(mmem.get());
638
639 if (metal_buffer.storageMode == MTLStorageModeShared) {
640 /* Replace host pointer with our host allocation. */
641 if (mem.host_pointer && mem.host_pointer != mmem->hostPtr) {
642 memcpy(mmem->hostPtr, mem.host_pointer, size);
643
644 host_free(mem.type, mem.host_pointer, mem.memory_size());
645 mem.host_pointer = mmem->hostPtr;
646 }
647 mem.shared_pointer = mmem->hostPtr;
648 mem.shared_counter++;
649 }
650
651 MetalMem *mmem_ptr = mmem.get();
652 metal_mem_map[&mem] = std::move(mmem);
653
654 if (max_working_set_exceeded()) {
655 set_error("System is out of GPU memory");
656 return nullptr;
657 }
658
659 return mmem_ptr;
660 }
661}
662
663void MetalDevice::generic_copy_to(device_memory &)
664{
665 /* No need to copy - Apple Silicon has Unified Memory Architecture. */
666}
667
668void MetalDevice::generic_free(device_memory &mem)
669{
670 if (!mem.device_pointer) {
671 return;
672 }
673
674 /* Host pointer should already have been freed at this point. If not we might
675 * end up freeing shared memory and can't recover original host memory. */
676 assert(mem.host_pointer == nullptr);
677
678 std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
679 MetalMem &mmem = *metal_mem_map.at(&mem);
680 size_t size = mmem.size;
681
682 bool free_mtlBuffer = true;
683
684 /* If this is shared, reference counting is used to safely free memory. */
685 if (mem.shared_pointer) {
686 assert(mem.shared_counter > 0);
687 if (--mem.shared_counter > 0) {
688 free_mtlBuffer = false;
689 }
690 }
691
692 if (free_mtlBuffer) {
693 if (mem.host_pointer && mem.host_pointer == mem.shared_pointer) {
694 /* Safely move the device-side data back to the host before it is freed.
695 * We should actually never reach this code as it is inefficient, but
696 * better than to crash if there is a bug. */
697 assert(!"Metal device should not copy memory back to host");
698 mem.host_pointer = mem.host_alloc(size);
699 memcpy(mem.host_pointer, mem.shared_pointer, size);
700 }
701
702 mem.shared_pointer = nullptr;
703
704 /* Free device memory. */
705 delayed_free_list.push_back(mmem.mtlBuffer);
706 mmem.mtlBuffer = nil;
707 }
708
709 erase_allocation(mem);
710}
711
712void MetalDevice::mem_alloc(device_memory &mem)
713{
714 if (mem.type == MEM_TEXTURE) {
715 assert(!"mem_alloc not supported for textures.");
716 }
717 else if (mem.type == MEM_GLOBAL) {
718 generic_alloc(mem);
719 }
720 else {
721 generic_alloc(mem);
722 }
723}
724
725void MetalDevice::mem_copy_to(device_memory &mem)
726{
727 if (!mem.device_pointer) {
728 if (mem.type == MEM_GLOBAL) {
729 global_alloc(mem);
730 }
731 else if (mem.type == MEM_TEXTURE) {
732 tex_alloc((device_texture &)mem);
733 }
734 else {
735 generic_alloc(mem);
736 generic_copy_to(mem);
737 }
738 }
739 else if (mem.is_resident(this)) {
740 if (mem.type == MEM_GLOBAL) {
741 generic_copy_to(mem);
742 }
743 else if (mem.type == MEM_TEXTURE) {
744 tex_copy_to((device_texture &)mem);
745 }
746 else {
747 generic_copy_to(mem);
748 }
749 }
750}
751
752void MetalDevice::mem_move_to_host(device_memory & /*mem*/)
753{
754 /* Metal implements own mechanism for moving host memory. */
755 assert(!"Metal does not support mem_move_to_host");
756}
757
758void MetalDevice::mem_copy_from(device_memory &, const size_t, size_t, const size_t, size_t)
759{
760 /* No need to copy - Apple Silicon has Unified Memory Architecture. */
761}
762
763void MetalDevice::mem_zero(device_memory &mem)
764{
765 if (!mem.device_pointer) {
766 mem_alloc(mem);
767 }
769 memset(mem.shared_pointer, 0, mem.memory_size());
770}
771
772void MetalDevice::mem_free(device_memory &mem)
773{
774 if (mem.type == MEM_GLOBAL) {
775 global_free(mem);
776 }
777 else if (mem.type == MEM_TEXTURE) {
778 tex_free((device_texture &)mem);
779 }
780 else {
781 generic_free(mem);
782 }
783}
784
785device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory & /*mem*/,
786 size_t /*offset*/,
787 size_t /*size*/)
788{
789 /* METAL_WIP - revive if necessary */
790 assert(0);
791 return 0;
792}
793
794void MetalDevice::cancel()
795{
796 /* Remove this device's ID from the list of active devices. Any pending compilation requests
797 * originating from this session will be cancelled. */
798 thread_scoped_lock lock(existing_devices_mutex);
799 if (device_id) {
800 active_device_ids.erase(device_id);
801 device_id = 0;
802 }
803}
804
805bool MetalDevice::is_ready(string &status) const
806{
807 if (!error_msg.empty()) {
808 /* Avoid hanging if we had an error. */
809 return true;
810 }
811
812 int num_loaded = MetalDeviceKernels::get_loaded_kernel_count(this, PSO_GENERIC);
813 if (num_loaded < DEVICE_KERNEL_NUM) {
814 status = string_printf("%d / %d render kernels loaded (may take a few minutes the first time)",
815 num_loaded,
817 return false;
818 }
819
820 if (int num_requests = MetalDeviceKernels::num_incomplete_specialization_requests()) {
821 status = string_printf("%d kernels to optimize", num_requests);
822 }
823 else if (kernel_specialization_level == PSO_SPECIALIZED_INTERSECT) {
824 status = "Using optimized intersection kernels";
825 }
826 else if (kernel_specialization_level == PSO_SPECIALIZED_SHADE) {
827 status = "Using optimized kernels";
828 }
829
830 metal_printf("MetalDevice::is_ready(...) --> true");
831 return true;
832}
833
834bool MetalDevice::set_bvh_limits(size_t instance_count, size_t max_prim_count)
835{
836 /* For object & primitive counts above a certain limit, MetalRT requires extended limits to be
837 * built into the kernels, and when building BVHs. Following best practices, this should only
838 * be enabled when necessary. See
839 * https://developer.apple.com/documentation/metal/mtlaccelerationstructureusage/mtlaccelerationstructureusageextendedlimits?language=objc
840 */
841
842 const int standard_limits_max_prim_count = (1 << 28);
843 const int standard_limits_max_instance_count = (1 << 24);
844
845 bool using_metalrt_extended_limits_before = use_metalrt_extended_limits;
846
847 /* Enable extended limits if object count exceeds max supported by standard limits.
848 * Once enabled, it remains enabled for the lifetime of the device. */
849 if (instance_count > standard_limits_max_instance_count ||
850 max_prim_count > standard_limits_max_prim_count)
851 {
852 use_metalrt_extended_limits = true;
853 metal_printf("Enabling MetalRT extended limits (max_prim_count = %zu, instance_count = %zu)",
854 max_prim_count,
855 instance_count);
856 }
857
858 /* All BVHs need to be rebuilt if the extended limits state changes. */
859 return using_metalrt_extended_limits_before != use_metalrt_extended_limits;
860}
861
862void MetalDevice::optimize_for_scene(Scene *scene)
863{
864 MetalPipelineType specialization_level = kernel_specialization_level;
865
866 if (!scene->params.background) {
867 /* In live viewport, don't specialize beyond intersection kernels for responsiveness. */
868 specialization_level = (MetalPipelineType)min(specialization_level, PSO_SPECIALIZED_INTERSECT);
869 }
870
871 /* For responsive rendering, specialize the kernels in the background, and only if there isn't an
872 * existing "optimize_for_scene" request in flight. */
873 int this_device_id = this->device_id;
874 auto specialize_kernels_fn = ^() {
875 for (int level = 1; level <= int(specialization_level); level++) {
876 compile_and_load(this_device_id, MetalPipelineType(level));
877 }
878 };
879
880 /* In normal use, we always compile the specialized kernels in the background. */
881 bool specialize_in_background = true;
882
883 /* Block if a per-kernel profiling is enabled (ensure steady rendering rate). */
884 if (getenv("CYCLES_METAL_PROFILING") != nullptr) {
885 specialize_in_background = false;
886 }
887
888 /* Block during benchmark warm-up to ensure kernels are cached prior to the observed run. */
889 if (MetalDeviceKernels::is_benchmark_warmup()) {
890 specialize_in_background = false;
891 }
892
893 if (specialize_in_background) {
894 if (MetalDeviceKernels::num_incomplete_specialization_requests() == 0) {
895 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
896 specialize_kernels_fn);
897 }
898 else {
899 metal_printf("\"optimize_for_scene\" request already in flight - dropping request");
900 }
901 }
902 else {
903 specialize_kernels_fn();
904 }
905}
906
907void MetalDevice::const_copy_to(const char *name, void *host, const size_t size)
908{
909 if (strcmp(name, "data") == 0) {
910 assert(size == sizeof(KernelData));
911 memcpy((uint8_t *)&launch_params->data, host, sizeof(KernelData));
912
913 /* Refresh the kernels_md5 checksums for specialized kernel sets. */
914 for (int level = 1; level <= int(kernel_specialization_level); level++) {
915 refresh_source_and_kernels_md5(MetalPipelineType(level));
916 }
917 return;
918 }
919
920 auto update_launch_pointers = [&](size_t offset, void *data, const size_t pointers_size) {
921 uint64_t *addresses = (uint64_t *)((uint8_t *)launch_params + offset);
922
923 MetalMem **mmem = (MetalMem **)data;
924 int pointer_count = pointers_size / sizeof(device_ptr);
925 int pointer_index = offset / sizeof(device_ptr);
926 for (int i = 0; i < pointer_count; i++) {
927 addresses[i] = 0;
928 if (mmem[i]) {
929 mmem[i]->pointer_index = pointer_index + i;
930 if (mmem[i]->mtlBuffer) {
931 if (@available(macOS 13.0, *)) {
932 addresses[i] = metal_gpuAddress(mmem[i]->mtlBuffer);
933 }
934 }
935 }
936 }
937 };
938
939 /* Update data storage pointers in launch parameters. */
940 if (strcmp(name, "integrator_state") == 0) {
941 /* IntegratorStateGPU is contiguous pointers up until sort_partition_divisor. */
942 const size_t pointer_block_size = offsetof(IntegratorStateGPU, sort_partition_divisor);
943 update_launch_pointers(
944 offsetof(KernelParamsMetal, integrator_state), host, pointer_block_size);
945
946 /* Ensure the non-pointers part of IntegratorStateGPU is copied (this is the proper fix for
947 * #144713). */
948 memcpy((uint8_t *)&launch_params->integrator_state + pointer_block_size,
949 (uint8_t *)host + pointer_block_size,
950 sizeof(IntegratorStateGPU) - pointer_block_size);
951 }
952# define KERNEL_DATA_ARRAY(data_type, tex_name) \
953 else if (strcmp(name, #tex_name) == 0) { \
954 update_launch_pointers(offsetof(KernelParamsMetal, tex_name), host, size); \
955 }
956# include "kernel/data_arrays.h"
957# undef KERNEL_DATA_ARRAY
958}
959
960void MetalDevice::global_alloc(device_memory &mem)
961{
962 if (mem.is_resident(this)) {
963 generic_alloc(mem);
964 generic_copy_to(mem);
965 }
966
967 const_copy_to(mem.name, &mem.device_pointer, sizeof(mem.device_pointer));
968}
969
970void MetalDevice::global_free(device_memory &mem)
971{
972 if (mem.is_resident(this) && mem.device_pointer) {
973 generic_free(mem);
974 }
975}
976
977void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
978{
979 MetalDevice::MetalMem *mmem = generic_alloc(mem);
980 generic_copy_to(mem);
981
982 /* Resize once */
983 const uint slot = mem.slot;
984 if (slot >= texture_info.size()) {
985 /* Allocate some slots in advance, to reduce amount
986 * of re-allocations. */
987 texture_info.resize(round_up(slot + 1, 128));
988 texture_slot_map.resize(round_up(slot + 1, 128));
989 }
990
991 texture_info[slot] = mem.info;
992 texture_slot_map[slot] = mmem->mtlBuffer;
993
994 if (is_nanovdb_type(mem.info.data_type)) {
995 using_nanovdb = true;
996 }
997}
998
999void MetalDevice::tex_alloc(device_texture &mem)
1000{
1001 @autoreleasepool {
1002 /* Check that dimensions fit within maximum allowable size.
1003 * If 1D texture is allocated, use 1D buffer.
1004 * See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
1005 if (mem.data_height > 0) {
1006 if (mem.data_width > 16384 || mem.data_height > 16384) {
1007 set_error(string_printf(
1008 "Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
1009 mem.data_width,
1010 mem.data_height));
1011 return;
1012 }
1013 }
1014
1015 /* General variables for both architectures */
1016 size_t size = mem.memory_size();
1017
1018 /* sampler_index maps into the GPU's constant 'metal_samplers' array */
1019 uint64_t sampler_index = mem.info.extension;
1021 sampler_index += 4;
1022 }
1023
1024 /* Image Texture Storage */
1025 MTLPixelFormat format;
1026 switch (mem.data_type) {
1027 case TYPE_UCHAR: {
1028 MTLPixelFormat formats[] = {MTLPixelFormatR8Unorm,
1029 MTLPixelFormatRG8Unorm,
1030 MTLPixelFormatInvalid,
1031 MTLPixelFormatRGBA8Unorm};
1032 format = formats[mem.data_elements - 1];
1033 } break;
1034 case TYPE_UINT16: {
1035 MTLPixelFormat formats[] = {MTLPixelFormatR16Unorm,
1036 MTLPixelFormatRG16Unorm,
1037 MTLPixelFormatInvalid,
1038 MTLPixelFormatRGBA16Unorm};
1039 format = formats[mem.data_elements - 1];
1040 } break;
1041 case TYPE_UINT: {
1042 MTLPixelFormat formats[] = {MTLPixelFormatR32Uint,
1043 MTLPixelFormatRG32Uint,
1044 MTLPixelFormatInvalid,
1045 MTLPixelFormatRGBA32Uint};
1046 format = formats[mem.data_elements - 1];
1047 } break;
1048 case TYPE_INT: {
1049 MTLPixelFormat formats[] = {MTLPixelFormatR32Sint,
1050 MTLPixelFormatRG32Sint,
1051 MTLPixelFormatInvalid,
1052 MTLPixelFormatRGBA32Sint};
1053 format = formats[mem.data_elements - 1];
1054 } break;
1055 case TYPE_FLOAT: {
1056 MTLPixelFormat formats[] = {MTLPixelFormatR32Float,
1057 MTLPixelFormatRG32Float,
1058 MTLPixelFormatInvalid,
1059 MTLPixelFormatRGBA32Float};
1060 format = formats[mem.data_elements - 1];
1061 } break;
1062 case TYPE_HALF: {
1063 MTLPixelFormat formats[] = {MTLPixelFormatR16Float,
1064 MTLPixelFormatRG16Float,
1065 MTLPixelFormatInvalid,
1066 MTLPixelFormatRGBA16Float};
1067 format = formats[mem.data_elements - 1];
1068 } break;
1069 default:
1070 assert(0);
1071 return;
1072 }
1073
1074 assert(format != MTLPixelFormatInvalid);
1075
1076 id<MTLTexture> mtlTexture = nil;
1077 size_t src_pitch = mem.data_width * datatype_size(mem.data_type) * mem.data_elements;
1078
1079 if (mem.data_height > 0) {
1080 /* 2D texture */
1081 MTLTextureDescriptor *desc;
1082
1083 desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:format
1084 width:mem.data_width
1085 height:mem.data_height
1086 mipmapped:NO];
1087
1088 desc.storageMode = MTLStorageModeShared;
1089 desc.usage = MTLTextureUsageShaderRead;
1090
1091 /* Disallow lossless texture compression. Path-tracing texture access patterns are very
1092 * random, and cache reuse gains are typically too low to offset the decompression overheads.
1093 */
1094 desc.allowGPUOptimizedContents = false;
1095
1096 LOG_DEBUG << "Texture 2D allocate: " << mem.name << ", "
1097 << string_human_readable_number(mem.memory_size()) << " bytes. ("
1098 << string_human_readable_size(mem.memory_size()) << ")";
1099
1100 mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
1101 if (!mtlTexture) {
1102 set_error("System is out of GPU memory");
1103 return;
1104 }
1105
1106 [mtlTexture replaceRegion:MTLRegionMake2D(0, 0, mem.data_width, mem.data_height)
1107 mipmapLevel:0
1108 withBytes:mem.host_pointer
1109 bytesPerRow:src_pitch];
1110 }
1111 else {
1112 /* 1D texture, using linear memory. */
1113 tex_alloc_as_buffer(mem);
1114 return;
1115 }
1116
1117 mem.device_pointer = (device_ptr)mtlTexture;
1118 mem.device_size = size;
1119 stats.mem_alloc(size);
1120
1121 std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
1122 unique_ptr<MetalMem> mmem = make_unique<MetalMem>();
1123 mmem->mem = &mem;
1124 mmem->mtlTexture = mtlTexture;
1125 metal_mem_map[&mem] = std::move(mmem);
1126
1127 /* Resize once */
1128 const uint slot = mem.slot;
1129 if (slot >= texture_info.size()) {
1130 /* Allocate some slots in advance, to reduce amount
1131 * of re-allocations. */
1132 texture_info.resize(slot + 128);
1133 texture_slot_map.resize(slot + 128);
1134
1135 ssize_t min_buffer_length = sizeof(void *) * texture_info.size();
1136 if (!texture_bindings || (texture_bindings.length < min_buffer_length)) {
1137 if (texture_bindings) {
1138 delayed_free_list.push_back(texture_bindings);
1139 stats.mem_free(texture_bindings.allocatedSize);
1140 }
1141 texture_bindings = [mtlDevice newBufferWithLength:min_buffer_length
1142 options:MTLResourceStorageModeShared];
1143
1144 stats.mem_alloc(texture_bindings.allocatedSize);
1145 }
1146 }
1147
1148 /* Set Mapping. */
1149 texture_slot_map[slot] = mtlTexture;
1150 texture_info[slot] = mem.info;
1151 texture_info[slot].data = uint64_t(slot) | (sampler_index << 32);
1152
1153 if (max_working_set_exceeded()) {
1154 set_error("System is out of GPU memory");
1155 }
1156 }
1157}
1158
1159void MetalDevice::tex_copy_to(device_texture &mem)
1160{
1161 if (mem.is_resident(this)) {
1162 const size_t src_pitch = mem.data_width * datatype_size(mem.data_type) * mem.data_elements;
1163
1164 if (mem.data_height > 0) {
1165 id<MTLTexture> mtlTexture;
1166 {
1167 std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
1168 mtlTexture = metal_mem_map.at(&mem)->mtlTexture;
1169 }
1170 [mtlTexture replaceRegion:MTLRegionMake2D(0, 0, mem.data_width, mem.data_height)
1171 mipmapLevel:0
1172 withBytes:mem.host_pointer
1173 bytesPerRow:src_pitch];
1174 }
1175 else {
1176 generic_copy_to(mem);
1177 }
1178 }
1179}
1180
1181void MetalDevice::tex_free(device_texture &mem)
1182{
1183 int slot = mem.slot;
1184 if (mem.data_height == 0) {
1185 generic_free(mem);
1186 }
1187 else if (metal_mem_map.count(&mem)) {
1188 std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
1189 MetalMem &mmem = *metal_mem_map.at(&mem);
1190
1191 /* Free bindless texture. */
1192 delayed_free_list.push_back(mmem.mtlTexture);
1193 mmem.mtlTexture = nil;
1194 erase_allocation(mem);
1195 }
1196 texture_slot_map[slot] = nil;
1197}
1198
1199unique_ptr<DeviceQueue> MetalDevice::gpu_queue_create()
1200{
1201 return make_unique<MetalDeviceQueue>(this);
1202}
1203
1204bool MetalDevice::should_use_graphics_interop(const GraphicsInteropDevice &interop_device,
1205 const bool /*log*/)
1206{
1207 /* Always supported with unified memory. */
1208 return interop_device.type == GraphicsInteropDevice::METAL;
1209}
1210
1211void *MetalDevice::get_native_buffer(device_ptr ptr)
1212{
1213 return ((MetalMem *)ptr)->mtlBuffer;
1214}
1215
1216void MetalDevice::flush_delayed_free_list()
1217{
1218 /* free any Metal buffers that may have been freed by host while a command
1219 * buffer was being generated. This function should be called after each
1220 * completion of a command buffer */
1221 std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
1222 for (auto &it : delayed_free_list) {
1223 [it release];
1224 }
1225 delayed_free_list.clear();
1226}
1227
1228void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
1229{
1230 @autoreleasepool {
1231 if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2) {
1232 Device::build_bvh(bvh, progress, refit);
1233 return;
1234 }
1235
1236 BVHMetal *bvh_metal = static_cast<BVHMetal *>(bvh);
1237 bvh_metal->motion_blur = motion_blur;
1238 bvh_metal->use_pcmi = use_pcmi;
1239 bvh_metal->extended_limits = use_metalrt_extended_limits;
1240 if (bvh_metal->build(progress, mtlDevice, mtlGeneralCommandQueue, refit)) {
1241
1242 if (bvh->params.top_level) {
1243 update_bvh(bvh_metal);
1244 }
1245 }
1246
1247 if (max_working_set_exceeded()) {
1248 set_error("System is out of GPU memory");
1249 }
1250 }
1251}
1252
1253void MetalDevice::free_bvh()
1254{
1255 for (id<MTLAccelerationStructure> &blas : unique_blas_array) {
1256 [blas release];
1257 }
1258 unique_blas_array.clear();
1259 blas_array.clear();
1260
1261 if (blas_buffer) {
1262 [blas_buffer release];
1263 blas_buffer = nil;
1264 }
1265
1266 if (accel_struct) {
1267 [accel_struct release];
1268 accel_struct = nil;
1269 }
1270}
1271
1272void MetalDevice::update_bvh(BVHMetal *bvh_metal)
1273{
1274 free_bvh();
1275
1276 if (!bvh_metal) {
1277 return;
1278 }
1279
1280 accel_struct = bvh_metal->accel_struct;
1281 unique_blas_array = bvh_metal->unique_blas_array;
1282 blas_array = bvh_metal->blas_array;
1283
1284 [accel_struct retain];
1285 for (id<MTLAccelerationStructure> &blas : unique_blas_array) {
1286 [blas retain];
1287 }
1288
1289 // Allocate required buffers for BLAS array.
1290 uint64_t buffer_size = blas_array.size() * sizeof(uint64_t);
1291 blas_buffer = [mtlDevice newBufferWithLength:buffer_size options:MTLResourceStorageModeShared];
1292 stats.mem_alloc(blas_buffer.allocatedSize);
1293}
1294
1296
1297#endif
unsigned int uint
volatile int lock
BMesh const char void * data
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
BVHLayout bvh_layout
Definition params.h:83
bool top_level
Definition params.h:80
Definition bvh/bvh.h:67
BVHParams params
Definition bvh/bvh.h:69
Metal metal
Definition debug.h:132
KernelOptimizationLevel kernel_optimization_level
string description
bool use_hardware_raytracing
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
virtual void set_error(const string &error)
Definition md5.h:19
void append(const uint8_t *data, const int nbytes)
Definition md5.cpp:268
string get_hex()
Definition md5.cpp:367
bool background
Definition scene.h:76
size_t mem_used
Definition util/stats.h:30
void mem_alloc(const size_t size)
Definition util/stats.h:18
void mem_free(const size_t size)
Definition util/stats.h:24
bool is_resident(Device *sub_device) const
Definition memory.cpp:132
void * host_alloc(const size_t size)
Definition memory.cpp:41
static constexpr size_t datatype_size(DataType datatype)
@ MEM_TEXTURE
@ MEM_DEVICE_ONLY
@ TYPE_UINT16
CCL_NAMESPACE_BEGIN struct Options options
DebugFlags & DebugFlags()
Definition debug.h:145
#define KERNEL_FEATURE_OBJECT_MOTION
#define CCL_NAMESPACE_END
@ KERNEL_OPTIMIZATION_LEVEL_OFF
@ KERNEL_OPTIMIZATION_LEVEL_FULL
@ KERNEL_OPTIMIZATION_LEVEL_INTERSECT
#define offsetof(t, d)
#define str(s)
static const char * to_string(const Interpolation &interp)
Definition gl_shader.cc:103
#define assert(assertion)
@ BVH_LAYOUT_METAL
@ BVH_LAYOUT_BVH2
@ DEVICE_KERNEL_NUM
format
#define LOG_DEBUG
Definition log.h:107
#define LOG_ERROR
Definition log.h:101
#define LOG_WARNING
Definition log.h:103
static void error(const char *str)
static void init(bNodeTree *, bNode *node)
int BVHLayoutMask
Definition params.h:50
string path_cache_get(const string &sub)
Definition path.cpp:360
string path_source_replace_includes(const string &source, const string &path)
Definition path.cpp:968
string path_get(const string &sub)
Definition path.cpp:337
bool path_write_text(const string &path, string &text)
Definition path.cpp:673
const char * name
const int status
#define min(a, b)
Definition sort.cc:36
string string_human_readable_size(size_t size)
Definition string.cpp:257
string string_human_readable_number(size_t num)
Definition string.cpp:276
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
void string_replace_same_length(string &haystack, const string &needle, const string &other)
Definition string.cpp:155
bool use_metalrt_pcmi
Definition debug.h:106
bool adaptive_compile
Definition debug.h:93
bool use_local_atomic_sort
Definition debug.h:96
Definition DNA_ID.h:414
SceneParams params
Definition scene.h:168
uint64_t data
Definition texture.h:86
uint data_type
Definition texture.h:88
uint extension
Definition texture.h:91
uint height
Definition texture.h:94
uint interpolation
Definition texture.h:90
i
Definition text_draw.cc:230
@ INTERPOLATION_CLOSEST
Definition texture.h:25
ccl_device_inline bool is_nanovdb_type(int type)
Definition texture.h:51
std::mutex thread_mutex
Definition thread.h:27
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:28
CCL_NAMESPACE_BEGIN double time_dt()
Definition time.cpp:47
ccl_device_inline size_t round_up(const size_t x, const size_t multiple)
Definition types_base.h:57
uint64_t device_ptr
Definition types_base.h:44
PointerRNA * ptr
Definition wm_files.cc:4238