Blender V4.3
device/device.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#include <stdlib.h>
6#include <string.h>
7
8#include "bvh/bvh2.h"
9
10#include "device/device.h"
11#include "device/queue.h"
12
13#include "device/cpu/device.h"
14#include "device/cpu/kernel.h"
15#include "device/cuda/device.h"
16#include "device/dummy/device.h"
17#include "device/hip/device.h"
19#include "device/metal/device.h"
20#include "device/multi/device.h"
22#include "device/optix/device.h"
23
24#include "util/foreach.h"
25#include "util/half.h"
26#include "util/log.h"
27#include "util/math.h"
28#include "util/string.h"
29#include "util/system.h"
30#include "util/task.h"
31#include "util/time.h"
32#include "util/types.h"
33#include "util/vector.h"
34
36
37bool Device::need_types_update = true;
38bool Device::need_devices_update = true;
39thread_mutex Device::device_mutex;
40vector<DeviceInfo> Device::cuda_devices;
41vector<DeviceInfo> Device::optix_devices;
42vector<DeviceInfo> Device::cpu_devices;
43vector<DeviceInfo> Device::hip_devices;
44vector<DeviceInfo> Device::metal_devices;
45vector<DeviceInfo> Device::oneapi_devices;
46uint Device::devices_initialized_mask = 0;
47
48/* Device */
49
50Device::~Device() noexcept(false) {}
51
52void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
53{
54 assert(bvh->params.bvh_layout == BVH_LAYOUT_BVH2);
55
56 BVH2 *const bvh2 = static_cast<BVH2 *>(bvh);
57 if (refit) {
58 bvh2->refit(progress);
59 }
60 else {
61 bvh2->build(progress, &stats);
62 }
63}
64
65Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
66{
67 if (!info.multi_devices.empty()) {
68 /* Always create a multi device when info contains multiple devices.
69 * This is done so that the type can still be e.g. DEVICE_CPU to indicate
70 * that it is a homogeneous collection of devices, which simplifies checks. */
72 }
73
74 Device *device = NULL;
75
76 switch (info.type) {
77 case DEVICE_CPU:
79 break;
80#ifdef WITH_CUDA
81 case DEVICE_CUDA:
82 if (device_cuda_init()) {
84 }
85 break;
86#endif
87#ifdef WITH_OPTIX
88 case DEVICE_OPTIX:
91 break;
92#endif
93
94#ifdef WITH_HIP
95 case DEVICE_HIP:
96 if (device_hip_init())
98 break;
99#endif
100
101#ifdef WITH_METAL
102 case DEVICE_METAL:
103 if (device_metal_init())
105 break;
106#endif
107
108#ifdef WITH_ONEAPI
109 case DEVICE_ONEAPI:
111 break;
112#endif
113
114 default:
115 break;
116 }
117
118 if (device == NULL) {
120 }
121
122 return device;
123}
124
126{
127 if (strcmp(name, "CPU") == 0) {
128 return DEVICE_CPU;
129 }
130 else if (strcmp(name, "CUDA") == 0) {
131 return DEVICE_CUDA;
132 }
133 else if (strcmp(name, "OPTIX") == 0) {
134 return DEVICE_OPTIX;
135 }
136 else if (strcmp(name, "MULTI") == 0) {
137 return DEVICE_MULTI;
138 }
139 else if (strcmp(name, "HIP") == 0) {
140 return DEVICE_HIP;
141 }
142 else if (strcmp(name, "METAL") == 0) {
143 return DEVICE_METAL;
144 }
145 else if (strcmp(name, "ONEAPI") == 0) {
146 return DEVICE_ONEAPI;
147 }
148 else if (strcmp(name, "HIPRT") == 0) {
149 return DEVICE_HIPRT;
150 }
151
152 return DEVICE_NONE;
153}
154
156{
157 if (type == DEVICE_CPU) {
158 return "CPU";
159 }
160 else if (type == DEVICE_CUDA) {
161 return "CUDA";
162 }
163 else if (type == DEVICE_OPTIX) {
164 return "OPTIX";
165 }
166 else if (type == DEVICE_MULTI) {
167 return "MULTI";
168 }
169 else if (type == DEVICE_HIP) {
170 return "HIP";
171 }
172 else if (type == DEVICE_METAL) {
173 return "METAL";
174 }
175 else if (type == DEVICE_ONEAPI) {
176 return "ONEAPI";
177 }
178 else if (type == DEVICE_HIPRT) {
179 return "HIPRT";
180 }
181
182 return "";
183}
184
186{
188 types.push_back(DEVICE_CPU);
189#ifdef WITH_CUDA
190 types.push_back(DEVICE_CUDA);
191#endif
192#ifdef WITH_OPTIX
193 types.push_back(DEVICE_OPTIX);
194#endif
195#ifdef WITH_HIP
196 types.push_back(DEVICE_HIP);
197#endif
198#ifdef WITH_METAL
199 types.push_back(DEVICE_METAL);
200#endif
201#ifdef WITH_ONEAPI
202 types.push_back(DEVICE_ONEAPI);
203#endif
204#ifdef WITH_HIPRT
205 if (hiprtewInit())
206 types.push_back(DEVICE_HIPRT);
207#endif
208 return types;
209}
210
212{
213 /* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
214 * be broken and cause crashes when only trying to get device info, so
215 * we don't want to do any initialization until the user chooses to. */
216 thread_scoped_lock lock(device_mutex);
217 vector<DeviceInfo> devices;
218
219#if defined(WITH_CUDA) || defined(WITH_OPTIX)
220 if (mask & (DEVICE_MASK_CUDA | DEVICE_MASK_OPTIX)) {
221 if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
222 if (device_cuda_init()) {
223 device_cuda_info(cuda_devices);
224 }
225 devices_initialized_mask |= DEVICE_MASK_CUDA;
226 }
227 if (mask & DEVICE_MASK_CUDA) {
228 foreach (DeviceInfo &info, cuda_devices) {
229 devices.push_back(info);
230 }
231 }
232 }
233#endif
234
235#ifdef WITH_OPTIX
236 if (mask & DEVICE_MASK_OPTIX) {
237 if (!(devices_initialized_mask & DEVICE_MASK_OPTIX)) {
238 if (device_optix_init()) {
239 device_optix_info(cuda_devices, optix_devices);
240 }
241 devices_initialized_mask |= DEVICE_MASK_OPTIX;
242 }
243 foreach (DeviceInfo &info, optix_devices) {
244 devices.push_back(info);
245 }
246 }
247#endif
248
249#ifdef WITH_HIP
250 if (mask & DEVICE_MASK_HIP) {
251 if (!(devices_initialized_mask & DEVICE_MASK_HIP)) {
252 if (device_hip_init()) {
253 device_hip_info(hip_devices);
254 }
255 devices_initialized_mask |= DEVICE_MASK_HIP;
256 }
257 foreach (DeviceInfo &info, hip_devices) {
258 devices.push_back(info);
259 }
260 }
261#endif
262
263#ifdef WITH_ONEAPI
264 if (mask & DEVICE_MASK_ONEAPI) {
265 if (!(devices_initialized_mask & DEVICE_MASK_ONEAPI)) {
266 if (device_oneapi_init()) {
267 device_oneapi_info(oneapi_devices);
268 }
269 devices_initialized_mask |= DEVICE_MASK_ONEAPI;
270 }
271 foreach (DeviceInfo &info, oneapi_devices) {
272 devices.push_back(info);
273 }
274 }
275#endif
276
277 if (mask & DEVICE_MASK_CPU) {
278 if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
279 device_cpu_info(cpu_devices);
280 devices_initialized_mask |= DEVICE_MASK_CPU;
281 }
282 foreach (DeviceInfo &info, cpu_devices) {
283 devices.push_back(info);
284 }
285 }
286
287#ifdef WITH_METAL
288 if (mask & DEVICE_MASK_METAL) {
289 if (!(devices_initialized_mask & DEVICE_MASK_METAL)) {
290 if (device_metal_init()) {
291 device_metal_info(metal_devices);
292 }
293 devices_initialized_mask |= DEVICE_MASK_METAL;
294 }
295 foreach (DeviceInfo &info, metal_devices) {
296 devices.push_back(info);
297 }
298 }
299#endif
300
301 return devices;
302}
303
304DeviceInfo Device::dummy_device(const string &error_msg)
305{
309 return info;
310}
311
313{
314 thread_scoped_lock lock(device_mutex);
315 string capabilities = "";
316
317 if (mask & DEVICE_MASK_CPU) {
318 capabilities += "\nCPU device capabilities: ";
319 capabilities += device_cpu_capabilities() + "\n";
320 }
321
322#ifdef WITH_CUDA
323 if (mask & DEVICE_MASK_CUDA) {
324 if (device_cuda_init()) {
326 if (!device_capabilities.empty()) {
327 capabilities += "\nCUDA device capabilities:\n";
328 capabilities += device_capabilities;
329 }
330 }
331 }
332#endif
333
334#ifdef WITH_HIP
335 if (mask & DEVICE_MASK_HIP) {
336 if (device_hip_init()) {
338 if (!device_capabilities.empty()) {
339 capabilities += "\nHIP device capabilities:\n";
340 capabilities += device_capabilities;
341 }
342 }
343 }
344#endif
345
346#ifdef WITH_ONEAPI
347 if (mask & DEVICE_MASK_ONEAPI) {
348 if (device_oneapi_init()) {
350 if (!device_capabilities.empty()) {
351 capabilities += "\noneAPI device capabilities:\n";
352 capabilities += device_capabilities;
353 }
354 }
355 }
356#endif
357
358#ifdef WITH_METAL
359 if (mask & DEVICE_MASK_METAL) {
360 if (device_metal_init()) {
362 if (!device_capabilities.empty()) {
363 capabilities += "\nMetal device capabilities:\n";
364 capabilities += device_capabilities;
365 }
366 }
367 }
368#endif
369
370 return capabilities;
371}
372
374 int threads,
375 bool background)
376{
377 assert(subdevices.size() > 0);
378
379 if (subdevices.size() == 1) {
380 /* No multi device needed. */
381 return subdevices.front();
382 }
383
386 info.id = "MULTI";
387 info.description = "Multi Device";
388 info.num = 0;
389
390 info.has_nanovdb = true;
391 info.has_mnee = true;
392 info.has_osl = true;
393 info.has_guiding = true;
394 info.has_profiling = true;
395 info.has_peer_memory = false;
398
399 foreach (const DeviceInfo &device, subdevices) {
400 /* Ensure CPU device does not slow down GPU. */
401 if (device.type == DEVICE_CPU && subdevices.size() > 1) {
402 if (background) {
403 int orig_cpu_threads = (threads) ? threads : TaskScheduler::max_concurrency();
404 int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), size_t(0));
405
406 VLOG_INFO << "CPU render threads reduced from " << orig_cpu_threads << " to "
407 << cpu_threads << ", to dedicate to GPU.";
408
409 if (cpu_threads >= 1) {
410 DeviceInfo cpu_device = device;
411 cpu_device.cpu_threads = cpu_threads;
412 info.multi_devices.push_back(cpu_device);
413 }
414 else {
415 continue;
416 }
417 }
418 else {
419 VLOG_INFO << "CPU render threads disabled for interactive render.";
420 continue;
421 }
422 }
423 else {
424 info.multi_devices.push_back(device);
425 }
426
427 /* Create unique ID for this combination of devices. */
428 info.id += device.id;
429
430 /* Set device type to MULTI if subdevices are not of a common type. */
431 if (info.type == DEVICE_NONE) {
432 info.type = device.type;
433 }
434 else if (device.type != info.type) {
436 }
437
438 /* Accumulate device info. */
439 info.has_nanovdb &= device.has_nanovdb;
440 info.has_mnee &= device.has_mnee;
441 info.has_osl &= device.has_osl;
442 info.has_guiding &= device.has_guiding;
446 info.denoisers &= device.denoisers;
447 }
448
449 return info;
450}
451
453{
454 free_memory();
455}
456
458{
459 devices_initialized_mask = 0;
460 cuda_devices.free_memory();
461 optix_devices.free_memory();
462 hip_devices.free_memory();
463 oneapi_devices.free_memory();
464 cpu_devices.free_memory();
465 metal_devices.free_memory();
466}
467
468unique_ptr<DeviceQueue> Device::gpu_queue_create()
469{
470 LOG(FATAL) << "Device does not support queues.";
471 return nullptr;
472}
473
475{
476 /* Initialize CPU kernels once and reuse. */
477 static CPUKernels kernels;
478 return kernels;
479}
480
482 vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/)
483{
484 LOG(FATAL) << "Device does not support CPU kernels.";
485}
486
488{
489 return nullptr;
490}
491
493
495{
496 if (need_texture_info) {
497 /* Unset flag before copying, so this does not loop indefinitely if the copy below calls
498 * into 'move_textures_to_host' (which calls 'load_texture_info' again). */
499 need_texture_info = false;
501 return true;
502 }
503 else {
504 return false;
505 }
506}
507
508void GPUDevice::init_host_memory(size_t preferred_texture_headroom,
509 size_t preferred_working_headroom)
510{
511 /* Limit amount of host mapped memory, because allocating too much can
512 * cause system instability. Leave at least half or 4 GB of system
513 * memory free, whichever is smaller. */
514 size_t default_limit = 4 * 1024 * 1024 * 1024LL;
515 size_t system_ram = system_physical_ram();
516
517 if (system_ram > 0) {
518 if (system_ram / 2 > default_limit) {
519 map_host_limit = system_ram - default_limit;
520 }
521 else {
522 map_host_limit = system_ram / 2;
523 }
524 }
525 else {
526 VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
527 map_host_limit = 0;
528 }
529
530 /* Amount of device memory to keep free after texture memory
531 * and working memory allocations respectively. We set the working
532 * memory limit headroom lower than the working one so there
533 * is space left for it. */
534 device_working_headroom = preferred_working_headroom > 0 ? preferred_working_headroom :
535 32 * 1024 * 1024LL; // 32MB
536 device_texture_headroom = preferred_texture_headroom > 0 ? preferred_texture_headroom :
537 128 * 1024 * 1024LL; // 128MB
538
539 VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
540 << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
541}
542
543void GPUDevice::move_textures_to_host(size_t size, bool for_texture)
544{
545 /* Break out of recursive call, which can happen when moving memory on a multi device. */
546 static bool any_device_moving_textures_to_host = false;
547 if (any_device_moving_textures_to_host) {
548 return;
549 }
550
551 /* Signal to reallocate textures in host memory only. */
553
554 while (size > 0) {
555 /* Find suitable memory allocation to move. */
556 device_memory *max_mem = NULL;
557 size_t max_size = 0;
558 bool max_is_image = false;
559
561 foreach (MemMap::value_type &pair, device_mem_map) {
562 device_memory &mem = *pair.first;
563 Mem *cmem = &pair.second;
564
565 /* Can only move textures allocated on this device (and not those from peer devices).
566 * And need to ignore memory that is already on the host. */
567 if (!mem.is_resident(this) || cmem->use_mapped_host) {
568 continue;
569 }
570
571 bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
572 (&mem != &texture_info);
573 bool is_image = is_texture && (mem.data_height > 1);
574
575 /* Can't move this type of memory. */
576 if (!is_texture || cmem->array) {
577 continue;
578 }
579
580 /* For other textures, only move image textures. */
581 if (for_texture && !is_image) {
582 continue;
583 }
584
585 /* Try to move largest allocation, prefer moving images. */
586 if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
587 max_is_image = is_image;
588 max_size = mem.device_size;
589 max_mem = &mem;
590 }
591 }
592 lock.unlock();
593
594 /* Move to host memory. This part is mutex protected since
595 * multiple backend devices could be moving the memory. The
596 * first one will do it, and the rest will adopt the pointer. */
597 if (max_mem) {
598 VLOG_WORK << "Move memory from device to host: " << max_mem->name;
599
600 static thread_mutex move_mutex;
601 thread_scoped_lock lock(move_mutex);
602
603 any_device_moving_textures_to_host = true;
604
605 /* Potentially need to call back into multi device, so pointer mapping
606 * and peer devices are updated. This is also necessary since the device
607 * pointer may just be a key here, so cannot be accessed and freed directly.
608 * Unfortunately it does mean that memory is reallocated on all other
609 * devices as well, which is potentially dangerous when still in use (since
610 * a thread rendering on another devices would only be caught in this mutex
611 * if it so happens to do an allocation at the same time as well. */
612 max_mem->device_copy_to();
613 size = (max_size >= size) ? 0 : size - max_size;
614
615 any_device_moving_textures_to_host = false;
616 }
617 else {
618 break;
619 }
620 }
621
622 /* Unset flag before texture info is reloaded, since it should stay in device memory. */
623 move_texture_to_host = false;
624
625 /* Update texture info array with new pointers. */
627}
628
630{
631 void *device_pointer = 0;
632 size_t size = mem.memory_size() + pitch_padding;
633
634 bool mem_alloc_result = false;
635 const char *status = "";
636
637 /* First try allocating in device memory, respecting headroom. We make
638 * an exception for texture info. It is small and frequently accessed,
639 * so treat it as working memory.
640 *
641 * If there is not enough room for working memory, we will try to move
642 * textures to host memory, assuming the performance impact would have
643 * been worse for working memory. */
644 bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
645 bool is_image = is_texture && (mem.data_height > 1);
646
647 size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
648
649 size_t total = 0, free = 0;
651
652 /* Move textures to host memory if needed. */
653 if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
654 move_textures_to_host(size + headroom - free, is_texture);
656 }
657
658 /* Allocate in device memory. */
659 if (!move_texture_to_host && (size + headroom) < free) {
660 mem_alloc_result = alloc_device(device_pointer, size);
661 if (mem_alloc_result) {
663 status = " in device memory";
664 }
665 }
666
667 /* Fall back to mapped host memory if needed and possible. */
668
669 void *shared_pointer = 0;
670
671 if (!mem_alloc_result && can_map_host && mem.type != MEM_DEVICE_ONLY) {
672 if (mem.shared_pointer) {
673 /* Another device already allocated host memory. */
674 mem_alloc_result = true;
675 shared_pointer = mem.shared_pointer;
676 }
677 else if (map_host_used + size < map_host_limit) {
678 /* Allocate host memory ourselves. */
679 mem_alloc_result = alloc_host(shared_pointer, size);
680
681 assert((mem_alloc_result && shared_pointer != 0) ||
682 (!mem_alloc_result && shared_pointer == 0));
683 }
684
685 if (mem_alloc_result) {
686 transform_host_pointer(device_pointer, shared_pointer);
688 status = " in host memory";
689 }
690 }
691
692 if (!mem_alloc_result) {
693 if (mem.type == MEM_DEVICE_ONLY) {
694 status = " failed, out of device memory";
695 set_error("System is out of GPU memory");
696 }
697 else {
698 status = " failed, out of device and host memory";
699 set_error("System is out of GPU and shared host memory");
700 }
701 }
702
703 if (mem.name) {
704 VLOG_WORK << "Buffer allocate: " << mem.name << ", "
705 << string_human_readable_number(mem.memory_size()) << " bytes. ("
706 << string_human_readable_size(mem.memory_size()) << ")" << status;
707 }
708
709 mem.device_pointer = (device_ptr)device_pointer;
710 mem.device_size = size;
711 stats.mem_alloc(size);
712
713 if (!mem.device_pointer) {
714 return NULL;
715 }
716
717 /* Insert into map of allocations. */
719 Mem *cmem = &device_mem_map[&mem];
720 if (shared_pointer != 0) {
721 /* Replace host pointer with our host allocation. Only works if
722 * memory layout is the same and has no pitch padding. Also
723 * does not work if we move textures to host during a render,
724 * since other devices might be using the memory. */
725
726 if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
727 mem.host_pointer != shared_pointer)
728 {
729 memcpy(shared_pointer, mem.host_pointer, size);
730
731 /* A Call to device_memory::host_free() should be preceded by
732 * a call to device_memory::device_free() for host memory
733 * allocated by a device to be handled properly. Two exceptions
734 * are here and a call in OptiXDevice::generic_alloc(), where
735 * the current host memory can be assumed to be allocated by
736 * device_memory::host_alloc(), not by a device */
737
738 mem.host_free();
739 mem.host_pointer = shared_pointer;
740 }
741 mem.shared_pointer = shared_pointer;
742 mem.shared_counter++;
743 cmem->use_mapped_host = true;
744 }
745 else {
746 cmem->use_mapped_host = false;
747 }
748
749 return cmem;
750}
751
753{
754 if (mem.device_pointer) {
756 DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
757 const Mem &cmem = device_mem_map[&mem];
758
759 /* If cmem.use_mapped_host is true, reference counting is used
760 * to safely free a mapped host memory. */
761
762 if (cmem.use_mapped_host) {
763 assert(mem.shared_pointer);
764 if (mem.shared_pointer) {
765 assert(mem.shared_counter > 0);
766 if (--mem.shared_counter == 0) {
767 if (mem.host_pointer == mem.shared_pointer) {
768 mem.host_pointer = 0;
769 }
771 mem.shared_pointer = 0;
772 }
773 }
775 }
776 else {
777 /* Free device memory. */
778 free_device((void *)mem.device_pointer);
780 }
781
783 mem.device_pointer = 0;
784 mem.device_size = 0;
785
786 device_mem_map.erase(device_mem_map.find(&mem));
787 }
788}
789
791{
792 if (!mem.host_pointer || !mem.device_pointer) {
793 return;
794 }
795
796 /* If use_mapped_host of mem is false, the current device only uses device memory allocated by
797 * backend device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
798 * copy data from mem.host_pointer. */
800 if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
802 }
803}
804
805/* DeviceInfo */
806
void BLI_kdtree_nd_ free(KDTree *tree)
unsigned int uint
volatile int lock
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
Definition bvh2.h:36
void refit(Progress &progress)
Definition bvh2.cpp:90
void build(Progress &progress, Stats *stats)
Definition bvh2.cpp:38
BVHLayout bvh_layout
Definition params.h:84
Definition bvh/bvh.h:66
BVHParams params
Definition bvh/bvh.h:68
vector< DeviceInfo > multi_devices
DenoiserTypeMask denoisers
string error_msg
bool has_peer_memory
bool has_profiling
DeviceType type
string description
bool use_hardware_raytracing
static void free_memory()
static DeviceInfo dummy_device(const string &error_msg="")
static void tag_update()
static vector< DeviceInfo > available_devices(uint device_type_mask=DEVICE_MASK_ALL)
static const CPUKernels & get_cpu_kernels()
virtual void get_cpu_kernel_thread_globals(vector< CPUKernelThreadGlobals > &)
string error_msg
virtual ~Device() noexcept(false)
virtual unique_ptr< DeviceQueue > gpu_queue_create()
Profiler & profiler
Stats & stats
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
static DeviceType type_from_string(const char *name)
virtual void * get_cpu_osl_memory()
bool headless
virtual void set_error(const string &error)
static string device_capabilities(uint device_type_mask=DEVICE_MASK_ALL)
DeviceInfo info
static vector< DeviceType > available_types()
static string string_from_type(DeviceType type)
static DeviceInfo get_multi_device(const vector< DeviceInfo > &subdevices, int threads, bool background)
static Device * create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void mem_free(size_t size)
Definition util/stats.h:26
void mem_alloc(size_t size)
Definition util/stats.h:20
static int max_concurrency()
Definition task.cpp:98
bool is_resident(Device *sub_device) const
Definition memory.cpp:127
void device_copy_to()
Definition memory.cpp:82
void host_free()
Definition memory.cpp:60
void free_memory()
@ MEM_TEXTURE
@ MEM_DEVICE_ONLY
@ DENOISER_ALL
Definition denoise.h:19
CCL_NAMESPACE_BEGIN Device * device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_cpu_info(vector< DeviceInfo > &devices)
string device_cpu_capabilities()
#define CCL_NAMESPACE_END
void device_cuda_info(vector< DeviceInfo > &devices)
string device_cuda_capabilities()
CCL_NAMESPACE_BEGIN bool device_cuda_init()
Device * device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
@ DEVICE_MASK_OPTIX
@ DEVICE_MASK_CPU
@ DEVICE_MASK_HIP
@ DEVICE_MASK_CUDA
@ DEVICE_MASK_METAL
@ DEVICE_MASK_ONEAPI
DeviceType
@ DEVICE_DUMMY
@ DEVICE_NONE
@ DEVICE_METAL
@ DEVICE_MULTI
@ DEVICE_CUDA
@ DEVICE_CPU
@ DEVICE_HIPRT
@ DEVICE_OPTIX
@ DEVICE_HIP
@ DEVICE_ONEAPI
Device * device_dummy_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_hip_info(vector< DeviceInfo > &devices)
string device_hip_capabilities()
Device * device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
CCL_NAMESPACE_BEGIN bool device_hip_init()
#define NULL
Device * device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
string device_metal_capabilities()
Definition device.mm:143
bool device_metal_init()
Definition device.mm:136
void device_metal_info(vector< DeviceInfo > &devices)
Definition device.mm:141
Device * device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_oneapi_info(vector< DeviceInfo > &devices)
CCL_NAMESPACE_BEGIN bool device_oneapi_init()
Device * device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
string device_oneapi_capabilities()
CCL_NAMESPACE_BEGIN bool device_optix_init()
void device_optix_info(const vector< DeviceInfo > &cuda_devices, vector< DeviceInfo > &devices)
Device * device_optix_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
@ BVH_LAYOUT_BVH2
#define VLOG_INFO
Definition log.h:72
#define VLOG_WARNING
Definition log.h:70
#define DCHECK(expression)
Definition log.h:51
#define VLOG_WORK
Definition log.h:75
#define LOG(severity)
Definition log.h:33
static char ** types
Definition makesdna.cc:71
string string_human_readable_size(size_t size)
Definition string.cpp:234
string string_human_readable_number(size_t num)
Definition string.cpp:255
arrayMemObject array
bool need_texture_info
virtual GPUDevice::Mem * generic_alloc(device_memory &mem, size_t pitch_padding=0)
size_t map_host_used
bool can_map_host
size_t device_texture_headroom
virtual void get_device_memory_info(size_t &total, size_t &free)=0
size_t device_working_headroom
virtual void init_host_memory(size_t preferred_texture_headroom=0, size_t preferred_working_headroom=0)
virtual bool alloc_host(void *&shared_pointer, size_t size)=0
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer)=0
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)=0
virtual void generic_copy_to(device_memory &mem)
virtual void free_host(void *shared_pointer)=0
virtual ~GPUDevice() noexcept(false)
virtual bool load_texture_info()
size_t map_host_limit
virtual void free_device(void *device_pointer)=0
thread_mutex device_mem_map_mutex
virtual bool alloc_device(void *&device_pointer, size_t size)=0
virtual void generic_free(device_memory &mem)
size_t device_mem_in_use
bool move_texture_to_host
device_vector< TextureInfo > texture_info
MemMap device_mem_map
virtual void move_textures_to_host(size_t size, bool for_texture)
size_t system_physical_ram()
Definition system.cpp:234
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:30
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
Definition thread.h:29
float max
uint64_t device_ptr
Definition util/types.h:45