14#include "device/cpu/kernel.h"
37bool Device::need_types_update =
true;
38bool Device::need_devices_update =
true;
46uint Device::devices_initialized_mask = 0;
65 BVH2 *
const bvh2 =
static_cast<BVH2 *
>(bvh);
67 bvh2->
refit(progress);
79 if (!
info.multi_devices.empty()) {
133 if (device ==
nullptr) {
142 if (strcmp(
name,
"CPU") == 0) {
145 if (strcmp(
name,
"CUDA") == 0) {
148 if (strcmp(
name,
"OPTIX") == 0) {
151 if (strcmp(
name,
"MULTI") == 0) {
154 if (strcmp(
name,
"HIP") == 0) {
157 if (strcmp(
name,
"METAL") == 0) {
160 if (strcmp(
name,
"ONEAPI") == 0) {
163 if (strcmp(
name,
"HIPRT") == 0) {
235#if defined(WITH_CUDA) || defined(WITH_OPTIX)
245 devices.push_back(
info);
260 devices.push_back(
info);
274 devices.push_back(
info);
288 devices.push_back(
info);
299 devices.push_back(
info);
312 devices.push_back(
info);
334 capabilities +=
"\nCPU device capabilities: ";
343 capabilities +=
"\nCUDA device capabilities:\n";
355 capabilities +=
"\nHIP device capabilities:\n";
367 capabilities +=
"\noneAPI device capabilities:\n";
379 capabilities +=
"\nMetal device capabilities:\n";
393 assert(!subdevices.empty());
395 if (subdevices.size() == 1) {
397 return subdevices.front();
403 info.description =
"Multi Device";
406 info.has_nanovdb =
true;
407 info.has_mnee =
true;
409 info.has_guiding =
true;
410 info.has_profiling =
true;
411 info.has_peer_memory =
false;
412 info.use_hardware_raytracing =
false;
417 if (device.type ==
DEVICE_CPU && subdevices.size() > 1) {
420 const int cpu_threads =
max(orig_cpu_threads - (subdevices.size() - 1),
size_t(0));
422 LOG_INFO <<
"CPU render threads reduced from " << orig_cpu_threads <<
" to " << cpu_threads
423 <<
", to dedicate to GPU.";
425 if (cpu_threads >= 1) {
428 info.multi_devices.push_back(cpu_device);
435 LOG_INFO <<
"CPU render threads disabled for interactive render.";
440 info.multi_devices.push_back(device);
444 info.id += device.id;
448 info.type = device.type;
450 else if (device.type !=
info.type) {
455 info.has_nanovdb &= device.has_nanovdb;
456 info.has_mnee &= device.has_mnee;
457 info.has_osl &= device.has_osl;
458 info.has_guiding &= device.has_guiding;
459 info.has_profiling &= device.has_profiling;
460 info.has_peer_memory |= device.has_peer_memory;
461 info.use_hardware_raytracing |= device.use_hardware_raytracing;
462 info.denoisers &= device.denoisers;
475 devices_initialized_mask = 0;
476 cuda_devices.free_memory();
477 optix_devices.free_memory();
478 hip_devices.free_memory();
479 oneapi_devices.free_memory();
480 cpu_devices.free_memory();
481 metal_devices.free_memory();
486 LOG_FATAL <<
"Device does not support queues.";
500 LOG_FATAL <<
"Device does not support CPU kernels.";
510 LOG_ERROR <<
"Request guiding field from a device which does not support it.";
539 const size_t preferred_working_headroom)
544 const size_t default_limit = 4 * 1024 * 1024 * 1024LL;
547 if (system_ram > 0) {
548 if (system_ram / 2 > default_limit) {
556 LOG_WARNING <<
"Mapped host memory disabled, failed to get system RAM";
591 bool max_is_image =
false;
596 Mem *cmem = &pair.second;
606 const bool is_image = is_texture && (mem.
data_height > 1);
609 if (!is_texture || cmem->
array) {
614 if (for_texture && !is_image) {
619 if (is_image > max_is_image || (is_image == max_is_image && mem.
device_size > max_size)) {
620 max_is_image = is_image;
631 LOG_DEBUG <<
"Move memory from device to host: " << max_mem->
name;
656 void *device_pointer =
nullptr;
659 bool mem_alloc_result =
false;
671 const bool is_image = is_texture && (mem.
data_height > 1);
687 if (mem_alloc_result) {
689 status =
" in device memory";
695 void *shared_pointer =
nullptr;
700 mem_alloc_result =
true;
707 assert((mem_alloc_result && shared_pointer !=
nullptr) ||
708 (!mem_alloc_result && shared_pointer ==
nullptr));
711 if (mem_alloc_result) {
714 status =
" in host memory";
718 if (!mem_alloc_result) {
720 status =
" failed, out of device memory";
721 set_error(
"System is out of GPU memory");
724 status =
" failed, out of device and host memory";
725 set_error(
"System is out of GPU and shared host memory");
746 if (shared_pointer !=
nullptr) {
787 assert(!
"GPU device should not copy memory back to host");
828 return (shared_pointer && device_pointer &&
void BLI_kdtree_nd_ free(KDTree *tree)
CCL_NAMESPACE_BEGIN void * util_aligned_malloc(const size_t size, const int alignment)
void util_aligned_free(void *ptr, const size_t size)
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
void refit(Progress &progress)
void build(Progress &progress, Stats *stats)
virtual void host_free(const MemoryType type, void *host_pointer, const size_t size)
virtual void * get_guiding_device() const
static void free_memory()
static DeviceInfo dummy_device(const string &error_msg="")
Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
static DeviceInfo get_multi_device(const vector< DeviceInfo > &subdevices, const int threads, bool background)
static const CPUKernels & get_cpu_kernels()
virtual ~Device() noexcept(false)
virtual unique_ptr< DeviceQueue > gpu_queue_create()
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
static DeviceType type_from_string(const char *name)
virtual void set_error(const string &error)
virtual void get_cpu_kernel_thread_globals(vector< ThreadKernelGlobalsCPU > &)
static string device_capabilities(const uint device_type_mask=DEVICE_MASK_ALL)
static vector< DeviceType > available_types()
static string string_from_type(DeviceType type)
virtual OSLGlobals * get_cpu_osl_memory()
static vector< DeviceInfo > available_devices(const uint device_type_mask=DEVICE_MASK_ALL)
static unique_ptr< Device > create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
virtual void * host_alloc(const MemoryType type, const size_t size)
static int max_concurrency()
bool is_resident(Device *sub_device) const
void * host_alloc(const size_t size)
bool is_shared(Device *sub_device) const
device_ptr device_pointer
void device_move_to_host()
#define MIN_ALIGNMENT_DEVICE_MEMORY
void device_cpu_info(vector< DeviceInfo > &devices)
string device_cpu_capabilities()
CCL_NAMESPACE_BEGIN unique_ptr< Device > device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
#define CCL_NAMESPACE_END
void device_cuda_info(vector< DeviceInfo > &devices)
string device_cuda_capabilities()
CCL_NAMESPACE_BEGIN bool device_cuda_init()
unique_ptr< Device > device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
unique_ptr< Device > device_dummy_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_hip_info(vector< DeviceInfo > &devices)
string device_hip_capabilities()
unique_ptr< Device > device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, const bool headless)
CCL_NAMESPACE_BEGIN bool device_hip_init()
unique_ptr< Device > device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
unique_ptr< Device > device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_oneapi_info(vector< DeviceInfo > &devices)
CCL_NAMESPACE_BEGIN bool device_oneapi_init()
string device_oneapi_capabilities()
CCL_NAMESPACE_BEGIN bool device_optix_init()
void device_optix_info(const vector< DeviceInfo > &cuda_devices, vector< DeviceInfo > &devices)
unique_ptr< Device > device_optix_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
#define assert(assertion)
#define DCHECK(expression)
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
static void error(const char *str)
string string_human_readable_size(size_t size)
string string_human_readable_number(size_t num)
bool is_shared(const void *shared_pointer, const device_ptr device_pointer, Device *sub_device) override
virtual bool shared_alloc(void *&shared_pointer, const size_t size)=0
virtual void shared_free(void *shared_pointer)=0
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
size_t device_texture_headroom
virtual void get_device_memory_info(size_t &total, size_t &free)=0
virtual bool alloc_device(void *&device_pointer, const size_t size)=0
size_t device_working_headroom
friend class device_memory
virtual void * shared_to_device_pointer(const void *shared_pointer)=0
virtual void generic_copy_to(device_memory &mem)
virtual void move_textures_to_host(const size_t size, const size_t headroom, const bool for_texture)
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size)=0
virtual bool load_texture_info()
virtual void free_device(void *device_pointer)=0
thread_mutex device_mem_map_mutex
virtual void generic_free(device_memory &mem)
virtual void init_host_memory(const size_t preferred_texture_headroom=0, const size_t preferred_working_headroom=0)
virtual GPUDevice::Mem * generic_alloc(device_memory &mem, const size_t pitch_padding=0)
device_vector< TextureInfo > texture_info
~GPUDevice() noexcept(false) override
size_t system_physical_ram()
std::unique_lock< std::mutex > thread_scoped_lock