14#include "device/cpu/kernel.h"
37bool Device::need_types_update =
true;
38bool Device::need_devices_update =
true;
46uint Device::devices_initialized_mask = 0;
56 BVH2 *
const bvh2 =
static_cast<BVH2 *
>(bvh);
58 bvh2->
refit(progress);
118 if (device ==
NULL) {
127 if (strcmp(name,
"CPU") == 0) {
130 else if (strcmp(name,
"CUDA") == 0) {
133 else if (strcmp(name,
"OPTIX") == 0) {
136 else if (strcmp(name,
"MULTI") == 0) {
139 else if (strcmp(name,
"HIP") == 0) {
142 else if (strcmp(name,
"METAL") == 0) {
145 else if (strcmp(name,
"ONEAPI") == 0) {
148 else if (strcmp(name,
"HIPRT") == 0) {
219#if defined(WITH_CUDA) || defined(WITH_OPTIX)
229 devices.push_back(
info);
244 devices.push_back(
info);
258 devices.push_back(
info);
272 devices.push_back(
info);
283 devices.push_back(
info);
296 devices.push_back(
info);
315 string capabilities =
"";
318 capabilities +=
"\nCPU device capabilities: ";
327 capabilities +=
"\nCUDA device capabilities:\n";
339 capabilities +=
"\nHIP device capabilities:\n";
351 capabilities +=
"\noneAPI device capabilities:\n";
363 capabilities +=
"\nMetal device capabilities:\n";
377 assert(subdevices.size() > 0);
379 if (subdevices.size() == 1) {
381 return subdevices.front();
399 foreach (
const DeviceInfo &device, subdevices) {
404 int cpu_threads =
max(orig_cpu_threads - (subdevices.size() - 1),
size_t(0));
406 VLOG_INFO <<
"CPU render threads reduced from " << orig_cpu_threads <<
" to "
407 << cpu_threads <<
", to dedicate to GPU.";
409 if (cpu_threads >= 1) {
419 VLOG_INFO <<
"CPU render threads disabled for interactive render.";
459 devices_initialized_mask = 0;
470 LOG(FATAL) <<
"Device does not support queues.";
484 LOG(FATAL) <<
"Device does not support CPU kernels.";
509 size_t preferred_working_headroom)
514 size_t default_limit = 4 * 1024 * 1024 * 1024LL;
517 if (system_ram > 0) {
518 if (system_ram / 2 > default_limit) {
526 VLOG_WARNING <<
"Mapped host memory disabled, failed to get system RAM";
546 static bool any_device_moving_textures_to_host =
false;
547 if (any_device_moving_textures_to_host) {
558 bool max_is_image =
false;
563 Mem *cmem = &pair.second;
573 bool is_image = is_texture && (mem.
data_height > 1);
576 if (!is_texture || cmem->
array) {
581 if (for_texture && !is_image) {
586 if (is_image > max_is_image || (is_image == max_is_image && mem.
device_size > max_size)) {
587 max_is_image = is_image;
598 VLOG_WORK <<
"Move memory from device to host: " << max_mem->
name;
603 any_device_moving_textures_to_host =
true;
613 size = (max_size >=
size) ? 0 : size - max_size;
615 any_device_moving_textures_to_host =
false;
631 void *device_pointer = 0;
634 bool mem_alloc_result =
false;
635 const char *status =
"";
645 bool is_image = is_texture && (mem.
data_height > 1);
649 size_t total = 0,
free = 0;
661 if (mem_alloc_result) {
663 status =
" in device memory";
669 void *shared_pointer = 0;
674 mem_alloc_result =
true;
679 mem_alloc_result =
alloc_host(shared_pointer, size);
681 assert((mem_alloc_result && shared_pointer != 0) ||
682 (!mem_alloc_result && shared_pointer == 0));
685 if (mem_alloc_result) {
688 status =
" in host memory";
692 if (!mem_alloc_result) {
694 status =
" failed, out of device memory";
695 set_error(
"System is out of GPU memory");
698 status =
" failed, out of device and host memory";
699 set_error(
"System is out of GPU and shared host memory");
720 if (shared_pointer != 0) {
void BLI_kdtree_nd_ free(KDTree *tree)
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
void refit(Progress &progress)
void build(Progress &progress, Stats *stats)
vector< DeviceInfo > multi_devices
DenoiserTypeMask denoisers
bool use_hardware_raytracing
static void free_memory()
static DeviceInfo dummy_device(const string &error_msg="")
static vector< DeviceInfo > available_devices(uint device_type_mask=DEVICE_MASK_ALL)
static const CPUKernels & get_cpu_kernels()
virtual void get_cpu_kernel_thread_globals(vector< CPUKernelThreadGlobals > &)
virtual ~Device() noexcept(false)
virtual unique_ptr< DeviceQueue > gpu_queue_create()
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
static DeviceType type_from_string(const char *name)
virtual void * get_cpu_osl_memory()
virtual void set_error(const string &error)
static string device_capabilities(uint device_type_mask=DEVICE_MASK_ALL)
static vector< DeviceType > available_types()
static string string_from_type(DeviceType type)
static DeviceInfo get_multi_device(const vector< DeviceInfo > &subdevices, int threads, bool background)
static Device * create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void mem_free(size_t size)
void mem_alloc(size_t size)
static int max_concurrency()
bool is_resident(Device *sub_device) const
device_ptr device_pointer
CCL_NAMESPACE_BEGIN Device * device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_cpu_info(vector< DeviceInfo > &devices)
string device_cpu_capabilities()
#define CCL_NAMESPACE_END
void device_cuda_info(vector< DeviceInfo > &devices)
string device_cuda_capabilities()
CCL_NAMESPACE_BEGIN bool device_cuda_init()
Device * device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
Device * device_dummy_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_hip_info(vector< DeviceInfo > &devices)
string device_hip_capabilities()
Device * device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
CCL_NAMESPACE_BEGIN bool device_hip_init()
Device * device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_oneapi_info(vector< DeviceInfo > &devices)
CCL_NAMESPACE_BEGIN bool device_oneapi_init()
Device * device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
string device_oneapi_capabilities()
CCL_NAMESPACE_BEGIN bool device_optix_init()
void device_optix_info(const vector< DeviceInfo > &cuda_devices, vector< DeviceInfo > &devices)
Device * device_optix_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
#define DCHECK(expression)
string string_human_readable_size(size_t size)
string string_human_readable_number(size_t num)
virtual GPUDevice::Mem * generic_alloc(device_memory &mem, size_t pitch_padding=0)
size_t device_texture_headroom
virtual void get_device_memory_info(size_t &total, size_t &free)=0
size_t device_working_headroom
virtual void init_host_memory(size_t preferred_texture_headroom=0, size_t preferred_working_headroom=0)
virtual bool alloc_host(void *&shared_pointer, size_t size)=0
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer)=0
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)=0
virtual void generic_copy_to(device_memory &mem)
virtual void free_host(void *shared_pointer)=0
virtual ~GPUDevice() noexcept(false)
virtual bool load_texture_info()
virtual void free_device(void *device_pointer)=0
thread_mutex device_mem_map_mutex
virtual bool alloc_device(void *&device_pointer, size_t size)=0
virtual void generic_free(device_memory &mem)
bool move_texture_to_host
device_vector< TextureInfo > texture_info
virtual void move_textures_to_host(size_t size, bool for_texture)
size_t system_physical_ram()
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex