9# include <sycl/sycl.hpp>
17# ifdef WITH_EMBREE_GPU
21# if defined(WITH_OPENIMAGEDENOISE)
22# include <OpenImageDenoise/config.h>
23# if OIDN_VERSION >= 20300
31# if defined(WITH_EMBREE_GPU) && defined(EMBREE_SYCL_SUPPORT) && !defined(SYCL_LANGUAGE_VERSION)
34extern "C" RTCDevice rtcNewSYCLDevice(sycl::context context,
const char *config);
35extern "C" bool rtcIsSYCLDeviceSupported(
const sycl::device sycl_device);
40static std::vector<sycl::device> available_sycl_devices();
41static int parse_driver_build_version(
const sycl::device &device);
43static void queue_error_cb(
const char *message,
void *user_ptr)
46 *
reinterpret_cast<std::string *
>(user_ptr) = message;
51 :
GPUDevice(info, stats, profiler, headless),
52 device_queue_(nullptr),
53# ifdef WITH_EMBREE_GPU
54 embree_device(nullptr),
55 embree_scene(nullptr),
58 kg_memory_device_(nullptr),
62 static_assert(
sizeof(texMemObject) ==
sizeof(
void *));
63 static_assert(
sizeof(arrayMemObject) ==
sizeof(
void *));
67 oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
69 bool is_finished_ok = create_queue(device_queue_,
71# ifdef WITH_EMBREE_GPU
72 use_hardware_raytracing ? &embree_device : nullptr
78 if (is_finished_ok ==
false) {
79 set_error(
"oneAPI queue initialization error: got runtime exception \"" +
80 oneapi_error_string_ +
"\"");
83 VLOG_DEBUG <<
"oneAPI queue has been successfully created for the device \""
85 assert(device_queue_);
88# ifdef WITH_EMBREE_GPU
89 use_hardware_raytracing = use_hardware_raytracing && (embree_device !=
nullptr);
91 use_hardware_raytracing =
false;
94 if (use_hardware_raytracing) {
95 VLOG_INFO <<
"oneAPI will use hardware ray tracing for intersection acceleration.";
98 size_t globals_segment_size;
99 is_finished_ok = kernel_globals_size(globals_segment_size);
100 if (is_finished_ok ==
false) {
101 set_error(
"oneAPI constant memory initialization got runtime exception \"" +
102 oneapi_error_string_ +
"\"");
105 VLOG_DEBUG <<
"Successfully created global/constant memory segment (kernel globals object)";
108 kg_memory_ = usm_aligned_alloc_host(device_queue_, globals_segment_size, 16);
109 usm_memset(device_queue_, kg_memory_, 0, globals_segment_size);
111 kg_memory_device_ = usm_alloc_device(device_queue_, globals_segment_size);
113 kg_memory_size_ = globals_segment_size;
115 max_memory_on_device_ = get_memcapacity();
117 move_texture_to_host =
false;
120 const char *headroom_str = getenv(
"CYCLES_ONEAPI_MEMORY_HEADROOM");
121 if (headroom_str !=
nullptr) {
122 const long long override_headroom = (
float)atoll(headroom_str);
123 device_working_headroom = override_headroom;
124 device_texture_headroom = override_headroom;
130OneapiDevice::~OneapiDevice()
132# ifdef WITH_EMBREE_GPU
134 rtcReleaseDevice(embree_device);
138 usm_free(device_queue_, kg_memory_);
139 usm_free(device_queue_, kg_memory_device_);
141 for (ConstMemMap::iterator mt = const_mem_map_.begin(); mt != const_mem_map_.end(); mt++)
145 free_queue(device_queue_);
148bool OneapiDevice::check_peer_access(
Device * )
153bool OneapiDevice::can_use_hardware_raytracing_for_features(
uint requested_features)
const
157# if defined(RTC_VERSION) && RTC_VERSION < 40100
160 (void)requested_features;
167 return (use_hardware_raytracing &&
168 can_use_hardware_raytracing_for_features(requested_features)) ?
173# ifdef WITH_EMBREE_GPU
177 BVHEmbree *
const bvh_embree =
static_cast<BVHEmbree *
>(bvh);
179 bvh_embree->refit(progress);
182 bvh_embree->build(progress, &stats, embree_device,
true);
185# if RTC_VERSION >= 40302
187 all_embree_scenes.push_back(bvh_embree->scene);
191 embree_scene = bvh_embree->scene;
192# if RTC_VERSION >= 40302
193 RTCError error_code = bvh_embree->offload_scenes_to_gpu(all_embree_scenes);
194 if (error_code != RTC_ERROR_NONE) {
196 string_printf(
"BVH failed to migrate to the GPU due to Embree library error (%s)",
197 bvh_embree->get_error_string(error_code)));
199 all_embree_scenes.clear();
209size_t OneapiDevice::get_free_mem()
const
215 const sycl::device &device =
reinterpret_cast<sycl::queue *
>(device_queue_)->get_device();
216 const bool is_integrated_gpu = device.get_info<sycl::info::device::host_unified_memory>();
217 if (device.has(sycl::aspect::ext_intel_free_memory) && is_integrated_gpu ==
false) {
218 return device.get_info<sycl::ext::intel::info::device::free_memory>();
221 else if (device_mem_in_use < max_memory_on_device_) {
222 return max_memory_on_device_ - device_mem_in_use;
229bool OneapiDevice::load_kernels(
const uint requested_features)
231 assert(device_queue_);
239 kernel_features |= requested_features;
241 bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
242 if (is_finished_ok ==
false) {
243 set_error(
"oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ +
249 assert(device_queue_);
252 if (use_hardware_raytracing && !can_use_hardware_raytracing_for_features(requested_features)) {
254 <<
"Hardware ray tracing disabled, not supported yet by oneAPI for requested features.";
255 use_hardware_raytracing =
false;
258 is_finished_ok = oneapi_load_kernels(
259 device_queue_, (
const unsigned int)requested_features, use_hardware_raytracing);
260 if (is_finished_ok ==
false) {
261 set_error(
"oneAPI kernels loading: got a runtime exception \"" + oneapi_error_string_ +
"\"");
267 if (is_finished_ok) {
268 reserve_private_memory(requested_features);
269 is_finished_ok = !have_error();
272 return is_finished_ok;
275void OneapiDevice::reserve_private_memory(
const uint kernel_features)
277 size_t free_before = get_free_mem();
287 unique_ptr<DeviceQueue> queue = gpu_queue_create();
294 queue->init_execution();
298 queue->enqueue(test_kernel, 1, args);
299 queue->synchronize();
302 size_t free_after = get_free_mem();
304 VLOG_INFO <<
"For kernel execution were reserved "
309void OneapiDevice::get_device_memory_info(
size_t &total,
size_t &
free)
311 free = get_free_mem();
312 total = max_memory_on_device_;
315bool OneapiDevice::alloc_device(
void *&device_pointer,
size_t size)
317 bool allocation_success =
false;
318 device_pointer = usm_alloc_device(device_queue_, size);
319 if (device_pointer !=
nullptr) {
320 allocation_success =
true;
323 if (!oneapi_zero_memory_on_device(device_queue_, device_pointer, size)) {
324 set_error(
"oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
326 usm_free(device_queue_, device_pointer);
328 device_pointer =
nullptr;
329 allocation_success =
false;
333 return allocation_success;
336void OneapiDevice::free_device(
void *device_pointer)
338 usm_free(device_queue_, device_pointer);
341bool OneapiDevice::alloc_host(
void *&shared_pointer,
size_t size)
343 shared_pointer = usm_aligned_alloc_host(device_queue_, size, 64);
344 return shared_pointer !=
nullptr;
347void OneapiDevice::free_host(
void *shared_pointer)
349 usm_free(device_queue_, shared_pointer);
352void OneapiDevice::transform_host_pointer(
void *&device_pointer,
void *&shared_pointer)
356 device_pointer = shared_pointer;
359void OneapiDevice::copy_host_to_device(
void *device_pointer,
void *host_pointer,
size_t size)
361 usm_memcpy(device_queue_, device_pointer, host_pointer, size);
365SyclQueue *OneapiDevice::sycl_queue()
367 return device_queue_;
370string OneapiDevice::oneapi_error_message()
372 return string(oneapi_error_string_);
375int OneapiDevice::scene_max_shaders()
377 return scene_max_shaders_;
380void *OneapiDevice::kernel_globals_device_pointer()
382 return kg_memory_device_;
388 assert(!
"mem_alloc not supported for textures.");
391 assert(!
"mem_alloc not supported for global memory.");
406 VLOG_DEBUG <<
"OneapiDevice::mem_copy_to: \"" << mem.
name <<
"\", "
429 generic_copy_to(mem);
433void OneapiDevice::mem_copy_from(
device_memory &mem,
size_t y,
size_t w,
size_t h,
size_t elem)
436 assert(!
"mem_copy_from not supported for textures.");
439 const size_t size = (
w > 0 || h > 0 || elem > 0) ? (elem *
w * h) : mem.
memory_size();
440 const size_t offset = elem * y *
w;
443 VLOG_DEBUG <<
"OneapiDevice::mem_copy_from: \"" << mem.
name <<
"\" object of "
446 <<
" data " << size <<
" bytes";
455 assert(device_queue_);
459 char *shifted_host =
reinterpret_cast<char *
>(mem.
host_pointer) + offset;
460 char *shifted_device =
reinterpret_cast<char *
>(mem.
device_pointer) + offset;
461 bool is_finished_ok = usm_memcpy(device_queue_, shifted_host, shifted_device, size);
462 if (is_finished_ok ==
false) {
463 set_error(
"oneAPI memory operation error: got runtime exception \"" +
464 oneapi_error_string_ +
"\"");
491 assert(device_queue_);
492 bool is_finished_ok = usm_memset(
494 if (is_finished_ok ==
false) {
495 set_error(
"oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
525void OneapiDevice::const_copy_to(
const char *name,
void *host,
size_t size)
529 VLOG_DEBUG <<
"OneapiDevice::const_copy_to \"" << name <<
"\" object "
533# ifdef WITH_EMBREE_GPU
534 if (embree_scene !=
nullptr && strcmp(name,
"data") == 0) {
539 data->device_bvh = embree_scene;
542 scene_max_shaders_ = data->max_shaders;
546 ConstMemMap::iterator i = const_mem_map_.find(name);
549 if (i == const_mem_map_.end()) {
552 const_mem_map_.insert(ConstMemMap::value_type(name, data));
558 assert(data->memory_size() <= size);
559 memcpy(data->data(), host, size);
560 data->copy_to_device();
562 set_global_memory(device_queue_, kg_memory_, name, (
void *)data->device_pointer);
564 usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
572 VLOG_DEBUG <<
"OneapiDevice::global_alloc \"" << mem.
name <<
"\" object "
577 generic_copy_to(mem);
581 usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
594 generic_copy_to(mem);
598 if (slot >= texture_info.size()) {
599 texture_info.resize(slot + 128);
602 texture_info[slot] = mem.
info;
603 need_texture_info =
true;
616unique_ptr<DeviceQueue> OneapiDevice::gpu_queue_create()
618 return make_unique<OneapiDeviceQueue>(
this);
621bool OneapiDevice::should_use_graphics_interop()
628void *OneapiDevice::usm_aligned_alloc_host(
size_t memory_size,
size_t alignment)
630 assert(device_queue_);
631 return usm_aligned_alloc_host(device_queue_, memory_size, alignment);
634void OneapiDevice::usm_free(
void *usm_ptr)
636 assert(device_queue_);
637 return usm_free(device_queue_, usm_ptr);
640void OneapiDevice::check_usm(SyclQueue *queue_,
const void *usm_ptr,
bool allow_host =
false)
643 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
644 sycl::info::device_type device_type =
645 queue->get_device().get_info<sycl::info::device::device_type>();
646 sycl::usm::alloc usm_type =
get_pointer_type(usm_ptr, queue->get_context());
648# ifndef WITH_ONEAPI_SYCL_HOST_TASK
649 const sycl::usm::alloc main_memory_type = sycl::usm::alloc::device;
651 const sycl::usm::alloc main_memory_type = sycl::usm::alloc::host;
653 assert(usm_type == main_memory_type ||
654 (usm_type == sycl::usm::alloc::host &&
655 (allow_host || device_type == sycl::info::device_type::cpu)) ||
656 usm_type == sycl::usm::alloc::unknown);
665bool OneapiDevice::create_queue(SyclQueue *&external_queue,
667 void *embree_device_pointer)
669 bool finished_correct =
true;
671 std::vector<sycl::device>
devices = available_sycl_devices();
672 if (device_index < 0 || device_index >=
devices.size()) {
675 sycl::queue *created_queue =
new sycl::queue(devices[device_index],
676 sycl::property::queue::in_order());
677 external_queue =
reinterpret_cast<SyclQueue *
>(created_queue);
678# ifdef WITH_EMBREE_GPU
679 if (embree_device_pointer) {
680 RTCDevice *device_object_ptr =
reinterpret_cast<RTCDevice *
>(embree_device_pointer);
681 *device_object_ptr = rtcNewSYCLDevice(created_queue->get_context(),
"");
682 if (*device_object_ptr ==
nullptr) {
683 finished_correct =
false;
684 oneapi_error_string_ =
685 "Hardware Raytracing is not available; please install "
686 "\"intel-level-zero-gpu-raytracing\" to enable it or disable Embree on GPU.";
690 (void)embree_device_pointer;
693 catch (sycl::exception
const &
e) {
694 finished_correct =
false;
695 oneapi_error_string_ =
e.what();
697 return finished_correct;
700void OneapiDevice::free_queue(SyclQueue *queue_)
703 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
707void *OneapiDevice::usm_aligned_alloc_host(SyclQueue *queue_,
size_t memory_size,
size_t alignment)
710 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
711 return sycl::aligned_alloc_host(alignment, memory_size, *queue);
714void *OneapiDevice::usm_alloc_device(SyclQueue *queue_,
size_t memory_size)
717 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
728# ifndef WITH_ONEAPI_SYCL_HOST_TASK
729 return sycl::malloc_device(memory_size, *queue);
731 return sycl::malloc_host(memory_size, *queue);
735void OneapiDevice::usm_free(SyclQueue *queue_,
void *usm_ptr)
738 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
739 OneapiDevice::check_usm(queue_, usm_ptr,
true);
740 sycl::free(usm_ptr, *queue);
743bool OneapiDevice::usm_memcpy(SyclQueue *queue_,
void *dest,
void *src,
size_t num_bytes)
752 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
753 OneapiDevice::check_usm(queue_, dest,
true);
754 OneapiDevice::check_usm(queue_, src,
true);
759 if ((dest_type == sycl::usm::alloc::host || dest_type == sycl::usm::alloc::unknown) &&
760 (src_type == sycl::usm::alloc::host || src_type == sycl::usm::alloc::unknown))
762 memcpy(dest, src, num_bytes);
767 sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
768# ifdef WITH_CYCLES_DEBUG
772 mem_event.wait_and_throw();
775 bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
776 src_type == sycl::usm::alloc::device;
777 bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
778 src_type == sycl::usm::alloc::unknown;
782 if (from_device_to_host || host_or_device_memop_with_offset)
787 catch (sycl::exception
const &
e) {
788 oneapi_error_string_ =
e.what();
793bool OneapiDevice::usm_memset(SyclQueue *queue_,
805 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
806 OneapiDevice::check_usm(queue_, usm_ptr,
true);
808 sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
809# ifdef WITH_CYCLES_DEBUG
813 mem_event.wait_and_throw();
819 catch (sycl::exception
const &
e) {
820 oneapi_error_string_ =
e.what();
825bool OneapiDevice::queue_synchronize(SyclQueue *queue_)
828 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
830 queue->wait_and_throw();
833 catch (sycl::exception
const &
e) {
834 oneapi_error_string_ =
e.what();
839bool OneapiDevice::kernel_globals_size(
size_t &kernel_global_size)
846void OneapiDevice::set_global_memory(SyclQueue *queue_,
847 void *kernel_globals,
848 const char *memory_name,
849 void *memory_device_pointer)
852 assert(kernel_globals);
854 assert(memory_device_pointer);
856 OneapiDevice::check_usm(queue_, memory_device_pointer,
true);
857 OneapiDevice::check_usm(queue_, kernel_globals,
true);
859 std::string matched_name(memory_name);
862# define KERNEL_DATA_ARRAY(type, name) \
863 else if (#name == matched_name) { \
864 globals->__##name = (type *)memory_device_pointer; \
869 else if (
"integrator_state" == matched_name) {
874# include "kernel/data_arrays.h"
876 std::cerr <<
"Can't found global/constant memory with name \"" << matched_name <<
"\"!"
880# undef KERNEL_DATA_ARRAY
883bool OneapiDevice::enqueue_kernel(
884 KernelContext *kernel_context,
int kernel,
size_t global_size,
size_t local_size,
void **args)
886 return oneapi_enqueue_kernel(kernel_context,
891 use_hardware_raytracing,
895void OneapiDevice::get_adjusted_global_and_local_sizes(SyclQueue *queue,
897 size_t &kernel_global_size,
898 size_t &kernel_local_size)
901 const static size_t preferred_work_group_size_intersect = 128;
902 const static size_t preferred_work_group_size_shading = 256;
903 const static size_t preferred_work_group_size_shading_simd8 = 64;
906 const static size_t preferred_work_group_size_shader_evaluation = 256;
909 const static size_t preferred_work_group_size_cryptomatte = 512;
910 const static size_t preferred_work_group_size_default = 1024;
912 const sycl::device &device =
reinterpret_cast<sycl::queue *
>(queue)->get_device();
913 const size_t max_work_group_size = device.get_info<sycl::info::device::max_work_group_size>();
915 size_t preferred_work_group_size = 0;
924 preferred_work_group_size = preferred_work_group_size_intersect;
935 const bool device_is_simd8 =
936 (device.has(sycl::aspect::ext_intel_gpu_eu_simd_width) &&
937 device.get_info<sycl::ext::intel::info::device::gpu_eu_simd_width>() == 8);
938 preferred_work_group_size = (device_is_simd8) ? preferred_work_group_size_shading_simd8 :
939 preferred_work_group_size_shading;
943 preferred_work_group_size = preferred_work_group_size_cryptomatte;
949 preferred_work_group_size = preferred_work_group_size_shader_evaluation;
959 if (preferred_work_group_size == 0) {
960 preferred_work_group_size = oneapi_suggested_gpu_kernel_size((
::DeviceKernel)kernel);
964 if (preferred_work_group_size == 0) {
965 preferred_work_group_size = preferred_work_group_size_default;
968 kernel_local_size = std::min(max_work_group_size, preferred_work_group_size);
972 kernel_global_size =
round_up(kernel_global_size, kernel_local_size);
974# ifdef WITH_ONEAPI_SYCL_HOST_TASK
985 kernel_global_size = 1;
986 kernel_local_size = 1;
990 assert(kernel_global_size % kernel_local_size == 0);
995static const int lowest_supported_driver_version_win = 1015730;
999static const int lowest_supported_driver_version_neo = 29550;
1001static const int lowest_supported_driver_version_neo = 29735;
1004int parse_driver_build_version(
const sycl::device &device)
1006 const std::string &driver_version = device.get_info<sycl::info::device::driver_version>();
1007 int driver_build_version = 0;
1009 size_t second_dot_position = driver_version.find(
'.', driver_version.find(
'.') + 1);
1010 if (second_dot_position == std::string::npos) {
1011 std::cerr <<
"Unable to parse unknown Intel GPU driver version \"" << driver_version
1012 <<
"\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
1013 <<
" xx.xx.xxx.xxxx (Windows) for device \""
1014 << device.get_info<sycl::info::device::name>() <<
"\"." << std::endl;
1018 size_t third_dot_position = driver_version.find(
'.', second_dot_position + 1);
1019 if (third_dot_position != std::string::npos) {
1020 const std::string &third_number_substr = driver_version.substr(
1021 second_dot_position + 1, third_dot_position - second_dot_position - 1);
1022 const std::string &forth_number_substr = driver_version.substr(third_dot_position + 1);
1023 if (third_number_substr.length() == 3 && forth_number_substr.length() == 4)
1024 driver_build_version = std::stoi(third_number_substr) * 10000 +
1025 std::stoi(forth_number_substr);
1028 const std::string &third_number_substr = driver_version.substr(second_dot_position + 1);
1029 driver_build_version = std::stoi(third_number_substr);
1032 catch (std::invalid_argument &) {
1033 std::cerr <<
"Unable to parse unknown Intel GPU driver version \"" << driver_version
1034 <<
"\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
1035 <<
" xx.xx.xxx.xxxx (Windows) for device \""
1036 << device.get_info<sycl::info::device::name>() <<
"\"." << std::endl;
1040 return driver_build_version;
1043std::vector<sycl::device> available_sycl_devices()
1045 bool allow_all_devices =
false;
1046 if (getenv(
"CYCLES_ONEAPI_ALL_DEVICES") !=
nullptr) {
1047 allow_all_devices =
true;
1050 const std::vector<sycl::platform> &oneapi_platforms = sycl::platform::get_platforms();
1052 std::vector<sycl::device> available_devices;
1053 for (
const sycl::platform &platform : oneapi_platforms) {
1056 if (platform.get_backend() == sycl::backend::opencl) {
1060 const std::vector<sycl::device> &oneapi_devices =
1061 (allow_all_devices) ? platform.get_devices(sycl::info::device_type::all) :
1062 platform.get_devices(sycl::info::device_type::gpu);
1064 for (
const sycl::device &device : oneapi_devices) {
1065 bool filter_out =
false;
1066 if (!allow_all_devices) {
1070 if (!device.is_gpu() || platform.get_backend() != sycl::backend::ext_oneapi_level_zero) {
1075 int number_of_eus = 96;
1076 int threads_per_eu = 7;
1077 if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
1078 number_of_eus = device.get_info<sycl::ext::intel::info::device::gpu_eu_count>();
1080 if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
1082 device.get_info<sycl::ext::intel::info::device::gpu_hw_threads_per_eu>();
1085 if (number_of_eus <= 96 && threads_per_eu == 7) {
1089 bool check_driver_version = !filter_out;
1091 if (check_driver_version &&
1092 device.get_info<sycl::info::device::vendor>().find(
"Intel") == std::string::npos)
1094 check_driver_version =
false;
1102# if __LIBSYCL_MAJOR_VERSION < 8
1103 if (check_driver_version &&
1104 !
string_startswith(device.get_info<sycl::info::device::driver_version>(),
"1.3."))
1106 check_driver_version =
false;
1109 if (check_driver_version) {
1110 int driver_build_version = parse_driver_build_version(device);
1111 if ((driver_build_version > 100000 &&
1112 driver_build_version < lowest_supported_driver_version_win) ||
1113 driver_build_version < lowest_supported_driver_version_neo)
1121 available_devices.push_back(device);
1126 return available_devices;
1129char *OneapiDevice::device_capabilities()
1131 std::stringstream capabilities;
1133 const std::vector<sycl::device> &oneapi_devices = available_sycl_devices();
1134 for (
const sycl::device &device : oneapi_devices) {
1135# ifndef WITH_ONEAPI_SYCL_HOST_TASK
1136 const std::string &name = device.get_info<sycl::info::device::name>();
1138 const std::string &name =
"SYCL Host Task (Debug)";
1141 capabilities << std::string(
"\t") << name <<
"\n";
1142 capabilities <<
"\t\tsycl::info::platform::name\t\t\t"
1143 << device.get_platform().get_info<sycl::info::platform::name>() <<
"\n";
1145# define WRITE_ATTR(attribute_name, attribute_variable) \
1146 capabilities << "\t\tsycl::info::device::" #attribute_name "\t\t\t" << attribute_variable \
1148# define GET_ATTR(attribute) \
1150 capabilities << "\t\tsycl::info::device::" #attribute "\t\t\t" \
1151 << device.get_info<sycl::info::device ::attribute>() << "\n"; \
1153# define GET_INTEL_ATTR(attribute) \
1155 if (device.has(sycl::aspect::ext_intel_##attribute)) { \
1156 capabilities << "\t\tsycl::ext::intel::info::device::" #attribute "\t\t\t" \
1157 << device.get_info<sycl::ext::intel::info::device ::attribute>() << "\n"; \
1160# define GET_ASPECT(aspect_) \
1162 capabilities << "\t\tdevice::has(" #aspect_ ")\t\t\t" << device.has(sycl::aspect ::aspect_) \
1167 GET_ATTR(driver_version)
1168 GET_ATTR(max_compute_units)
1169 GET_ATTR(max_clock_frequency)
1170 GET_ATTR(global_mem_size)
1171 GET_INTEL_ATTR(pci_address)
1172 GET_INTEL_ATTR(gpu_eu_simd_width)
1173 GET_INTEL_ATTR(gpu_eu_count)
1174 GET_INTEL_ATTR(gpu_slices)
1175 GET_INTEL_ATTR(gpu_subslices_per_slice)
1176 GET_INTEL_ATTR(gpu_eu_count_per_subslice)
1177 GET_INTEL_ATTR(gpu_hw_threads_per_eu)
1178 GET_INTEL_ATTR(max_mem_bandwidth)
1179 GET_ATTR(max_work_group_size)
1180 GET_ATTR(max_work_item_dimensions)
1181 sycl::id<3> max_work_item_sizes =
1182 device.get_info<sycl::info::device::max_work_item_sizes<3>>();
1183 WRITE_ATTR(max_work_item_sizes[0], max_work_item_sizes.get(0))
1184 WRITE_ATTR(max_work_item_sizes[1], max_work_item_sizes.get(1))
1185 WRITE_ATTR(max_work_item_sizes[2], max_work_item_sizes.get(2))
1187 GET_ATTR(max_num_sub_groups)
1188 for (
size_t sub_group_size : device.get_info<sycl::info::device::sub_group_sizes>()) {
1189 WRITE_ATTR(sub_group_size[], sub_group_size)
1191 GET_ATTR(sub_group_independent_forward_progress)
1193 GET_ATTR(preferred_vector_width_char)
1194 GET_ATTR(preferred_vector_width_short)
1195 GET_ATTR(preferred_vector_width_int)
1196 GET_ATTR(preferred_vector_width_long)
1197 GET_ATTR(preferred_vector_width_float)
1198 GET_ATTR(preferred_vector_width_double)
1199 GET_ATTR(preferred_vector_width_half)
1201 GET_ATTR(address_bits)
1202 GET_ATTR(max_mem_alloc_size)
1203 GET_ATTR(mem_base_addr_align)
1204 GET_ATTR(error_correction_support)
1205 GET_ATTR(is_available)
1210 GET_ASPECT(atomic64)
1211 GET_ASPECT(usm_host_allocations)
1212 GET_ASPECT(usm_device_allocations)
1213 GET_ASPECT(usm_shared_allocations)
1214 GET_ASPECT(usm_system_allocations)
1216# ifdef __SYCL_ANY_DEVICE_HAS_ext_oneapi_non_uniform_groups__
1217 GET_ASPECT(ext_oneapi_non_uniform_groups)
1219# ifdef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_images__
1220 GET_ASPECT(ext_oneapi_bindless_images)
1222# ifdef __SYCL_ANY_DEVICE_HAS_ext_oneapi_interop_semaphore_import__
1223 GET_ASPECT(ext_oneapi_interop_semaphore_import)
1225# ifdef __SYCL_ANY_DEVICE_HAS_ext_oneapi_interop_semaphore_export__
1226 GET_ASPECT(ext_oneapi_interop_semaphore_export)
1229# undef GET_INTEL_ATTR
1233 capabilities <<
"\n";
1236 return ::strdup(capabilities.str().c_str());
1239void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb,
void *user_ptr)
1242 std::vector<sycl::device>
devices = available_sycl_devices();
1243 for (sycl::device &device :
devices) {
1244 const std::string &platform_name =
1245 device.get_platform().get_info<sycl::info::platform::name>();
1246# ifndef WITH_ONEAPI_SYCL_HOST_TASK
1247 std::string name = device.get_info<sycl::info::device::name>();
1249 std::string name =
"SYCL Host Task (Debug)";
1251# ifdef WITH_EMBREE_GPU
1252 bool hwrt_support = rtcIsSYCLDeviceSupported(device);
1254 bool hwrt_support =
false;
1256# if defined(WITH_OPENIMAGEDENOISE) && OIDN_VERSION >= 20300
1257 bool oidn_support = oidnIsSYCLDeviceSupported(&device);
1259 bool oidn_support =
false;
1261 std::string
id =
"ONEAPI_" + platform_name +
"_" + name;
1262 if (device.has(sycl::aspect::ext_intel_pci_address)) {
1263 id.append(
"_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
1265 (cb)(
id.c_str(), name.c_str(), num, hwrt_support, oidn_support, user_ptr);
1270size_t OneapiDevice::get_memcapacity()
1272 return reinterpret_cast<sycl::queue *
>(device_queue_)
1274 .get_info<sycl::info::device::global_mem_size>();
1277int OneapiDevice::get_num_multiprocessors()
1279 const sycl::device &device =
reinterpret_cast<sycl::queue *
>(device_queue_)->get_device();
1280 if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
1281 return device.get_info<sycl::ext::intel::info::device::gpu_eu_count>();
1287int OneapiDevice::get_max_num_threads_per_multiprocessor()
1289 const sycl::device &device =
reinterpret_cast<sycl::queue *
>(device_queue_)->get_device();
1290 if (device.has(sycl::aspect::ext_intel_gpu_eu_simd_width) &&
1291 device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu))
1293 return device.get_info<sycl::ext::intel::info::device::gpu_eu_simd_width>() *
1294 device.get_info<sycl::ext::intel::info::device::gpu_hw_threads_per_eu>();
void BLI_kdtree_nd_ free(KDTree *tree)
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
static PointerRNA * get_pointer_type(ButsContextPath *path, StructRNA *type)
bool use_hardware_raytracing
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
size_t memory_elements_size(int elements)
device_ptr device_pointer
#define KERNEL_DATA_ARRAY(type, name)
#define CCL_NAMESPACE_END
draw_view in_light_buf[] float
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_DEDICATED_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_SHADER_EVAL_DISPLACE
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK
@ DEVICE_KERNEL_SHADER_EVAL_BACKGROUND
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_DEDICATED_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA
@ DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE
@ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST
@ DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND
Vector< CPUDevice > devices
list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
unsigned __int64 uint64_t
string string_human_readable_size(size_t size)
string string_human_readable_number(size_t num)
bool string_startswith(const string_view s, const string_view start)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
IntegratorStateGPU * integrator_state
std::unique_lock< std::mutex > thread_scoped_lock
ccl_device_inline size_t round_up(size_t x, size_t multiple)