Blender V4.3
oneapi/device_impl.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Intel Corporation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_ONEAPI
6
7/* <algorithm> is needed until included upstream in sycl/detail/property_list_base.hpp */
8# include <algorithm>
9# include <sycl/sycl.hpp>
10
12
13# include "util/debug.h"
14# include "util/foreach.h"
15# include "util/log.h"
16
17# ifdef WITH_EMBREE_GPU
18# include "bvh/embree.h"
19# endif
20
21# if defined(WITH_OPENIMAGEDENOISE)
22# include <OpenImageDenoise/config.h>
23# if OIDN_VERSION >= 20300
25# endif
26# endif
27
30
31# if defined(WITH_EMBREE_GPU) && defined(EMBREE_SYCL_SUPPORT) && !defined(SYCL_LANGUAGE_VERSION)
32/* These declarations are missing from embree headers when compiling from a compiler that doesn't
33 * support SYCL. */
34extern "C" RTCDevice rtcNewSYCLDevice(sycl::context context, const char *config);
35extern "C" bool rtcIsSYCLDeviceSupported(const sycl::device sycl_device);
36# endif
37
39
40static std::vector<sycl::device> available_sycl_devices();
41static int parse_driver_build_version(const sycl::device &device);
42
43static void queue_error_cb(const char *message, void *user_ptr)
44{
45 if (user_ptr) {
46 *reinterpret_cast<std::string *>(user_ptr) = message;
47 }
48}
49
50OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
51 : GPUDevice(info, stats, profiler, headless),
52 device_queue_(nullptr),
53# ifdef WITH_EMBREE_GPU
54 embree_device(nullptr),
55 embree_scene(nullptr),
56# endif
57 kg_memory_(nullptr),
58 kg_memory_device_(nullptr),
59 kg_memory_size_(0)
60{
61 /* Verify that base class types can be used with specific backend types */
62 static_assert(sizeof(texMemObject) == sizeof(void *));
63 static_assert(sizeof(arrayMemObject) == sizeof(void *));
64
65 use_hardware_raytracing = info.use_hardware_raytracing;
66
67 oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
68
69 bool is_finished_ok = create_queue(device_queue_,
70 info.num,
71# ifdef WITH_EMBREE_GPU
72 use_hardware_raytracing ? &embree_device : nullptr
73# else
74 nullptr
75# endif
76 );
77
78 if (is_finished_ok == false) {
79 set_error("oneAPI queue initialization error: got runtime exception \"" +
80 oneapi_error_string_ + "\"");
81 }
82 else {
83 VLOG_DEBUG << "oneAPI queue has been successfully created for the device \""
84 << info.description << "\"";
85 assert(device_queue_);
86 }
87
88# ifdef WITH_EMBREE_GPU
89 use_hardware_raytracing = use_hardware_raytracing && (embree_device != nullptr);
90# else
91 use_hardware_raytracing = false;
92# endif
93
94 if (use_hardware_raytracing) {
95 VLOG_INFO << "oneAPI will use hardware ray tracing for intersection acceleration.";
96 }
97
98 size_t globals_segment_size;
99 is_finished_ok = kernel_globals_size(globals_segment_size);
100 if (is_finished_ok == false) {
101 set_error("oneAPI constant memory initialization got runtime exception \"" +
102 oneapi_error_string_ + "\"");
103 }
104 else {
105 VLOG_DEBUG << "Successfully created global/constant memory segment (kernel globals object)";
106 }
107
108 kg_memory_ = usm_aligned_alloc_host(device_queue_, globals_segment_size, 16);
109 usm_memset(device_queue_, kg_memory_, 0, globals_segment_size);
110
111 kg_memory_device_ = usm_alloc_device(device_queue_, globals_segment_size);
112
113 kg_memory_size_ = globals_segment_size;
114
115 max_memory_on_device_ = get_memcapacity();
116 init_host_memory();
117 move_texture_to_host = false;
118 can_map_host = true;
119
120 const char *headroom_str = getenv("CYCLES_ONEAPI_MEMORY_HEADROOM");
121 if (headroom_str != nullptr) {
122 const long long override_headroom = (float)atoll(headroom_str);
123 device_working_headroom = override_headroom;
124 device_texture_headroom = override_headroom;
125 }
126 VLOG_DEBUG << "oneAPI memory headroom size: "
127 << string_human_readable_size(device_working_headroom);
128}
129
130OneapiDevice::~OneapiDevice()
131{
132# ifdef WITH_EMBREE_GPU
133 if (embree_device)
134 rtcReleaseDevice(embree_device);
135# endif
136
137 texture_info.free();
138 usm_free(device_queue_, kg_memory_);
139 usm_free(device_queue_, kg_memory_device_);
140
141 for (ConstMemMap::iterator mt = const_mem_map_.begin(); mt != const_mem_map_.end(); mt++)
142 delete mt->second;
143
144 if (device_queue_)
145 free_queue(device_queue_);
146}
147
148bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
149{
150 return false;
151}
152
153bool OneapiDevice::can_use_hardware_raytracing_for_features(uint requested_features) const
154{
155 /* MNEE and Ray-trace kernels work correctly with Hardware Ray-tracing starting with Embree 4.1.
156 */
157# if defined(RTC_VERSION) && RTC_VERSION < 40100
158 return !(requested_features & (KERNEL_FEATURE_MNEE | KERNEL_FEATURE_NODE_RAYTRACE));
159# else
160 (void)requested_features;
161 return true;
162# endif
163}
164
165BVHLayoutMask OneapiDevice::get_bvh_layout_mask(uint requested_features) const
166{
167 return (use_hardware_raytracing &&
168 can_use_hardware_raytracing_for_features(requested_features)) ?
171}
172
173# ifdef WITH_EMBREE_GPU
174void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
175{
176 if (embree_device && bvh->params.bvh_layout == BVH_LAYOUT_EMBREEGPU) {
177 BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
178 if (refit) {
179 bvh_embree->refit(progress);
180 }
181 else {
182 bvh_embree->build(progress, &stats, embree_device, true);
183 }
184
185# if RTC_VERSION >= 40302
186 thread_scoped_lock lock(scene_data_mutex);
187 all_embree_scenes.push_back(bvh_embree->scene);
188# endif
189
190 if (bvh->params.top_level) {
191 embree_scene = bvh_embree->scene;
192# if RTC_VERSION >= 40302
193 RTCError error_code = bvh_embree->offload_scenes_to_gpu(all_embree_scenes);
194 if (error_code != RTC_ERROR_NONE) {
195 set_error(
196 string_printf("BVH failed to migrate to the GPU due to Embree library error (%s)",
197 bvh_embree->get_error_string(error_code)));
198 }
199 all_embree_scenes.clear();
200# endif
201 }
202 }
203 else {
204 Device::build_bvh(bvh, progress, refit);
205 }
206}
207# endif
208
209size_t OneapiDevice::get_free_mem() const
210{
211 /* Accurate: Use device info, which is practically useful only on dGPU.
212 * This is because for non-discrete GPUs, all GPU memory allocations would
213 * be in the RAM, thus having the same performance for device and host pointers,
214 * so there is no need to be very accurate about what would end where. */
215 const sycl::device &device = reinterpret_cast<sycl::queue *>(device_queue_)->get_device();
216 const bool is_integrated_gpu = device.get_info<sycl::info::device::host_unified_memory>();
217 if (device.has(sycl::aspect::ext_intel_free_memory) && is_integrated_gpu == false) {
218 return device.get_info<sycl::ext::intel::info::device::free_memory>();
219 }
220 /* Estimate: Capacity - in use. */
221 else if (device_mem_in_use < max_memory_on_device_) {
222 return max_memory_on_device_ - device_mem_in_use;
223 }
224 else {
225 return 0;
226 }
227}
228
229bool OneapiDevice::load_kernels(const uint requested_features)
230{
231 assert(device_queue_);
232
233 /* Kernel loading is expected to be a cumulative operation; for example, if
234 * a device is asked to load kernel A and then kernel B, then after these
235 * operations, both A and B should be available for use. So we need to store
236 * and use a cumulative mask of the requested kernel features, and not just
237 * the latest requested features.
238 */
239 kernel_features |= requested_features;
240
241 bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
242 if (is_finished_ok == false) {
243 set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ +
244 "\"");
245 return false;
246 }
247 else {
248 VLOG_INFO << "Test kernel has been executed successfully for \"" << info.description << "\"";
249 assert(device_queue_);
250 }
251
252 if (use_hardware_raytracing && !can_use_hardware_raytracing_for_features(requested_features)) {
254 << "Hardware ray tracing disabled, not supported yet by oneAPI for requested features.";
255 use_hardware_raytracing = false;
256 }
257
258 is_finished_ok = oneapi_load_kernels(
259 device_queue_, (const unsigned int)requested_features, use_hardware_raytracing);
260 if (is_finished_ok == false) {
261 set_error("oneAPI kernels loading: got a runtime exception \"" + oneapi_error_string_ + "\"");
262 }
263 else {
264 VLOG_INFO << "Kernels loading (compilation) has been done for \"" << info.description << "\"";
265 }
266
267 if (is_finished_ok) {
268 reserve_private_memory(requested_features);
269 is_finished_ok = !have_error();
270 }
271
272 return is_finished_ok;
273}
274
275void OneapiDevice::reserve_private_memory(const uint kernel_features)
276{
277 size_t free_before = get_free_mem();
278
279 /* Use the biggest kernel for estimation. */
280 const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
282 (kernel_features & KERNEL_FEATURE_MNEE) ?
285
286 {
287 unique_ptr<DeviceQueue> queue = gpu_queue_create();
288
289 device_ptr d_path_index = 0;
290 device_ptr d_render_buffer = 0;
291 int d_work_size = 0;
292 DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
293
294 queue->init_execution();
295 /* Launch of the kernel seems to be sufficient to reserve all
296 * needed memory regardless of the execution global size.
297 * So, the smallest possible size is used here. */
298 queue->enqueue(test_kernel, 1, args);
299 queue->synchronize();
300 }
301
302 size_t free_after = get_free_mem();
303
304 VLOG_INFO << "For kernel execution were reserved "
305 << string_human_readable_number(free_before - free_after) << " bytes. ("
306 << string_human_readable_size(free_before - free_after) << ")";
307}
308
309void OneapiDevice::get_device_memory_info(size_t &total, size_t &free)
310{
311 free = get_free_mem();
312 total = max_memory_on_device_;
313}
314
315bool OneapiDevice::alloc_device(void *&device_pointer, size_t size)
316{
317 bool allocation_success = false;
318 device_pointer = usm_alloc_device(device_queue_, size);
319 if (device_pointer != nullptr) {
320 allocation_success = true;
321 /* Due to lazy memory initialization in GPU runtime we will force memory to
322 * appear in device memory via execution of a kernel using this memory. */
323 if (!oneapi_zero_memory_on_device(device_queue_, device_pointer, size)) {
324 set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
325 "\"");
326 usm_free(device_queue_, device_pointer);
327
328 device_pointer = nullptr;
329 allocation_success = false;
330 }
331 }
332
333 return allocation_success;
334}
335
336void OneapiDevice::free_device(void *device_pointer)
337{
338 usm_free(device_queue_, device_pointer);
339}
340
341bool OneapiDevice::alloc_host(void *&shared_pointer, size_t size)
342{
343 shared_pointer = usm_aligned_alloc_host(device_queue_, size, 64);
344 return shared_pointer != nullptr;
345}
346
347void OneapiDevice::free_host(void *shared_pointer)
348{
349 usm_free(device_queue_, shared_pointer);
350}
351
352void OneapiDevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
353{
354 /* Device and host pointer are in the same address space
355 * as we're using Unified Shared Memory. */
356 device_pointer = shared_pointer;
357}
358
359void OneapiDevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
360{
361 usm_memcpy(device_queue_, device_pointer, host_pointer, size);
362}
363
364/* TODO: Make sycl::queue part of OneapiQueue and avoid using pointers to sycl::queue. */
365SyclQueue *OneapiDevice::sycl_queue()
366{
367 return device_queue_;
368}
369
370string OneapiDevice::oneapi_error_message()
371{
372 return string(oneapi_error_string_);
373}
374
375int OneapiDevice::scene_max_shaders()
376{
377 return scene_max_shaders_;
378}
379
380void *OneapiDevice::kernel_globals_device_pointer()
381{
382 return kg_memory_device_;
383}
384
385void OneapiDevice::mem_alloc(device_memory &mem)
386{
387 if (mem.type == MEM_TEXTURE) {
388 assert(!"mem_alloc not supported for textures.");
389 }
390 else if (mem.type == MEM_GLOBAL) {
391 assert(!"mem_alloc not supported for global memory.");
392 }
393 else {
394 if (mem.name) {
395 VLOG_DEBUG << "OneapiDevice::mem_alloc: \"" << mem.name << "\", "
396 << string_human_readable_number(mem.memory_size()) << " bytes. ("
398 }
399 generic_alloc(mem);
400 }
401}
402
403void OneapiDevice::mem_copy_to(device_memory &mem)
404{
405 if (mem.name) {
406 VLOG_DEBUG << "OneapiDevice::mem_copy_to: \"" << mem.name << "\", "
407 << string_human_readable_number(mem.memory_size()) << " bytes. ("
409 }
410
411 /* After getting runtime errors we need to avoid performing oneAPI runtime operations
412 * because the associated GPU context may be in an invalid state at this point. */
413 if (have_error()) {
414 return;
415 }
416
417 if (mem.type == MEM_GLOBAL) {
418 global_free(mem);
419 global_alloc(mem);
420 }
421 else if (mem.type == MEM_TEXTURE) {
422 tex_free((device_texture &)mem);
423 tex_alloc((device_texture &)mem);
424 }
425 else {
426 if (!mem.device_pointer)
427 generic_alloc(mem);
428
429 generic_copy_to(mem);
430 }
431}
432
433void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
434{
435 if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
436 assert(!"mem_copy_from not supported for textures.");
437 }
438 else if (mem.host_pointer) {
439 const size_t size = (w > 0 || h > 0 || elem > 0) ? (elem * w * h) : mem.memory_size();
440 const size_t offset = elem * y * w;
441
442 if (mem.name) {
443 VLOG_DEBUG << "OneapiDevice::mem_copy_from: \"" << mem.name << "\" object of "
444 << string_human_readable_number(mem.memory_size()) << " bytes. ("
445 << string_human_readable_size(mem.memory_size()) << ") from offset " << offset
446 << " data " << size << " bytes";
447 }
448
449 /* After getting runtime errors we need to avoid performing oneAPI runtime operations
450 * because the associated GPU context may be in an invalid state at this point. */
451 if (have_error()) {
452 return;
453 }
454
455 assert(device_queue_);
456
457 assert(size != 0);
458 if (mem.device_pointer) {
459 char *shifted_host = reinterpret_cast<char *>(mem.host_pointer) + offset;
460 char *shifted_device = reinterpret_cast<char *>(mem.device_pointer) + offset;
461 bool is_finished_ok = usm_memcpy(device_queue_, shifted_host, shifted_device, size);
462 if (is_finished_ok == false) {
463 set_error("oneAPI memory operation error: got runtime exception \"" +
464 oneapi_error_string_ + "\"");
465 }
466 }
467 }
468}
469
470void OneapiDevice::mem_zero(device_memory &mem)
471{
472 if (mem.name) {
473 VLOG_DEBUG << "OneapiDevice::mem_zero: \"" << mem.name << "\", "
474 << string_human_readable_number(mem.memory_size()) << " bytes. ("
475 << string_human_readable_size(mem.memory_size()) << ")\n";
476 }
477
478 /* After getting runtime errors we need to avoid performing oneAPI runtime operations
479 * because the associated GPU context may be in an invalid state at this point. */
480 if (have_error()) {
481 return;
482 }
483
484 if (!mem.device_pointer) {
485 mem_alloc(mem);
486 }
487 if (!mem.device_pointer) {
488 return;
489 }
490
491 assert(device_queue_);
492 bool is_finished_ok = usm_memset(
493 device_queue_, (void *)mem.device_pointer, 0, mem.memory_size());
494 if (is_finished_ok == false) {
495 set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
496 "\"");
497 }
498}
499
500void OneapiDevice::mem_free(device_memory &mem)
501{
502 if (mem.name) {
503 VLOG_DEBUG << "OneapiDevice::mem_free: \"" << mem.name << "\", "
504 << string_human_readable_number(mem.device_size) << " bytes. ("
506 }
507
508 if (mem.type == MEM_GLOBAL) {
509 global_free(mem);
510 }
511 else if (mem.type == MEM_TEXTURE) {
512 tex_free((device_texture &)mem);
513 }
514 else {
515 generic_free(mem);
516 }
517}
518
519device_ptr OneapiDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
520{
521 return reinterpret_cast<device_ptr>(reinterpret_cast<char *>(mem.device_pointer) +
522 mem.memory_elements_size(offset));
523}
524
525void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
526{
527 assert(name);
528
529 VLOG_DEBUG << "OneapiDevice::const_copy_to \"" << name << "\" object "
530 << string_human_readable_number(size) << " bytes. ("
531 << string_human_readable_size(size) << ")";
532
533# ifdef WITH_EMBREE_GPU
534 if (embree_scene != nullptr && strcmp(name, "data") == 0) {
535 assert(size <= sizeof(KernelData));
536
537 /* Update scene handle(since it is different for each device on multi devices) */
538 KernelData *const data = (KernelData *)host;
539 data->device_bvh = embree_scene;
540
541 /* We need this number later for proper local memory allocation. */
542 scene_max_shaders_ = data->max_shaders;
543 }
544# endif
545
546 ConstMemMap::iterator i = const_mem_map_.find(name);
548
549 if (i == const_mem_map_.end()) {
550 data = new device_vector<uchar>(this, name, MEM_READ_ONLY);
551 data->alloc(size);
552 const_mem_map_.insert(ConstMemMap::value_type(name, data));
553 }
554 else {
555 data = i->second;
556 }
557
558 assert(data->memory_size() <= size);
559 memcpy(data->data(), host, size);
560 data->copy_to_device();
561
562 set_global_memory(device_queue_, kg_memory_, name, (void *)data->device_pointer);
563
564 usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
565}
566
567void OneapiDevice::global_alloc(device_memory &mem)
568{
569 assert(mem.name);
570
571 size_t size = mem.memory_size();
572 VLOG_DEBUG << "OneapiDevice::global_alloc \"" << mem.name << "\" object "
573 << string_human_readable_number(size) << " bytes. ("
574 << string_human_readable_size(size) << ")";
575
576 generic_alloc(mem);
577 generic_copy_to(mem);
578
579 set_global_memory(device_queue_, kg_memory_, mem.name, (void *)mem.device_pointer);
580
581 usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
582}
583
584void OneapiDevice::global_free(device_memory &mem)
585{
586 if (mem.device_pointer) {
587 generic_free(mem);
588 }
589}
590
591void OneapiDevice::tex_alloc(device_texture &mem)
592{
593 generic_alloc(mem);
594 generic_copy_to(mem);
595
596 /* Resize if needed. Also, in case of resize - allocate in advance for future allocations. */
597 const uint slot = mem.slot;
598 if (slot >= texture_info.size()) {
599 texture_info.resize(slot + 128);
600 }
601
602 texture_info[slot] = mem.info;
603 need_texture_info = true;
604
605 texture_info[slot].data = (uint64_t)mem.device_pointer;
606}
607
608void OneapiDevice::tex_free(device_texture &mem)
609{
610 /* There is no texture memory in SYCL. */
611 if (mem.device_pointer) {
612 generic_free(mem);
613 }
614}
615
616unique_ptr<DeviceQueue> OneapiDevice::gpu_queue_create()
617{
618 return make_unique<OneapiDeviceQueue>(this);
619}
620
621bool OneapiDevice::should_use_graphics_interop()
622{
623 /* NOTE(@nsirgien): oneAPI doesn't yet support direct writing into graphics API objects, so
624 * return false. */
625 return false;
626}
627
628void *OneapiDevice::usm_aligned_alloc_host(size_t memory_size, size_t alignment)
629{
630 assert(device_queue_);
631 return usm_aligned_alloc_host(device_queue_, memory_size, alignment);
632}
633
634void OneapiDevice::usm_free(void *usm_ptr)
635{
636 assert(device_queue_);
637 return usm_free(device_queue_, usm_ptr);
638}
639
640void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_host = false)
641{
642# ifndef NDEBUG
643 sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
644 sycl::info::device_type device_type =
645 queue->get_device().get_info<sycl::info::device::device_type>();
646 sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
647 (void)usm_type;
648# ifndef WITH_ONEAPI_SYCL_HOST_TASK
649 const sycl::usm::alloc main_memory_type = sycl::usm::alloc::device;
650# else
651 const sycl::usm::alloc main_memory_type = sycl::usm::alloc::host;
652# endif
653 assert(usm_type == main_memory_type ||
654 (usm_type == sycl::usm::alloc::host &&
655 (allow_host || device_type == sycl::info::device_type::cpu)) ||
656 usm_type == sycl::usm::alloc::unknown);
657# else
658 /* Silence warning about unused arguments. */
659 (void)queue_;
660 (void)usm_ptr;
661 (void)allow_host;
662# endif
663}
664
665bool OneapiDevice::create_queue(SyclQueue *&external_queue,
666 int device_index,
667 void *embree_device_pointer)
668{
669 bool finished_correct = true;
670 try {
671 std::vector<sycl::device> devices = available_sycl_devices();
672 if (device_index < 0 || device_index >= devices.size()) {
673 return false;
674 }
675 sycl::queue *created_queue = new sycl::queue(devices[device_index],
676 sycl::property::queue::in_order());
677 external_queue = reinterpret_cast<SyclQueue *>(created_queue);
678# ifdef WITH_EMBREE_GPU
679 if (embree_device_pointer) {
680 RTCDevice *device_object_ptr = reinterpret_cast<RTCDevice *>(embree_device_pointer);
681 *device_object_ptr = rtcNewSYCLDevice(created_queue->get_context(), "");
682 if (*device_object_ptr == nullptr) {
683 finished_correct = false;
684 oneapi_error_string_ =
685 "Hardware Raytracing is not available; please install "
686 "\"intel-level-zero-gpu-raytracing\" to enable it or disable Embree on GPU.";
687 }
688 }
689# else
690 (void)embree_device_pointer;
691# endif
692 }
693 catch (sycl::exception const &e) {
694 finished_correct = false;
695 oneapi_error_string_ = e.what();
696 }
697 return finished_correct;
698}
699
700void OneapiDevice::free_queue(SyclQueue *queue_)
701{
702 assert(queue_);
703 sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
704 delete queue;
705}
706
707void *OneapiDevice::usm_aligned_alloc_host(SyclQueue *queue_, size_t memory_size, size_t alignment)
708{
709 assert(queue_);
710 sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
711 return sycl::aligned_alloc_host(alignment, memory_size, *queue);
712}
713
714void *OneapiDevice::usm_alloc_device(SyclQueue *queue_, size_t memory_size)
715{
716 assert(queue_);
717 sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
718 /* NOTE(@nsirgien): There are three types of Unified Shared Memory (USM) in oneAPI: host, device
719 * and shared. For new project it could more beneficial to use USM shared memory, because it
720 * provides automatic migration mechanism in order to allow to use the same pointer on host and
721 * on device, without need to worry about explicit memory transfer operations, although usage of
722 * USM shared imply some documented limitations on the memory usage in regards of parallel access
723 * from different threads. But for Blender/Cycles this type of memory is not very suitable in
724 * current application architecture, because Cycles is multi-thread application and already uses
725 * two different pointer for host activity and device activity, and also has to perform all
726 * needed memory transfer operations. So, USM device memory type has been used for oneAPI device
727 * in order to better fit in Cycles architecture. */
728# ifndef WITH_ONEAPI_SYCL_HOST_TASK
729 return sycl::malloc_device(memory_size, *queue);
730# else
731 return sycl::malloc_host(memory_size, *queue);
732# endif
733}
734
735void OneapiDevice::usm_free(SyclQueue *queue_, void *usm_ptr)
736{
737 assert(queue_);
738 sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
739 OneapiDevice::check_usm(queue_, usm_ptr, true);
740 sycl::free(usm_ptr, *queue);
741}
742
743bool OneapiDevice::usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_bytes)
744{
745 assert(queue_);
746 /* sycl::queue::memcpy may crash if the queue is in an invalid state due to previous
747 * runtime errors. It's better to avoid running memory operations in that case.
748 * The render will be canceled and the queue will be destroyed anyway. */
749 if (have_error())
750 return false;
751
752 sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
753 OneapiDevice::check_usm(queue_, dest, true);
754 OneapiDevice::check_usm(queue_, src, true);
755 sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
756 sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
757 /* Unknown here means, that this is not an USM allocation, which implies that this is
758 * some generic C++ allocation, so we could use C++ memcpy directly with USM host. */
759 if ((dest_type == sycl::usm::alloc::host || dest_type == sycl::usm::alloc::unknown) &&
760 (src_type == sycl::usm::alloc::host || src_type == sycl::usm::alloc::unknown))
761 {
762 memcpy(dest, src, num_bytes);
763 return true;
764 }
765
766 try {
767 sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
768# ifdef WITH_CYCLES_DEBUG
769 /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
770 * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
771 */
772 mem_event.wait_and_throw();
773 return true;
774# else
775 bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
776 src_type == sycl::usm::alloc::device;
777 bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
778 src_type == sycl::usm::alloc::unknown;
779 /* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
780 * may not wait until the end of the transfer before using the memory.
781 */
782 if (from_device_to_host || host_or_device_memop_with_offset)
783 mem_event.wait();
784 return true;
785# endif
786 }
787 catch (sycl::exception const &e) {
788 oneapi_error_string_ = e.what();
789 return false;
790 }
791}
792
793bool OneapiDevice::usm_memset(SyclQueue *queue_,
794 void *usm_ptr,
795 unsigned char value,
796 size_t num_bytes)
797{
798 assert(queue_);
799 /* sycl::queue::memset may crash if the queue is in an invalid state due to previous
800 * runtime errors. It's better to avoid running memory operations in that case.
801 * The render will be canceled and the queue will be destroyed anyway. */
802 if (have_error())
803 return false;
804
805 sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
806 OneapiDevice::check_usm(queue_, usm_ptr, true);
807 try {
808 sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
809# ifdef WITH_CYCLES_DEBUG
810 /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
811 * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
812 */
813 mem_event.wait_and_throw();
814# else
815 (void)mem_event;
816# endif
817 return true;
818 }
819 catch (sycl::exception const &e) {
820 oneapi_error_string_ = e.what();
821 return false;
822 }
823}
824
825bool OneapiDevice::queue_synchronize(SyclQueue *queue_)
826{
827 assert(queue_);
828 sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
829 try {
830 queue->wait_and_throw();
831 return true;
832 }
833 catch (sycl::exception const &e) {
834 oneapi_error_string_ = e.what();
835 return false;
836 }
837}
838
839bool OneapiDevice::kernel_globals_size(size_t &kernel_global_size)
840{
841 kernel_global_size = sizeof(KernelGlobalsGPU);
842
843 return true;
844}
845
846void OneapiDevice::set_global_memory(SyclQueue *queue_,
847 void *kernel_globals,
848 const char *memory_name,
849 void *memory_device_pointer)
850{
851 assert(queue_);
852 assert(kernel_globals);
853 assert(memory_name);
854 assert(memory_device_pointer);
855 KernelGlobalsGPU *globals = (KernelGlobalsGPU *)kernel_globals;
856 OneapiDevice::check_usm(queue_, memory_device_pointer, true);
857 OneapiDevice::check_usm(queue_, kernel_globals, true);
858
859 std::string matched_name(memory_name);
860
861/* This macro will change global ptr of KernelGlobals via name matching. */
862# define KERNEL_DATA_ARRAY(type, name) \
863 else if (#name == matched_name) { \
864 globals->__##name = (type *)memory_device_pointer; \
865 return; \
866 }
867 if (false) {
868 }
869 else if ("integrator_state" == matched_name) {
870 globals->integrator_state = (IntegratorStateGPU *)memory_device_pointer;
871 return;
872 }
874# include "kernel/data_arrays.h"
875 else {
876 std::cerr << "Can't found global/constant memory with name \"" << matched_name << "\"!"
877 << std::endl;
878 assert(false);
879 }
880# undef KERNEL_DATA_ARRAY
881}
882
883bool OneapiDevice::enqueue_kernel(
884 KernelContext *kernel_context, int kernel, size_t global_size, size_t local_size, void **args)
885{
886 return oneapi_enqueue_kernel(kernel_context,
887 kernel,
888 global_size,
889 local_size,
890 kernel_features,
891 use_hardware_raytracing,
892 args);
893}
894
895void OneapiDevice::get_adjusted_global_and_local_sizes(SyclQueue *queue,
896 const DeviceKernel kernel,
897 size_t &kernel_global_size,
898 size_t &kernel_local_size)
899{
900 assert(queue);
901 const static size_t preferred_work_group_size_intersect = 128;
902 const static size_t preferred_work_group_size_shading = 256;
903 const static size_t preferred_work_group_size_shading_simd8 = 64;
904 /* Shader evaluation kernels seems to use some amount of shared memory, so better
905 * to avoid usage of maximum work group sizes for them. */
906 const static size_t preferred_work_group_size_shader_evaluation = 256;
907 /* NOTE(@nsirgien): 1024 currently may lead to issues with cryptomatte kernels, so
908 * for now their work-group size is restricted to 512. */
909 const static size_t preferred_work_group_size_cryptomatte = 512;
910 const static size_t preferred_work_group_size_default = 1024;
911
912 const sycl::device &device = reinterpret_cast<sycl::queue *>(queue)->get_device();
913 const size_t max_work_group_size = device.get_info<sycl::info::device::max_work_group_size>();
914
915 size_t preferred_work_group_size = 0;
916 switch (kernel) {
924 preferred_work_group_size = preferred_work_group_size_intersect;
925 break;
926
935 const bool device_is_simd8 =
936 (device.has(sycl::aspect::ext_intel_gpu_eu_simd_width) &&
937 device.get_info<sycl::ext::intel::info::device::gpu_eu_simd_width>() == 8);
938 preferred_work_group_size = (device_is_simd8) ? preferred_work_group_size_shading_simd8 :
939 preferred_work_group_size_shading;
940 } break;
941
943 preferred_work_group_size = preferred_work_group_size_cryptomatte;
944 break;
945
949 preferred_work_group_size = preferred_work_group_size_shader_evaluation;
950 break;
951
952 default:
953 /* Do nothing and keep initial zero value. */
954 break;
955 }
956
957 /* Such order of logic allow us to override Blender default values, if needed,
958 * yet respect them otherwise. */
959 if (preferred_work_group_size == 0) {
960 preferred_work_group_size = oneapi_suggested_gpu_kernel_size((::DeviceKernel)kernel);
961 }
962
963 /* If there is no recommendation, then use manual default value. */
964 if (preferred_work_group_size == 0) {
965 preferred_work_group_size = preferred_work_group_size_default;
966 }
967
968 kernel_local_size = std::min(max_work_group_size, preferred_work_group_size);
969
970 /* NOTE(@nsirgien): As for now non-uniform work-groups don't work on most oneAPI devices,
971 * we extend work size to fit uniformity requirements. */
972 kernel_global_size = round_up(kernel_global_size, kernel_local_size);
973
974# ifdef WITH_ONEAPI_SYCL_HOST_TASK
975 /* Kernels listed below need a specific number of work groups. */
983 {
984 /* Path array implementation is serial in case of SYCL Host Task execution. */
985 kernel_global_size = 1;
986 kernel_local_size = 1;
987 }
988# endif
989
990 assert(kernel_global_size % kernel_local_size == 0);
991}
992
993/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
994 * since Windows driver 101.3268. */
995static const int lowest_supported_driver_version_win = 1015730;
996# ifdef _WIN32
997/* For Windows driver 101.5730, compute-runtime version is 29550.
998 * This information is returned by `ocloc query OCL_DRIVER_VERSION`.*/
999static const int lowest_supported_driver_version_neo = 29550;
1000# else
1001static const int lowest_supported_driver_version_neo = 29735;
1002# endif
1003
1004int parse_driver_build_version(const sycl::device &device)
1005{
1006 const std::string &driver_version = device.get_info<sycl::info::device::driver_version>();
1007 int driver_build_version = 0;
1008
1009 size_t second_dot_position = driver_version.find('.', driver_version.find('.') + 1);
1010 if (second_dot_position == std::string::npos) {
1011 std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
1012 << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
1013 << " xx.xx.xxx.xxxx (Windows) for device \""
1014 << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
1015 }
1016 else {
1017 try {
1018 size_t third_dot_position = driver_version.find('.', second_dot_position + 1);
1019 if (third_dot_position != std::string::npos) {
1020 const std::string &third_number_substr = driver_version.substr(
1021 second_dot_position + 1, third_dot_position - second_dot_position - 1);
1022 const std::string &forth_number_substr = driver_version.substr(third_dot_position + 1);
1023 if (third_number_substr.length() == 3 && forth_number_substr.length() == 4)
1024 driver_build_version = std::stoi(third_number_substr) * 10000 +
1025 std::stoi(forth_number_substr);
1026 }
1027 else {
1028 const std::string &third_number_substr = driver_version.substr(second_dot_position + 1);
1029 driver_build_version = std::stoi(third_number_substr);
1030 }
1031 }
1032 catch (std::invalid_argument &) {
1033 std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
1034 << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
1035 << " xx.xx.xxx.xxxx (Windows) for device \""
1036 << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
1037 }
1038 }
1039
1040 return driver_build_version;
1041}
1042
1043std::vector<sycl::device> available_sycl_devices()
1044{
1045 bool allow_all_devices = false;
1046 if (getenv("CYCLES_ONEAPI_ALL_DEVICES") != nullptr) {
1047 allow_all_devices = true;
1048 }
1049
1050 const std::vector<sycl::platform> &oneapi_platforms = sycl::platform::get_platforms();
1051
1052 std::vector<sycl::device> available_devices;
1053 for (const sycl::platform &platform : oneapi_platforms) {
1054 /* ignore OpenCL platforms to avoid using the same devices through both Level-Zero and OpenCL.
1055 */
1056 if (platform.get_backend() == sycl::backend::opencl) {
1057 continue;
1058 }
1059
1060 const std::vector<sycl::device> &oneapi_devices =
1061 (allow_all_devices) ? platform.get_devices(sycl::info::device_type::all) :
1062 platform.get_devices(sycl::info::device_type::gpu);
1063
1064 for (const sycl::device &device : oneapi_devices) {
1065 bool filter_out = false;
1066 if (!allow_all_devices) {
1067 /* For now we support all Intel(R) Arc(TM) devices and likely any future GPU,
1068 * assuming they have either more than 96 Execution Units or not 7 threads per EU.
1069 * Official support can be broaden to older and smaller GPUs once ready. */
1070 if (!device.is_gpu() || platform.get_backend() != sycl::backend::ext_oneapi_level_zero) {
1071 filter_out = true;
1072 }
1073 else {
1074 /* Filtered-out defaults in-case these values aren't available. */
1075 int number_of_eus = 96;
1076 int threads_per_eu = 7;
1077 if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
1078 number_of_eus = device.get_info<sycl::ext::intel::info::device::gpu_eu_count>();
1079 }
1080 if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
1081 threads_per_eu =
1082 device.get_info<sycl::ext::intel::info::device::gpu_hw_threads_per_eu>();
1083 }
1084 /* This filters out all Level-Zero supported GPUs from older generation than Arc. */
1085 if (number_of_eus <= 96 && threads_per_eu == 7) {
1086 filter_out = true;
1087 }
1088 /* if not already filtered out, check driver version. */
1089 bool check_driver_version = !filter_out;
1090 /* We don't know how to check driver version strings for non-Intel GPUs. */
1091 if (check_driver_version &&
1092 device.get_info<sycl::info::device::vendor>().find("Intel") == std::string::npos)
1093 {
1094 check_driver_version = false;
1095 }
1096 /* Because of https://github.com/oneapi-src/unified-runtime/issues/1777, future drivers
1097 * may break parsing done by a SYCL runtime from before the fix we expect in major
1098 * version 8. Parsed driver version would start with something different than current
1099 * "1.3.". To avoid blocking a device by mistake in the case of new driver / old SYCL
1100 * runtime, we disable driver version check in case LIBSYCL_MAJOR_VERSION is below 8 and
1101 * actual driver version doesn't start with 1.3. */
1102# if __LIBSYCL_MAJOR_VERSION < 8
1103 if (check_driver_version &&
1104 !string_startswith(device.get_info<sycl::info::device::driver_version>(), "1.3."))
1105 {
1106 check_driver_version = false;
1107 }
1108# endif
1109 if (check_driver_version) {
1110 int driver_build_version = parse_driver_build_version(device);
1111 if ((driver_build_version > 100000 &&
1112 driver_build_version < lowest_supported_driver_version_win) ||
1113 driver_build_version < lowest_supported_driver_version_neo)
1114 {
1115 filter_out = true;
1116 }
1117 }
1118 }
1119 }
1120 if (!filter_out) {
1121 available_devices.push_back(device);
1122 }
1123 }
1124 }
1125
1126 return available_devices;
1127}
1128
1129char *OneapiDevice::device_capabilities()
1130{
1131 std::stringstream capabilities;
1132
1133 const std::vector<sycl::device> &oneapi_devices = available_sycl_devices();
1134 for (const sycl::device &device : oneapi_devices) {
1135# ifndef WITH_ONEAPI_SYCL_HOST_TASK
1136 const std::string &name = device.get_info<sycl::info::device::name>();
1137# else
1138 const std::string &name = "SYCL Host Task (Debug)";
1139# endif
1140
1141 capabilities << std::string("\t") << name << "\n";
1142 capabilities << "\t\tsycl::info::platform::name\t\t\t"
1143 << device.get_platform().get_info<sycl::info::platform::name>() << "\n";
1144
1145# define WRITE_ATTR(attribute_name, attribute_variable) \
1146 capabilities << "\t\tsycl::info::device::" #attribute_name "\t\t\t" << attribute_variable \
1147 << "\n";
1148# define GET_ATTR(attribute) \
1149 { \
1150 capabilities << "\t\tsycl::info::device::" #attribute "\t\t\t" \
1151 << device.get_info<sycl::info::device ::attribute>() << "\n"; \
1152 }
1153# define GET_INTEL_ATTR(attribute) \
1154 { \
1155 if (device.has(sycl::aspect::ext_intel_##attribute)) { \
1156 capabilities << "\t\tsycl::ext::intel::info::device::" #attribute "\t\t\t" \
1157 << device.get_info<sycl::ext::intel::info::device ::attribute>() << "\n"; \
1158 } \
1159 }
1160# define GET_ASPECT(aspect_) \
1161 { \
1162 capabilities << "\t\tdevice::has(" #aspect_ ")\t\t\t" << device.has(sycl::aspect ::aspect_) \
1163 << "\n"; \
1164 }
1165
1166 GET_ATTR(vendor)
1167 GET_ATTR(driver_version)
1168 GET_ATTR(max_compute_units)
1169 GET_ATTR(max_clock_frequency)
1170 GET_ATTR(global_mem_size)
1171 GET_INTEL_ATTR(pci_address)
1172 GET_INTEL_ATTR(gpu_eu_simd_width)
1173 GET_INTEL_ATTR(gpu_eu_count)
1174 GET_INTEL_ATTR(gpu_slices)
1175 GET_INTEL_ATTR(gpu_subslices_per_slice)
1176 GET_INTEL_ATTR(gpu_eu_count_per_subslice)
1177 GET_INTEL_ATTR(gpu_hw_threads_per_eu)
1178 GET_INTEL_ATTR(max_mem_bandwidth)
1179 GET_ATTR(max_work_group_size)
1180 GET_ATTR(max_work_item_dimensions)
1181 sycl::id<3> max_work_item_sizes =
1182 device.get_info<sycl::info::device::max_work_item_sizes<3>>();
1183 WRITE_ATTR(max_work_item_sizes[0], max_work_item_sizes.get(0))
1184 WRITE_ATTR(max_work_item_sizes[1], max_work_item_sizes.get(1))
1185 WRITE_ATTR(max_work_item_sizes[2], max_work_item_sizes.get(2))
1186
1187 GET_ATTR(max_num_sub_groups)
1188 for (size_t sub_group_size : device.get_info<sycl::info::device::sub_group_sizes>()) {
1189 WRITE_ATTR(sub_group_size[], sub_group_size)
1190 }
1191 GET_ATTR(sub_group_independent_forward_progress)
1192
1193 GET_ATTR(preferred_vector_width_char)
1194 GET_ATTR(preferred_vector_width_short)
1195 GET_ATTR(preferred_vector_width_int)
1196 GET_ATTR(preferred_vector_width_long)
1197 GET_ATTR(preferred_vector_width_float)
1198 GET_ATTR(preferred_vector_width_double)
1199 GET_ATTR(preferred_vector_width_half)
1200
1201 GET_ATTR(address_bits)
1202 GET_ATTR(max_mem_alloc_size)
1203 GET_ATTR(mem_base_addr_align)
1204 GET_ATTR(error_correction_support)
1205 GET_ATTR(is_available)
1206
1207 GET_ASPECT(cpu)
1208 GET_ASPECT(gpu)
1209 GET_ASPECT(fp16)
1210 GET_ASPECT(atomic64)
1211 GET_ASPECT(usm_host_allocations)
1212 GET_ASPECT(usm_device_allocations)
1213 GET_ASPECT(usm_shared_allocations)
1214 GET_ASPECT(usm_system_allocations)
1215
1216# ifdef __SYCL_ANY_DEVICE_HAS_ext_oneapi_non_uniform_groups__
1217 GET_ASPECT(ext_oneapi_non_uniform_groups)
1218# endif
1219# ifdef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_images__
1220 GET_ASPECT(ext_oneapi_bindless_images)
1221# endif
1222# ifdef __SYCL_ANY_DEVICE_HAS_ext_oneapi_interop_semaphore_import__
1223 GET_ASPECT(ext_oneapi_interop_semaphore_import)
1224# endif
1225# ifdef __SYCL_ANY_DEVICE_HAS_ext_oneapi_interop_semaphore_export__
1226 GET_ASPECT(ext_oneapi_interop_semaphore_export)
1227# endif
1228
1229# undef GET_INTEL_ATTR
1230# undef GET_ASPECT
1231# undef GET_ATTR
1232# undef WRITE_ATTR
1233 capabilities << "\n";
1234 }
1235
1236 return ::strdup(capabilities.str().c_str());
1237}
1238
1239void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr)
1240{
1241 int num = 0;
1242 std::vector<sycl::device> devices = available_sycl_devices();
1243 for (sycl::device &device : devices) {
1244 const std::string &platform_name =
1245 device.get_platform().get_info<sycl::info::platform::name>();
1246# ifndef WITH_ONEAPI_SYCL_HOST_TASK
1247 std::string name = device.get_info<sycl::info::device::name>();
1248# else
1249 std::string name = "SYCL Host Task (Debug)";
1250# endif
1251# ifdef WITH_EMBREE_GPU
1252 bool hwrt_support = rtcIsSYCLDeviceSupported(device);
1253# else
1254 bool hwrt_support = false;
1255# endif
1256# if defined(WITH_OPENIMAGEDENOISE) && OIDN_VERSION >= 20300
1257 bool oidn_support = oidnIsSYCLDeviceSupported(&device);
1258# else
1259 bool oidn_support = false;
1260# endif
1261 std::string id = "ONEAPI_" + platform_name + "_" + name;
1262 if (device.has(sycl::aspect::ext_intel_pci_address)) {
1263 id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
1264 }
1265 (cb)(id.c_str(), name.c_str(), num, hwrt_support, oidn_support, user_ptr);
1266 num++;
1267 }
1268}
1269
1270size_t OneapiDevice::get_memcapacity()
1271{
1272 return reinterpret_cast<sycl::queue *>(device_queue_)
1273 ->get_device()
1274 .get_info<sycl::info::device::global_mem_size>();
1275}
1276
1277int OneapiDevice::get_num_multiprocessors()
1278{
1279 const sycl::device &device = reinterpret_cast<sycl::queue *>(device_queue_)->get_device();
1280 if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
1281 return device.get_info<sycl::ext::intel::info::device::gpu_eu_count>();
1282 }
1283 else
1284 return 0;
1285}
1286
1287int OneapiDevice::get_max_num_threads_per_multiprocessor()
1288{
1289 const sycl::device &device = reinterpret_cast<sycl::queue *>(device_queue_)->get_device();
1290 if (device.has(sycl::aspect::ext_intel_gpu_eu_simd_width) &&
1291 device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu))
1292 {
1293 return device.get_info<sycl::ext::intel::info::device::gpu_eu_simd_width>() *
1294 device.get_info<sycl::ext::intel::info::device::gpu_hw_threads_per_eu>();
1295 }
1296 else
1297 return 0;
1298}
1299
1301
1302#endif
void BLI_kdtree_nd_ free(KDTree *tree)
unsigned int uint
volatile int lock
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
static PointerRNA * get_pointer_type(ButsContextPath *path, StructRNA *type)
BVHLayout bvh_layout
Definition params.h:84
bool top_level
Definition params.h:81
Definition bvh/bvh.h:66
BVHParams params
Definition bvh/bvh.h:68
string description
bool use_hardware_raytracing
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
size_t memory_elements_size(int elements)
@ MEM_TEXTURE
@ MEM_READ_ONLY
#define KERNEL_DATA_ARRAY(type, name)
Definition data_arrays.h:6
#define CCL_NAMESPACE_END
struct KernelGlobalsGPU KernelGlobalsGPU
draw_view in_light_buf[] float
KernelData
@ BVH_LAYOUT_BVH2
@ BVH_LAYOUT_EMBREEGPU
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_MNEE
DeviceKernel
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_DEDICATED_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_SHADER_EVAL_DISPLACE
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK
@ DEVICE_KERNEL_SHADER_EVAL_BACKGROUND
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_DEDICATED_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA
@ DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE
@ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST
@ DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND
#define VLOG_INFO
Definition log.h:72
#define VLOG_DEBUG
Definition log.h:81
Vector< CPUDevice > devices
list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
int BVHLayoutMask
Definition params.h:51
unsigned __int64 uint64_t
Definition stdint.h:90
string string_human_readable_size(size_t size)
Definition string.cpp:234
string string_human_readable_number(size_t num)
Definition string.cpp:255
bool string_startswith(const string_view s, const string_view start)
Definition string.cpp:103
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
IntegratorStateGPU * integrator_state
uint64_t data
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:30
ccl_device_inline size_t round_up(size_t x, size_t multiple)
Definition util/types.h:58
uint64_t device_ptr
Definition util/types.h:45