Blender V5.0
oneapi/queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2025 Intel Corporation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_ONEAPI
6
7# include "device/oneapi/queue.h"
10# include "util/log.h"
11
13
15
16struct KernelExecutionInfo {
17 double elapsed_summary = 0.0;
18 int enqueue_count = 0;
19};
20
21/* OneapiDeviceQueue */
22
23OneapiDeviceQueue::OneapiDeviceQueue(OneapiDevice *device)
24 : DeviceQueue(device), oneapi_device_(device)
25{
26}
27
28int OneapiDeviceQueue::num_concurrent_states(const size_t state_size) const
29{
30 int num_states = 4 * num_concurrent_busy_states(state_size);
31
32 LOG_TRACE << "GPU queue concurrent states: " << num_states << ", using up to "
34
35 return num_states;
36}
37
38int OneapiDeviceQueue::num_concurrent_busy_states(const size_t /*state_size*/) const
39{
40 const int max_num_threads = oneapi_device_->get_num_multiprocessors() *
41 oneapi_device_->get_max_num_threads_per_multiprocessor();
42
43 return 4 * max(8 * max_num_threads, 65536);
44}
45
46int OneapiDeviceQueue::num_sort_partitions(int max_num_paths, uint /*max_scene_shaders*/) const
47{
48 int sort_partition_elements = (oneapi_device_->get_max_num_threads_per_multiprocessor() >= 128) ?
49 65536 :
50 8192;
51 /* Sort partitioning with local sorting on Intel GPUs is currently the most effective solution no
52 * matter the number of shaders. */
53 return max(max_num_paths / sort_partition_elements, 1);
54}
55
56void OneapiDeviceQueue::init_execution()
57{
58 oneapi_device_->load_texture_info();
59
60 SyclQueue *device_queue = oneapi_device_->sycl_queue();
61 void *kg_dptr = oneapi_device_->kernel_globals_device_pointer();
62 assert(device_queue);
63 assert(kg_dptr);
64 kernel_context_ = make_unique<KernelContext>();
65 kernel_context_->queue = device_queue;
66 kernel_context_->kernel_globals = kg_dptr;
67
68 debug_init_execution();
69}
70
71bool OneapiDeviceQueue::enqueue(DeviceKernel kernel,
72 const int signed_kernel_work_size,
73 const DeviceKernelArguments &_args)
74{
75 if (oneapi_device_->have_error()) {
76 return false;
77 }
78
79 /* Update texture info in case memory moved to host. */
80 if (oneapi_device_->load_texture_info()) {
81 if (!synchronize()) {
82 return false;
83 }
84 }
85
86 void **args = const_cast<void **>(_args.values);
87
88 debug_enqueue_begin(kernel, signed_kernel_work_size);
89 assert(signed_kernel_work_size >= 0);
90 size_t kernel_global_size = (size_t)signed_kernel_work_size;
91 size_t kernel_local_size;
92
93 assert(kernel_context_);
94 kernel_context_->scene_max_shaders = oneapi_device_->scene_max_shaders();
95
96 oneapi_device_->get_adjusted_global_and_local_sizes(
97 kernel_context_->queue, kernel, kernel_global_size, kernel_local_size);
98
99 /* Call the oneAPI kernel DLL to launch the requested kernel. */
100 bool is_finished_ok = oneapi_device_->enqueue_kernel(
101 kernel_context_.get(), kernel, kernel_global_size, kernel_local_size, args);
102
103 if (is_finished_ok == false) {
104 oneapi_device_->set_error("oneAPI kernel \"" + std::string(device_kernel_as_string(kernel)) +
105 "\" execution error: got runtime exception \"" +
106 oneapi_device_->oneapi_error_message() + "\"");
107 }
108
109 debug_enqueue_end();
110
111 return is_finished_ok;
112}
113
114bool OneapiDeviceQueue::synchronize()
115{
116 if (oneapi_device_->have_error()) {
117 return false;
118 }
119
120 bool is_finished_ok = oneapi_device_->queue_synchronize(oneapi_device_->sycl_queue());
121 if (is_finished_ok == false) {
122 oneapi_device_->set_error("oneAPI unknown kernel execution error: got runtime exception \"" +
123 oneapi_device_->oneapi_error_message() + "\"");
124 }
125
126 debug_synchronize();
127
128 return !(oneapi_device_->have_error());
129}
130
131void OneapiDeviceQueue::zero_to_device(device_memory &mem)
132{
133 oneapi_device_->mem_zero(mem);
134}
135
136void OneapiDeviceQueue::copy_to_device(device_memory &mem)
137{
138 oneapi_device_->mem_copy_to(mem);
139}
140
141void OneapiDeviceQueue::copy_from_device(device_memory &mem)
142{
143 oneapi_device_->mem_copy_from(mem);
144}
145
146# ifdef SYCL_LINEAR_MEMORY_INTEROP_AVAILABLE
147unique_ptr<DeviceGraphicsInterop> OneapiDeviceQueue::graphics_interop_create()
148{
149 return make_unique<OneapiDeviceGraphicsInterop>(this);
150}
151# endif
152
154
155#endif /* WITH_ONEAPI */
unsigned int uint
#define CCL_NAMESPACE_END
const char * device_kernel_as_string(DeviceKernel kernel)
#define assert(assertion)
const int num_states
DeviceKernel
#define LOG_TRACE
Definition log.h:108
string string_human_readable_size(size_t size)
Definition string.cpp:257
void * values[MAX_ARGS]
max
Definition text_draw.cc:251