Blender V4.3
oneapi/queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Intel Corporation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_ONEAPI
6
7# include "device/oneapi/queue.h"
9# include "util/log.h"
10# include "util/time.h"
11# include <iomanip>
12# include <vector>
13
15
17
18struct KernelExecutionInfo {
19 double elapsed_summary = 0.0;
20 int enqueue_count = 0;
21};
22
23/* OneapiDeviceQueue */
24
25OneapiDeviceQueue::OneapiDeviceQueue(OneapiDevice *device)
26 : DeviceQueue(device), oneapi_device_(device), kernel_context_(nullptr)
27{
28}
29
30OneapiDeviceQueue::~OneapiDeviceQueue()
31{
32 delete kernel_context_;
33}
34
35int OneapiDeviceQueue::num_concurrent_states(const size_t state_size) const
36{
37 int num_states = 4 * num_concurrent_busy_states(state_size);
38
39 VLOG_DEVICE_STATS << "GPU queue concurrent states: " << num_states << ", using up to "
41
42 return num_states;
43}
44
45int OneapiDeviceQueue::num_concurrent_busy_states(const size_t /*state_size*/) const
46{
47 const int max_num_threads = oneapi_device_->get_num_multiprocessors() *
48 oneapi_device_->get_max_num_threads_per_multiprocessor();
49
50 return 4 * max(8 * max_num_threads, 65536);
51}
52
53int OneapiDeviceQueue::num_sort_partition_elements() const
54{
55 return (oneapi_device_->get_max_num_threads_per_multiprocessor() >= 128) ? 65536 : 8192;
56}
57
58void OneapiDeviceQueue::init_execution()
59{
60 oneapi_device_->load_texture_info();
61
62 SyclQueue *device_queue = oneapi_device_->sycl_queue();
63 void *kg_dptr = (void *)oneapi_device_->kernel_globals_device_pointer();
64 assert(device_queue);
65 assert(kg_dptr);
66 kernel_context_ = new KernelContext{device_queue, kg_dptr, 0};
67
68 debug_init_execution();
69}
70
71bool OneapiDeviceQueue::enqueue(DeviceKernel kernel,
72 const int signed_kernel_work_size,
73 DeviceKernelArguments const &_args)
74{
75 if (oneapi_device_->have_error()) {
76 return false;
77 }
78
79 void **args = const_cast<void **>(_args.values);
80
81 debug_enqueue_begin(kernel, signed_kernel_work_size);
82 assert(signed_kernel_work_size >= 0);
83 size_t kernel_global_size = (size_t)signed_kernel_work_size;
84 size_t kernel_local_size;
85
86 assert(kernel_context_);
87 kernel_context_->scene_max_shaders = oneapi_device_->scene_max_shaders();
88
89 oneapi_device_->get_adjusted_global_and_local_sizes(
90 kernel_context_->queue, kernel, kernel_global_size, kernel_local_size);
91
92 /* Call the oneAPI kernel DLL to launch the requested kernel. */
93 bool is_finished_ok = oneapi_device_->enqueue_kernel(
94 kernel_context_, kernel, kernel_global_size, kernel_local_size, args);
95
96 if (is_finished_ok == false) {
97 oneapi_device_->set_error("oneAPI kernel \"" + std::string(device_kernel_as_string(kernel)) +
98 "\" execution error: got runtime exception \"" +
99 oneapi_device_->oneapi_error_message() + "\"");
100 }
101
102 debug_enqueue_end();
103
104 return is_finished_ok;
105}
106
107bool OneapiDeviceQueue::synchronize()
108{
109 if (oneapi_device_->have_error()) {
110 return false;
111 }
112
113 bool is_finished_ok = oneapi_device_->queue_synchronize(oneapi_device_->sycl_queue());
114 if (is_finished_ok == false)
115 oneapi_device_->set_error("oneAPI unknown kernel execution error: got runtime exception \"" +
116 oneapi_device_->oneapi_error_message() + "\"");
117
118 debug_synchronize();
119
120 return !(oneapi_device_->have_error());
121}
122
123void OneapiDeviceQueue::zero_to_device(device_memory &mem)
124{
125 oneapi_device_->mem_zero(mem);
126}
127
128void OneapiDeviceQueue::copy_to_device(device_memory &mem)
129{
130 oneapi_device_->mem_copy_to(mem);
131}
132
133void OneapiDeviceQueue::copy_from_device(device_memory &mem)
134{
135 oneapi_device_->mem_copy_from(mem);
136}
137
139
140#endif /* WITH_ONEAPI */
#define CCL_NAMESPACE_END
const char * device_kernel_as_string(DeviceKernel kernel)
DeviceKernel
#define VLOG_DEVICE_STATS
Definition log.h:78
string string_human_readable_size(size_t size)
Definition string.cpp:234
void * values[MAX_ARGS]
float max