15struct KernelExecutionInfo {
16 double elapsed_summary = 0.0;
17 int enqueue_count = 0;
22OneapiDeviceQueue::OneapiDeviceQueue(OneapiDevice *device)
27int OneapiDeviceQueue::num_concurrent_states(
const size_t state_size)
const
29 int num_states = 4 * num_concurrent_busy_states(state_size);
37int OneapiDeviceQueue::num_concurrent_busy_states(
const size_t )
const
39 const int max_num_threads = oneapi_device_->get_num_multiprocessors() *
40 oneapi_device_->get_max_num_threads_per_multiprocessor();
42 return 4 *
max(8 * max_num_threads, 65536);
45int OneapiDeviceQueue::num_sort_partitions(
int max_num_paths,
uint )
const
47 int sort_partition_elements = (oneapi_device_->get_max_num_threads_per_multiprocessor() >= 128) ?
52 return max(max_num_paths / sort_partition_elements, 1);
55void OneapiDeviceQueue::init_execution()
57 oneapi_device_->load_texture_info();
59 SyclQueue *device_queue = oneapi_device_->sycl_queue();
60 void *kg_dptr = oneapi_device_->kernel_globals_device_pointer();
63 kernel_context_ = make_unique<KernelContext>();
64 kernel_context_->queue = device_queue;
65 kernel_context_->kernel_globals = kg_dptr;
67 debug_init_execution();
71 const int signed_kernel_work_size,
74 if (oneapi_device_->have_error()) {
79 if (oneapi_device_->load_texture_info()) {
85 void **args =
const_cast<void **
>(_args.
values);
87 debug_enqueue_begin(kernel, signed_kernel_work_size);
88 assert(signed_kernel_work_size >= 0);
89 size_t kernel_global_size = (size_t)signed_kernel_work_size;
90 size_t kernel_local_size;
93 kernel_context_->scene_max_shaders = oneapi_device_->scene_max_shaders();
95 oneapi_device_->get_adjusted_global_and_local_sizes(
96 kernel_context_->queue, kernel, kernel_global_size, kernel_local_size);
99 bool is_finished_ok = oneapi_device_->enqueue_kernel(
100 kernel_context_.get(), kernel, kernel_global_size, kernel_local_size, args);
102 if (is_finished_ok ==
false) {
104 "\" execution error: got runtime exception \"" +
105 oneapi_device_->oneapi_error_message() +
"\"");
110 return is_finished_ok;
113bool OneapiDeviceQueue::synchronize()
115 if (oneapi_device_->have_error()) {
119 bool is_finished_ok = oneapi_device_->queue_synchronize(oneapi_device_->sycl_queue());
120 if (is_finished_ok ==
false) {
121 oneapi_device_->set_error(
"oneAPI unknown kernel execution error: got runtime exception \"" +
122 oneapi_device_->oneapi_error_message() +
"\"");
127 return !(oneapi_device_->have_error());
132 oneapi_device_->mem_zero(mem);
137 oneapi_device_->mem_copy_to(mem);
142 oneapi_device_->mem_copy_from(mem);
#define CCL_NAMESPACE_END
const char * device_kernel_as_string(DeviceKernel kernel)
#define assert(assertion)
#define VLOG_DEVICE_STATS
string string_human_readable_size(size_t size)