14# include <sycl/sycl.hpp>
24static OneAPIErrorCallback s_error_cb =
nullptr;
25static void *s_error_user_ptr =
nullptr;
27# ifdef WITH_EMBREE_GPU
28static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_BASIC_FEATURES = (
const RTCFeatureFlags)(
29 RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE |
30 RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT |
31 RTC_FEATURE_FLAG_MOTION_BLUR);
32static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_ALL_FEATURES = (
const RTCFeatureFlags)(
33 CYCLES_ONEAPI_EMBREE_BASIC_FEATURES | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
34 RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE);
37void oneapi_set_error_cb(OneAPIErrorCallback cb,
void *user_ptr)
40 s_error_user_ptr = user_ptr;
43size_t oneapi_suggested_gpu_kernel_size(
const DeviceKernel kernel)
76bool oneapi_run_test_kernel(SyclQueue *queue_)
79 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
81 const size_t memory_byte_size =
sizeof(
int) *
N;
83 bool is_computation_correct =
true;
85 int *A_host = (
int *)sycl::aligned_alloc_host(16, memory_byte_size, *queue);
87 for (
size_t i = (
size_t)0; i <
N; i++) {
88 A_host[i] = rand() % 32;
91 int *A_device = (
int *)sycl::malloc_device(memory_byte_size, *queue);
92 int *B_device = (
int *)sycl::malloc_device(memory_byte_size, *queue);
94 queue->memcpy(A_device, A_host, memory_byte_size);
95 queue->wait_and_throw();
97 queue->submit([&](sycl::handler &cgh) {
98 cgh.parallel_for(
N, [=](sycl::id<1> idx) { B_device[idx] = A_device[idx] + idx.get(0); });
100 queue->wait_and_throw();
102 int *B_host = (
int *)sycl::aligned_alloc_host(16, memory_byte_size, *queue);
104 queue->memcpy(B_host, B_device, memory_byte_size);
105 queue->wait_and_throw();
107 for (
size_t i = (
size_t)0; i <
N; i++) {
108 const int expected_result = i + A_host[i];
109 if (B_host[i] != expected_result) {
110 is_computation_correct =
false;
112 s_error_cb((
"Incorrect result in test kernel execution - expected " +
113 std::to_string(expected_result) +
", got " + std::to_string(B_host[i]))
120 sycl::free(A_host, *queue);
121 sycl::free(B_host, *queue);
122 sycl::free(A_device, *queue);
123 sycl::free(B_device, *queue);
124 queue->wait_and_throw();
126 catch (sycl::exception
const &
e) {
128 s_error_cb(
e.what(), s_error_user_ptr);
133 return is_computation_correct;
136bool oneapi_zero_memory_on_device(SyclQueue *queue_,
void *device_pointer,
size_t num_bytes)
139 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
141 queue->memset(device_pointer, 0, num_bytes);
142 queue->wait_and_throw();
145 catch (sycl::exception
const &
e) {
147 s_error_cb(
e.what(), s_error_user_ptr);
153bool oneapi_kernel_is_required_for_features(
const std::string &kernel_name,
154 const uint kernel_features)
157 if (kernel_name.find(
"oneapi_kernel_") == std::string::npos) {
184 std::string::npos) ||
186 std::string::npos) ||
188 std::string::npos) ||
198bool oneapi_kernel_is_compatible_with_hardware_raytracing(
const std::string &kernel_name)
202# if defined(RTC_VERSION) && RTC_VERSION < 40100
204 std::string::npos) &&
212bool oneapi_kernel_has_intersections(
const std::string &kernel_name)
225bool oneapi_load_kernels(SyclQueue *queue_,
226 const uint kernel_features,
227 bool use_hardware_raytracing)
230 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
232# ifdef WITH_EMBREE_GPU
234 if (use_hardware_raytracing) {
236 sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
237 sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
238 {queue->get_device()});
240 for (
const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
241 const std::string &kernel_name = kernel_id.get_name();
243 if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
244 !(oneapi_kernel_has_intersections(kernel_name) &&
245 oneapi_kernel_is_compatible_with_hardware_raytracing(kernel_name)))
250 sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
251 sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
255 one_kernel_bundle_input
256 .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
257 CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
258 sycl::build(one_kernel_bundle_input);
261 one_kernel_bundle_input
262 .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
263 CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
264 sycl::build(one_kernel_bundle_input);
268 catch (sycl::exception
const &
e) {
270 s_error_cb(
e.what(), s_error_user_ptr);
278 sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
279 sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
280 {queue->get_device()});
282 for (
const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
283 const std::string &kernel_name = kernel_id.get_name();
287 if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
288 (use_hardware_raytracing && oneapi_kernel_has_intersections(kernel_name) &&
289 oneapi_kernel_is_compatible_with_hardware_raytracing(kernel_name)))
294# ifdef WITH_EMBREE_GPU
295 if (oneapi_kernel_has_intersections(kernel_name)) {
296 sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
297 sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
298 one_kernel_bundle_input
299 .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
300 RTC_FEATURE_FLAG_NONE);
301 sycl::build(one_kernel_bundle_input);
307 (void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(queue->get_context(),
311 catch (sycl::exception
const &
e) {
313 s_error_cb(
e.what(), s_error_user_ptr);
320bool oneapi_enqueue_kernel(KernelContext *kernel_context,
324 const uint kernel_features,
325 bool use_hardware_raytracing,
331 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(kernel_context->queue);
339# pragma warning(error : 4062)
340# elif defined(__GNUC__)
341# pragma GCC diagnostic push
342# pragma GCC diagnostic error "-Wswitch"
350 max_shaders = (kernel_context->scene_max_shaders);
354 queue->submit([&](sycl::handler &cgh) {
355# ifdef WITH_EMBREE_GPU
359 const RTCFeatureFlags used_embree_features = !use_hardware_raytracing ?
360 RTC_FEATURE_FLAG_NONE :
362 CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
363 CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
364 cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
365 used_embree_features);
368 (void)kernel_features;
370 switch (device_kernel) {
372 oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
377 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_init_from_camera);
382 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_init_from_bake);
387 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_intersect_closest);
392 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_intersect_shadow);
401 oneapi_kernel_integrator_intersect_subsurface);
410 oneapi_kernel_integrator_intersect_volume_stack);
419 oneapi_kernel_integrator_intersect_dedicated_light);
424 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_background);
429 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_light);
434 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_shadow);
439 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_surface);
448 oneapi_kernel_integrator_shade_surface_raytrace);
453 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_surface_mnee);
458 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_volume);
467 oneapi_kernel_integrator_shade_dedicated_light);
472 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_queued_paths_array);
481 oneapi_kernel_integrator_queued_shadow_paths_array);
486 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_active_paths_array);
495 oneapi_kernel_integrator_terminated_paths_array);
504 oneapi_kernel_integrator_terminated_shadow_paths_array);
509 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sorted_paths_array);
513 sycl::local_accessor<int> local_mem(max_shaders, cgh);
514 oneapi_kernel_integrator_sort_bucket_pass(kg,
527 sycl::local_accessor<int> local_mem(max_shaders, cgh);
528 oneapi_kernel_integrator_sort_write_pass(kg,
546 oneapi_kernel_integrator_compact_paths_array);
555 oneapi_kernel_integrator_compact_shadow_paths_array);
564 oneapi_kernel_adaptive_sampling_convergence_check);
569 kg, cgh, global_size, local_size, args, oneapi_kernel_adaptive_sampling_filter_x);
574 kg, cgh, global_size, local_size, args, oneapi_kernel_adaptive_sampling_filter_y);
578 oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_shader_eval_displace);
583 kg, cgh, global_size, local_size, args, oneapi_kernel_shader_eval_background);
592 oneapi_kernel_shader_eval_curve_shadow_transparency);
596 oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_prefix_sum);
601 # define DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT, variant) \
602 case DEVICE_KERNEL_FILM_CONVERT_##VARIANT: { \
603 oneapi_call(kg, cgh, \
607 oneapi_kernel_film_convert_##variant); \
611# define DEVICE_KERNEL_FILM_CONVERT(variant, VARIANT) \
612 DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT, variant) \
613 DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT##_HALF_RGBA, variant##_half_rgba)
615 DEVICE_KERNEL_FILM_CONVERT(depth, DEPTH);
616 DEVICE_KERNEL_FILM_CONVERT(mist, MIST);
617 DEVICE_KERNEL_FILM_CONVERT(sample_count, SAMPLE_COUNT);
618 DEVICE_KERNEL_FILM_CONVERT(
float,
FLOAT);
619 DEVICE_KERNEL_FILM_CONVERT(light_path, LIGHT_PATH);
621 DEVICE_KERNEL_FILM_CONVERT(motion, MOTION);
622 DEVICE_KERNEL_FILM_CONVERT(cryptomatte, CRYPTOMATTE);
623 DEVICE_KERNEL_FILM_CONVERT(shadow_catcher, SHADOW_CATCHER);
624 DEVICE_KERNEL_FILM_CONVERT(shadow_catcher_matte_with_shadow,
625 SHADOW_CATCHER_MATTE_WITH_SHADOW);
626 DEVICE_KERNEL_FILM_CONVERT(combined, COMBINED);
627 DEVICE_KERNEL_FILM_CONVERT(float4,
FLOAT4);
629# undef DEVICE_KERNEL_FILM_CONVERT
630# undef DEVICE_KERNEL_FILM_CONVERT_PARTIAL
635 kg, cgh, global_size, local_size, args, oneapi_kernel_filter_guiding_preprocess);
644 oneapi_kernel_filter_guiding_set_fake_albedo);
649 kg, cgh, global_size, local_size, args, oneapi_kernel_filter_color_preprocess);
654 kg, cgh, global_size, local_size, args, oneapi_kernel_filter_color_postprocess);
659 kg, cgh, global_size, local_size, args, oneapi_kernel_cryptomatte_postprocess);
664 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_compact_states);
673 oneapi_kernel_integrator_compact_shadow_states);
682 oneapi_kernel_integrator_shadow_catcher_count_possible_splits);
693 catch (sycl::exception
const &
e) {
695 s_error_cb(
e.what(), s_error_user_ptr);
701# pragma warning(default : 4062)
702# elif defined(__GNUC__)
703# pragma GCC diagnostic pop
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
#define kernel_assert(cond)
bool device_kernel_has_intersection(DeviceKernel kernel)
const char * device_kernel_as_string(DeviceKernel kernel)
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
#define KERNEL_FEATURE_VOLUME
#define KERNEL_FEATURE_PATH_TRACING
#define KERNEL_FEATURE_HAIR
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_BAKING
#define KERNEL_FEATURE_MNEE
@ DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK
@ DEVICE_KERNEL_INTEGRATOR_RESET
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_DEDICATED_LIGHT
@ DEVICE_KERNEL_FILTER_COLOR_PREPROCESS
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS
@ DEVICE_KERNEL_SHADER_EVAL_DISPLACE
@ DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO
@ DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK
@ DEVICE_KERNEL_SHADER_EVAL_BACKGROUND
@ DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_DEDICATED_LIGHT
@ DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA
@ DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y
@ DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X
@ DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE
@ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST
@ DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND
@ DEVICE_KERNEL_PREFIX_SUM
#define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE
#define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE
#define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE
#define GPU_PARALLEL_SORT_BLOCK_SIZE