14# include <sycl/sycl.hpp>
24static OneAPIErrorCallback s_error_cb =
nullptr;
25static void *s_error_user_ptr =
nullptr;
27# ifdef WITH_EMBREE_GPU
28static RTCFeatureFlags oneapi_embree_features_from_kernel_features(
const uint kernel_features)
30 unsigned int feature_flags = RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE |
31 RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS;
34 feature_flags |= RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
35 RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE;
38 feature_flags |= RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE;
41 feature_flags |= RTC_FEATURE_FLAG_POINT;
44 feature_flags |= RTC_FEATURE_FLAG_MOTION_BLUR;
47 return (RTCFeatureFlags)feature_flags;
51void oneapi_set_error_cb(OneAPIErrorCallback cb,
void *user_ptr)
54 s_error_user_ptr = user_ptr;
57size_t oneapi_suggested_gpu_kernel_size(
const DeviceKernel kernel)
90bool oneapi_run_test_kernel(SyclQueue *queue_)
93 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
95 const size_t memory_byte_size =
sizeof(int) *
N;
97 bool is_computation_correct =
true;
99 int *A_host = (
int *)sycl::aligned_alloc_host(16, memory_byte_size, *queue);
101 for (
size_t i = (
size_t)0;
i <
N;
i++) {
102 A_host[
i] = rand() % 32;
105 int *A_device = (
int *)sycl::malloc_device(memory_byte_size, *queue);
106 int *B_device = (
int *)sycl::malloc_device(memory_byte_size, *queue);
108 queue->memcpy(A_device, A_host, memory_byte_size);
109 queue->wait_and_throw();
111 queue->submit([&](sycl::handler &cgh) {
112 cgh.parallel_for(
N, [=](sycl::id<1> idx) { B_device[idx] = A_device[idx] + idx.get(0); });
114 queue->wait_and_throw();
116 int *B_host = (
int *)sycl::aligned_alloc_host(16, memory_byte_size, *queue);
118 queue->memcpy(B_host, B_device, memory_byte_size);
119 queue->wait_and_throw();
121 for (
size_t i = (
size_t)0;
i <
N;
i++) {
122 const int expected_result =
i + A_host[
i];
123 if (B_host[
i] != expected_result) {
124 is_computation_correct =
false;
126 s_error_cb((
"Incorrect result in test kernel execution - expected " +
127 std::to_string(expected_result) +
", got " + std::to_string(B_host[
i]))
134 sycl::free(A_host, *queue);
135 sycl::free(B_host, *queue);
136 sycl::free(A_device, *queue);
137 sycl::free(B_device, *queue);
138 queue->wait_and_throw();
140 catch (
const sycl::exception &
e) {
142 s_error_cb(
e.what(), s_error_user_ptr);
147 return is_computation_correct;
150bool oneapi_zero_memory_on_device(SyclQueue *queue_,
void *device_pointer,
const size_t num_bytes)
153 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
155 queue->memset(device_pointer, 0, num_bytes);
156 queue->wait_and_throw();
159 catch (
const sycl::exception &
e) {
161 s_error_cb(
e.what(), s_error_user_ptr);
167bool oneapi_kernel_is_required_for_features(
const std::string &kernel_name,
168 const uint kernel_features)
171 if (kernel_name.find(
"oneapi_kernel_") == std::string::npos) {
198 std::string::npos) ||
200 std::string::npos) ||
202 std::string::npos) ||
212bool oneapi_kernel_is_compatible_with_hardware_raytracing(
const std::string &kernel_name)
216# if defined(RTC_VERSION) && RTC_VERSION < 40100
218 std::string::npos) &&
226bool oneapi_kernel_has_intersections(
const std::string &kernel_name)
239bool oneapi_load_kernels(SyclQueue *queue_,
240 const uint kernel_features,
241 bool use_hardware_raytracing)
244 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
246# ifdef WITH_EMBREE_GPU
248 if (use_hardware_raytracing) {
250 sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
251 sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
252 {queue->get_device()});
254 for (
const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
255 const std::string &kernel_name = kernel_id.get_name();
257 if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
258 !(oneapi_kernel_has_intersections(kernel_name) &&
259 oneapi_kernel_is_compatible_with_hardware_raytracing(kernel_name)))
264 sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
265 sycl::get_kernel_bundle<sycl::bundle_state::input>(
266 queue->get_context(), {queue->get_device()}, {kernel_id});
268 const RTCFeatureFlags embree_features = oneapi_embree_features_from_kernel_features(
270 one_kernel_bundle_input
271 .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
273 sycl::build(one_kernel_bundle_input);
276 catch (
const sycl::exception &
e) {
278 s_error_cb(
e.what(), s_error_user_ptr);
286 sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
287 sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
288 {queue->get_device()});
290 for (
const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
291 const std::string &kernel_name = kernel_id.get_name();
295 if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
296 (use_hardware_raytracing && oneapi_kernel_has_intersections(kernel_name) &&
297 oneapi_kernel_is_compatible_with_hardware_raytracing(kernel_name)))
302# ifdef WITH_EMBREE_GPU
303 if (oneapi_kernel_has_intersections(kernel_name)) {
304 sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
305 sycl::get_kernel_bundle<sycl::bundle_state::input>(
306 queue->get_context(), {queue->get_device()}, {kernel_id});
307 one_kernel_bundle_input
308 .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
309 RTC_FEATURE_FLAG_NONE);
310 sycl::build(one_kernel_bundle_input);
316 (void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(
317 queue->get_context(), {queue->get_device()}, {kernel_id});
320 catch (
const sycl::exception &
e) {
322 s_error_cb(
e.what(), s_error_user_ptr);
329bool oneapi_enqueue_kernel(KernelContext *kernel_context,
331 const size_t global_size,
332 const size_t local_size,
333 const uint kernel_features,
334 bool use_hardware_raytracing,
340 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(kernel_context->queue);
348# pragma warning(error : 4062)
349# elif defined(__GNUC__)
350# pragma GCC diagnostic push
351# pragma GCC diagnostic error "-Wswitch"
359 max_shaders = (kernel_context->scene_max_shaders);
363 queue->submit([&](sycl::handler &cgh) {
364# ifdef WITH_EMBREE_GPU
368 const RTCFeatureFlags embree_features = use_hardware_raytracing ?
369 oneapi_embree_features_from_kernel_features(
371 RTC_FEATURE_FLAG_NONE;
372 cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
376 (void)kernel_features;
378 switch (device_kernel) {
380 oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
385 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_init_from_camera);
390 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_init_from_bake);
395 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_intersect_closest);
400 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_intersect_shadow);
409 oneapi_kernel_integrator_intersect_subsurface);
418 oneapi_kernel_integrator_intersect_volume_stack);
427 oneapi_kernel_integrator_intersect_dedicated_light);
432 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_background);
437 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_light);
442 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_shadow);
447 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_surface);
456 oneapi_kernel_integrator_shade_surface_raytrace);
461 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_surface_mnee);
466 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_volume);
475 oneapi_kernel_integrator_shade_volume_ray_marching);
484 oneapi_kernel_integrator_shade_dedicated_light);
489 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_queued_paths_array);
498 oneapi_kernel_integrator_queued_shadow_paths_array);
503 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_active_paths_array);
512 oneapi_kernel_integrator_terminated_paths_array);
521 oneapi_kernel_integrator_terminated_shadow_paths_array);
526 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sorted_paths_array);
530 sycl::local_accessor<int> local_mem(max_shaders, cgh);
531 oneapi_kernel_integrator_sort_bucket_pass(kg,
544 sycl::local_accessor<int> local_mem(max_shaders, cgh);
545 oneapi_kernel_integrator_sort_write_pass(kg,
563 oneapi_kernel_integrator_compact_paths_array);
572 oneapi_kernel_integrator_compact_shadow_paths_array);
581 oneapi_kernel_adaptive_sampling_convergence_check);
586 kg, cgh, global_size, local_size, args, oneapi_kernel_adaptive_sampling_filter_x);
591 kg, cgh, global_size, local_size, args, oneapi_kernel_adaptive_sampling_filter_y);
595 oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_shader_eval_displace);
600 kg, cgh, global_size, local_size, args, oneapi_kernel_shader_eval_background);
609 oneapi_kernel_shader_eval_curve_shadow_transparency);
614 kg, cgh, global_size, local_size, args, oneapi_kernel_shader_eval_volume_density);
618 oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_prefix_sum);
623 kg, cgh, global_size, local_size, args, oneapi_kernel_volume_guiding_filter_x);
628 kg, cgh, global_size, local_size, args, oneapi_kernel_volume_guiding_filter_y);
633 # define DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT, variant) \
634 case DEVICE_KERNEL_FILM_CONVERT_##VARIANT: { \
635 oneapi_call(kg, cgh, \
639 oneapi_kernel_film_convert_##variant); \
643# define DEVICE_KERNEL_FILM_CONVERT(variant, VARIANT) \
644 DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT, variant) \
645 DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT##_HALF_RGBA, variant##_half_rgba)
647 DEVICE_KERNEL_FILM_CONVERT(depth, DEPTH);
648 DEVICE_KERNEL_FILM_CONVERT(mist, MIST);
649 DEVICE_KERNEL_FILM_CONVERT(volume_majorant, VOLUME_MAJORANT);
650 DEVICE_KERNEL_FILM_CONVERT(sample_count, SAMPLE_COUNT);
651 DEVICE_KERNEL_FILM_CONVERT(
float,
FLOAT);
652 DEVICE_KERNEL_FILM_CONVERT(light_path, LIGHT_PATH);
653 DEVICE_KERNEL_FILM_CONVERT(rgbe,
RGBE);
655 DEVICE_KERNEL_FILM_CONVERT(motion, MOTION);
656 DEVICE_KERNEL_FILM_CONVERT(cryptomatte, CRYPTOMATTE);
657 DEVICE_KERNEL_FILM_CONVERT(shadow_catcher, SHADOW_CATCHER);
658 DEVICE_KERNEL_FILM_CONVERT(shadow_catcher_matte_with_shadow,
659 SHADOW_CATCHER_MATTE_WITH_SHADOW);
660 DEVICE_KERNEL_FILM_CONVERT(combined, COMBINED);
663# undef DEVICE_KERNEL_FILM_CONVERT
664# undef DEVICE_KERNEL_FILM_CONVERT_PARTIAL
669 kg, cgh, global_size, local_size, args, oneapi_kernel_filter_guiding_preprocess);
678 oneapi_kernel_filter_guiding_set_fake_albedo);
683 kg, cgh, global_size, local_size, args, oneapi_kernel_filter_color_preprocess);
688 kg, cgh, global_size, local_size, args, oneapi_kernel_filter_color_postprocess);
692 oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_filter_color_flip_y);
697 kg, cgh, global_size, local_size, args, oneapi_kernel_cryptomatte_postprocess);
702 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_compact_states);
711 oneapi_kernel_integrator_compact_shadow_states);
720 oneapi_kernel_integrator_shadow_catcher_count_possible_splits);
731 catch (
const sycl::exception &
e) {
733 s_error_cb(
e.what(), s_error_user_ptr);
739# pragma warning(default : 4062)
740# elif defined(__GNUC__)
741# pragma GCC diagnostic pop
#define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE
#define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE
#define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE
#define GPU_PARALLEL_SORT_BLOCK_SIZE
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
#define kernel_assert(cond)
#define KERNEL_FEATURE_VOLUME
#define KERNEL_FEATURE_OBJECT_MOTION
#define KERNEL_FEATURE_HAIR_THICK
#define KERNEL_FEATURE_PATH_TRACING
#define KERNEL_FEATURE_HAIR
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_BAKING
#define KERNEL_FEATURE_MNEE
#define KERNEL_FEATURE_POINTCLOUD
bool device_kernel_has_intersection(DeviceKernel kernel)
const char * device_kernel_as_string(DeviceKernel kernel)
#define assert(assertion)
@ DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK
@ DEVICE_KERNEL_INTEGRATOR_RESET
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_DEDICATED_LIGHT
@ DEVICE_KERNEL_FILTER_COLOR_PREPROCESS
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS
@ DEVICE_KERNEL_SHADER_EVAL_DISPLACE
@ DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS
@ DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE
@ DEVICE_KERNEL_VOLUME_GUIDING_FILTER_X
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO
@ DEVICE_KERNEL_FILTER_COLOR_FLIP_Y
@ DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK
@ DEVICE_KERNEL_SHADER_EVAL_BACKGROUND
@ DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_DEDICATED_LIGHT
@ DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA
@ DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y
@ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME_RAY_MARCHING
@ DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X
@ DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE
@ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST
@ DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND
@ DEVICE_KERNEL_VOLUME_GUIDING_FILTER_Y
@ DEVICE_KERNEL_PREFIX_SUM