Blender V5.0
optix/queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_OPTIX
6
7# include "device/optix/queue.h"
9
10# define __KERNEL_OPTIX__
12
14
15/* CUDADeviceQueue */
16
17OptiXDeviceQueue::OptiXDeviceQueue(OptiXDevice *device) : CUDADeviceQueue(device) {}
18
19void OptiXDeviceQueue::init_execution()
20{
21 CUDADeviceQueue::init_execution();
22}
23
24static bool is_optix_specific_kernel(DeviceKernel kernel, bool osl_shading, bool osl_camera)
25{
26# ifdef WITH_OSL
27 /* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */
28 if (osl_shading && device_kernel_has_shading(kernel)) {
29 return true;
30 }
31 if (osl_camera && kernel == DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA) {
32 return true;
33 }
34# else
35 (void)osl_shading;
36 (void)osl_camera;
37# endif
38
39 return device_kernel_has_intersection(kernel);
40}
41
42bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
43 const int work_size,
44 const DeviceKernelArguments &args)
45{
46 OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
47
48# ifdef WITH_OSL
49 const OSLGlobals *og = static_cast<const OSLGlobals *>(optix_device->get_cpu_osl_memory());
50 const bool osl_shading = og->use_shading;
51 const bool osl_camera = og->use_camera;
52# else
53 const bool osl_shading = false;
54 const bool osl_camera = false;
55# endif
56
57 if (!is_optix_specific_kernel(kernel, osl_shading, osl_camera)) {
58 return CUDADeviceQueue::enqueue(kernel, work_size, args);
59 }
60
61 if (cuda_device_->have_error()) {
62 return false;
63 }
64
65 debug_enqueue_begin(kernel, work_size);
66
67 const CUDAContextScope scope(cuda_device_);
68
69 const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer;
70 const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer;
71
72 auto set_launch_param = [&](size_t offset, size_t size, int arg) {
73 cuda_device_assert(
74 cuda_device_,
75 cuMemcpyHtoDAsync(launch_params_ptr + offset, args.values[arg], size, cuda_stream_));
76 };
77
78 set_launch_param(offsetof(KernelParamsOptiX, path_index_array), sizeof(device_ptr), 0);
79
81 set_launch_param(offsetof(KernelParamsOptiX, render_buffer), sizeof(device_ptr), 1);
82 }
87 {
88 set_launch_param(offsetof(KernelParamsOptiX, offset), sizeof(int32_t), 2);
89 }
90
92 set_launch_param(offsetof(KernelParamsOptiX, num_tiles), sizeof(int32_t), 1);
93 set_launch_param(offsetof(KernelParamsOptiX, render_buffer), sizeof(device_ptr), 2);
94 set_launch_param(offsetof(KernelParamsOptiX, max_tile_work_size), sizeof(int32_t), 3);
95 }
96
97 cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
98
99 OptixPipeline pipeline = nullptr;
100 OptixShaderBindingTable sbt_params = {};
101
102 switch (kernel) {
104 pipeline = optix_device->pipelines[PIP_SHADE];
105 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_BACKGROUND * sizeof(SbtRecord);
106 break;
108 pipeline = optix_device->pipelines[PIP_SHADE];
109 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_LIGHT * sizeof(SbtRecord);
110 break;
112 pipeline = optix_device->pipelines[PIP_SHADE];
113 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE * sizeof(SbtRecord);
114 break;
116 pipeline = optix_device->pipelines[PIP_SHADE];
117 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
118 break;
120 pipeline = optix_device->pipelines[PIP_SHADE];
121 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
122 break;
124 pipeline = optix_device->pipelines[PIP_SHADE];
125 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_VOLUME * sizeof(SbtRecord);
126 break;
128 pipeline = optix_device->pipelines[PIP_SHADE];
129 sbt_params.raygenRecord = sbt_data_ptr +
130 PG_RGEN_SHADE_VOLUME_RAY_MARCHING * sizeof(SbtRecord);
131 break;
133 pipeline = optix_device->pipelines[PIP_SHADE];
134 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SHADOW * sizeof(SbtRecord);
135 break;
137 pipeline = optix_device->pipelines[PIP_SHADE];
138 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_DEDICATED_LIGHT * sizeof(SbtRecord);
139 break;
140
142 pipeline = optix_device->pipelines[PIP_INTERSECT];
143 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
144 break;
146 pipeline = optix_device->pipelines[PIP_INTERSECT];
147 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SHADOW * sizeof(SbtRecord);
148 break;
150 pipeline = optix_device->pipelines[PIP_INTERSECT];
151 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SUBSURFACE * sizeof(SbtRecord);
152 break;
154 pipeline = optix_device->pipelines[PIP_INTERSECT];
155 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord);
156 break;
158 pipeline = optix_device->pipelines[PIP_INTERSECT];
159 sbt_params.raygenRecord = sbt_data_ptr +
160 PG_RGEN_INTERSECT_DEDICATED_LIGHT * sizeof(SbtRecord);
161 break;
162
164 pipeline = optix_device->pipelines[PIP_SHADE];
165 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_DISPLACE * sizeof(SbtRecord);
166 break;
168 pipeline = optix_device->pipelines[PIP_SHADE];
169 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_BACKGROUND * sizeof(SbtRecord);
170 break;
172 pipeline = optix_device->pipelines[PIP_SHADE];
173 sbt_params.raygenRecord = sbt_data_ptr +
174 PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord);
175 break;
177 pipeline = optix_device->pipelines[PIP_SHADE];
178 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_VOLUME_DENSITY * sizeof(SbtRecord);
179 break;
180
182 pipeline = optix_device->pipelines[PIP_SHADE];
183 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INIT_FROM_CAMERA * sizeof(SbtRecord);
184 break;
185
186 default:
187 LOG_ERROR << "Invalid kernel " << device_kernel_as_string(kernel)
188 << " is attempted to be enqueued.";
189 return false;
190 }
191
192 sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord);
193 sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
194 sbt_params.missRecordCount = NUM_MISS_PROGRAM_GROUPS;
195 sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord);
196 sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
197 sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS;
198 sbt_params.callablesRecordBase = sbt_data_ptr + CALLABLE_PROGRAM_GROUPS_BASE * sizeof(SbtRecord);
199 sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS;
200 sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
201
202# ifdef WITH_OSL
203 if (osl_shading || osl_camera) {
204 sbt_params.callablesRecordCount += static_cast<unsigned int>(optix_device->osl_groups.size());
205 }
206# endif
207
208 /* Launch the ray generation program. */
209 optix_device_assert(optix_device,
210 optixLaunch(pipeline,
211 cuda_stream_,
212 launch_params_ptr,
213 optix_device->launch_params.data_elements,
214 &sbt_params,
215 work_size,
216 1,
217 1));
218
219 debug_enqueue_end();
220
221 return !(optix_device->have_error());
222}
223
225
226#endif /* WITH_OPTIX */
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
#define CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN bool device_kernel_has_shading(DeviceKernel kernel)
bool device_kernel_has_intersection(DeviceKernel kernel)
const char * device_kernel_as_string(DeviceKernel kernel)
#define offsetof(t, d)
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int num_tiles
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float const int max_tile_work_size
ccl_gpu_kernel_postfix const ccl_global int * path_index_array
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float * render_buffer
DeviceKernel
@ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_DEDICATED_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_SHADER_EVAL_DISPLACE
@ DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK
@ DEVICE_KERNEL_SHADER_EVAL_BACKGROUND
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_DEDICATED_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA
@ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME_RAY_MARCHING
@ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST
@ DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND
#define LOG_ERROR
Definition log.h:101
void * values[MAX_ARGS]
uint64_t device_ptr
Definition types_base.h:44