Blender V4.3
optix/queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_OPTIX
6
7# include "device/optix/queue.h"
9
10# include "util/time.h"
11
12# define __KERNEL_OPTIX__
14
16
17/* CUDADeviceQueue */
18
19OptiXDeviceQueue::OptiXDeviceQueue(OptiXDevice *device) : CUDADeviceQueue(device) {}
20
21void OptiXDeviceQueue::init_execution()
22{
23 CUDADeviceQueue::init_execution();
24}
25
26static bool is_optix_specific_kernel(DeviceKernel kernel, bool use_osl)
27{
28# ifdef WITH_OSL
29 /* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */
30 if (use_osl && device_kernel_has_shading(kernel)) {
31 return true;
32 }
33# else
34 (void)use_osl;
35# endif
36
37 return device_kernel_has_intersection(kernel);
38}
39
40bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
41 const int work_size,
42 DeviceKernelArguments const &args)
43{
44 OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
45
46# ifdef WITH_OSL
47 const bool use_osl = static_cast<OSLGlobals *>(optix_device->get_cpu_osl_memory())->use;
48# else
49 const bool use_osl = false;
50# endif
51
52 if (!is_optix_specific_kernel(kernel, use_osl)) {
53 return CUDADeviceQueue::enqueue(kernel, work_size, args);
54 }
55
56 if (cuda_device_->have_error()) {
57 return false;
58 }
59
60 debug_enqueue_begin(kernel, work_size);
61
62 const CUDAContextScope scope(cuda_device_);
63
64 const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer;
65 const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer;
66
67 cuda_device_assert(
68 cuda_device_,
69 cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, path_index_array),
70 args.values[0], // &d_path_index
71 sizeof(device_ptr),
72 cuda_stream_));
73
75 cuda_device_assert(
76 cuda_device_,
77 cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
78 args.values[1], // &d_render_buffer
79 sizeof(device_ptr),
80 cuda_stream_));
81 }
85 {
86 cuda_device_assert(cuda_device_,
87 cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, offset),
88 args.values[2], // &d_offset
89 sizeof(int32_t),
90 cuda_stream_));
91 }
92
93 cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
94
95 OptixPipeline pipeline = nullptr;
96 OptixShaderBindingTable sbt_params = {};
97
98 switch (kernel) {
100 pipeline = optix_device->pipelines[PIP_SHADE];
101 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_BACKGROUND * sizeof(SbtRecord);
102 break;
104 pipeline = optix_device->pipelines[PIP_SHADE];
105 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_LIGHT * sizeof(SbtRecord);
106 break;
108 pipeline = optix_device->pipelines[PIP_SHADE];
109 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE * sizeof(SbtRecord);
110 break;
112 pipeline = optix_device->pipelines[PIP_SHADE];
113 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
114 break;
116 pipeline = optix_device->pipelines[PIP_SHADE];
117 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
118 break;
120 pipeline = optix_device->pipelines[PIP_SHADE];
121 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_VOLUME * sizeof(SbtRecord);
122 break;
124 pipeline = optix_device->pipelines[PIP_SHADE];
125 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SHADOW * sizeof(SbtRecord);
126 break;
128 pipeline = optix_device->pipelines[PIP_SHADE];
129 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_DEDICATED_LIGHT * sizeof(SbtRecord);
130 break;
131
133 pipeline = optix_device->pipelines[PIP_INTERSECT];
134 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
135 break;
137 pipeline = optix_device->pipelines[PIP_INTERSECT];
138 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SHADOW * sizeof(SbtRecord);
139 break;
141 pipeline = optix_device->pipelines[PIP_INTERSECT];
142 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SUBSURFACE * sizeof(SbtRecord);
143 break;
145 pipeline = optix_device->pipelines[PIP_INTERSECT];
146 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord);
147 break;
149 pipeline = optix_device->pipelines[PIP_INTERSECT];
150 sbt_params.raygenRecord = sbt_data_ptr +
151 PG_RGEN_INTERSECT_DEDICATED_LIGHT * sizeof(SbtRecord);
152 break;
153
155 pipeline = optix_device->pipelines[PIP_SHADE];
156 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_DISPLACE * sizeof(SbtRecord);
157 break;
159 pipeline = optix_device->pipelines[PIP_SHADE];
160 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_BACKGROUND * sizeof(SbtRecord);
161 break;
163 pipeline = optix_device->pipelines[PIP_SHADE];
164 sbt_params.raygenRecord = sbt_data_ptr +
165 PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord);
166 break;
167
168 default:
169 LOG(ERROR) << "Invalid kernel " << device_kernel_as_string(kernel)
170 << " is attempted to be enqueued.";
171 return false;
172 }
173
174 sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord);
175 sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
176 sbt_params.missRecordCount = NUM_MISS_PROGRAM_GROUPS;
177 sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord);
178 sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
179 sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS;
180 sbt_params.callablesRecordBase = sbt_data_ptr + CALLABLE_PROGRAM_GROUPS_BASE * sizeof(SbtRecord);
181 sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS;
182 sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
183
184# ifdef WITH_OSL
185 if (use_osl) {
186 sbt_params.callablesRecordCount += static_cast<unsigned int>(optix_device->osl_groups.size());
187 }
188# endif
189
190 /* Launch the ray generation program. */
191 optix_device_assert(optix_device,
192 optixLaunch(pipeline,
193 cuda_stream_,
194 launch_params_ptr,
195 optix_device->launch_params.data_elements,
196 &sbt_params,
197 work_size,
198 1,
199 1));
200
201 debug_enqueue_end();
202
203 return !(optix_device->have_error());
204}
205
207
208#endif /* WITH_OPTIX */
#define CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN bool device_kernel_has_shading(DeviceKernel kernel)
bool device_kernel_has_intersection(DeviceKernel kernel)
const char * device_kernel_as_string(DeviceKernel kernel)
#define offsetof(t, d)
ccl_gpu_kernel_postfix ccl_global const int * path_index_array
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float * render_buffer
DeviceKernel
@ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_DEDICATED_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_SHADER_EVAL_DISPLACE
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK
@ DEVICE_KERNEL_SHADER_EVAL_BACKGROUND
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_DEDICATED_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
@ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST
@ DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND
#define LOG(severity)
Definition log.h:33
signed int int32_t
Definition stdint.h:77
void * values[MAX_ARGS]
uint64_t device_ptr
Definition util/types.h:45