Blender V4.3
denoiser_optix.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_OPTIX
6
9
11# include "device/optix/queue.h"
12
13# include <optix_denoiser_tiling.h>
14
16
17# if OPTIX_ABI_VERSION >= 60
18using ::optixUtilDenoiserInvokeTiled;
19# else
20// A minimal copy of functionality `optix_denoiser_tiling.h` which allows to fix integer overflow
21// issues without bumping SDK or driver requirement.
22//
23// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
24static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
25 const OptixImage2D &output,
26 unsigned int overlapWindowSizeInPixels,
27 unsigned int tileWidth,
28 unsigned int tileHeight,
29 std::vector<OptixUtilDenoiserImageTile> &tiles)
30{
31 if (tileWidth == 0 || tileHeight == 0)
32 return OPTIX_ERROR_INVALID_VALUE;
33
34 unsigned int inPixelStride = optixUtilGetPixelStride(input);
35 unsigned int outPixelStride = optixUtilGetPixelStride(output);
36
37 int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
38 int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
39 int inp_y = 0, copied_y = 0;
40
41 do {
42 int inputOffsetY = inp_y == 0 ? 0 :
43 std::max((int)overlapWindowSizeInPixels,
44 inp_h - ((int)input.height - inp_y));
45 int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
46 std::min(tileHeight, input.height - copied_y);
47
48 int inp_x = 0, copied_x = 0;
49 do {
50 int inputOffsetX = inp_x == 0 ? 0 :
51 std::max((int)overlapWindowSizeInPixels,
52 inp_w - ((int)input.width - inp_x));
53 int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
54 std::min(tileWidth, input.width - copied_x);
55
56 OptixUtilDenoiserImageTile tile;
57 tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
58 +(size_t)(inp_x - inputOffsetX) * inPixelStride;
59 tile.input.width = inp_w;
60 tile.input.height = inp_h;
61 tile.input.rowStrideInBytes = input.rowStrideInBytes;
62 tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
63 tile.input.format = input.format;
64
65 tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
66 (size_t)inp_x * outPixelStride;
67 tile.output.width = copy_x;
68 tile.output.height = copy_y;
69 tile.output.rowStrideInBytes = output.rowStrideInBytes;
70 tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
71 tile.output.format = output.format;
72
73 tile.inputOffsetX = inputOffsetX;
74 tile.inputOffsetY = inputOffsetY;
75 tiles.push_back(tile);
76
77 inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
78 copied_x += copy_x;
79 } while (inp_x < static_cast<int>(input.width));
80
81 inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
82 copied_y += copy_y;
83 } while (inp_y < static_cast<int>(input.height));
84
85 return OPTIX_SUCCESS;
86}
87
88static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
89 CUstream stream,
90 const OptixDenoiserParams *params,
91 CUdeviceptr denoiserState,
92 size_t denoiserStateSizeInBytes,
93 const OptixDenoiserGuideLayer *guideLayer,
94 const OptixDenoiserLayer *layers,
95 unsigned int numLayers,
96 CUdeviceptr scratch,
97 size_t scratchSizeInBytes,
98 unsigned int overlapWindowSizeInPixels,
99 unsigned int tileWidth,
100 unsigned int tileHeight)
101{
102 if (!guideLayer || !layers)
103 return OPTIX_ERROR_INVALID_VALUE;
104
105 std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles(numLayers);
106 std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
107 for (unsigned int l = 0; l < numLayers; l++) {
108 if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].input,
109 layers[l].output,
110 overlapWindowSizeInPixels,
111 tileWidth,
112 tileHeight,
113 tiles[l]))
114 return res;
115
116 if (layers[l].previousOutput.data) {
117 OptixImage2D dummyOutput = layers[l].previousOutput;
118 if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].previousOutput,
119 dummyOutput,
120 overlapWindowSizeInPixels,
121 tileWidth,
122 tileHeight,
123 prevTiles[l]))
124 return res;
125 }
126 }
127
128 std::vector<OptixUtilDenoiserImageTile> albedoTiles;
129 if (guideLayer->albedo.data) {
130 OptixImage2D dummyOutput = guideLayer->albedo;
131 if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
132 dummyOutput,
133 overlapWindowSizeInPixels,
134 tileWidth,
135 tileHeight,
136 albedoTiles))
137 return res;
138 }
139
140 std::vector<OptixUtilDenoiserImageTile> normalTiles;
141 if (guideLayer->normal.data) {
142 OptixImage2D dummyOutput = guideLayer->normal;
143 if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
144 dummyOutput,
145 overlapWindowSizeInPixels,
146 tileWidth,
147 tileHeight,
148 normalTiles))
149 return res;
150 }
151 std::vector<OptixUtilDenoiserImageTile> flowTiles;
152 if (guideLayer->flow.data) {
153 OptixImage2D dummyOutput = guideLayer->flow;
154 if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
155 dummyOutput,
156 overlapWindowSizeInPixels,
157 tileWidth,
158 tileHeight,
159 flowTiles))
160 return res;
161 }
162
163 for (size_t t = 0; t < tiles[0].size(); t++) {
164 std::vector<OptixDenoiserLayer> tlayers;
165 for (unsigned int l = 0; l < numLayers; l++) {
166 OptixDenoiserLayer layer = {};
167 layer.input = (tiles[l])[t].input;
168 layer.output = (tiles[l])[t].output;
169 if (layers[l].previousOutput.data)
170 layer.previousOutput = (prevTiles[l])[t].input;
171 tlayers.push_back(layer);
172 }
173
174 OptixDenoiserGuideLayer gl = {};
175 if (guideLayer->albedo.data)
176 gl.albedo = albedoTiles[t].input;
177
178 if (guideLayer->normal.data)
179 gl.normal = normalTiles[t].input;
180
181 if (guideLayer->flow.data)
182 gl.flow = flowTiles[t].input;
183
184 if (const OptixResult res = optixDenoiserInvoke(denoiser,
185 stream,
186 params,
187 denoiserState,
188 denoiserStateSizeInBytes,
189 &gl,
190 &tlayers[0],
191 numLayers,
192 (tiles[0])[t].inputOffsetX,
193 (tiles[0])[t].inputOffsetY,
194 scratch,
195 scratchSizeInBytes))
196 return res;
197 }
198 return OPTIX_SUCCESS;
199}
200# endif
201
202OptiXDenoiser::OptiXDenoiser(Device *denoiser_device, const DenoiseParams &params)
203 : DenoiserGPU(denoiser_device, params), state_(denoiser_device, "__denoiser_state", true)
204{
205}
206
207OptiXDenoiser::~OptiXDenoiser()
208{
209 /* It is important that the OptixDenoiser handle is destroyed before the OptixDeviceContext
210 * handle, which is guaranteed since the local denoising device owning the OptiX device context
211 * is deleted as part of the Denoiser class destructor call after this. */
212 if (optix_denoiser_ != nullptr) {
213 optixDenoiserDestroy(optix_denoiser_);
214 }
215}
216
217uint OptiXDenoiser::get_device_type_mask() const
218{
219 return DEVICE_MASK_OPTIX;
220}
221
222bool OptiXDenoiser::is_device_supported(const DeviceInfo &device)
223{
224 if (device.type == DEVICE_OPTIX) {
225 return device.denoisers & DENOISER_OPTIX;
226 }
227 return false;
228}
229
230bool OptiXDenoiser::denoise_buffer(const DenoiseTask &task)
231{
232 OptiXDevice *const optix_device = static_cast<OptiXDevice *>(denoiser_device_);
233
234 const CUDAContextScope scope(optix_device);
235
236 return DenoiserGPU::denoise_buffer(task);
237}
238
239bool OptiXDenoiser::denoise_create_if_needed(DenoiseContext &context)
240{
241 const bool recreate_denoiser = (optix_denoiser_ == nullptr) ||
242 (use_pass_albedo_ != context.use_pass_albedo) ||
243 (use_pass_normal_ != context.use_pass_normal) ||
244 (use_pass_motion_ != context.use_pass_motion);
245 if (!recreate_denoiser) {
246 return true;
247 }
248
249 /* Destroy existing handle before creating new one. */
250 if (optix_denoiser_) {
251 optixDenoiserDestroy(optix_denoiser_);
252 }
253
254 /* Create OptiX denoiser handle on demand when it is first used. */
255 OptixDenoiserOptions denoiser_options = {};
256 denoiser_options.guideAlbedo = context.use_pass_albedo;
257 denoiser_options.guideNormal = context.use_pass_normal;
258
259 OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
260 if (context.use_pass_motion) {
261 model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
262 }
263
264 const OptixResult result = optixDenoiserCreate(
265 static_cast<OptiXDevice *>(denoiser_device_)->context,
266 model,
267 &denoiser_options,
268 &optix_denoiser_);
269
270 if (result != OPTIX_SUCCESS) {
271 set_error("Failed to create OptiX denoiser");
272 return false;
273 }
274
275 /* OptiX denoiser handle was created with the requested number of input passes. */
276 use_pass_albedo_ = context.use_pass_albedo;
277 use_pass_normal_ = context.use_pass_normal;
278 use_pass_motion_ = context.use_pass_motion;
279
280 /* OptiX denoiser has been created, but it needs configuration. */
281 is_configured_ = false;
282
283 return true;
284}
285
286bool OptiXDenoiser::denoise_configure_if_needed(DenoiseContext &context)
287{
288 /* Limit maximum tile size denoiser can be invoked with. */
289 const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
290 min(context.buffer_params.height, 4096));
291
292 if (is_configured_ && (configured_size_.x == tile_size.x && configured_size_.y == tile_size.y)) {
293 return true;
294 }
295
296 optix_device_assert(
297 denoiser_device_,
298 optixDenoiserComputeMemoryResources(optix_denoiser_, tile_size.x, tile_size.y, &sizes_));
299
300 const bool tiled = tile_size.x < context.buffer_params.width ||
301 tile_size.y < context.buffer_params.height;
302
303 /* Allocate denoiser state if tile size has changed since last setup. */
304 state_.device = denoiser_device_;
305 state_.alloc_to_device(sizes_.stateSizeInBytes + sizes_.withOverlapScratchSizeInBytes);
306
307 /* Initialize denoiser state for the current tile size. */
308 const OptixResult result = optixDenoiserSetup(
309 optix_denoiser_,
310 0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
311 * on a stream that is not the default stream. */
312 tile_size.x + (tiled ? sizes_.overlapWindowSizeInPixels * 2 : 0),
313 tile_size.y + (tiled ? sizes_.overlapWindowSizeInPixels * 2 : 0),
314 state_.device_pointer,
315 sizes_.stateSizeInBytes,
316 state_.device_pointer + sizes_.stateSizeInBytes,
317 sizes_.withOverlapScratchSizeInBytes);
318 if (result != OPTIX_SUCCESS) {
319 set_error("Failed to set up OptiX denoiser");
320 return false;
321 }
322
323 cuda_device_assert(denoiser_device_, cuCtxSynchronize());
324
325 is_configured_ = true;
326 configured_size_ = tile_size;
327
328 return true;
329}
330
331bool OptiXDenoiser::denoise_run(const DenoiseContext &context, const DenoisePass &pass)
332{
333 const BufferParams &buffer_params = context.buffer_params;
334 const int width = buffer_params.width;
335 const int height = buffer_params.height;
336
337 /* Set up input and output layer information. */
338 OptixImage2D color_layer = {0};
339 OptixImage2D albedo_layer = {0};
340 OptixImage2D normal_layer = {0};
341 OptixImage2D flow_layer = {0};
342
343 OptixImage2D output_layer = {0};
344 OptixImage2D prev_output_layer = {0};
345
346 /* Color pass. */
347 {
348 const int pass_denoised = pass.denoised_offset;
349 const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
350
351 color_layer.data = context.render_buffers->buffer.device_pointer +
352 pass_denoised * sizeof(float);
353 color_layer.width = width;
354 color_layer.height = height;
355 color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
356 color_layer.pixelStrideInBytes = pass_stride_in_bytes;
357 color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
358 }
359
360 /* Previous output. */
361 if (context.prev_output.offset != PASS_UNUSED) {
362 const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
363
364 prev_output_layer.data = context.prev_output.device_pointer +
365 context.prev_output.offset * sizeof(float);
366 prev_output_layer.width = width;
367 prev_output_layer.height = height;
368 prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
369 prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
370 prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
371 }
372
373 /* Optional albedo and color passes. */
374 if (context.num_input_passes > 1) {
375 const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
376 const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
377 const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
378
379 if (context.use_pass_albedo) {
380 albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo * sizeof(float);
381 albedo_layer.width = width;
382 albedo_layer.height = height;
383 albedo_layer.rowStrideInBytes = row_stride_in_bytes;
384 albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
385 albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
386 }
387
388 if (context.use_pass_normal) {
389 normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal * sizeof(float);
390 normal_layer.width = width;
391 normal_layer.height = height;
392 normal_layer.rowStrideInBytes = row_stride_in_bytes;
393 normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
394 normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
395 }
396
397 if (context.use_pass_motion) {
398 flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
399 flow_layer.width = width;
400 flow_layer.height = height;
401 flow_layer.rowStrideInBytes = row_stride_in_bytes;
402 flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
403 flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
404 }
405 }
406
407 /* Denoise in-place of the noisy input in the render buffers. */
408 output_layer = color_layer;
409
410 OptixDenoiserGuideLayer guide_layers = {};
411 guide_layers.albedo = albedo_layer;
412 guide_layers.normal = normal_layer;
413 guide_layers.flow = flow_layer;
414
415 OptixDenoiserLayer image_layers = {};
416 image_layers.input = color_layer;
417 image_layers.previousOutput = prev_output_layer;
418 image_layers.output = output_layer;
419
420 /* Finally run denoising. */
421 OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
422
423 optix_device_assert(denoiser_device_,
424 ccl::optixUtilDenoiserInvokeTiled(
425 optix_denoiser_,
426 static_cast<OptiXDeviceQueue *>(denoiser_queue_.get())->stream(),
427 &params,
428 state_.device_pointer,
429 sizes_.stateSizeInBytes,
430 &guide_layers,
431 &image_layers,
432 1,
433 state_.device_pointer + sizes_.stateSizeInBytes,
434 sizes_.withOverlapScratchSizeInBytes,
435 sizes_.overlapWindowSizeInPixels,
436 configured_size_.x,
437 configured_size_.y));
438
439 return true;
440}
441
443
444#endif
unsigned int uint
ATTR_WARN_UNUSED_RESULT const BMLoop * l
NODE_DECLARE int width
Definition buffers.h:72
virtual bool denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, const int num_samples, bool allow_inplace_modification) override
DenoiserTypeMask denoisers
DeviceType type
@ DENOISER_OPTIX
Definition denoise.h:14
#define CCL_NAMESPACE_END
@ DEVICE_MASK_OPTIX
@ DEVICE_OPTIX
ccl_device_forceinline int2 make_int2(const int x, const int y)
draw_view in_light_buf[] float
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
ccl_gpu_kernel_postfix ccl_global KernelWorkTile * tiles
ccl_global const KernelWorkTile * tile
#define PASS_UNUSED
#define min(a, b)
Definition sort.c:32
__int64 int64_t
Definition stdint.h:89
int x
Definition types_int2.h:15
int y
Definition types_int2.h:15
uint64_t device_ptr
Definition util/types.h:45