Blender V5.0
denoiser_gpu.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include "device/denoise.h"
8#include "device/device.h"
9#include "device/memory.h"
10#include "device/queue.h"
11
13
14#include "session/buffers.h"
15
16#include "util/log.h"
17
19
21 : Denoiser(denoiser_device, params)
22{
23 denoiser_queue_ = denoiser_device->gpu_queue_create();
25}
26
28{
29 /* Explicit implementation, to allow forward declaration of Device in the header. */
30}
31
32bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
33 RenderBuffers *render_buffers,
34 const int num_samples,
35 bool allow_inplace_modification)
36{
37 Device *denoiser_device = get_denoiser_device();
38 if (!denoiser_device) {
39 return false;
40 }
41
42 DenoiseTask task;
43 task.params = params_;
44 task.num_samples = num_samples;
45 task.buffer_params = buffer_params;
46 task.allow_inplace_modification = allow_inplace_modification;
47
48 RenderBuffers local_render_buffers(denoiser_device);
49 bool local_buffer_used = false;
50
51 if (denoiser_device == render_buffers->buffer.device) {
52 /* The device can access an existing buffer pointer. */
53 local_buffer_used = false;
54 task.render_buffers = render_buffers;
55 }
56 else {
57 LOG_DEBUG << "Creating temporary buffer on denoiser device.";
58
59 /* Create buffer which is available by the device used by denoiser. */
60
61 /* TODO(sergey): Optimize data transfers. For example, only copy denoising related passes,
62 * ignoring other light ad data passes. */
63
64 local_buffer_used = true;
65
66 render_buffers->copy_from_device();
67
68 local_render_buffers.reset(buffer_params);
69
70 /* NOTE: The local buffer is allocated for an exact size of the effective render size, while
71 * the input render buffer is allocated for the lowest resolution divider possible. So it is
72 * important to only copy actually needed part of the input buffer. */
73 memcpy(local_render_buffers.buffer.data(),
74 render_buffers->buffer.data(),
75 sizeof(float) * local_render_buffers.buffer.size());
76
77 denoiser_queue_->copy_to_device(local_render_buffers.buffer);
78
79 task.render_buffers = &local_render_buffers;
81 }
82
83 const bool denoise_result = denoise_buffer(task);
84
85 if (local_buffer_used) {
86 local_render_buffers.copy_from_device();
87
89 render_buffers, buffer_params, &local_render_buffers, local_render_buffers.params);
90
91 render_buffers->copy_to_device();
92 }
93
94 return denoise_result;
95}
96
98{
99 DenoiseContext context(denoiser_device_, task);
100
101 if (!denoise_ensure(context)) {
102 return false;
103 }
104
105 if (!denoise_filter_guiding_preprocess(context)) {
106 LOG_ERROR << "Error preprocessing guiding passes.";
107 return false;
108 }
109
110 /* Passes which will use real albedo when it is available. */
111 denoise_pass(context, PASS_COMBINED);
113
114 /* Passes which do not need albedo and hence if real is present it needs to become fake. */
116
117 return true;
118}
119
121{
122 if (!denoise_create_if_needed(context)) {
123 LOG_ERROR << "GPU denoiser creation has failed.";
124 return false;
125 }
126
127 if (!denoise_configure_if_needed(context)) {
128 LOG_ERROR << "GPU denoiser configuration has failed.";
129 return false;
130 }
131
132 return true;
133}
134
136{
137 const BufferParams &buffer_params = context.buffer_params;
138
139 const int work_size = buffer_params.width * buffer_params.height;
140
141 const DeviceKernelArguments args(&context.guiding_params.device_pointer,
142 &context.guiding_params.pass_stride,
143 &context.guiding_params.pass_albedo,
144 &context.guiding_params.pass_normal,
145 &context.guiding_params.pass_flow,
146 &context.render_buffers->buffer.device_pointer,
147 &buffer_params.offset,
148 &buffer_params.stride,
149 &buffer_params.pass_stride,
150 &context.pass_sample_count,
151 &context.pass_denoising_albedo,
152 &context.pass_denoising_normal,
153 &context.pass_motion,
154 &buffer_params.full_x,
155 &buffer_params.full_y,
156 &buffer_params.width,
157 &buffer_params.height,
158 &context.num_samples);
159
162}
163
165 : denoise_params(task.params),
168 guiding_buffer(device, "denoiser guiding passes buffer", true),
170{
172 if (denoise_params.use_pass_albedo) {
173 num_input_passes += 1;
174 use_pass_albedo = true;
175 pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
176 if (denoise_params.use_pass_normal) {
177 num_input_passes += 1;
178 use_pass_normal = true;
179 pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
180 }
181 }
182
183 if (denoise_params.temporally_stable) {
184 prev_output.device_pointer = render_buffers->buffer.device_pointer;
185
186 prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
187
188 prev_output.stride = buffer_params.stride;
189 prev_output.pass_stride = buffer_params.pass_stride;
190
191 num_input_passes += 1;
192 use_pass_motion = true;
193 pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
194 }
195
196 use_guiding_passes = (num_input_passes - 1) > 0;
197
198 if (use_guiding_passes) {
199 if (task.allow_inplace_modification) {
200 guiding_params.device_pointer = render_buffers->buffer.device_pointer;
201
202 guiding_params.pass_albedo = pass_denoising_albedo;
203 guiding_params.pass_normal = pass_denoising_normal;
204 guiding_params.pass_flow = pass_motion;
205
206 guiding_params.stride = buffer_params.stride;
207 guiding_params.pass_stride = buffer_params.pass_stride;
208 }
209 else {
210 guiding_params.pass_stride = 0;
211 if (use_pass_albedo) {
212 guiding_params.pass_albedo = guiding_params.pass_stride;
213 guiding_params.pass_stride += 3;
214 }
215 if (use_pass_normal) {
216 guiding_params.pass_normal = guiding_params.pass_stride;
217 guiding_params.pass_stride += 3;
218 }
219 if (use_pass_motion) {
220 guiding_params.pass_flow = guiding_params.pass_stride;
221 guiding_params.pass_stride += 2;
222 }
223
224 guiding_params.stride = buffer_params.width;
225
226 guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
227 guiding_params.pass_stride);
228 guiding_params.device_pointer = guiding_buffer.device_pointer;
229 }
230 }
231
232 pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
233}
234
236 const DenoisePass &pass)
237{
238 if (!denoise_filter_color_flip_y(context, pass)) {
239 return false;
240 }
241
242 const BufferParams &buffer_params = context.buffer_params;
243
244 const int work_size = buffer_params.width * buffer_params.height;
245
246 const DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
247 &buffer_params.full_x,
248 &buffer_params.full_y,
249 &buffer_params.width,
250 &buffer_params.height,
251 &buffer_params.offset,
252 &buffer_params.stride,
253 &buffer_params.pass_stride,
254 &context.num_samples,
255 &pass.noisy_offset,
256 &pass.denoised_offset,
257 &context.pass_sample_count,
258 &pass.num_components,
259 &pass.use_compositing);
260
262}
263
265 const DenoisePass &pass)
266{
267 if (context.denoise_params.type != DENOISER_OPTIX) {
268 /* Pass preprocessing is used to clamp values for the OptiX denoiser.
269 * Clamping is not necessary for other denoisers, so just skip this preprocess step. */
270 return true;
271 }
272
273 if (!denoise_filter_color_flip_y(context, pass)) {
274 return false;
275 }
276
277 const BufferParams &buffer_params = context.buffer_params;
278
279 const int work_size = buffer_params.width * buffer_params.height;
280
281 const DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
282 &buffer_params.full_x,
283 &buffer_params.full_y,
284 &buffer_params.width,
285 &buffer_params.height,
286 &buffer_params.offset,
287 &buffer_params.stride,
288 &buffer_params.pass_stride,
289 &pass.denoised_offset);
290
292}
293
295 const DenoisePass &pass)
296{
297 if (context.denoise_params.type != DENOISER_OPTIX || context.denoise_params.temporally_stable) {
298 /* Flipping the image is used to improve result quality with the OptiX denoiser.
299 * It is not necessary for other denoisers, so just skip this preprocess step. */
300 return true;
301 }
302
303 const BufferParams &buffer_params = context.buffer_params;
304
305 const int work_size = buffer_params.width * buffer_params.height / 2;
306
307 const DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
308 &buffer_params.full_x,
309 &buffer_params.full_y,
310 &buffer_params.width,
311 &buffer_params.height,
312 &buffer_params.offset,
313 &buffer_params.stride,
314 &buffer_params.pass_stride,
315 &pass.denoised_offset);
316
318}
319
321{
322 if (context.denoise_params.type != DENOISER_OPTIX || context.denoise_params.temporally_stable) {
323 /* Flipping the image is used to improve result quality with the OptiX denoiser.
324 * It is not necessary for other denoisers, so just skip this preprocess step. */
325 return true;
326 }
327
328 const BufferParams &buffer_params = context.buffer_params;
329
330 const int guiding_offset = 0;
331
332 const int work_size = buffer_params.width * buffer_params.height / 2;
333
334 const int guiding_passes[] = {context.guiding_params.pass_albedo,
335 context.guiding_params.pass_normal};
336 for (const int guiding_pass : guiding_passes) {
337 if (guiding_pass == PASS_UNUSED) {
338 continue;
339 }
340
341 const DeviceKernelArguments args(&context.guiding_params.device_pointer,
342 &guiding_offset,
343 &guiding_offset,
344 &buffer_params.width,
345 &buffer_params.height,
346 &guiding_offset,
347 &context.guiding_params.stride,
348 &context.guiding_params.pass_stride,
349 &guiding_pass);
350
352 return false;
353 }
354 }
355 return true;
356}
357
359{
360 const BufferParams &buffer_params = context.buffer_params;
361
362 const int work_size = buffer_params.width * buffer_params.height;
363
364 const DeviceKernelArguments args(&context.guiding_params.device_pointer,
365 &context.guiding_params.pass_stride,
366 &context.guiding_params.pass_albedo,
367 &buffer_params.width,
368 &buffer_params.height);
369
371}
372
374{
375 PassAccessor::PassAccessInfo pass_access_info;
376 pass_access_info.type = pass.type;
377 pass_access_info.mode = PassMode::NOISY;
378 pass_access_info.offset = pass.noisy_offset;
379
380 /* Denoiser operates on passes which are used to calculate the approximation, and is never used
381 * on the approximation. The latter is not even possible because OptiX does not support
382 * denoising of semi-transparent pixels. */
383 pass_access_info.use_approximate_shadow_catcher = false;
384 pass_access_info.use_approximate_shadow_catcher_background = false;
385 pass_access_info.show_active_pixels = false;
386
387 /* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
388 */
389 const PassAccessorGPU pass_accessor(
390 denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
391
392 PassAccessor::Destination destination(pass_access_info.type, pass_access_info.mode);
393 destination.d_pixels = context.render_buffers->buffer.device_pointer;
394 destination.num_components = 3;
395 destination.pixel_offset = pass.denoised_offset;
396 destination.pixel_stride = context.buffer_params.pass_stride;
397
398 BufferParams buffer_params = context.buffer_params;
399 buffer_params.window_x = 0;
400 buffer_params.window_y = 0;
401 buffer_params.window_width = buffer_params.width;
402 buffer_params.window_height = buffer_params.height;
403
404 pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
405}
406
408{
409 const BufferParams &buffer_params = context.buffer_params;
410
411 const DenoisePass pass(pass_type, buffer_params);
412
413 if (pass.noisy_offset == PASS_UNUSED) {
414 return;
415 }
416 if (pass.denoised_offset == PASS_UNUSED) {
417 LOG_DFATAL << "Missing denoised pass " << pass_type_as_string(pass_type);
418 return;
419 }
420
421 if (pass.use_denoising_albedo) {
422 if (context.albedo_replaced_with_fake) {
423 LOG_ERROR << "Pass which requires albedo is denoised after fake albedo has been set.";
424 return;
425 }
426 }
427 else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
428 context.albedo_replaced_with_fake = true;
430 LOG_ERROR << "Error replacing real albedo with the fake one.";
431 return;
432 }
433 }
434
435 /* Read and preprocess noisy color input pass. */
436 denoise_color_read(context, pass);
437 if (!denoise_filter_color_preprocess(context, pass)) {
438 LOG_ERROR << "Error converting denoising passes to RGB buffer.";
439 return;
440 }
441
442 if (!denoise_run(context, pass)) {
443 LOG_ERROR << "Error running denoiser.";
444 return;
445 }
446
447 /* Store result in the combined pass of the render buffer.
448 *
449 * This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
450 if (!denoise_filter_color_postprocess(context, pass)) {
451 LOG_ERROR << "Error copying denoiser result to the denoised pass.";
452 return;
453 }
454
455 denoiser_queue_->synchronize();
456}
457
return true
void render_buffers_host_copy_denoised(RenderBuffers *dst, const BufferParams &dst_params, const RenderBuffers *src, const BufferParams &src_params, const size_t src_offset)
Definition buffers.cpp:304
int pass_stride
Definition buffers.h:92
int window_y
Definition buffers.h:78
int window_height
Definition buffers.h:80
int window_width
Definition buffers.h:79
NODE_DECLARE int width
Definition buffers.h:70
int window_x
Definition buffers.h:77
DenoiseContext(Device *device, const DenoiseTask &task)
const BufferParams & buffer_params
const DenoiseParams & denoise_params
device_only_memory< float > guiding_buffer
RenderBuffers * render_buffers
RenderBuffers * render_buffers
bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
virtual bool denoise_run(const DenoiseContext &context, const DenoisePass &pass)=0
DenoiserGPU(Device *denoiser_device, const DenoiseParams &params)
virtual bool denoise_ensure(DenoiseContext &context)
bool denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass)
~DenoiserGPU() override
bool denoise_filter_color_preprocess(const DenoiseContext &context, const DenoisePass &pass)
bool denoise_filter_guiding_preprocess(const DenoiseContext &context)
virtual bool denoise_create_if_needed(DenoiseContext &context)=0
bool denoise_filter_color_flip_y(const DenoiseContext &context, const DenoisePass &pass)
bool denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, const int num_samples, bool allow_inplace_modification) override
unique_ptr< DeviceQueue > denoiser_queue_
bool denoise_filter_guiding_flip_y(const DenoiseContext &context)
void denoise_pass(DenoiseContext &context, PassType pass_type)
void denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
virtual bool denoise_configure_if_needed(DenoiseContext &context)=0
DenoiseParams params_
Definition denoiser.h:133
Denoiser(Device *denoiser_device, const DenoiseParams &params)
Definition denoiser.cpp:211
Device * get_denoiser_device() const
Definition denoiser.cpp:268
Device * denoiser_device_
Definition denoiser.h:131
virtual unique_ptr< DeviceQueue > gpu_queue_create()
bool get_render_tile_pixels(const RenderBuffers *render_buffers, const Destination &destination) const
device_vector< float > buffer
Definition buffers.h:158
BufferParams params
Definition buffers.h:155
bool copy_from_device()
Definition buffers.cpp:286
void copy_to_device()
Definition buffers.cpp:299
void reset(const BufferParams &params)
Definition buffers.cpp:271
size_t size() const
@ DENOISER_OPTIX
Definition denoise.h:12
#define PASS_UNUSED
#define CCL_NAMESPACE_END
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
PassType
@ PASS_SHADOW_CATCHER_MATTE
@ PASS_SHADOW_CATCHER
@ PASS_COMBINED
@ PASS_SAMPLE_COUNT
@ DEVICE_KERNEL_FILTER_COLOR_PREPROCESS
@ DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO
@ DEVICE_KERNEL_FILTER_COLOR_FLIP_Y
@ DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS
@ DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS
#define DCHECK(expression)
Definition log.h:135
#define LOG_DFATAL
Definition log.h:100
#define LOG_DEBUG
Definition log.h:107
#define LOG_ERROR
Definition log.h:101
CCL_NAMESPACE_BEGIN const char * pass_type_as_string(const PassType type)
Definition pass.cpp:12
@ NOISY
Definition pass.h:21