Blender V4.3
denoiser_gpu.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include "device/denoise.h"
8#include "device/device.h"
9#include "device/memory.h"
10#include "device/queue.h"
12#include "session/buffers.h"
13#include "util/log.h"
14#include "util/progress.h"
15
17
19 : Denoiser(denoiser_device, params)
20{
21 denoiser_queue_ = denoiser_device->gpu_queue_create();
23}
24
26{
27 /* Explicit implementation, to allow forward declaration of Device in the header. */
28}
29
30bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
31 RenderBuffers *render_buffers,
32 const int num_samples,
33 bool allow_inplace_modification)
34{
35 Device *denoiser_device = get_denoiser_device();
36 if (!denoiser_device) {
37 return false;
38 }
39
40 DenoiseTask task;
41 task.params = params_;
42 task.num_samples = num_samples;
43 task.buffer_params = buffer_params;
44 task.allow_inplace_modification = allow_inplace_modification;
45
46 RenderBuffers local_render_buffers(denoiser_device);
47 bool local_buffer_used = false;
48
49 if (denoiser_device == render_buffers->buffer.device) {
50 /* The device can access an existing buffer pointer. */
51 local_buffer_used = false;
52 task.render_buffers = render_buffers;
53 }
54 else {
55 VLOG_WORK << "Creating temporary buffer on denoiser device.";
56
57 /* Create buffer which is available by the device used by denoiser. */
58
59 /* TODO(sergey): Optimize data transfers. For example, only copy denoising related passes,
60 * ignoring other light ad data passes. */
61
62 local_buffer_used = true;
63
64 render_buffers->copy_from_device();
65
66 local_render_buffers.reset(buffer_params);
67
68 /* NOTE: The local buffer is allocated for an exact size of the effective render size, while
69 * the input render buffer is allocated for the lowest resolution divider possible. So it is
70 * important to only copy actually needed part of the input buffer. */
71 memcpy(local_render_buffers.buffer.data(),
72 render_buffers->buffer.data(),
73 sizeof(float) * local_render_buffers.buffer.size());
74
75 denoiser_queue_->copy_to_device(local_render_buffers.buffer);
76
77 task.render_buffers = &local_render_buffers;
78 task.allow_inplace_modification = true;
79 }
80
81 const bool denoise_result = denoise_buffer(task);
82
83 if (local_buffer_used) {
84 local_render_buffers.copy_from_device();
85
87 render_buffers, buffer_params, &local_render_buffers, local_render_buffers.params);
88
89 render_buffers->copy_to_device();
90 }
91
92 return denoise_result;
93}
94
96{
97 DenoiseContext context(denoiser_device_, task);
98
99 if (!denoise_ensure(context)) {
100 return false;
101 }
102
103 if (!denoise_filter_guiding_preprocess(context)) {
104 LOG(ERROR) << "Error preprocessing guiding passes.";
105 return false;
106 }
107
108 /* Passes which will use real albedo when it is available. */
109 denoise_pass(context, PASS_COMBINED);
111
112 /* Passes which do not need albedo and hence if real is present it needs to become fake. */
114
115 return true;
116}
117
119{
120 if (!denoise_create_if_needed(context)) {
121 LOG(ERROR) << "GPU denoiser creation has failed.";
122 return false;
123 }
124
125 if (!denoise_configure_if_needed(context)) {
126 LOG(ERROR) << "GPU denoiser configuration has failed.";
127 return false;
128 }
129
130 return true;
131}
132
134{
135 const BufferParams &buffer_params = context.buffer_params;
136
137 const int work_size = buffer_params.width * buffer_params.height;
138
139 DeviceKernelArguments args(&context.guiding_params.device_pointer,
140 &context.guiding_params.pass_stride,
141 &context.guiding_params.pass_albedo,
142 &context.guiding_params.pass_normal,
143 &context.guiding_params.pass_flow,
144 &context.render_buffers->buffer.device_pointer,
145 &buffer_params.offset,
146 &buffer_params.stride,
147 &buffer_params.pass_stride,
148 &context.pass_sample_count,
149 &context.pass_denoising_albedo,
150 &context.pass_denoising_normal,
151 &context.pass_motion,
152 &buffer_params.full_x,
153 &buffer_params.full_y,
154 &buffer_params.width,
155 &buffer_params.height,
156 &context.num_samples);
157
159}
160
162 : denoise_params(task.params),
163 render_buffers(task.render_buffers),
164 buffer_params(task.buffer_params),
165 guiding_buffer(device, "denoiser guiding passes buffer", true),
166 num_samples(task.num_samples)
167{
170 num_input_passes += 1;
171 use_pass_albedo = true;
174 num_input_passes += 1;
175 use_pass_normal = true;
177 }
178 }
179
182
184
187
188 num_input_passes += 1;
189 use_pass_motion = true;
191 }
192
194
195 if (use_guiding_passes) {
196 if (task.allow_inplace_modification) {
198
201 guiding_params.pass_flow = pass_motion;
202
205 }
206 else {
207 guiding_params.pass_stride = 0;
208 if (use_pass_albedo) {
209 guiding_params.pass_albedo = guiding_params.pass_stride;
210 guiding_params.pass_stride += 3;
211 }
212 if (use_pass_normal) {
213 guiding_params.pass_normal = guiding_params.pass_stride;
214 guiding_params.pass_stride += 3;
215 }
216 if (use_pass_motion) {
217 guiding_params.pass_flow = guiding_params.pass_stride;
218 guiding_params.pass_stride += 2;
219 }
220
222
224 guiding_params.pass_stride);
226 }
227 }
228
230}
231
233 const DenoisePass &pass)
234{
235 const BufferParams &buffer_params = context.buffer_params;
236
237 const int work_size = buffer_params.width * buffer_params.height;
238
239 DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
240 &buffer_params.full_x,
241 &buffer_params.full_y,
242 &buffer_params.width,
243 &buffer_params.height,
244 &buffer_params.offset,
245 &buffer_params.stride,
246 &buffer_params.pass_stride,
247 &context.num_samples,
248 &pass.noisy_offset,
249 &pass.denoised_offset,
250 &context.pass_sample_count,
251 &pass.num_components,
252 &pass.use_compositing);
253
255}
256
258 const DenoisePass &pass)
259{
260 const BufferParams &buffer_params = context.buffer_params;
261
262 const int work_size = buffer_params.width * buffer_params.height;
263
264 DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
265 &buffer_params.full_x,
266 &buffer_params.full_y,
267 &buffer_params.width,
268 &buffer_params.height,
269 &buffer_params.offset,
270 &buffer_params.stride,
271 &buffer_params.pass_stride,
272 &pass.denoised_offset);
273
275}
276
278{
279 const BufferParams &buffer_params = context.buffer_params;
280
281 const int work_size = buffer_params.width * buffer_params.height;
282
283 DeviceKernelArguments args(&context.guiding_params.device_pointer,
284 &context.guiding_params.pass_stride,
285 &context.guiding_params.pass_albedo,
286 &buffer_params.width,
287 &buffer_params.height);
288
290}
291
293{
294 PassAccessor::PassAccessInfo pass_access_info;
295 pass_access_info.type = pass.type;
296 pass_access_info.mode = PassMode::NOISY;
297 pass_access_info.offset = pass.noisy_offset;
298
299 /* Denoiser operates on passes which are used to calculate the approximation, and is never used
300 * on the approximation. The latter is not even possible because OptiX does not support
301 * denoising of semi-transparent pixels. */
302 pass_access_info.use_approximate_shadow_catcher = false;
303 pass_access_info.use_approximate_shadow_catcher_background = false;
304 pass_access_info.show_active_pixels = false;
305
306 /* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
307 */
308 const PassAccessorGPU pass_accessor(
309 denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
310
311 PassAccessor::Destination destination(pass_access_info.type);
312 destination.d_pixels = context.render_buffers->buffer.device_pointer;
313 destination.num_components = 3;
314 destination.pixel_offset = pass.denoised_offset;
315 destination.pixel_stride = context.buffer_params.pass_stride;
316
317 BufferParams buffer_params = context.buffer_params;
318 buffer_params.window_x = 0;
319 buffer_params.window_y = 0;
320 buffer_params.window_width = buffer_params.width;
321 buffer_params.window_height = buffer_params.height;
322
323 pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
324}
325
327{
328 const BufferParams &buffer_params = context.buffer_params;
329
330 const DenoisePass pass(pass_type, buffer_params);
331
332 if (pass.noisy_offset == PASS_UNUSED) {
333 return;
334 }
335 if (pass.denoised_offset == PASS_UNUSED) {
336 LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
337 return;
338 }
339
340 if (pass.use_denoising_albedo) {
341 if (context.albedo_replaced_with_fake) {
342 LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
343 return;
344 }
345 }
346 else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
347 context.albedo_replaced_with_fake = true;
349 LOG(ERROR) << "Error replacing real albedo with the fake one.";
350 return;
351 }
352 }
353
354 /* Read and preprocess noisy color input pass. */
355 denoise_color_read(context, pass);
356 if (!denoise_filter_color_preprocess(context, pass)) {
357 LOG(ERROR) << "Error converting denoising passes to RGB buffer.";
358 return;
359 }
360
361 if (!denoise_run(context, pass)) {
362 LOG(ERROR) << "Error running denoiser.";
363 return;
364 }
365
366 /* Store result in the combined pass of the render buffer.
367 *
368 * This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
369 if (!denoise_filter_color_postprocess(context, pass)) {
370 LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
371 return;
372 }
373
374 denoiser_queue_->synchronize();
375}
376
void render_buffers_host_copy_denoised(RenderBuffers *dst, const BufferParams &dst_params, const RenderBuffers *src, const BufferParams &src_params, const size_t src_offset)
Definition buffers.cpp:307
int pass_stride
Definition buffers.h:94
int get_pass_offset(PassType type, PassMode mode=PassMode::NOISY) const
Definition buffers.cpp:167
int window_y
Definition buffers.h:80
int window_height
Definition buffers.h:82
int window_width
Definition buffers.h:81
NODE_DECLARE int width
Definition buffers.h:72
int window_x
Definition buffers.h:79
bool temporally_stable
Definition denoise.h:71
bool use_pass_normal
Definition denoise.h:68
bool use_pass_albedo
Definition denoise.h:67
DenoiseContext(Device *device, const DenoiseTask &task)
const BufferParams & buffer_params
const DenoiseParams & denoise_params
struct DenoiserGPU::DenoiseContext::@1421 guiding_params
struct DenoiserGPU::DenoiseContext::@1420 prev_output
device_only_memory< float > guiding_buffer
RenderBuffers * render_buffers
bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
virtual bool denoise_run(const DenoiseContext &context, const DenoisePass &pass)=0
DenoiserGPU(Device *denoiser_device, const DenoiseParams &params)
virtual bool denoise_ensure(DenoiseContext &context)
bool denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass)
bool denoise_filter_color_preprocess(const DenoiseContext &context, const DenoisePass &pass)
bool denoise_filter_guiding_preprocess(const DenoiseContext &context)
virtual bool denoise_create_if_needed(DenoiseContext &context)=0
virtual bool denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, const int num_samples, bool allow_inplace_modification) override
unique_ptr< DeviceQueue > denoiser_queue_
void denoise_pass(DenoiseContext &context, PassType pass_type)
void denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
virtual bool denoise_configure_if_needed(DenoiseContext &context)=0
DenoiseParams params_
Definition denoiser.h:128
Device * get_denoiser_device() const
Definition denoiser.cpp:256
Device * denoiser_device_
Definition denoiser.h:126
virtual unique_ptr< DeviceQueue > gpu_queue_create()
bool get_render_tile_pixels(const RenderBuffers *render_buffers, const Destination &destination) const
device_vector< float > buffer
Definition buffers.h:160
BufferParams params
Definition buffers.h:157
bool copy_from_device()
Definition buffers.cpp:289
void copy_to_device()
Definition buffers.cpp:302
void reset(const BufferParams &params)
Definition buffers.cpp:274
void alloc_to_device(size_t num, bool shrink_to_fit=true)
size_t size() const
#define CCL_NAMESPACE_END
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
#define PASS_UNUSED
PassType
@ PASS_SHADOW_CATCHER_MATTE
@ PASS_SHADOW_CATCHER
@ PASS_DENOISING_NORMAL
@ PASS_MOTION
@ PASS_COMBINED
@ PASS_SAMPLE_COUNT
@ PASS_DENOISING_ALBEDO
@ PASS_DENOISING_PREVIOUS
@ DEVICE_KERNEL_FILTER_COLOR_PREPROCESS
@ DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO
@ DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS
@ DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS
#define DCHECK(expression)
Definition log.h:51
#define VLOG_WORK
Definition log.h:75
#define LOG(severity)
Definition log.h:33
CCL_NAMESPACE_BEGIN const char * pass_type_as_string(const PassType type)
Definition pass.cpp:12