Blender V4.3
denoiser_oidn_gpu.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#if defined(WITH_OPENIMAGEDENOISE)
6
8
9# include <array>
10
11# include "device/device.h"
13# include "device/queue.h"
15# include "session/buffers.h"
16# include "util/array.h"
17# include "util/log.h"
18# include "util/path.h"
19
22
23# if OIDN_VERSION_MAJOR < 2
24# define oidnSetFilterBool oidnSetFilter1b
25# define oidnSetFilterInt oidnSetFilter1i
26# define oidnExecuteFilterAsync oidnExecuteFilter
27# endif
28
30
31# if OIDN_VERSION < 20300
32static const char *oidn_device_type_to_string(const OIDNDeviceType type)
33{
34 switch (type) {
35 case OIDN_DEVICE_TYPE_DEFAULT:
36 return "DEFAULT";
37 case OIDN_DEVICE_TYPE_CPU:
38 return "CPU";
39
40 /* The initial GPU support was added in OIDN 2.0. */
41# if OIDN_VERSION_MAJOR >= 2
42 case OIDN_DEVICE_TYPE_SYCL:
43 return "SYCL";
44 case OIDN_DEVICE_TYPE_CUDA:
45 return "CUDA";
46 case OIDN_DEVICE_TYPE_HIP:
47 return "HIP";
48# endif
49
50 /* The Metal support was added in OIDN 2.2. */
51# if (OIDN_VERSION_MAJOR > 2) || ((OIDN_VERSION_MAJOR == 2) && (OIDN_VERSION_MINOR >= 2))
52 case OIDN_DEVICE_TYPE_METAL:
53 return "METAL";
54# endif
55 }
56 return "UNKNOWN";
57}
58# endif
59
60bool OIDNDenoiserGPU::is_device_supported(const DeviceInfo &device)
61{
62# if OIDN_VERSION >= 20300
63 if (device.type == DEVICE_MULTI) {
64 for (const DeviceInfo &multi_device : device.multi_devices) {
65 if (multi_device.type != DEVICE_CPU && multi_device.denoisers & DENOISER_OPENIMAGEDENOISE) {
66 return true;
67 }
68 }
69
70 return false;
71 }
72
74# else
75 if (device.type == DEVICE_MULTI) {
76 for (const DeviceInfo &multi_device : device.multi_devices) {
77 if (multi_device.type != DEVICE_CPU && is_device_supported(multi_device)) {
78 return true;
79 }
80 }
81
82 return false;
83 }
84
85 VLOG_DEBUG << "Checking device " << device.description << " (" << device.id
86 << ") for OIDN GPU support";
87
88 int device_type = OIDN_DEVICE_TYPE_DEFAULT;
89 switch (device.type) {
90# ifdef OIDN_DEVICE_SYCL
91 case DEVICE_ONEAPI:
92 device_type = OIDN_DEVICE_TYPE_SYCL;
93 break;
94# endif
95# ifdef OIDN_DEVICE_HIP
96 case DEVICE_HIP:
97 device_type = OIDN_DEVICE_TYPE_HIP;
98 break;
99# endif
100# ifdef OIDN_DEVICE_CUDA
101 case DEVICE_CUDA:
102 case DEVICE_OPTIX:
103 device_type = OIDN_DEVICE_TYPE_CUDA;
104 break;
105# endif
106# ifdef OIDN_DEVICE_METAL
107 case DEVICE_METAL: {
108 const int num_devices = oidnGetNumPhysicalDevices();
109 VLOG_DEBUG << "Found " << num_devices << " OIDN device(s)";
110 for (int i = 0; i < num_devices; i++) {
111 const int type = oidnGetPhysicalDeviceInt(i, "type");
112 const char *name = oidnGetPhysicalDeviceString(i, "name");
113 VLOG_DEBUG << "OIDN device " << i << ": name=\"" << name
114 << "\", type=" << oidn_device_type_to_string(OIDNDeviceType(type));
115 if (type == OIDN_DEVICE_TYPE_METAL) {
116 if (device.id.find(name) != std::string::npos) {
117 VLOG_DEBUG << "OIDN device name matches the Cycles device name";
118 return true;
119 }
120 }
121 }
122 VLOG_DEBUG << "No matched OIDN device found";
123 return false;
124 }
125# endif
126 case DEVICE_CPU:
127 /* This is the GPU denoiser - CPU devices shouldn't end up here. */
128 assert(0);
129 default:
130 return false;
131 }
132
133 /* Match GPUs by their PCI ID. */
134 const int num_devices = oidnGetNumPhysicalDevices();
135 VLOG_DEBUG << "Found " << num_devices << " OIDN device(s)";
136 for (int i = 0; i < num_devices; i++) {
137 const int type = oidnGetPhysicalDeviceInt(i, "type");
138 const char *name = oidnGetPhysicalDeviceString(i, "name");
139 VLOG_DEBUG << "OIDN device " << i << ": name=\"" << name
140 << "\" type=" << oidn_device_type_to_string(OIDNDeviceType(type));
141 if (type == device_type) {
142 if (oidnGetPhysicalDeviceBool(i, "pciAddressSupported")) {
143 unsigned int pci_domain = oidnGetPhysicalDeviceInt(i, "pciDomain");
144 unsigned int pci_bus = oidnGetPhysicalDeviceInt(i, "pciBus");
145 unsigned int pci_device = oidnGetPhysicalDeviceInt(i, "pciDevice");
146 string pci_id = string_printf("%04x:%02x:%02x", pci_domain, pci_bus, pci_device);
147 VLOG_INFO << "OIDN device PCI-e identifier: " << pci_id;
148 if (device.id.find(pci_id) != string::npos) {
149 VLOG_DEBUG << "OIDN device PCI-e identifier matches the Cycles device ID";
150 return true;
151 }
152 }
153 else {
154 VLOG_DEBUG << "Device does not support pciAddressSupported";
155 }
156 }
157 }
158 VLOG_DEBUG << "No matched OIDN device found";
159 return false;
160# endif
161}
162
163OIDNDenoiserGPU::OIDNDenoiserGPU(Device *denoiser_device, const DenoiseParams &params)
164 : DenoiserGPU(denoiser_device, params)
165{
167}
168
169OIDNDenoiserGPU::~OIDNDenoiserGPU()
170{
171 release_all_resources();
172}
173
174bool OIDNDenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
175 RenderBuffers *render_buffers,
176 const int num_samples,
177 bool allow_inplace_modification)
178{
180 buffer_params, render_buffers, num_samples, allow_inplace_modification);
181}
182
183uint OIDNDenoiserGPU::get_device_type_mask() const
184{
185 uint device_mask = 0;
186# ifdef OIDN_DEVICE_SYCL
187 device_mask |= DEVICE_MASK_ONEAPI;
188# endif
189# ifdef OIDN_DEVICE_METAL
190 device_mask |= DEVICE_MASK_METAL;
191# endif
192# ifdef OIDN_DEVICE_CUDA
193 device_mask |= DEVICE_MASK_CUDA;
194 device_mask |= DEVICE_MASK_OPTIX;
195# endif
196# ifdef OIDN_DEVICE_HIP
197 device_mask |= DEVICE_MASK_HIP;
198# endif
199 return device_mask;
200}
201
202OIDNFilter OIDNDenoiserGPU::create_filter()
203{
204 const char *error_message = nullptr;
205 OIDNFilter filter = oidnNewFilter(oidn_device_, "RT");
206 if (filter == nullptr) {
207 OIDNError err = oidnGetDeviceError(oidn_device_, (const char **)&error_message);
208 if (OIDN_ERROR_NONE != err) {
209 LOG(ERROR) << "OIDN error: " << error_message;
210 set_error(error_message);
211 }
212 }
213
214# if OIDN_VERSION_MAJOR >= 2
215 switch (quality_) {
217# if OIDN_VERSION >= 20300
218 oidnSetFilterInt(filter, "quality", OIDN_QUALITY_FAST);
219 break;
220# endif
222 oidnSetFilterInt(filter, "quality", OIDN_QUALITY_BALANCED);
223 break;
225 default:
226 oidnSetFilterInt(filter, "quality", OIDN_QUALITY_HIGH);
227 }
228# endif
229
230 return filter;
231}
232
233bool OIDNDenoiserGPU::commit_and_execute_filter(OIDNFilter filter, ExecMode mode)
234{
235 const char *error_message = nullptr;
236 OIDNError err = OIDN_ERROR_NONE;
237
238 for (;;) {
239 oidnCommitFilter(filter);
240 if (mode == ExecMode::ASYNC) {
241 oidnExecuteFilterAsync(filter);
242 }
243 else {
244 oidnExecuteFilter(filter);
245 }
246
247 /* If OIDN runs out of memory, reduce mem limit and retry */
248 err = oidnGetDeviceError(oidn_device_, (const char **)&error_message);
249 if (err != OIDN_ERROR_OUT_OF_MEMORY || max_mem_ < 200) {
250 break;
251 }
252 max_mem_ = max_mem_ / 2;
253 oidnSetFilterInt(filter, "maxMemoryMB", max_mem_);
254 }
255
256 if (err != OIDN_ERROR_NONE) {
257 if (error_message == nullptr) {
258 error_message = "Unspecified OIDN error";
259 }
260 LOG(ERROR) << "OIDN error: " << error_message;
261 set_error(error_message);
262 return false;
263 }
264 return true;
265}
266
267bool OIDNDenoiserGPU::denoise_create_if_needed(DenoiseContext &context)
268{
269 const bool recreate_denoiser = (oidn_device_ == nullptr) || (oidn_filter_ == nullptr) ||
270 (use_pass_albedo_ != context.use_pass_albedo) ||
271 (use_pass_normal_ != context.use_pass_normal) ||
272 (quality_ != params_.quality);
273 if (!recreate_denoiser) {
274 return true;
275 }
276
277 /* Destroy existing handles before creating new ones. */
278 release_all_resources();
279
280 switch (denoiser_device_->info.type) {
281# if defined(OIDN_DEVICE_SYCL) && defined(WITH_ONEAPI)
282 case DEVICE_ONEAPI:
283 oidn_device_ = oidnNewSYCLDevice(
284 (const sycl::queue *)reinterpret_cast<OneapiDevice *>(denoiser_device_)->sycl_queue(),
285 1);
286 break;
287# endif
288# if defined(OIDN_DEVICE_METAL) && defined(WITH_METAL)
289 case DEVICE_METAL: {
290 denoiser_queue_->init_execution();
291 const MTLCommandQueue_id queue = (const MTLCommandQueue_id)denoiser_queue_->native_queue();
292 oidn_device_ = oidnNewMetalDevice(&queue, 1);
293 } break;
294# endif
295# if defined(OIDN_DEVICE_CUDA) && defined(WITH_CUDA)
296 case DEVICE_CUDA:
297 case DEVICE_OPTIX: {
298 /* Directly using the stream from the DeviceQueue returns "invalid resource handle". */
299 cudaStream_t stream = nullptr;
300 oidn_device_ = oidnNewCUDADevice(&denoiser_device_->info.num, &stream, 1);
301 break;
302 }
303# endif
304# if defined(OIDN_DEVICE_HIP) && defined(WITH_HIP)
305 case DEVICE_HIP: {
306 hipStream_t stream = nullptr;
307 oidn_device_ = oidnNewHIPDevice(&denoiser_device_->info.num, &stream, 1);
308 break;
309 }
310# endif
311 default:
312 break;
313 }
314
315 if (!oidn_device_) {
316 set_error("Failed to create OIDN device");
317 return false;
318 }
319
320 if (denoiser_queue_) {
321 denoiser_queue_->init_execution();
322 }
323
324 oidnCommitDevice(oidn_device_);
325
326 quality_ = params_.quality;
327
328 oidn_filter_ = create_filter();
329 if (oidn_filter_ == nullptr) {
330 return false;
331 }
332
333 oidnSetFilterBool(oidn_filter_, "hdr", true);
334 oidnSetFilterBool(oidn_filter_, "srgb", false);
335
336 const char *custom_weight_path = getenv("CYCLES_OIDN_CUSTOM_WEIGHTS");
337 if (custom_weight_path) {
338 if (path_read_binary(custom_weight_path, custom_weights)) {
339 oidnSetSharedFilterData(
340 oidn_filter_, "weights", custom_weights.data(), custom_weights.size());
341 }
342 else {
343 fprintf(stderr, "Cycles: Failed to load custom OIDN weights!");
344 }
345 }
346
347 if (context.use_pass_albedo) {
348 albedo_filter_ = create_filter();
349 if (albedo_filter_ == nullptr) {
350 return false;
351 }
352 }
353
354 if (context.use_pass_normal) {
355 normal_filter_ = create_filter();
356 if (normal_filter_ == nullptr) {
357 return false;
358 }
359 }
360
361 /* OIDN denoiser handle was created with the requested number of input passes. */
362 use_pass_albedo_ = context.use_pass_albedo;
363 use_pass_normal_ = context.use_pass_normal;
364
365 /* OIDN denoiser has been created, but it needs configuration. */
366 is_configured_ = false;
367 return true;
368}
369
370bool OIDNDenoiserGPU::denoise_configure_if_needed(DenoiseContext &context)
371{
372 /* Limit maximum tile size denoiser can be invoked with. */
373 const int2 size = make_int2(context.buffer_params.width, context.buffer_params.height);
374
375 if (is_configured_ && (configured_size_.x == size.x && configured_size_.y == size.y)) {
376 return true;
377 }
378
379 is_configured_ = true;
380 configured_size_ = size;
381
382 return true;
383}
384
385bool OIDNDenoiserGPU::denoise_run(const DenoiseContext &context, const DenoisePass &pass)
386{
387 /* Color pass. */
388 const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
389
390 set_filter_pass(oidn_filter_,
391 "color",
392 context.render_buffers->buffer.device_pointer,
393 OIDN_FORMAT_FLOAT3,
394 context.buffer_params.width,
395 context.buffer_params.height,
396 pass.denoised_offset * sizeof(float),
397 pass_stride_in_bytes,
398 pass_stride_in_bytes * context.buffer_params.stride);
399
400 set_filter_pass(oidn_filter_,
401 "output",
402 context.render_buffers->buffer.device_pointer,
403 OIDN_FORMAT_FLOAT3,
404 context.buffer_params.width,
405 context.buffer_params.height,
406 pass.denoised_offset * sizeof(float),
407 pass_stride_in_bytes,
408 pass_stride_in_bytes * context.buffer_params.stride);
409
410 /* Optional albedo and color passes. */
411 if (context.num_input_passes > 1) {
412 const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
413 const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
414 const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
415
416 if (context.use_pass_albedo) {
417 set_filter_pass(oidn_filter_,
418 "albedo",
419 d_guiding_buffer,
420 OIDN_FORMAT_FLOAT3,
421 context.buffer_params.width,
422 context.buffer_params.height,
423 context.guiding_params.pass_albedo * sizeof(float),
424 pixel_stride_in_bytes,
425 row_stride_in_bytes);
426
427 if (params_.prefilter == DENOISER_PREFILTER_ACCURATE) {
428 set_filter_pass(albedo_filter_,
429 "albedo",
430 d_guiding_buffer,
431 OIDN_FORMAT_FLOAT3,
432 context.buffer_params.width,
433 context.buffer_params.height,
434 context.guiding_params.pass_albedo * sizeof(float),
435 pixel_stride_in_bytes,
436 row_stride_in_bytes);
437
438 set_filter_pass(albedo_filter_,
439 "output",
440 d_guiding_buffer,
441 OIDN_FORMAT_FLOAT3,
442 context.buffer_params.width,
443 context.buffer_params.height,
444 context.guiding_params.pass_albedo * sizeof(float),
445 pixel_stride_in_bytes,
446 row_stride_in_bytes);
447
448 if (!commit_and_execute_filter(albedo_filter_, ExecMode::ASYNC)) {
449 return false;
450 }
451 }
452 }
453
454 if (context.use_pass_normal) {
455 set_filter_pass(oidn_filter_,
456 "normal",
457 d_guiding_buffer,
458 OIDN_FORMAT_FLOAT3,
459 context.buffer_params.width,
460 context.buffer_params.height,
461 context.guiding_params.pass_normal * sizeof(float),
462 pixel_stride_in_bytes,
463 row_stride_in_bytes);
464
465 if (params_.prefilter == DENOISER_PREFILTER_ACCURATE) {
466 set_filter_pass(normal_filter_,
467 "normal",
468 d_guiding_buffer,
469 OIDN_FORMAT_FLOAT3,
470 context.buffer_params.width,
471 context.buffer_params.height,
472 context.guiding_params.pass_normal * sizeof(float),
473 pixel_stride_in_bytes,
474 row_stride_in_bytes);
475
476 set_filter_pass(normal_filter_,
477 "output",
478 d_guiding_buffer,
479 OIDN_FORMAT_FLOAT3,
480 context.buffer_params.width,
481 context.buffer_params.height,
482 context.guiding_params.pass_normal * sizeof(float),
483 pixel_stride_in_bytes,
484 row_stride_in_bytes);
485
486 if (!commit_and_execute_filter(normal_filter_, ExecMode::ASYNC)) {
487 return false;
488 }
489 }
490 }
491 }
492
493 oidnSetFilterInt(oidn_filter_, "cleanAux", params_.prefilter != DENOISER_PREFILTER_FAST);
494 return commit_and_execute_filter(oidn_filter_);
495}
496
497void OIDNDenoiserGPU::set_filter_pass(OIDNFilter filter,
498 const char *name,
500 int format,
501 int width,
502 int height,
503 size_t offset_in_bytes,
504 size_t pixel_stride_in_bytes,
505 size_t row_stride_in_bytes)
506{
507# if defined(OIDN_DEVICE_METAL) && defined(WITH_METAL)
508 if (denoiser_device_->info.type == DEVICE_METAL) {
509 void *mtl_buffer = denoiser_device_->get_native_buffer(ptr);
510 OIDNBuffer oidn_buffer = oidnNewSharedBufferFromMetal(oidn_device_, mtl_buffer);
511
512 oidnSetFilterImage(filter,
513 name,
514 oidn_buffer,
515 (OIDNFormat)format,
516 width,
517 height,
518 offset_in_bytes,
519 pixel_stride_in_bytes,
520 row_stride_in_bytes);
521
522 oidnReleaseBuffer(oidn_buffer);
523 }
524 else
525# endif
526 {
527 oidnSetSharedFilterImage(filter,
528 name,
529 (void *)ptr,
530 (OIDNFormat)format,
531 width,
532 height,
533 offset_in_bytes,
534 pixel_stride_in_bytes,
535 row_stride_in_bytes);
536 }
537}
538
539void OIDNDenoiserGPU::release_all_resources()
540{
541 if (albedo_filter_) {
542 oidnReleaseFilter(albedo_filter_);
543 albedo_filter_ = nullptr;
544 }
545 if (normal_filter_) {
546 oidnReleaseFilter(normal_filter_);
547 normal_filter_ = nullptr;
548 }
549 if (oidn_filter_) {
550 oidnReleaseFilter(oidn_filter_);
551 oidn_filter_ = nullptr;
552 }
553 if (oidn_device_) {
554 oidnReleaseDevice(oidn_device_);
555 oidn_device_ = nullptr;
556 }
557}
558
560
561#endif
unsigned int uint
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
virtual bool denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, const int num_samples, bool allow_inplace_modification) override
DenoiserTypeMask denoisers
DeviceType type
string description
@ DENOISER_QUALITY_FAST
Definition denoise.h:46
@ DENOISER_QUALITY_BALANCED
Definition denoise.h:45
@ DENOISER_QUALITY_HIGH
Definition denoise.h:44
@ DENOISER_PREFILTER_FAST
Definition denoise.h:34
@ DENOISER_PREFILTER_ACCURATE
Definition denoise.h:38
@ DENOISER_OPENIMAGEDENOISE
Definition denoise.h:15
#define CCL_NAMESPACE_END
@ DEVICE_MASK_OPTIX
@ DEVICE_MASK_HIP
@ DEVICE_MASK_CUDA
@ DEVICE_MASK_METAL
@ DEVICE_MASK_ONEAPI
@ DEVICE_METAL
@ DEVICE_MULTI
@ DEVICE_CUDA
@ DEVICE_CPU
@ DEVICE_OPTIX
@ DEVICE_HIP
@ DEVICE_ONEAPI
ccl_device_forceinline int2 make_int2(const int x, const int y)
draw_view in_light_buf[] float
DO_INLINE void filter(lfVector *V, fmatrix3x3 *S)
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
format
#define VLOG_INFO
Definition log.h:72
#define DCHECK_EQ(a, b)
Definition log.h:59
#define LOG(severity)
Definition log.h:33
#define VLOG_DEBUG
Definition log.h:81
bool path_read_binary(const string &path, vector< uint8_t > &binary)
Definition path.cpp:682
__int64 int64_t
Definition stdint.h:89
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
uint64_t device_ptr
Definition util/types.h:45
PointerRNA * ptr
Definition wm_files.cc:4126