Blender V5.0
denoiser_oidn_gpu.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#if defined(WITH_OPENIMAGEDENOISE)
6
8
9# include "device/device.h"
11# include "device/queue.h"
12
13# include "session/buffers.h"
14
15# include "util/log.h"
16# include "util/path.h"
17
18# if OIDN_VERSION_MAJOR < 2
19# define oidnSetFilterBool oidnSetFilter1b
20# define oidnSetFilterInt oidnSetFilter1i
21# define oidnExecuteFilterAsync oidnExecuteFilter
22# endif
23
25
26# if OIDN_VERSION < 20300
27static const char *oidn_device_type_to_string(const OIDNDeviceType type)
28{
29 switch (type) {
30 case OIDN_DEVICE_TYPE_DEFAULT:
31 return "DEFAULT";
32 case OIDN_DEVICE_TYPE_CPU:
33 return "CPU";
34
35 /* The initial GPU support was added in OIDN 2.0. */
36# if OIDN_VERSION_MAJOR >= 2
37 case OIDN_DEVICE_TYPE_SYCL:
38 return "SYCL";
39 case OIDN_DEVICE_TYPE_CUDA:
40 return "CUDA";
41 case OIDN_DEVICE_TYPE_HIP:
42 return "HIP";
43# endif
44
45 /* The Metal support was added in OIDN 2.2. */
46# if (OIDN_VERSION_MAJOR > 2) || ((OIDN_VERSION_MAJOR == 2) && (OIDN_VERSION_MINOR >= 2))
47 case OIDN_DEVICE_TYPE_METAL:
48 return "METAL";
49# endif
50 }
51 return "UNKNOWN";
52}
53# endif
54
55bool OIDNDenoiserGPU::is_device_supported(const DeviceInfo &device)
56{
57# if OIDN_VERSION >= 20300
58 if (device.type == DEVICE_MULTI) {
59 for (const DeviceInfo &multi_device : device.multi_devices) {
60 if (multi_device.type != DEVICE_CPU && multi_device.denoisers & DENOISER_OPENIMAGEDENOISE) {
61 return true;
62 }
63 }
64
65 return false;
66 }
67
69# else
70 if (device.type == DEVICE_MULTI) {
71 for (const DeviceInfo &multi_device : device.multi_devices) {
72 if (multi_device.type != DEVICE_CPU && is_device_supported(multi_device)) {
73 return true;
74 }
75 }
76
77 return false;
78 }
79
80 LOG_TRACE << "Checking device " << device.description << " (" << device.id
81 << ") for OIDN GPU support";
82
83 int device_type = OIDN_DEVICE_TYPE_DEFAULT;
84 switch (device.type) {
85# ifdef OIDN_DEVICE_SYCL
86 case DEVICE_ONEAPI:
87 device_type = OIDN_DEVICE_TYPE_SYCL;
88 break;
89# endif
90# ifdef OIDN_DEVICE_HIP
91 case DEVICE_HIP:
92 device_type = OIDN_DEVICE_TYPE_HIP;
93 break;
94# endif
95# ifdef OIDN_DEVICE_CUDA
96 case DEVICE_CUDA:
97 case DEVICE_OPTIX:
98 device_type = OIDN_DEVICE_TYPE_CUDA;
99 break;
100# endif
101# ifdef OIDN_DEVICE_METAL
102 case DEVICE_METAL: {
103 const int num_devices = oidnGetNumPhysicalDevices();
104 LOG_TRACE << "Found " << num_devices << " OIDN device(s)";
105 for (int i = 0; i < num_devices; i++) {
106 const int type = oidnGetPhysicalDeviceInt(i, "type");
107 const char *name = oidnGetPhysicalDeviceString(i, "name");
108 LOG_TRACE << "OIDN device " << i << ": name=\"" << name
109 << "\", type=" << oidn_device_type_to_string(OIDNDeviceType(type));
110 if (type == OIDN_DEVICE_TYPE_METAL) {
111 if (device.id.find(name) != std::string::npos) {
112 LOG_TRACE << "OIDN device name matches the Cycles device name";
113 return true;
114 }
115 }
116 }
117 LOG_TRACE << "No matched OIDN device found";
118 return false;
119 }
120# endif
121 case DEVICE_CPU:
122 /* This is the GPU denoiser - CPU devices shouldn't end up here. */
123 assert(0);
124 default:
125 return false;
126 }
127
128 /* Match GPUs by their PCI ID. */
129 const int num_devices = oidnGetNumPhysicalDevices();
130 LOG_TRACE << "Found " << num_devices << " OIDN device(s)";
131 for (int i = 0; i < num_devices; i++) {
132 const int type = oidnGetPhysicalDeviceInt(i, "type");
133 const char *name = oidnGetPhysicalDeviceString(i, "name");
134 LOG_TRACE << "OIDN device " << i << ": name=\"" << name
135 << "\" type=" << oidn_device_type_to_string(OIDNDeviceType(type));
136 if (type == device_type) {
137 if (oidnGetPhysicalDeviceBool(i, "pciAddressSupported")) {
138 unsigned int pci_domain = oidnGetPhysicalDeviceInt(i, "pciDomain");
139 unsigned int pci_bus = oidnGetPhysicalDeviceInt(i, "pciBus");
140 unsigned int pci_device = oidnGetPhysicalDeviceInt(i, "pciDevice");
141 string pci_id = string_printf("%04x:%02x:%02x", pci_domain, pci_bus, pci_device);
142 LOG_INFO << "OIDN device PCI-e identifier: " << pci_id;
143 if (device.id.find(pci_id) != string::npos) {
144 LOG_TRACE << "OIDN device PCI-e identifier matches the Cycles device ID";
145 return true;
146 }
147 }
148 else {
149 LOG_TRACE << "Device does not support pciAddressSupported";
150 }
151 }
152 }
153 LOG_TRACE << "No matched OIDN device found";
154 return false;
155# endif
156}
157
158OIDNDenoiserGPU::OIDNDenoiserGPU(Device *denoiser_device, const DenoiseParams &params)
159 : DenoiserGPU(denoiser_device, params)
160{
162}
163
164OIDNDenoiserGPU::~OIDNDenoiserGPU()
165{
166 release_all_resources();
167}
168
169bool OIDNDenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
170 RenderBuffers *render_buffers,
171 const int num_samples,
172 bool allow_inplace_modification)
173{
175 buffer_params, render_buffers, num_samples, allow_inplace_modification);
176}
177
178uint OIDNDenoiserGPU::get_device_type_mask() const
179{
180 uint device_mask = 0;
181# ifdef OIDN_DEVICE_SYCL
182 device_mask |= DEVICE_MASK_ONEAPI;
183# endif
184# ifdef OIDN_DEVICE_METAL
185 device_mask |= DEVICE_MASK_METAL;
186# endif
187# ifdef OIDN_DEVICE_CUDA
188 device_mask |= DEVICE_MASK_CUDA;
189 device_mask |= DEVICE_MASK_OPTIX;
190# endif
191# ifdef OIDN_DEVICE_HIP
192 device_mask |= DEVICE_MASK_HIP;
193# endif
194 return device_mask;
195}
196
197OIDNFilter OIDNDenoiserGPU::create_filter()
198{
199 const char *error_message = nullptr;
200 OIDNFilter filter = oidnNewFilter(oidn_device_, "RT");
201 if (filter == nullptr) {
202 const OIDNError err = oidnGetDeviceError(oidn_device_, &error_message);
203 if (OIDN_ERROR_NONE != err) {
204 LOG_ERROR << "OIDN error: " << error_message;
205 set_error(error_message);
206 }
207 }
208
209# if OIDN_VERSION_MAJOR >= 2
210 switch (quality_) {
212# if OIDN_VERSION >= 20300
213 oidnSetFilterInt(filter, "quality", OIDN_QUALITY_FAST);
214 break;
215# endif
217 oidnSetFilterInt(filter, "quality", OIDN_QUALITY_BALANCED);
218 break;
220 default:
221 oidnSetFilterInt(filter, "quality", OIDN_QUALITY_HIGH);
222 }
223# endif
224
225 return filter;
226}
227
228bool OIDNDenoiserGPU::commit_and_execute_filter(OIDNFilter filter, ExecMode mode)
229{
230 const char *error_message = nullptr;
231 OIDNError err = OIDN_ERROR_NONE;
232
233 for (;;) {
234 oidnCommitFilter(filter);
235 if (mode == ExecMode::ASYNC) {
236 oidnExecuteFilterAsync(filter);
237 }
238 else {
239 oidnExecuteFilter(filter);
240 }
241
242 /* If OIDN runs out of memory, reduce mem limit and retry */
243 err = oidnGetDeviceError(oidn_device_, &error_message);
244 if (err != OIDN_ERROR_OUT_OF_MEMORY || max_mem_ < 200) {
245 break;
246 }
247 max_mem_ = max_mem_ / 2;
248 oidnSetFilterInt(filter, "maxMemoryMB", max_mem_);
249 }
250
251 if (err != OIDN_ERROR_NONE) {
252 if (error_message == nullptr) {
253 error_message = "Unspecified OIDN error";
254 }
255 LOG_ERROR << "OIDN error: " << error_message;
256 set_error(error_message);
257 return false;
258 }
259 return true;
260}
261
262bool OIDNDenoiserGPU::denoise_create_if_needed(DenoiseContext &context)
263{
264 const bool recreate_denoiser = (oidn_device_ == nullptr) || (oidn_filter_ == nullptr) ||
265 (use_pass_albedo_ != context.use_pass_albedo) ||
266 (use_pass_normal_ != context.use_pass_normal) ||
267 (quality_ != params_.quality);
268 if (!recreate_denoiser) {
269 return true;
270 }
271
272 /* Destroy existing handles before creating new ones. */
273 release_all_resources();
274
275 switch (denoiser_device_->info.type) {
276# if defined(OIDN_DEVICE_SYCL) && defined(WITH_ONEAPI)
277 case DEVICE_ONEAPI:
278 oidn_device_ = oidnNewSYCLDevice(
279 (const sycl::queue *)reinterpret_cast<OneapiDevice *>(denoiser_device_)->sycl_queue(),
280 1);
281 break;
282# endif
283# if defined(OIDN_DEVICE_METAL) && defined(WITH_METAL)
284 case DEVICE_METAL: {
285 denoiser_queue_->init_execution();
286 const MTLCommandQueue_id queue = (const MTLCommandQueue_id)denoiser_queue_->native_queue();
287 oidn_device_ = oidnNewMetalDevice(&queue, 1);
288 } break;
289# endif
290# if defined(OIDN_DEVICE_CUDA) && defined(WITH_CUDA)
291 case DEVICE_CUDA:
292 case DEVICE_OPTIX: {
293 /* Directly using the stream from the DeviceQueue returns "invalid resource handle". */
294 cudaStream_t stream = nullptr;
295 oidn_device_ = oidnNewCUDADevice(&denoiser_device_->info.num, &stream, 1);
296 break;
297 }
298# endif
299# if defined(OIDN_DEVICE_HIP) && defined(WITH_HIP)
300 case DEVICE_HIP: {
301 hipStream_t stream = nullptr;
302 oidn_device_ = oidnNewHIPDevice(&denoiser_device_->info.num, &stream, 1);
303 break;
304 }
305# endif
306 default:
307 break;
308 }
309
310 if (!oidn_device_) {
311 set_error("Failed to create OIDN device");
312 return false;
313 }
314
315 if (denoiser_queue_) {
316 denoiser_queue_->init_execution();
317 }
318
319 oidnCommitDevice(oidn_device_);
320
321 quality_ = params_.quality;
322
323 oidn_filter_ = create_filter();
324 if (oidn_filter_ == nullptr) {
325 return false;
326 }
327
328 oidnSetFilterBool(oidn_filter_, "hdr", true);
329 oidnSetFilterBool(oidn_filter_, "srgb", false);
330
331 const char *custom_weight_path = getenv("CYCLES_OIDN_CUSTOM_WEIGHTS");
332 if (custom_weight_path) {
333 if (path_read_binary(custom_weight_path, custom_weights)) {
334 oidnSetSharedFilterData(
335 oidn_filter_, "weights", custom_weights.data(), custom_weights.size());
336 }
337 else {
338 LOG_ERROR << "Failed to load custom OpenImageDenoise weights";
339 }
340 }
341
342 if (context.use_pass_albedo) {
343 albedo_filter_ = create_filter();
344 if (albedo_filter_ == nullptr) {
345 return false;
346 }
347 }
348
349 if (context.use_pass_normal) {
350 normal_filter_ = create_filter();
351 if (normal_filter_ == nullptr) {
352 return false;
353 }
354 }
355
356 /* OIDN denoiser handle was created with the requested number of input passes. */
357 use_pass_albedo_ = context.use_pass_albedo;
358 use_pass_normal_ = context.use_pass_normal;
359
360 /* OIDN denoiser has been created, but it needs configuration. */
361 is_configured_ = false;
362 return true;
363}
364
365bool OIDNDenoiserGPU::denoise_configure_if_needed(DenoiseContext &context)
366{
367 /* Limit maximum tile size denoiser can be invoked with. */
368 const int2 size = make_int2(context.buffer_params.width, context.buffer_params.height);
369
370 if (is_configured_ && (configured_size_.x == size.x && configured_size_.y == size.y)) {
371 return true;
372 }
373
374 is_configured_ = true;
375 configured_size_ = size;
376
377 return true;
378}
379
380bool OIDNDenoiserGPU::denoise_run(const DenoiseContext &context, const DenoisePass &pass)
381{
382 /* Color pass. */
383 const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
384
385 set_filter_pass(oidn_filter_,
386 "color",
387 context.render_buffers->buffer.device_pointer,
388 OIDN_FORMAT_FLOAT3,
389 context.buffer_params.width,
390 context.buffer_params.height,
391 pass.denoised_offset * sizeof(float),
392 pass_stride_in_bytes,
393 pass_stride_in_bytes * context.buffer_params.stride);
394
395 set_filter_pass(oidn_filter_,
396 "output",
397 context.render_buffers->buffer.device_pointer,
398 OIDN_FORMAT_FLOAT3,
399 context.buffer_params.width,
400 context.buffer_params.height,
401 pass.denoised_offset * sizeof(float),
402 pass_stride_in_bytes,
403 pass_stride_in_bytes * context.buffer_params.stride);
404
405 /* Optional albedo and color passes. */
406 if (context.num_input_passes > 1) {
407 const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
408 const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
409 const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
410
411 if (context.use_pass_albedo) {
412 set_filter_pass(oidn_filter_,
413 "albedo",
414 d_guiding_buffer,
415 OIDN_FORMAT_FLOAT3,
416 context.buffer_params.width,
417 context.buffer_params.height,
418 context.guiding_params.pass_albedo * sizeof(float),
419 pixel_stride_in_bytes,
420 row_stride_in_bytes);
421
422 if (params_.prefilter == DENOISER_PREFILTER_ACCURATE) {
423 set_filter_pass(albedo_filter_,
424 "albedo",
425 d_guiding_buffer,
426 OIDN_FORMAT_FLOAT3,
427 context.buffer_params.width,
428 context.buffer_params.height,
429 context.guiding_params.pass_albedo * sizeof(float),
430 pixel_stride_in_bytes,
431 row_stride_in_bytes);
432
433 set_filter_pass(albedo_filter_,
434 "output",
435 d_guiding_buffer,
436 OIDN_FORMAT_FLOAT3,
437 context.buffer_params.width,
438 context.buffer_params.height,
439 context.guiding_params.pass_albedo * sizeof(float),
440 pixel_stride_in_bytes,
441 row_stride_in_bytes);
442
443 if (!commit_and_execute_filter(albedo_filter_, ExecMode::ASYNC)) {
444 return false;
445 }
446 }
447 }
448
449 if (context.use_pass_normal) {
450 set_filter_pass(oidn_filter_,
451 "normal",
452 d_guiding_buffer,
453 OIDN_FORMAT_FLOAT3,
454 context.buffer_params.width,
455 context.buffer_params.height,
456 context.guiding_params.pass_normal * sizeof(float),
457 pixel_stride_in_bytes,
458 row_stride_in_bytes);
459
460 if (params_.prefilter == DENOISER_PREFILTER_ACCURATE) {
461 set_filter_pass(normal_filter_,
462 "normal",
463 d_guiding_buffer,
464 OIDN_FORMAT_FLOAT3,
465 context.buffer_params.width,
466 context.buffer_params.height,
467 context.guiding_params.pass_normal * sizeof(float),
468 pixel_stride_in_bytes,
469 row_stride_in_bytes);
470
471 set_filter_pass(normal_filter_,
472 "output",
473 d_guiding_buffer,
474 OIDN_FORMAT_FLOAT3,
475 context.buffer_params.width,
476 context.buffer_params.height,
477 context.guiding_params.pass_normal * sizeof(float),
478 pixel_stride_in_bytes,
479 row_stride_in_bytes);
480
481 if (!commit_and_execute_filter(normal_filter_, ExecMode::ASYNC)) {
482 return false;
483 }
484 }
485 }
486 }
487
488 oidnSetFilterInt(oidn_filter_, "cleanAux", params_.prefilter != DENOISER_PREFILTER_FAST);
489 return commit_and_execute_filter(oidn_filter_);
490}
491
492void OIDNDenoiserGPU::set_filter_pass(OIDNFilter filter,
493 const char *name,
495 const int format,
496 const int width,
497 const int height,
498 const size_t offset_in_bytes,
499 const size_t pixel_stride_in_bytes,
500 const size_t row_stride_in_bytes)
501{
502# if defined(OIDN_DEVICE_METAL) && defined(WITH_METAL)
503 if (denoiser_device_->info.type == DEVICE_METAL) {
504 void *mtl_buffer = denoiser_device_->get_native_buffer(ptr);
505 OIDNBuffer oidn_buffer = oidnNewSharedBufferFromMetal(oidn_device_, mtl_buffer);
506
507 oidnSetFilterImage(filter,
508 name,
509 oidn_buffer,
510 (OIDNFormat)format,
511 width,
512 height,
513 offset_in_bytes,
514 pixel_stride_in_bytes,
515 row_stride_in_bytes);
516
517 oidnReleaseBuffer(oidn_buffer);
518 }
519 else
520# endif
521 {
522 oidnSetSharedFilterImage(filter,
523 name,
524 (void *)ptr,
525 (OIDNFormat)format,
526 width,
527 height,
528 offset_in_bytes,
529 pixel_stride_in_bytes,
530 row_stride_in_bytes);
531 }
532}
533
534void OIDNDenoiserGPU::release_all_resources()
535{
536 if (albedo_filter_) {
537 oidnReleaseFilter(albedo_filter_);
538 albedo_filter_ = nullptr;
539 }
540 if (normal_filter_) {
541 oidnReleaseFilter(normal_filter_);
542 normal_filter_ = nullptr;
543 }
544 if (oidn_filter_) {
545 oidnReleaseFilter(oidn_filter_);
546 oidn_filter_ = nullptr;
547 }
548 if (oidn_device_) {
549 oidnReleaseDevice(oidn_device_);
550 oidn_device_ = nullptr;
551 }
552}
553
555
556#endif
unsigned int uint
long long int int64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
bool denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, const int num_samples, bool allow_inplace_modification) override
vector< DeviceInfo > multi_devices
DenoiserTypeMask denoisers
DeviceType type
string description
nullptr float
@ DENOISER_QUALITY_FAST
Definition denoise.h:44
@ DENOISER_QUALITY_BALANCED
Definition denoise.h:43
@ DENOISER_QUALITY_HIGH
Definition denoise.h:42
@ DENOISER_PREFILTER_FAST
Definition denoise.h:32
@ DENOISER_PREFILTER_ACCURATE
Definition denoise.h:36
@ DENOISER_OPENIMAGEDENOISE
Definition denoise.h:13
#define CCL_NAMESPACE_END
@ DEVICE_MASK_OPTIX
@ DEVICE_MASK_HIP
@ DEVICE_MASK_CUDA
@ DEVICE_MASK_METAL
@ DEVICE_MASK_ONEAPI
@ DEVICE_METAL
@ DEVICE_MULTI
@ DEVICE_CUDA
@ DEVICE_CPU
@ DEVICE_OPTIX
@ DEVICE_HIP
@ DEVICE_ONEAPI
ccl_device_forceinline int2 make_int2(const int x, const int y)
#define assert(assertion)
#define filter
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
format
#define DCHECK_EQ(a, b)
Definition log.h:144
#define LOG_ERROR
Definition log.h:101
#define LOG_INFO
Definition log.h:106
#define LOG_TRACE
Definition log.h:108
int context(const bContext *C, const char *member, bContextDataResult *result)
bool path_read_binary(const string &path, vector< uint8_t > &binary)
Definition path.cpp:681
const char * name
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
i
Definition text_draw.cc:230
uint64_t device_ptr
Definition types_base.h:44
PointerRNA * ptr
Definition wm_files.cc:4238