13# include <optix_denoiser_tiling.h>
17# if OPTIX_ABI_VERSION >= 60
18using ::optixUtilDenoiserInvokeTiled;
24static OptixResult optixUtilDenoiserSplitImage(
const OptixImage2D &input,
25 const OptixImage2D &output,
26 unsigned int overlapWindowSizeInPixels,
27 unsigned int tileWidth,
28 unsigned int tileHeight,
29 std::vector<OptixUtilDenoiserImageTile> &
tiles)
31 if (tileWidth == 0 || tileHeight == 0)
32 return OPTIX_ERROR_INVALID_VALUE;
34 unsigned int inPixelStride = optixUtilGetPixelStride(input);
35 unsigned int outPixelStride = optixUtilGetPixelStride(output);
37 int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
38 int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
39 int inp_y = 0, copied_y = 0;
42 int inputOffsetY = inp_y == 0 ? 0 :
43 std::max((
int)overlapWindowSizeInPixels,
44 inp_h - ((
int)input.height - inp_y));
45 int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
46 std::
min(tileHeight, input.height - copied_y);
48 int inp_x = 0, copied_x = 0;
50 int inputOffsetX = inp_x == 0 ? 0 :
51 std::max((
int)overlapWindowSizeInPixels,
52 inp_w - ((
int)input.width - inp_x));
53 int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
54 std::
min(tileWidth, input.width - copied_x);
56 OptixUtilDenoiserImageTile
tile;
57 tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
58 +(size_t)(inp_x - inputOffsetX) * inPixelStride;
59 tile.input.width = inp_w;
60 tile.input.height = inp_h;
61 tile.input.rowStrideInBytes = input.rowStrideInBytes;
62 tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
63 tile.input.format = input.format;
65 tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
66 (
size_t)inp_x * outPixelStride;
67 tile.output.width = copy_x;
68 tile.output.height = copy_y;
69 tile.output.rowStrideInBytes = output.rowStrideInBytes;
70 tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
71 tile.output.format = output.format;
73 tile.inputOffsetX = inputOffsetX;
74 tile.inputOffsetY = inputOffsetY;
77 inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
79 }
while (inp_x <
static_cast<int>(input.width));
81 inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
83 }
while (inp_y <
static_cast<int>(input.height));
88static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
90 const OptixDenoiserParams *
params,
91 CUdeviceptr denoiserState,
92 size_t denoiserStateSizeInBytes,
93 const OptixDenoiserGuideLayer *guideLayer,
94 const OptixDenoiserLayer *layers,
95 unsigned int numLayers,
97 size_t scratchSizeInBytes,
98 unsigned int overlapWindowSizeInPixels,
99 unsigned int tileWidth,
100 unsigned int tileHeight)
102 if (!guideLayer || !layers)
103 return OPTIX_ERROR_INVALID_VALUE;
105 std::vector<std::vector<OptixUtilDenoiserImageTile>>
tiles(numLayers);
106 std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
107 for (
unsigned int l = 0;
l < numLayers;
l++) {
108 if (
const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[
l].input,
110 overlapWindowSizeInPixels,
116 if (layers[
l].previousOutput.data) {
117 OptixImage2D dummyOutput = layers[
l].previousOutput;
118 if (
const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[
l].previousOutput,
120 overlapWindowSizeInPixels,
128 std::vector<OptixUtilDenoiserImageTile> albedoTiles;
129 if (guideLayer->albedo.data) {
130 OptixImage2D dummyOutput = guideLayer->albedo;
131 if (
const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
133 overlapWindowSizeInPixels,
140 std::vector<OptixUtilDenoiserImageTile> normalTiles;
141 if (guideLayer->normal.data) {
142 OptixImage2D dummyOutput = guideLayer->normal;
143 if (
const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
145 overlapWindowSizeInPixels,
151 std::vector<OptixUtilDenoiserImageTile> flowTiles;
152 if (guideLayer->flow.data) {
153 OptixImage2D dummyOutput = guideLayer->flow;
154 if (
const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
156 overlapWindowSizeInPixels,
163 for (
size_t t = 0; t <
tiles[0].size(); t++) {
164 std::vector<OptixDenoiserLayer> tlayers;
165 for (
unsigned int l = 0;
l < numLayers;
l++) {
166 OptixDenoiserLayer layer = {};
167 layer.input = (
tiles[
l])[t].input;
168 layer.output = (
tiles[
l])[t].output;
169 if (layers[
l].previousOutput.data)
170 layer.previousOutput = (prevTiles[
l])[t].input;
171 tlayers.push_back(layer);
174 OptixDenoiserGuideLayer gl = {};
175 if (guideLayer->albedo.data)
176 gl.albedo = albedoTiles[t].input;
178 if (guideLayer->normal.data)
179 gl.normal = normalTiles[t].input;
181 if (guideLayer->flow.data)
182 gl.flow = flowTiles[t].input;
184 if (
const OptixResult res = optixDenoiserInvoke(denoiser,
188 denoiserStateSizeInBytes,
192 (
tiles[0])[t].inputOffsetX,
193 (
tiles[0])[t].inputOffsetY,
198 return OPTIX_SUCCESS;
207OptiXDenoiser::~OptiXDenoiser()
212 if (optix_denoiser_ !=
nullptr) {
213 optixDenoiserDestroy(optix_denoiser_);
217uint OptiXDenoiser::get_device_type_mask()
const
222bool OptiXDenoiser::is_device_supported(
const DeviceInfo &device)
230bool OptiXDenoiser::denoise_buffer(
const DenoiseTask &task)
232 OptiXDevice *
const optix_device =
static_cast<OptiXDevice *
>(denoiser_device_);
234 const CUDAContextScope scope(optix_device);
239bool OptiXDenoiser::denoise_create_if_needed(DenoiseContext &context)
241 const bool recreate_denoiser = (optix_denoiser_ ==
nullptr) ||
242 (use_pass_albedo_ != context.use_pass_albedo) ||
243 (use_pass_normal_ != context.use_pass_normal) ||
244 (use_pass_motion_ != context.use_pass_motion);
245 if (!recreate_denoiser) {
250 if (optix_denoiser_) {
251 optixDenoiserDestroy(optix_denoiser_);
255 OptixDenoiserOptions denoiser_options = {};
256 denoiser_options.guideAlbedo = context.use_pass_albedo;
257 denoiser_options.guideNormal = context.use_pass_normal;
259 OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
260 if (context.use_pass_motion) {
261 model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
264 const OptixResult result = optixDenoiserCreate(
265 static_cast<OptiXDevice *
>(denoiser_device_)->context,
270 if (result != OPTIX_SUCCESS) {
271 set_error(
"Failed to create OptiX denoiser");
276 use_pass_albedo_ = context.use_pass_albedo;
277 use_pass_normal_ = context.use_pass_normal;
278 use_pass_motion_ = context.use_pass_motion;
281 is_configured_ =
false;
286bool OptiXDenoiser::denoise_configure_if_needed(DenoiseContext &context)
290 min(context.buffer_params.height, 4096));
292 if (is_configured_ && (configured_size_.x == tile_size.
x && configured_size_.y == tile_size.
y)) {
298 optixDenoiserComputeMemoryResources(optix_denoiser_, tile_size.
x, tile_size.
y, &sizes_));
300 const bool tiled = tile_size.
x < context.buffer_params.width ||
301 tile_size.
y < context.buffer_params.height;
304 state_.device = denoiser_device_;
305 state_.alloc_to_device(sizes_.stateSizeInBytes + sizes_.withOverlapScratchSizeInBytes);
308 const OptixResult result = optixDenoiserSetup(
312 tile_size.
x + (tiled ? sizes_.overlapWindowSizeInPixels * 2 : 0),
313 tile_size.y + (tiled ? sizes_.overlapWindowSizeInPixels * 2 : 0),
314 state_.device_pointer,
315 sizes_.stateSizeInBytes,
316 state_.device_pointer + sizes_.stateSizeInBytes,
317 sizes_.withOverlapScratchSizeInBytes);
318 if (result != OPTIX_SUCCESS) {
319 set_error(
"Failed to set up OptiX denoiser");
323 cuda_device_assert(denoiser_device_, cuCtxSynchronize());
325 is_configured_ =
true;
326 configured_size_ = tile_size;
331bool OptiXDenoiser::denoise_run(
const DenoiseContext &context,
const DenoisePass &pass)
333 const BufferParams &buffer_params = context.buffer_params;
334 const int width = buffer_params.
width;
335 const int height = buffer_params.
height;
338 OptixImage2D color_layer = {0};
339 OptixImage2D albedo_layer = {0};
340 OptixImage2D normal_layer = {0};
341 OptixImage2D flow_layer = {0};
343 OptixImage2D output_layer = {0};
344 OptixImage2D prev_output_layer = {0};
348 const int pass_denoised = pass.denoised_offset;
349 const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride *
sizeof(
float);
351 color_layer.data = context.render_buffers->buffer.device_pointer +
352 pass_denoised *
sizeof(
float);
353 color_layer.width = width;
354 color_layer.height = height;
355 color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
356 color_layer.pixelStrideInBytes = pass_stride_in_bytes;
357 color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
362 const int64_t pass_stride_in_bytes = context.prev_output.pass_stride *
sizeof(
float);
364 prev_output_layer.data = context.prev_output.device_pointer +
365 context.prev_output.offset *
sizeof(
float);
366 prev_output_layer.width = width;
367 prev_output_layer.height = height;
368 prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
369 prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
370 prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
374 if (context.num_input_passes > 1) {
375 const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
376 const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride *
sizeof(
float);
377 const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
379 if (context.use_pass_albedo) {
380 albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo *
sizeof(
float);
381 albedo_layer.width = width;
382 albedo_layer.height = height;
383 albedo_layer.rowStrideInBytes = row_stride_in_bytes;
384 albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
385 albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
388 if (context.use_pass_normal) {
389 normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal *
sizeof(
float);
390 normal_layer.width = width;
391 normal_layer.height = height;
392 normal_layer.rowStrideInBytes = row_stride_in_bytes;
393 normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
394 normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
397 if (context.use_pass_motion) {
398 flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow *
sizeof(
float);
399 flow_layer.width = width;
400 flow_layer.height = height;
401 flow_layer.rowStrideInBytes = row_stride_in_bytes;
402 flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
403 flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
408 output_layer = color_layer;
410 OptixDenoiserGuideLayer guide_layers = {};
411 guide_layers.albedo = albedo_layer;
412 guide_layers.normal = normal_layer;
413 guide_layers.flow = flow_layer;
415 OptixDenoiserLayer image_layers = {};
416 image_layers.input = color_layer;
417 image_layers.previousOutput = prev_output_layer;
418 image_layers.output = output_layer;
421 OptixDenoiserParams
params = {};
423 optix_device_assert(denoiser_device_,
424 ccl::optixUtilDenoiserInvokeTiled(
426 static_cast<OptiXDeviceQueue *
>(denoiser_queue_.get())->stream(),
428 state_.device_pointer,
429 sizes_.stateSizeInBytes,
433 state_.device_pointer + sizes_.stateSizeInBytes,
434 sizes_.withOverlapScratchSizeInBytes,
435 sizes_.overlapWindowSizeInPixels,
437 configured_size_.y));
ATTR_WARN_UNUSED_RESULT const BMLoop * l
virtual bool denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, const int num_samples, bool allow_inplace_modification) override
DenoiserTypeMask denoisers
#define CCL_NAMESPACE_END
draw_view in_light_buf[] float
ccl_gpu_kernel_postfix ccl_global KernelWorkTile * tiles
ccl_global const KernelWorkTile * tile