Blender V4.3
kernel/film/adaptive_sampling.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2019-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#include "kernel/film/write.h"
8
10
11/* Check whether the pixel has converged and should not be sampled anymore. */
12
16{
17 if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
18 return true;
19 }
20
22
23 const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
24 return buffer[aux_w_offset] == 0.0f;
25}
26
27/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
28
31 int x,
32 int y,
33 float threshold,
34 int reset,
35 int offset,
36 int stride)
37{
38 kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
39 kernel_assert(kernel_data.film.pass_sample_count != PASS_UNUSED);
40
41 const int render_pixel_index = offset + x + y * stride;
42 ccl_global float *buffer = render_buffer +
43 (uint64_t)render_pixel_index * kernel_data.film.pass_stride;
44
45 /* TODO(Stefan): Is this better in linear, sRGB or something else? */
46
47 const float4 A = kernel_read_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer);
48 if (!reset && A.w != 0.0f) {
49 /* If the pixel was considered converged, its state will not change in this kernel. Early
50 * output before doing any math.
51 *
52 * TODO(sergey): On a GPU it might be better to keep thread alive for better coherency? */
53 return true;
54 }
55
56 const float4 I = kernel_read_pass_float4(buffer + kernel_data.film.pass_combined);
57
58 const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
59 const float intensity_scale = kernel_data.film.exposure / sample;
60
61 /* The per pixel error as seen in section 2.1 of
62 * "A hierarchical automatic stopping condition for Monte Carlo global illumination" */
63 const float error_difference = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) *
64 intensity_scale;
65 const float intensity = (I.x + I.y + I.z) * intensity_scale;
66
67 /* Anything with R+G+B > 1 is highly exposed - even in sRGB it's a range that
68 * some displays aren't even able to display without significant losses in
69 * detalization. Everything with R+G+B > 3 is overexposed and should receive
70 * even less samples. Filmic-like curves need maximum sampling rate at
71 * intensity near 0.1-0.2, so threshold of 1 for R+G+B leaves an additional
72 * fstop in case it is needed for compositing.
73 */
74 float error_normalize;
75 if (intensity < 1.0f) {
76 error_normalize = sqrtf(intensity);
77 }
78 else {
79 error_normalize = intensity;
80 }
81
82 /* A small epsilon is added to the divisor to prevent division by zero. */
83 const float error = error_difference / (0.0001f + error_normalize);
84 const bool did_converge = (error < threshold);
85
86 const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
87 buffer[aux_w_offset] = did_converge;
88
89 return did_converge;
90}
91
92/* This is a simple box filter in two passes.
93 * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
94
97 int y,
98 int start_x,
99 int width,
100 int offset,
101 int stride)
102{
103 kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
104
105 bool prev = false;
106 for (int x = start_x; x < start_x + width; ++x) {
107 int index = offset + x + y * stride;
108 ccl_global float *buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
109 const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
110
111 if (buffer[aux_w_offset] == 0.0f) {
112 if (x > start_x && !prev) {
113 index = index - 1;
114 buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
115 buffer[aux_w_offset] = 0.0f;
116 }
117 prev = true;
118 }
119 else {
120 if (prev) {
121 buffer[aux_w_offset] = 0.0f;
122 }
123 prev = false;
124 }
125 }
126}
127
130 int x,
131 int start_y,
132 int height,
133 int offset,
134 int stride)
135{
136 kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
137
138 bool prev = false;
139 for (int y = start_y; y < start_y + height; ++y) {
140 int index = offset + x + y * stride;
141 ccl_global float *buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
142 const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
143
144 if (buffer[aux_w_offset] == 0.0f) {
145 if (y > start_y && !prev) {
146 index = index - stride;
147 buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
148 buffer[aux_w_offset] = 0.0f;
149 }
150 prev = true;
151 }
152 else {
153 if (prev) {
154 buffer[aux_w_offset] = 0.0f;
155 }
156 prev = false;
157 }
158 }
159}
160
unsigned int uint
void reset()
clear internal cached data and reset random seed
#define kernel_assert(cond)
#define kernel_data
const KernelGlobalsCPU *ccl_restrict KernelGlobals
#define ccl_device_forceinline
#define ccl_device
#define ccl_global
#define CCL_NAMESPACE_END
#define fabsf(x)
#define __float_as_uint(x)
#define sqrtf(x)
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float * render_buffer
ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg, ccl_global float *render_buffer, int x, int y, float threshold, int reset, int offset, int stride)
ccl_device void film_adaptive_sampling_filter_x(KernelGlobals kg, ccl_global float *render_buffer, int y, int start_x, int width, int offset, int stride)
CCL_NAMESPACE_BEGIN ccl_device_forceinline bool film_need_sample_pixel(KernelGlobals kg, ConstIntegratorState state, ccl_global float *render_buffer)
ccl_device void film_adaptive_sampling_filter_y(KernelGlobals kg, ccl_global float *render_buffer, int x, int start_y, int height, int offset, int stride)
#define PASS_UNUSED
static ulong state[N]
static void error(const char *str)
#define I
const IntegratorStateCPU *ccl_restrict ConstIntegratorState
Definition state.h:229
unsigned __int64 uint64_t
Definition stdint.h:90
CCL_NAMESPACE_BEGIN ccl_device_forceinline ccl_global float * film_pass_pixel_render_buffer(KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
Definition write.h:17
ccl_device_inline float4 kernel_read_pass_float4(ccl_global float *ccl_restrict buffer)
Definition write.h:120