Blender V4.3
COM_GlareBloomOperation.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include <cmath>
6#include <cstring>
7#include <memory>
8
9#include "BLI_array.hh"
10#include "BLI_index_range.hh"
11#include "BLI_math_base.hh"
12#include "BLI_math_vector.h"
13#include "BLI_math_vector.hh"
14#include "BLI_task.hh"
15
17
18#define MAX_GLARE_SIZE 9
19
20namespace blender::compositor {
21
22static void upsample(const MemoryBuffer &input, MemoryBuffer &output)
23{
24 const int2 output_size = int2(output.get_width(), output.get_height());
25
26 /* All the offsets in the following code section are in the normalized pixel space of the output
27 * image, so compute its normalized pixel size. */
28 float2 pixel_size = 1.0f / float2(output_size);
29
30 threading::parallel_for(IndexRange(output_size.y), 1, [&](const IndexRange sub_y_range) {
31 for (const int64_t y : sub_y_range) {
32 for (const int64_t x : IndexRange(output_size.x)) {
33 /* Each invocation corresponds to one output pixel, where the output has twice the size of
34 * the input. */
35 int2 texel = int2(x, y);
36
37 /* Add 0.5 to evaluate the buffer at the center of the pixel and divide by the image size
38 * to get the coordinates into the buffer's expected [0, 1] range. */
39 float2 coordinates = (float2(texel) + float2(0.5)) / float2(output_size);
40
41 /* Upsample by applying a 3x3 tent filter on the bi-linearly interpolated values evaluated
42 * at the center of neighboring output pixels. As more tent filter upsampling passes are
43 * applied, the result approximates a large sized Gaussian filter. This upsampling strategy
44 * is described in the talk:
45 *
46 * Next Generation Post Processing in Call of Duty: Advanced Warfare
47 * https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
48 *
49 * In particular, the upsampling strategy is described and illustrated in slide 162 titled
50 * "Upsampling - Our Solution". */
51 float4 upsampled = float4(0.0f);
52 upsampled += (4.0f / 16.0f) * input.texture_bilinear_extend(coordinates);
53 upsampled += (2.0f / 16.0f) *
54 input.texture_bilinear_extend(coordinates + pixel_size * float2(-1.0f, 0.0f));
55 upsampled += (2.0f / 16.0f) *
56 input.texture_bilinear_extend(coordinates + pixel_size * float2(0.0f, 1.0f));
57 upsampled += (2.0f / 16.0f) *
58 input.texture_bilinear_extend(coordinates + pixel_size * float2(1.0f, 0.0f));
59 upsampled += (2.0f / 16.0f) *
60 input.texture_bilinear_extend(coordinates + pixel_size * float2(0.0f, -1.0f));
61 upsampled += (1.0f / 16.0f) * input.texture_bilinear_extend(
62 coordinates + pixel_size * float2(-1.0f, -1.0f));
63 upsampled += (1.0f / 16.0f) *
64 input.texture_bilinear_extend(coordinates + pixel_size * float2(-1.0f, 1.0f));
65 upsampled += (1.0f / 16.0f) *
66 input.texture_bilinear_extend(coordinates + pixel_size * float2(1.0f, -1.0f));
67 upsampled += (1.0f / 16.0f) *
68 input.texture_bilinear_extend(coordinates + pixel_size * float2(1.0f, 1.0f));
69
70 const float4 original_value = output.get_elem(texel.x, texel.y);
71 copy_v4_v4(output.get_elem(texel.x, texel.y), original_value + upsampled);
72 }
73 }
74 });
75}
76
77/* Computes the weighted average of the given four colors, which are assumed to the colors of
78 * spatially neighboring pixels. The weights are computed so as to reduce the contributions of
79 * fireflies on the result by applying a form of local tone mapping as described by Brian Karis in
80 * the article "Graphic Rants: Tone Mapping".
81 *
82 * https://graphicrants.blogspot.com/2013/12/tone-mapping.html */
84 float4 color2,
85 float4 color3,
86 float4 color4)
87{
88 const float4 brightness = float4(math::reduce_max(color1.xyz()),
89 math::reduce_max(color2.xyz()),
90 math::reduce_max(color3.xyz()),
91 math::reduce_max(color4.xyz()));
92 const float4 weights = 1.0f / (brightness + 1.0);
93 const float weights_sum = math::reduce_add(weights);
94 const float4 sum = color1 * weights[0] + color2 * weights[1] + color3 * weights[2] +
95 color4 * weights[3];
97}
98
99static void downsample(const MemoryBuffer &input, MemoryBuffer &output, bool use_karis_average)
100{
101 const int2 input_size = int2(input.get_width(), input.get_height());
102 const int2 output_size = int2(output.get_width(), output.get_height());
103
104 /* All the offsets in the following code section are in the normalized pixel space of the
105 * input.texture_bilinear_extend, so compute its normalized pixel size. */
106 float2 pixel_size = 1.0f / float2(input_size);
107
108 threading::parallel_for(IndexRange(output_size.y), 1, [&](const IndexRange sub_y_range) {
109 for (const int64_t y : sub_y_range) {
110 for (const int64_t x : IndexRange(output_size.x)) {
111 /* Each invocation corresponds to one output pixel, where the output has half the size of
112 * the input. */
113 int2 texel = int2(x, y);
114
115 /* Add 0.5 to evaluate the buffer at the center of the pixel and divide by the image size
116 * to get the coordinates into the buffer's expected [0, 1] range. */
117 float2 coordinates = (float2(texel) + float2(0.5f)) / float2(output_size);
118
119 /* Each invocation downsamples a 6x6 area of pixels around the center of the corresponding
120 * output pixel, but instead of sampling each of the 36 pixels in the area, we only sample
121 * 13 positions using bilinear fetches at the center of a number of overlapping square
122 * 4-pixel groups. This downsampling strategy is described in the talk:
123 *
124 * Next Generation Post Processing in Call of Duty: Advanced Warfare
125 * https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
126 *
127 * In particular, the downsampling strategy is described and illustrated in slide 153
128 * titled "Downsampling - Our Solution". This is employed as it significantly improves the
129 * stability of the glare as can be seen in the videos in the talk. */
130 float4 center = input.texture_bilinear_extend(coordinates);
131 float4 upper_left_near = input.texture_bilinear_extend(coordinates +
132 pixel_size * float2(-1.0f, 1.0f));
133 float4 upper_right_near = input.texture_bilinear_extend(coordinates +
134 pixel_size * float2(1.0f, 1.0f));
135 float4 lower_left_near = input.texture_bilinear_extend(coordinates +
136 pixel_size * float2(-1.0f, -1.0f));
137 float4 lower_right_near = input.texture_bilinear_extend(coordinates +
138 pixel_size * float2(1.0f, -1.0f));
139 float4 left_far = input.texture_bilinear_extend(coordinates +
140 pixel_size * float2(-2.0f, 0.0f));
141 float4 right_far = input.texture_bilinear_extend(coordinates +
142 pixel_size * float2(2.0f, 0.0f));
143 float4 upper_far = input.texture_bilinear_extend(coordinates +
144 pixel_size * float2(0.0f, 2.0f));
145 float4 lower_far = input.texture_bilinear_extend(coordinates +
146 pixel_size * float2(0.0f, -2.0f));
147 float4 upper_left_far = input.texture_bilinear_extend(coordinates +
148 pixel_size * float2(-2.0f, 2.0f));
149 float4 upper_right_far = input.texture_bilinear_extend(coordinates +
150 pixel_size * float2(2.0f, 2.0f));
151 float4 lower_left_far = input.texture_bilinear_extend(coordinates +
152 pixel_size * float2(-2.0f, -2.0f));
153 float4 lower_right_far = input.texture_bilinear_extend(coordinates +
154 pixel_size * float2(2.0f, -2.0f));
155
156 if (!use_karis_average) {
157 /* The original weights equation mentioned in slide 153 is:
158 * 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
159 * The 0.5 corresponds to the center group of pixels and the 0.125 corresponds to the
160 * other groups of pixels. The center is sampled 4 times, the far non corner pixels are
161 * sampled 2 times, the near corner pixels are sampled only once; but their weight is
162 * quadruple the weights of other groups; so they count as sampled 4 times, finally the
163 * far corner pixels are sampled only once, essentially totaling 32 samples. So the
164 * weights are as used in the following code section. */
165 float4 result = (4.0f / 32.0f) * center +
166 (4.0f / 32.0f) * (upper_left_near + upper_right_near + lower_left_near +
167 lower_right_near) +
168 (2.0f / 32.0f) * (left_far + right_far + upper_far + lower_far) +
169 (1.0f / 32.0f) * (upper_left_far + upper_right_far + lower_left_far +
170 lower_right_far);
171 copy_v4_v4(output.get_elem(texel.x, texel.y), result);
172 }
173 else {
174 /* Reduce the contributions of fireflies on the result by reducing each group of pixels
175 * using a Karis brightness weighted sum. This is described in slide 168 titled
176 * "Fireflies - Partial Karis Average".
177 *
178 * This needn't be done on all downsampling passes, but only the first one, since
179 * fireflies will not survive the first pass, later passes can use the weighted average.
180 */
181 float4 center_weighted_sum = karis_brightness_weighted_sum(
182 upper_left_near, upper_right_near, lower_right_near, lower_left_near);
183 float4 upper_left_weighted_sum = karis_brightness_weighted_sum(
184 upper_left_far, upper_far, center, left_far);
185 float4 upper_right_weighted_sum = karis_brightness_weighted_sum(
186 upper_far, upper_right_far, right_far, center);
187 float4 lower_right_weighted_sum = karis_brightness_weighted_sum(
188 center, right_far, lower_right_far, lower_far);
189 float4 lower_left_weighted_sum = karis_brightness_weighted_sum(
190 left_far, center, lower_far, lower_left_far);
191
192 /* The original weights equation mentioned in slide 153 is:
193 * 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
194 * Multiply both sides by 8 and you get:
195 * 4 + 1 + 1 + 1 + 1 = 8
196 * So the weights are as used in the following code section. */
197 float4 result = (4.0f / 8.0f) * center_weighted_sum +
198 (1.0f / 8.0f) * (upper_left_weighted_sum + upper_right_weighted_sum +
199 lower_left_weighted_sum + lower_right_weighted_sum);
200 copy_v4_v4(output.get_elem(texel.x, texel.y), result);
201 }
202 }
203 }
204 });
205}
206
207/* Progressively down-sample the given buffer into a buffer with half the size for the given
208 * chain length, returning an array containing the chain of down-sampled buffers. The first
209 * buffer of the chain is the given buffer itself for easier handling. The chain length is
210 * expected not to exceed the binary logarithm of the smaller dimension of the given buffer,
211 * because that would buffer in down-sampling passes that produce useless textures with just
212 * one pixel. */
214 MemoryBuffer &highlights, int chain_length)
215{
216 Array<std::unique_ptr<MemoryBuffer>> downsample_chain(chain_length);
217
218 /* We append the original highlights buffer to the first buffer of the chain to make the code
219 * easier. In turn, the number of passes is one less than the chain length, because the first
220 * buffer needn't be computed. */
221 downsample_chain[0] = std::make_unique<MemoryBuffer>(highlights);
222 const IndexRange downsample_passes_range(chain_length - 1);
223
224 for (const int i : downsample_passes_range) {
225 const MemoryBuffer &input = *downsample_chain[i];
226
227 const int2 input_size = int2(input.get_width(), input.get_height());
228 const int2 output_size = input_size / 2;
229
230 rcti output_rect;
231 BLI_rcti_init(&output_rect, 0, output_size.x, 0, output_size.y);
232 downsample_chain[i + 1] = std::make_unique<MemoryBuffer>(DataType::Color, output_rect, false);
233 MemoryBuffer &output = *downsample_chain[i + 1];
234
235 /* For the first down-sample pass, we use a special "Karis" down-sample pass that applies a
236 * form of local tone mapping to reduce the contributions of fireflies, see the shader for
237 * more information. Later passes use a simple average down-sampling filter because fireflies
238 * doesn't service the first pass. */
239 const bool use_karis_average = i == downsample_passes_range.first();
240 downsample(input, output, use_karis_average);
241 }
242
243 return downsample_chain;
244}
245
246/* The size of the bloom relative to its maximum possible size, see the
247 * compute_bloom_size_halving_count() method for more information. */
248static int get_bloom_size(const NodeGlare *settings)
249{
250 return settings->size;
251}
252
253/* The bloom has a maximum possible size when the bloom size is equal to MAX_GLARE_SIZE and
254 * halves for every unit decrement of the bloom size. This method computes the number of halving
255 * that should take place, which is simply the difference to MAX_GLARE_SIZE. */
257{
258 return MAX_GLARE_SIZE - get_bloom_size(settings);
259}
260
261/* Bloom is computed by first progressively half-down-sampling the highlights down to a certain
262 * size, then progressively double-up-sampling the last down-sampled buffer up to the original size
263 * of the highlights, adding the down-sampled buffer of the same size in each up-sampling step.
264 * This can be illustrated as follows:
265 *
266 * Highlights ---+---> Bloom
267 * | |
268 * Down-sampled ---+---> Up-sampled
269 * | |
270 * Down-sampled ---+---> Up-sampled
271 * | |
272 * Down-sampled ---+---> Up-sampled
273 * | ^
274 * ... |
275 * Down-sampled ------------'
276 *
277 * The smooth down-sampling followed by smooth up-sampling can be thought of as a cheap way to
278 * approximate a large radius blur, and adding the corresponding down-sampled buffer while
279 * up-sampling is done to counter the attenuation that happens during down-sampling.
280 *
281 * Smaller down-sampled buffers contribute to larger glare size, so controlling the size can be
282 * done by stopping down-sampling down to a certain size, where the maximum possible size is
283 * achieved when down-sampling happens down to the smallest size of 2. */
284void GlareBloomOperation::generate_glare(float *output,
285 MemoryBuffer *highlights,
286 const NodeGlare *settings)
287{
288 /* The maximum possible glare size is achieved when we down-sampled down to the smallest size
289 * of 2, which would buffer in a down-sampling chain length of the binary logarithm of the
290 * smaller dimension of the size of the highlights.
291 *
292 * However, as users might want a smaller glare size, we reduce the chain length by the halving
293 * count supplied by the user. */
294 const int2 size = int2(highlights->get_width(), highlights->get_height());
295 const int smaller_glare_dimension = math::min(size.x, size.y);
296 const int chain_length = int(std::log2(smaller_glare_dimension)) -
298
299 /* If the chain length is less than 2, that means no down-sampling will happen, so we just copy
300 * the highlights to the output. This is a sanitization of a corner case, so no need to worry
301 * about optimizing the copy away. */
302 if (chain_length < 2) {
303 memcpy(output,
304 highlights->get_buffer(),
305 size.x * size.y * COM_DATA_TYPE_COLOR_CHANNELS * sizeof(float));
306 return;
307 }
308
310 *highlights, chain_length);
311
312 /* Notice that for a chain length of n, we need (n - 1) up-sampling passes. */
313 const IndexRange upsample_passes_range(chain_length - 1);
314
315 for (const int i : upsample_passes_range) {
316 const MemoryBuffer &input = *downsample_chain[upsample_passes_range.last() - i + 1];
317 MemoryBuffer &output = *downsample_chain[upsample_passes_range.last() - i];
318 upsample(input, output);
319 }
320
321 memcpy(output,
322 downsample_chain[0]->get_buffer(),
323 size.x * size.y * COM_DATA_TYPE_COLOR_CHANNELS * sizeof(float));
324}
325
326} // namespace blender::compositor
void BLI_rcti_init(struct rcti *rect, int xmin, int xmax, int ymin, int ymax)
Definition rct.c:418
#define MAX_GLARE_SIZE
static T sum(const btAlignedObjectArray< T > &items)
constexpr int64_t first() const
constexpr int64_t last(const int64_t n=0) const
a MemoryBuffer contains access to the data
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
BLI_INLINE float weights_sum(const float weights[8])
constexpr int COM_DATA_TYPE_COLOR_CHANNELS
Definition COM_defines.h:58
static Array< std::unique_ptr< MemoryBuffer > > compute_bloom_downsample_chain(MemoryBuffer &highlights, int chain_length)
static int get_bloom_size(const NodeGlare *settings)
static int compute_bloom_size_halving_count(const NodeGlare *settings)
static void downsample(const MemoryBuffer &input, MemoryBuffer &output, bool use_karis_average)
static void upsample(const MemoryBuffer &input, MemoryBuffer &output)
static float4 karis_brightness_weighted_sum(float4 color1, float4 color2, float4 color3, float4 color4)
T safe_divide(const T &a, const T &b)
T reduce_max(const VecBase< T, Size > &a)
T min(const T &a, const T &b)
T reduce_add(const VecBase< T, Size > &a)
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
Definition BLI_task.hh:95
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
VecBase< T, 3 > xyz() const