Blender V4.3
COM_VectorBlurOperation.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include <cmath>
6#include <cstring>
7#include <memory>
8
9#include "BLI_array.hh"
10#include "BLI_index_range.hh"
11#include "BLI_math_base.hh"
12#include "BLI_math_vector.h"
13#include "BLI_math_vector.hh"
14#include "BLI_task.hh"
15
17
18/* This is identical to the compositor implementation in compositor_motion_blur_info.hh and its
19 * related files with the necessary adjustments to make it work for the CPU. */
20
21#define MOTION_BLUR_TILE_SIZE 32
22#define DEPTH_SCALE 100.0f
23
24namespace blender::compositor {
25
34
35/* Returns the input velocity that has the larger magnitude. */
36static float2 max_velocity(const float2 &a, const float2 &b)
37{
39}
40
41/* Identical to motion_blur_tile_indirection_pack_payload, encodes the value and its texel such
42 * that the integer length of the value is encoded in the most significant bits, then the x value
43 * of the texel are encoded in the middle bits, then the y value of the texel is stored in the
44 * least significant bits. */
45static uint32_t velocity_atomic_max_value(const float2 &value, const int2 &texel)
46{
47 const uint32_t length_bits = math::min(uint32_t(math::ceil(math::length(value))), 0x3FFFu);
48 return (length_bits << 18u) | ((texel.x & 0x1FFu) << 9u) | (texel.y & 0x1FFu);
49}
50
51/* Returns the input velocity that has the larger integer magnitude, and if equal the larger x
52 * texel coordinates, and if equal, the larger y texel coordinates. It might be weird that we use
53 * an approximate comparison, but this is used for compatibility with the GPU code, which uses
54 * atomic integer operations, hence the limited precision. See velocity_atomic_max_value for more
55 * information. */
57 const float2 &b,
58 const int2 &a_texel,
59 const int2 &b_texel)
60{
61 return velocity_atomic_max_value(a, a_texel) > velocity_atomic_max_value(b, b_texel) ? a : b;
62}
63
64/* Reduces each 32x32 block of velocity pixels into a single velocity whose magnitude is largest.
65 * Each of the previous and next velocities are reduces independently. */
67{
68 const int2 tile_size = int2(MOTION_BLUR_TILE_SIZE);
69 const int2 velocity_size = int2(velocity_buffer->get_width(), velocity_buffer->get_height());
70 const int2 tiles_count = math::divide_ceil(velocity_size, tile_size);
71 MemoryBuffer output(DataType::Color, tiles_count.x, tiles_count.y);
72
73 threading::parallel_for(IndexRange(tiles_count.y), 1, [&](const IndexRange sub_y_range) {
74 for (const int64_t y : sub_y_range) {
75 for (const int64_t x : IndexRange(tiles_count.x)) {
76 const int2 texel = int2(x, y);
77
78 float2 max_previous_velocity = float2(0.0f);
79 float2 max_next_velocity = float2(0.0f);
80
81 for (int j = 0; j < tile_size.y; j++) {
82 for (int i = 0; i < tile_size.x; i++) {
83 int2 sub_texel = texel * tile_size + int2(i, j);
84 const float4 velocity = velocity_buffer->get_elem_clamped(sub_texel.x, sub_texel.y);
85 max_previous_velocity = max_velocity(velocity.xy(), max_previous_velocity);
86 max_next_velocity = max_velocity(velocity.zw(), max_next_velocity);
87 }
88 }
89
90 const float4 max_velocity = float4(max_previous_velocity, max_next_velocity);
91 copy_v4_v4(output.get_elem(texel.x, texel.y), max_velocity);
92 }
93 }
94 });
95
96 return output;
97}
98
103
105{
106 /* `ceil()` to number of tile touched. */
107 int2 point1 = tile + int2(math::sign(motion) *
108 math::ceil(math::abs(motion) / float(MOTION_BLUR_TILE_SIZE)));
109 int2 point2 = tile;
110
111 int2 max_point = math::max(point1, point2);
112 int2 min_point = math::min(point1, point2);
113 /* Clamp to bounds. */
114 max_point = math::min(max_point, size - 1);
115 min_point = math::max(min_point, int2(0));
116
117 MotionRect rect;
118 rect.bottom_left = min_point;
119 rect.extent = 1 + max_point - min_point;
120 return rect;
121}
122
129
131{
132 float magnitude = math::length(motion);
133 float2 dir = magnitude != 0.0f ? motion / magnitude : motion;
134
135 MotionLine line;
136 line.origin = float2(tile);
137 /* Rotate 90 degrees counter-clockwise. */
138 line.normal = float2(-dir.y, dir.x);
139 return line;
140}
141
142static bool is_inside_motion_line(int2 tile, MotionLine motion_line)
143{
144 /* NOTE: Everything in is tile unit. */
145 float distance_to_line = math::dot(motion_line.normal, motion_line.origin - float2(tile));
146 /* In order to be conservative and for simplicity, we use the tiles bounding circles.
147 * Consider that both the tile and the line have bounding radius of M_SQRT1_2. */
148 return math::abs(distance_to_line) < math::numbers::sqrt2_v<float>;
149}
150
151/* The max tile velocity image computes the maximum within 32x32 blocks, while the velocity can
152 * in fact extend beyond such a small block. So we dilate the max blocks by taking the maximum
153 * along the path of each of the max velocity tiles. Since the shader uses custom max atomics,
154 * the output will be an indirection buffer that points to a particular tile in the original max
155 * tile velocity image. This is done as a form of performance optimization, see the shader for
156 * more information. */
157static MemoryBuffer dilate_max_velocity(MemoryBuffer &max_tile_velocity, float shutter_speed)
158{
159 const int2 size = int2(max_tile_velocity.get_width(), max_tile_velocity.get_height());
160 MemoryBuffer output(DataType::Color, size.x, size.y);
161 const float4 zero_value = float4(0.0f);
162 output.fill(output.get_rect(), zero_value);
163
164 for (const int64_t y : IndexRange(size.y)) {
165 for (const int64_t x : IndexRange(size.x)) {
166 const int2 src_tile = int2(x, y);
167
168 float4 max_motion = float4(max_tile_velocity.get_elem(x, y)) *
169 float4(float2(shutter_speed), float2(-shutter_speed));
170
171 {
172 /* Rectangular area (in tiles) where the motion vector spreads. */
173 MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy(), size);
174 MotionLine motion_line = compute_motion_line(src_tile, max_motion.xy());
175 /* Do a conservative rasterization of the line of the motion vector line. */
176 for (int j = 0; j < motion_rect.extent.y; j++) {
177 for (int i = 0; i < motion_rect.extent.x; i++) {
178 int2 tile = motion_rect.bottom_left + int2(i, j);
179 if (is_inside_motion_line(tile, motion_line)) {
180 float *pixel = output.get_elem(tile.x, tile.y);
181 copy_v2_v2(pixel + 2,
182 max_velocity_approximate(pixel + 2, max_motion.zw(), tile, src_tile));
183 copy_v2_v2(pixel, max_velocity_approximate(pixel, max_motion.xy(), tile, src_tile));
184 }
185 }
186 }
187 }
188
189 {
190 /* Rectangular area (in tiles) where the motion vector spreads. */
191 MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw(), size);
192 MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw());
193 /* Do a conservative rasterization of the line of the motion vector line. */
194 for (int j = 0; j < motion_rect.extent.y; j++) {
195 for (int i = 0; i < motion_rect.extent.x; i++) {
196 int2 tile = motion_rect.bottom_left + int2(i, j);
197 if (is_inside_motion_line(tile, motion_line)) {
198 float *pixel = output.get_elem(tile.x, tile.y);
199 copy_v2_v2(pixel, max_velocity_approximate(pixel, max_motion.xy(), tile, src_tile));
200 copy_v2_v2(pixel + 2,
201 max_velocity_approximate(pixel + 2, max_motion.zw(), tile, src_tile));
202 }
203 }
204 }
205 }
206 }
207 }
208
209 return output;
210}
211
212/* Interleaved gradient noise by Jorge Jimenez
213 * http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare. */
215{
216 return math::fract(52.9829189f * math::fract(0.06711056f * p.x + 0.00583715f * p.y));
217}
218
219static float2 spread_compare(float center_motion_length,
220 float sample_motion_length,
221 float offset_length)
222{
223 return math::clamp(
224 float2(center_motion_length, sample_motion_length) - offset_length + 1.0f, 0.0f, 1.0f);
225}
226
227static float2 depth_compare(float center_depth, float sample_depth)
228{
229 float2 depth_scale = float2(DEPTH_SCALE, -DEPTH_SCALE);
230 return math::clamp(0.5f + depth_scale * (sample_depth - center_depth), 0.0f, 1.0f);
231}
232
233/* Kill contribution if not going the same direction. */
234static float dir_compare(float2 offset, float2 sample_motion, float sample_motion_length)
235{
236 if (sample_motion_length < 0.5f) {
237 return 1.0f;
238 }
239 return (math::dot(offset, sample_motion) > 0.0f) ? 1.0f : 0.0f;
240}
241
242/* Return background (x) and foreground (y) weights. */
243static float2 sample_weights(float center_depth,
244 float sample_depth,
245 float center_motion_length,
246 float sample_motion_length,
247 float offset_length)
248{
249 /* Classify foreground/background. */
250 float2 depth_weight = depth_compare(center_depth, sample_depth);
251 /* Weight if sample is overlapping or under the center pixel. */
252 float2 spread_weight = spread_compare(center_motion_length, sample_motion_length, offset_length);
253 return depth_weight * spread_weight;
254}
255
262
263static void gather_sample(MemoryBuffer *image_buffer,
264 MemoryBuffer *depth_buffer,
265 MemoryBuffer *velocity_buffer,
266 int2 size,
267 float2 screen_uv,
268 float center_depth,
269 float center_motion_len,
270 float2 offset,
271 float offset_len,
272 const bool next,
273 float shutter_speed,
274 Accumulator &accum)
275{
276 float2 sample_uv = screen_uv - offset / float2(size);
277 float4 sample_vectors = velocity_buffer->texture_bilinear_extend(sample_uv) *
278 float4(float2(shutter_speed), float2(-shutter_speed));
279 float2 sample_motion = (next) ? sample_vectors.zw() : sample_vectors.xy();
280 float sample_motion_len = math::length(sample_motion);
281 float sample_depth = depth_buffer->texture_bilinear_extend(sample_uv).x;
282 float4 sample_color = image_buffer->texture_bilinear_extend(sample_uv);
283
284 float2 direct_weights = sample_weights(
285 center_depth, sample_depth, center_motion_len, sample_motion_len, offset_len);
286
287 float3 weights;
288 weights.x = direct_weights.x;
289 weights.y = direct_weights.y;
290 weights.z = dir_compare(offset, sample_motion, sample_motion_len);
291 weights.x *= weights.z;
292 weights.y *= weights.z;
293
294 accum.fg += sample_color * weights.y;
295 accum.bg += sample_color * weights.x;
296 accum.weight += weights;
297}
298
299static void gather_blur(MemoryBuffer *image_buffer,
300 MemoryBuffer *depth_buffer,
301 MemoryBuffer *velocity_buffer,
302 int2 size,
303 float2 screen_uv,
304 float2 center_motion,
305 float center_depth,
306 float2 max_motion,
307 float ofs,
308 const bool next,
309 int samples_count,
310 float shutter_speed,
311 Accumulator &accum)
312{
313 float center_motion_len = math::length(center_motion);
314 float max_motion_len = math::length(max_motion);
315
316 /* Tile boundaries randomization can fetch a tile where there is less motion than this pixel.
317 * Fix this by overriding the max_motion. */
318 if (max_motion_len < center_motion_len) {
319 max_motion_len = center_motion_len;
320 max_motion = center_motion;
321 }
322
323 if (max_motion_len < 0.5f) {
324 return;
325 }
326
327 int i;
328 float t, inc = 1.0f / float(samples_count);
329 for (i = 0, t = ofs * inc; i < samples_count; i++, t += inc) {
330 gather_sample(image_buffer,
331 depth_buffer,
332 velocity_buffer,
333 size,
334 screen_uv,
335 center_depth,
336 center_motion_len,
337 max_motion * t,
338 max_motion_len * t,
339 next,
340 shutter_speed,
341 accum);
342 }
343
344 if (center_motion_len < 0.5f) {
345 return;
346 }
347
348 for (i = 0, t = ofs * inc; i < samples_count; i++, t += inc) {
349 /* Also sample in center motion direction.
350 * Allow recovering motion where there is conflicting
351 * motion between foreground and background. */
352 gather_sample(image_buffer,
353 depth_buffer,
354 velocity_buffer,
355 size,
356 screen_uv,
357 center_depth,
358 center_motion_len,
359 center_motion * t,
360 center_motion_len * t,
361 next,
362 shutter_speed,
363 accum);
364 }
365}
366
367static void motion_blur(MemoryBuffer *image_buffer,
368 MemoryBuffer *depth_buffer,
369 MemoryBuffer *velocity_buffer,
370 MemoryBuffer *max_velocity_buffer,
371 MemoryBuffer *output,
372 int samples_count,
373 float shutter_speed)
374{
375 const int2 size = int2(image_buffer->get_width(), image_buffer->get_height());
376 threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
377 for (const int64_t y : sub_y_range) {
378 for (const int64_t x : IndexRange(size.x)) {
379 const int2 texel = int2(x, y);
380 float2 uv = (float2(texel) + 0.5f) / float2(size);
381
382 /* Data of the center pixel of the gather (target). */
383 float center_depth = *depth_buffer->get_elem(x, y);
384 float4 center_motion = float4(velocity_buffer->get_elem(x, y)) *
385 float4(float2(shutter_speed), float2(-shutter_speed));
386 float4 center_color = image_buffer->get_elem(x, y);
387
388 /* Randomize tile boundary to avoid ugly discontinuities. Randomize 1/4th of the tile.
389 * Note this randomize only in one direction but in practice it's enough. */
390 float rand = interleaved_gradient_noise(texel);
391 int2 tile = (texel + int2(rand * 2.0f - 1.0f * float(MOTION_BLUR_TILE_SIZE) * 0.25f)) /
392 MOTION_BLUR_TILE_SIZE;
393
394 /* No need to multiply by the shutter speed and invert the next velocities since this was
395 * already done in dilate_max_velocity. */
396 float4 max_motion = max_velocity_buffer->get_elem(tile.x, tile.y);
397
398 Accumulator accum;
399 accum.weight = float3(0.0f, 0.0f, 1.0f);
400 accum.bg = float4(0.0f);
401 accum.fg = float4(0.0f);
402 /* First linear gather. time = [T - delta, T] */
403 gather_blur(image_buffer,
404 depth_buffer,
405 velocity_buffer,
406 size,
407 uv,
408 center_motion.xy(),
409 center_depth,
410 max_motion.xy(),
411 rand,
412 false,
413 samples_count,
414 shutter_speed,
415 accum);
416 /* Second linear gather. time = [T, T + delta] */
417 gather_blur(image_buffer,
418 depth_buffer,
419 velocity_buffer,
420 size,
421 uv,
422 center_motion.zw(),
423 center_depth,
424 max_motion.zw(),
425 rand,
426 true,
427 samples_count,
428 shutter_speed,
429 accum);
430
431#if 1 /* Own addition. Not present in reference implementation. */
432 /* Avoid division by 0.0. */
433 float w = 1.0f / (50.0f * float(samples_count) * 4.0f);
434 accum.bg += center_color * w;
435 accum.weight.x += w;
436 /* NOTE: In Jimenez's presentation, they used center sample.
437 * We use background color as it contains more information for foreground
438 * elements that have not enough weights.
439 * Yield better blur in complex motion. */
440 center_color = accum.bg / accum.weight.x;
441#endif
442 /* Merge background. */
443 accum.fg += accum.bg;
444 accum.weight.y += accum.weight.x;
445 /* Balance accumulation for failed samples.
446 * We replace the missing foreground by the background. */
447 float blend_fac = math::clamp(1.0f - accum.weight.y / accum.weight.z, 0.0f, 1.0f);
448 float4 out_color = (accum.fg / accum.weight.z) + center_color * blend_fac;
449
450 copy_v4_v4(output->get_elem(x, y), out_color);
451 }
452 }
453 });
454}
455
456void VectorBlurOperation::update_memory_buffer(MemoryBuffer *output,
457 const rcti & /*area*/,
459{
460 MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX];
461 MemoryBuffer *depth = inputs[DEPTH_INPUT_INDEX];
462 MemoryBuffer *velocity = inputs[VELOCITY_INPUT_INDEX];
463
464 const bool image_needs_inflation = image->is_a_single_elem();
465 const bool depth_needs_inflation = depth->is_a_single_elem();
466 const bool velocity_needs_inflation = velocity->is_a_single_elem();
467
468 MemoryBuffer *image_buffer = image_needs_inflation ? image->inflate() : image;
469 MemoryBuffer *depth_buffer = depth_needs_inflation ? depth->inflate() : depth;
470 MemoryBuffer *velocity_buffer = velocity_needs_inflation ? velocity->inflate() : velocity;
471
472 MemoryBuffer max_tile_velocity = compute_max_tile_velocity(velocity_buffer);
473 MemoryBuffer max_velocity = dilate_max_velocity(max_tile_velocity, settings_->fac);
474 motion_blur(image_buffer,
475 depth_buffer,
476 velocity_buffer,
478 output,
479 settings_->samples,
480 settings_->fac);
481
482 if (image_needs_inflation) {
483 delete image_buffer;
484 }
485
486 if (depth_needs_inflation) {
487 delete depth_buffer;
488 }
489
490 if (velocity_needs_inflation) {
491 delete velocity_buffer;
492 }
493}
494
495void VectorBlurOperation::get_area_of_interest(const int /*input_idx*/,
496 const rcti & /*output_area*/,
497 rcti &r_input_area)
498{
499 r_input_area = this->get_canvas();
500}
501
502} // namespace blender::compositor
MINLINE void copy_v2_v2(float r[2], const float a[2])
#define MOTION_BLUR_TILE_SIZE
#define DEPTH_SCALE
#define output
a MemoryBuffer contains access to the data
const int get_width() const
get the width of this MemoryBuffer
const int get_height() const
get the height of this MemoryBuffer
float4 texture_bilinear_extend(float2 coordinates) const
void add_output_socket(DataType datatype)
void add_input_socket(DataType datatype, ResizeMode resize_mode=ResizeMode::Center)
input_tx image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "preview_img") .compute_source("compositor_compute_preview.glsl") .do_static_compilation(true)
local_group_size(16, 16) .push_constant(Type b
draw_view in_light_buf[] float
ccl_global const KernelWorkTile * tile
static ulong * next
static MemoryBuffer compute_max_tile_velocity(MemoryBuffer *velocity_buffer)
static float2 depth_compare(float center_depth, float sample_depth)
static float2 spread_compare(float center_motion_length, float sample_motion_length, float offset_length)
static float2 max_velocity_approximate(const float2 &a, const float2 &b, const int2 &a_texel, const int2 &b_texel)
static float2 sample_weights(float center_depth, float sample_depth, float center_motion_length, float sample_motion_length, float offset_length)
static void gather_blur(MemoryBuffer *image_buffer, MemoryBuffer *depth_buffer, MemoryBuffer *velocity_buffer, int2 size, float2 screen_uv, float2 center_motion, float center_depth, float2 max_motion, float ofs, const bool next, int samples_count, float shutter_speed, Accumulator &accum)
static float dir_compare(float2 offset, float2 sample_motion, float sample_motion_length)
static uint32_t velocity_atomic_max_value(const float2 &value, const int2 &texel)
static float interleaved_gradient_noise(int2 p)
static void motion_blur(MemoryBuffer *image_buffer, MemoryBuffer *depth_buffer, MemoryBuffer *velocity_buffer, MemoryBuffer *max_velocity_buffer, MemoryBuffer *output, int samples_count, float shutter_speed)
static MemoryBuffer dilate_max_velocity(MemoryBuffer &max_tile_velocity, float shutter_speed)
static bool is_inside_motion_line(int2 tile, MotionLine motion_line)
static float2 max_velocity(const float2 &a, const float2 &b)
static void gather_sample(MemoryBuffer *image_buffer, MemoryBuffer *depth_buffer, MemoryBuffer *velocity_buffer, int2 size, float2 screen_uv, float center_depth, float center_motion_len, float2 offset, float offset_len, const bool next, float shutter_speed, Accumulator &accum)
static MotionLine compute_motion_line(int2 tile, float2 motion)
static MotionRect compute_motion_rect(int2 tile, float2 motion, int2 size)
T length_squared(const VecBase< T, Size > &a)
T clamp(const T &a, const T &min, const T &max)
T sign(const T &a)
VecBase< T, Size > divide_ceil(const VecBase< T, Size > &a, const VecBase< T, Size > &b)
T length(const VecBase< T, Size > &a)
T dot(const QuaternionBase< T > &a, const QuaternionBase< T > &b)
T min(const T &a, const T &b)
T fract(const T &a)
T ceil(const T &a)
T max(const T &a, const T &b)
T abs(const T &a)
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
Definition BLI_task.hh:95
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
unsigned int uint32_t
Definition stdint.h:80
__int64 int64_t
Definition stdint.h:89
VecBase< T, 2 > zw() const
VecBase< T, 2 > xy() const