Blender V4.5
node_composite_vec_blur.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2006 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <cstdint>
10
11#include "BLI_index_range.hh"
12#include "BLI_math_base.hh"
13#include "BLI_math_vector.hh"
15
16#include "UI_interface.hh"
17#include "UI_resources.hh"
18
19#include "GPU_compute.hh"
20#include "GPU_shader.hh"
21#include "GPU_state.hh"
22#include "GPU_storage_buffer.hh"
23#include "GPU_vertex_buffer.hh"
24
25#include "COM_node_operation.hh"
26#include "COM_result.hh"
27#include "COM_utilities.hh"
28
30
31/* **************** VECTOR BLUR ******************** */
32
34
36{
37 b.add_input<decl::Color>("Image")
38 .default_value({1.0f, 1.0f, 1.0f, 1.0f})
39 .compositor_domain_priority(0);
40 b.add_input<decl::Float>("Z").default_value(0.0f).min(0.0f).compositor_domain_priority(2);
41 b.add_input<decl::Vector>("Speed")
42 .dimensions(4)
43 .default_value({0.0f, 0.0f, 0.0f})
44 .min(0.0f)
45 .max(1.0f)
46 .subtype(PROP_VELOCITY)
47 .compositor_domain_priority(1);
48 b.add_input<decl::Int>("Samples")
49 .default_value(32)
50 .min(1)
51 .max(256)
52 .description("The number of samples used to approximate the motion blur")
53 .compositor_expects_single_value();
54 b.add_input<decl::Float>("Shutter")
55 .default_value(0.5f)
56 .min(0.0f)
57 .description("Time between shutter opening and closing in frames")
58 .compositor_expects_single_value();
59
60 b.add_output<decl::Color>("Image");
61}
62
63using namespace blender::compositor;
64
65#define MOTION_BLUR_TILE_SIZE 32
66#define DEPTH_SCALE 100.0f
67
68/* Returns the input velocity that has the larger magnitude. */
69static float2 max_velocity(const float2 &a, const float2 &b)
70{
72}
73
74/* Identical to motion_blur_tile_indirection_pack_payload, encodes the value and its texel such
75 * that the integer length of the value is encoded in the most significant bits, then the x value
76 * of the texel are encoded in the middle bits, then the y value of the texel is stored in the
77 * least significant bits. */
78static uint32_t velocity_atomic_max_value(const float2 &value, const int2 &texel)
79{
80 const uint32_t length_bits = math::min(uint32_t(math::ceil(math::length(value))), 0x3FFFu);
81 return (length_bits << 18u) | ((texel.x & 0x1FFu) << 9u) | (texel.y & 0x1FFu);
82}
83
84/* Returns the input velocity that has the larger integer magnitude, and if equal the larger x
85 * texel coordinates, and if equal, the larger y texel coordinates. It might be weird that we use
86 * an approximate comparison, but this is used for compatibility with the GPU code, which uses
87 * atomic integer operations, hence the limited precision. See velocity_atomic_max_value for more
88 * information. */
90 const float2 &b,
91 const int2 &a_texel,
92 const int2 &b_texel)
93{
94 return velocity_atomic_max_value(a, a_texel) > velocity_atomic_max_value(b, b_texel) ? a : b;
95}
96
97/* Reduces each 32x32 block of velocity pixels into a single velocity whose magnitude is largest.
98 * Each of the previous and next velocities are reduces independently. */
99static Result compute_max_tile_velocity_cpu(Context &context, const Result &velocity_image)
100{
101 if (velocity_image.is_single_value()) {
102 Result output = context.create_result(ResultType::Float4);
103 output.allocate_single_value();
104 output.set_single_value(velocity_image.get_single_value<float4>());
105 return output;
106 }
107
108 const int2 tile_size = int2(MOTION_BLUR_TILE_SIZE);
109 const int2 velocity_size = velocity_image.domain().size;
110 const int2 tiles_count = math::divide_ceil(velocity_size, tile_size);
111 Result output = context.create_result(ResultType::Float4);
112 output.allocate_texture(Domain(tiles_count));
113
114 parallel_for(tiles_count, [&](const int2 texel) {
115 float2 max_previous_velocity = float2(0.0f);
116 float2 max_next_velocity = float2(0.0f);
117
118 for (int j = 0; j < tile_size.y; j++) {
119 for (int i = 0; i < tile_size.x; i++) {
120 int2 sub_texel = texel * tile_size + int2(i, j);
121 const float4 velocity = velocity_image.load_pixel_extended<float4>(sub_texel);
122 max_previous_velocity = max_velocity(velocity.xy(), max_previous_velocity);
123 max_next_velocity = max_velocity(velocity.zw(), max_next_velocity);
124 }
125 }
126
127 const float4 max_velocity = float4(max_previous_velocity, max_next_velocity);
128 output.store_pixel(texel, max_velocity);
129 });
130
131 return output;
132}
133
138
139static MotionRect compute_motion_rect(const int2 &tile, const float2 &motion, const int2 &size)
140{
141 /* `ceil()` to number of tile touched. */
142 int2 point1 = tile + int2(math::sign(motion) *
143 math::ceil(math::abs(motion) / float(MOTION_BLUR_TILE_SIZE)));
144 int2 point2 = tile;
145
146 int2 max_point = math::max(point1, point2);
147 int2 min_point = math::min(point1, point2);
148 /* Clamp to bounds. */
149 max_point = math::min(max_point, size - 1);
150 min_point = math::max(min_point, int2(0));
151
152 MotionRect rect;
153 rect.bottom_left = min_point;
154 rect.extent = 1 + max_point - min_point;
155 return rect;
156}
157
164
165static MotionLine compute_motion_line(const int2 &tile, const float2 &motion)
166{
167 float magnitude = math::length(motion);
168 float2 dir = magnitude != 0.0f ? motion / magnitude : motion;
169
170 MotionLine line;
171 line.origin = float2(tile);
172 /* Rotate 90 degrees counter-clockwise. */
173 line.normal = float2(-dir.y, dir.x);
174 return line;
175}
176
177static bool is_inside_motion_line(const int2 &tile, const MotionLine &motion_line)
178{
179 /* NOTE: Everything in is tile unit. */
180 float distance_to_line = math::dot(motion_line.normal, motion_line.origin - float2(tile));
181 /* In order to be conservative and for simplicity, we use the tiles bounding circles.
182 * Consider that both the tile and the line have bounding radius of M_SQRT1_2. */
183 return math::abs(distance_to_line) < math::numbers::sqrt2_v<float>;
184}
185
186/* The max tile velocity image computes the maximum within 32x32 blocks, while the velocity can
187 * in fact extend beyond such a small block. So we dilate the max blocks by taking the maximum
188 * along the path of each of the max velocity tiles. */
190 const Result &max_tile_velocity,
191 const float shutter_speed)
192{
193 if (max_tile_velocity.is_single_value()) {
194 Result output = context.create_result(ResultType::Float4);
195 output.allocate_single_value();
196 output.set_single_value(max_tile_velocity.get_single_value<float4>());
197 return output;
198 }
199
200 const int2 size = max_tile_velocity.domain().size;
201 Result output = context.create_result(ResultType::Float4);
202 output.allocate_texture(Domain(size));
203
204 parallel_for(size, [&](const int2 texel) { output.store_pixel(texel, float4(0.0f)); });
205
206 for (const int64_t y : IndexRange(size.y)) {
207 for (const int64_t x : IndexRange(size.x)) {
208 const int2 src_tile = int2(x, y);
209
210 const float4 max_motion = max_tile_velocity.load_pixel<float4>(src_tile);
211 const float2 max_previous_velocity = max_motion.xy() * shutter_speed;
212 const float2 max_next_velocity = max_motion.zw() * -shutter_speed;
213
214 {
215 /* Rectangular area (in tiles) where the motion vector spreads. */
216 MotionRect motion_rect = compute_motion_rect(src_tile, max_previous_velocity, size);
217 MotionLine motion_line = compute_motion_line(src_tile, max_previous_velocity);
218 /* Do a conservative rasterization of the line of the motion vector line. */
219 for (int j = 0; j < motion_rect.extent.y; j++) {
220 for (int i = 0; i < motion_rect.extent.x; i++) {
221 int2 tile = motion_rect.bottom_left + int2(i, j);
222 if (is_inside_motion_line(tile, motion_line)) {
223 const float4 current_max_velocity = output.load_pixel<float4>(tile);
224 const float2 new_max_previous_velocity = max_velocity_approximate(
225 current_max_velocity.xy(), max_previous_velocity, tile, src_tile);
226 const float2 new_max_next_velocity = max_velocity_approximate(
227 current_max_velocity.zw(), max_next_velocity, tile, src_tile);
228 output.store_pixel(tile, float4(new_max_previous_velocity, new_max_next_velocity));
229 }
230 }
231 }
232 }
233
234 {
235 /* Rectangular area (in tiles) where the motion vector spreads. */
236 MotionRect motion_rect = compute_motion_rect(src_tile, max_next_velocity, size);
237 MotionLine motion_line = compute_motion_line(src_tile, max_next_velocity);
238 /* Do a conservative rasterization of the line of the motion vector line. */
239 for (int j = 0; j < motion_rect.extent.y; j++) {
240 for (int i = 0; i < motion_rect.extent.x; i++) {
241 int2 tile = motion_rect.bottom_left + int2(i, j);
242 if (is_inside_motion_line(tile, motion_line)) {
243 const float4 current_max_velocity = output.load_pixel<float4>(tile);
244 const float2 new_max_previous_velocity = max_velocity_approximate(
245 current_max_velocity.xy(), max_previous_velocity, tile, src_tile);
246 const float2 new_max_next_velocity = max_velocity_approximate(
247 current_max_velocity.zw(), max_next_velocity, tile, src_tile);
248 output.store_pixel(tile, float4(new_max_previous_velocity, new_max_next_velocity));
249 }
250 }
251 }
252 }
253 }
254 }
255
256 return output;
257}
258
259/* Interleaved gradient noise by Jorge Jimenez
260 * http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare. */
261static float interleaved_gradient_noise(const int2 &p)
262{
263 return math::fract(52.9829189f * math::fract(0.06711056f * p.x + 0.00583715f * p.y));
264}
265
266static float2 spread_compare(const float center_motion_length,
267 const float sample_motion_length,
268 const float offset_length)
269{
270 return math::clamp(
271 float2(center_motion_length, sample_motion_length) - offset_length + 1.0f, 0.0f, 1.0f);
272}
273
274static float2 depth_compare(const float center_depth, const float sample_depth)
275{
276 float2 depth_scale = float2(DEPTH_SCALE, -DEPTH_SCALE);
277 return math::clamp(0.5f + depth_scale * (sample_depth - center_depth), 0.0f, 1.0f);
278}
279
280/* Kill contribution if not going the same direction. */
281static float dir_compare(const float2 &offset,
282 const float2 &sample_motion,
283 const float &sample_motion_length)
284{
285 if (sample_motion_length < 0.5f) {
286 return 1.0f;
287 }
288 return (math::dot(offset, sample_motion) > 0.0f) ? 1.0f : 0.0f;
289}
290
291/* Return background (x) and foreground (y) weights. */
292static float2 sample_weights(const float center_depth,
293 const float sample_depth,
294 const float center_motion_length,
295 const float sample_motion_length,
296 const float offset_length)
297{
298 /* Classify foreground/background. */
299 float2 depth_weight = depth_compare(center_depth, sample_depth);
300 /* Weight if sample is overlapping or under the center pixel. */
301 float2 spread_weight = spread_compare(center_motion_length, sample_motion_length, offset_length);
302 return depth_weight * spread_weight;
303}
304
311
312static void gather_sample(const Result &input_image,
313 const Result &input_depth,
314 const Result &input_velocity,
315 const int2 &size,
316 const float2 &screen_uv,
317 const float center_depth,
318 const float center_motion_len,
319 const float2 &offset,
320 const float offset_len,
321 const bool next,
322 const float shutter_speed,
323 Accumulator &accum)
324{
325 float2 sample_uv = screen_uv - offset / float2(size);
326 float4 sample_vectors = input_velocity.sample_bilinear_extended(sample_uv) *
327 float4(float2(shutter_speed), float2(-shutter_speed));
328 float2 sample_motion = (next) ? sample_vectors.zw() : sample_vectors.xy();
329 float sample_motion_len = math::length(sample_motion);
330 float sample_depth = input_depth.sample_bilinear_extended(sample_uv).x;
331 float4 sample_color = input_image.sample_bilinear_extended(sample_uv);
332
333 float2 direct_weights = sample_weights(
334 center_depth, sample_depth, center_motion_len, sample_motion_len, offset_len);
335
336 float3 weights;
337 weights.x = direct_weights.x;
338 weights.y = direct_weights.y;
339 weights.z = dir_compare(offset, sample_motion, sample_motion_len);
340 weights.x *= weights.z;
341 weights.y *= weights.z;
342
343 accum.fg += sample_color * weights.y;
344 accum.bg += sample_color * weights.x;
345 accum.weight += weights;
346}
347
348static void gather_blur(const Result &input_image,
349 const Result &input_depth,
350 const Result &input_velocity,
351 const int2 &size,
352 const float2 &screen_uv,
353 const float2 &center_motion,
354 const float center_depth,
355 const float2 &max_motion,
356 const float ofs,
357 const bool next,
358 const int samples_count,
359 const float shutter_speed,
360 Accumulator &accum)
361{
362 float center_motion_len = math::length(center_motion);
363 float max_motion_len = math::length(max_motion);
364
365 /* Tile boundaries randomization can fetch a tile where there is less motion than this pixel.
366 * Fix this by overriding the max_motion. */
367 float2 sanitized_max_motion = max_motion;
368 if (max_motion_len < center_motion_len) {
369 max_motion_len = center_motion_len;
370 sanitized_max_motion = center_motion;
371 }
372
373 if (max_motion_len < 0.5f) {
374 return;
375 }
376
377 int i;
378 float t, inc = 1.0f / float(samples_count);
379 for (i = 0, t = ofs * inc; i < samples_count; i++, t += inc) {
380 gather_sample(input_image,
381 input_depth,
382 input_velocity,
383 size,
384 screen_uv,
385 center_depth,
386 center_motion_len,
387 sanitized_max_motion * t,
388 max_motion_len * t,
389 next,
390 shutter_speed,
391 accum);
392 }
393
394 if (center_motion_len < 0.5f) {
395 return;
396 }
397
398 for (i = 0, t = ofs * inc; i < samples_count; i++, t += inc) {
399 /* Also sample in center motion direction.
400 * Allow recovering motion where there is conflicting
401 * motion between foreground and background. */
402 gather_sample(input_image,
403 input_depth,
404 input_velocity,
405 size,
406 screen_uv,
407 center_depth,
408 center_motion_len,
409 center_motion * t,
410 center_motion_len * t,
411 next,
412 shutter_speed,
413 accum);
414 }
415}
416
417static void motion_blur_cpu(const Result &input_image,
418 const Result &input_depth,
419 const Result &input_velocity,
420 const Result &max_velocity,
421 Result &output,
422 const int samples_count,
423 const float shutter_speed)
424{
425 const int2 size = input_image.domain().size;
426 threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
427 for (const int64_t y : sub_y_range) {
428 for (const int64_t x : IndexRange(size.x)) {
429 const int2 texel = int2(x, y);
430 float2 uv = (float2(texel) + 0.5f) / float2(size);
431
432 /* Data of the center pixel of the gather (target). */
433 float center_depth = input_depth.load_pixel<float, true>(texel);
434 float4 center_motion = input_velocity.load_pixel<float4, true>(texel);
435 float2 center_previous_motion = center_motion.xy() * shutter_speed;
436 float2 center_next_motion = center_motion.zw() * -shutter_speed;
437 float4 center_color = input_image.load_pixel<float4>(texel);
438
439 /* Randomize tile boundary to avoid ugly discontinuities. Randomize 1/4th of the tile.
440 * Note this randomize only in one direction but in practice it's enough. */
441 float rand = interleaved_gradient_noise(texel);
442 int2 tile = (texel + int2(rand * 2.0f - 1.0f * float(MOTION_BLUR_TILE_SIZE) * 0.25f)) /
443 MOTION_BLUR_TILE_SIZE;
444
445 /* No need to multiply by the shutter speed and invert the next velocities since this was
446 * already done in dilate_max_velocity. */
447 float4 max_motion = max_velocity.load_pixel<float4, true>(tile);
448
449 Accumulator accum;
450 accum.weight = float3(0.0f, 0.0f, 1.0f);
451 accum.bg = float4(0.0f);
452 accum.fg = float4(0.0f);
453 /* First linear gather. time = [T - delta, T] */
454 gather_blur(input_image,
455 input_depth,
456 input_velocity,
457 size,
458 uv,
459 center_previous_motion,
460 center_depth,
461 max_motion.xy(),
462 rand,
463 false,
464 samples_count,
465 shutter_speed,
466 accum);
467 /* Second linear gather. time = [T, T + delta] */
468 gather_blur(input_image,
469 input_depth,
470 input_velocity,
471 size,
472 uv,
473 center_next_motion,
474 center_depth,
475 max_motion.zw(),
476 rand,
477 true,
478 samples_count,
479 shutter_speed,
480 accum);
481
482#if 1 /* Own addition. Not present in reference implementation. */
483 /* Avoid division by 0.0. */
484 float w = 1.0f / (50.0f * float(samples_count) * 4.0f);
485 accum.bg += center_color * w;
486 accum.weight.x += w;
487 /* NOTE: In Jimenez's presentation, they used center sample.
488 * We use background color as it contains more information for foreground
489 * elements that have not enough weights.
490 * Yield better blur in complex motion. */
491 center_color = accum.bg / accum.weight.x;
492#endif
493 /* Merge background. */
494 accum.fg += accum.bg;
495 accum.weight.y += accum.weight.x;
496 /* Balance accumulation for failed samples.
497 * We replace the missing foreground by the background. */
498 float blend_fac = math::clamp(1.0f - accum.weight.y / accum.weight.z, 0.0f, 1.0f);
499 float4 out_color = (accum.fg / accum.weight.z) + center_color * blend_fac;
500
501 output.store_pixel(texel, out_color);
502 }
503 }
504 });
505}
506
508 public:
513
514 void execute() override
515 {
516 const Result &input = this->get_input("Image");
517 if (input.is_single_value()) {
518 Result &output = this->get_result("Image");
519 output.share_data(input);
520 return;
521 }
522
523 if (this->context().use_gpu()) {
524 this->execute_gpu();
525 }
526 else {
527 this->execute_cpu();
528 }
529 }
530
532 {
533 Result max_tile_velocity = this->compute_max_tile_velocity();
534 GPUStorageBuf *tile_indirection_buffer = this->dilate_max_velocity(max_tile_velocity);
535 this->compute_motion_blur(max_tile_velocity, tile_indirection_buffer);
536 max_tile_velocity.release();
537 GPU_storagebuf_free(tile_indirection_buffer);
538 }
539
540 /* Reduces each 32x32 block of velocity pixels into a single velocity whose magnitude is largest.
541 * Each of the previous and next velocities are reduces independently. */
543 {
544 GPUShader *shader = context().get_shader("compositor_max_velocity");
545 GPU_shader_bind(shader);
546
547 GPU_shader_uniform_1b(shader, "is_initial_reduction", true);
548
549 Result &input = get_input("Speed");
550 input.bind_as_texture(shader, "input_tx");
551
552 Result output = context().create_result(ResultType::Float4);
553 const int2 tiles_count = math::divide_ceil(input.domain().size, int2(32));
554 output.allocate_texture(Domain(tiles_count));
555 output.bind_as_image(shader, "output_img");
556
557 GPU_compute_dispatch(shader, tiles_count.x, tiles_count.y, 1);
558
560 input.unbind_as_texture();
561 output.unbind_as_image();
562
563 return output;
564 }
565
566 /* The max tile velocity image computes the maximum within 32x32 blocks, while the velocity can
567 * in fact extend beyond such a small block. So we dilate the max blocks by taking the maximum
568 * along the path of each of the max velocity tiles. Since the shader uses custom max atomics,
569 * the output will be an indirection buffer that points to a particular tile in the original max
570 * tile velocity image. This is done as a form of performance optimization, see the shader for
571 * more information. */
572 GPUStorageBuf *dilate_max_velocity(Result &max_tile_velocity)
573 {
574 GPUShader *shader = context().get_shader("compositor_motion_blur_max_velocity_dilate");
575 GPU_shader_bind(shader);
576
577 GPU_shader_uniform_1f(shader, "shutter_speed", this->get_shutter());
578
579 max_tile_velocity.bind_as_texture(shader, "input_tx");
580
581 /* The shader assumes a maximum input size of 16k, and since the max tile velocity image is
582 * composed of blocks of 32, we get 16k / 32 = 512. So the table is 512x512, but we store two
583 * tables for the previous and next velocities, so we double that. */
584 const int size = sizeof(uint32_t) * 512 * 512 * 2;
585 GPUStorageBuf *tile_indirection_buffer = GPU_storagebuf_create_ex(
586 size, nullptr, GPU_USAGE_DEVICE_ONLY, __func__);
587 GPU_storagebuf_clear_to_zero(tile_indirection_buffer);
588 const int slot = GPU_shader_get_ssbo_binding(shader, "tile_indirection_buf");
589 GPU_storagebuf_bind(tile_indirection_buffer, slot);
590
591 compute_dispatch_threads_at_least(shader, max_tile_velocity.domain().size);
592
594 max_tile_velocity.unbind_as_texture();
595 GPU_storagebuf_unbind(tile_indirection_buffer);
596
597 return tile_indirection_buffer;
598 }
599
600 void compute_motion_blur(Result &max_tile_velocity, GPUStorageBuf *tile_indirection_buffer)
601 {
602 GPUShader *shader = context().get_shader("compositor_motion_blur");
603 GPU_shader_bind(shader);
604
605 GPU_shader_uniform_1i(shader, "samples_count", this->get_samples_count());
606 GPU_shader_uniform_1f(shader, "shutter_speed", this->get_shutter());
607
608 Result &input = get_input("Image");
609 input.bind_as_texture(shader, "input_tx");
610
611 Result &depth = get_input("Z");
612 depth.bind_as_texture(shader, "depth_tx");
613
614 Result &velocity = get_input("Speed");
615 velocity.bind_as_texture(shader, "velocity_tx");
616
617 max_tile_velocity.bind_as_texture(shader, "max_velocity_tx");
618
620 const int slot = GPU_shader_get_ssbo_binding(shader, "tile_indirection_buf");
621 GPU_storagebuf_bind(tile_indirection_buffer, slot);
622
623 Result &output = get_result("Image");
624 const Domain domain = compute_domain();
625 output.allocate_texture(domain);
626 output.bind_as_image(shader, "output_img");
627
628 compute_dispatch_threads_at_least(shader, output.domain().size);
629
631 input.unbind_as_texture();
632 depth.unbind_as_texture();
633 velocity.unbind_as_texture();
634 max_tile_velocity.unbind_as_texture();
635 output.unbind_as_image();
636 }
637
639 {
640 const float shutter_speed = this->get_shutter();
641 const int samples_count = this->get_samples_count();
642
643 const Result &input_image = get_input("Image");
644 const Result &input_depth = get_input("Z");
645 const Result &input_velocity = get_input("Speed");
646
647 Result &output = get_result("Image");
648 const Domain domain = compute_domain();
649 output.allocate_texture(domain);
650
651 Result max_tile_velocity = compute_max_tile_velocity_cpu(this->context(), input_velocity);
653 this->context(), max_tile_velocity, shutter_speed);
654 max_tile_velocity.release();
655 motion_blur_cpu(input_image,
656 input_depth,
657 input_velocity,
659 output,
660 samples_count,
661 shutter_speed);
662 max_velocity.release();
663 }
664
666 {
667 return math::clamp(this->get_input("Samples").get_single_value_default(32), 1, 256);
668 }
669
671 {
672 /* Divide by two since the motion blur algorithm expects shutter per motion step and has two
673 * motion steps, while the user inputs the entire shutter across all steps. */
674 return math::max(0.0f, this->get_input("Shutter").get_single_value_default(0.5f)) / 2.0f;
675 }
676};
677
679{
680 return new VectorBlurOperation(context, node);
681}
682
683} // namespace blender::nodes::node_composite_vec_blur_cc
684
686{
688
689 static blender::bke::bNodeType ntype;
690
691 cmp_node_type_base(&ntype, "CompositorNodeVecBlur", CMP_NODE_VECBLUR);
692 ntype.ui_name = "Vector Blur";
693 ntype.ui_description = "Uses the vector speed render pass to blur the image pixels in 2D";
694 ntype.enum_name_legacy = "VECBLUR";
696 ntype.declare = file_ns::cmp_node_vec_blur_declare;
697 ntype.get_compositor_operation = file_ns::get_compositor_operation;
698
700}
#define NODE_CLASS_OP_FILTER
Definition BKE_node.hh:437
#define CMP_NODE_VECBLUR
void GPU_compute_dispatch(GPUShader *shader, uint groups_x_len, uint groups_y_len, uint groups_z_len, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_uniform_1i(GPUShader *sh, const char *name, int value)
void GPU_shader_uniform_1f(GPUShader *sh, const char *name, float value)
void GPU_shader_bind(GPUShader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
int GPU_shader_get_ssbo_binding(GPUShader *shader, const char *name)
void GPU_shader_uniform_1b(GPUShader *sh, const char *name, bool value)
void GPU_shader_unbind()
void GPU_memory_barrier(eGPUBarrier barrier)
Definition gpu_state.cc:385
@ GPU_BARRIER_SHADER_STORAGE
Definition GPU_state.hh:48
void GPU_storagebuf_bind(GPUStorageBuf *ssbo, int slot)
GPUStorageBuf * GPU_storagebuf_create_ex(size_t size, const void *data, GPUUsageType usage, const char *name)
void GPU_storagebuf_unbind(GPUStorageBuf *ssbo)
void GPU_storagebuf_clear_to_zero(GPUStorageBuf *ssbo)
void GPU_storagebuf_free(GPUStorageBuf *ssbo)
@ GPU_USAGE_DEVICE_ONLY
#define NOD_REGISTER_NODE(REGISTER_FUNC)
@ PROP_VELOCITY
Definition RNA_types.hh:251
long long int int64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
NodeOperation(Context &context, DNode node)
Result & get_result(StringRef identifier)
Definition operation.cc:39
virtual Domain compute_domain()
Definition operation.cc:56
InputDescriptor & get_input_descriptor(StringRef identifier)
Definition operation.cc:158
void unbind_as_texture() const
Definition result.cc:389
void bind_as_texture(GPUShader *shader, const char *texture_name) const
Definition result.cc:365
T load_pixel_extended(const int2 &texel) const
const Domain & domain() const
T load_pixel(const int2 &texel) const
float4 sample_bilinear_extended(const float2 &coordinates) const
bool is_single_value() const
Definition result.cc:625
const T & get_single_value() const
void compute_motion_blur(Result &max_tile_velocity, GPUStorageBuf *tile_indirection_buffer)
#define MOTION_BLUR_TILE_SIZE
#define input
#define output
const ccl_global KernelWorkTile * tile
static ulong * next
void node_register_type(bNodeType &ntype)
Definition node.cc:2748
void compute_dispatch_threads_at_least(GPUShader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:170
void parallel_for(const int2 range, const Function &function)
T length_squared(const VecBase< T, Size > &a)
T clamp(const T &a, const T &min, const T &max)
T sign(const T &a)
VecBase< T, Size > divide_ceil(const VecBase< T, Size > &a, const VecBase< T, Size > &b)
T length(const VecBase< T, Size > &a)
T dot(const QuaternionBase< T > &a, const QuaternionBase< T > &b)
T min(const T &a, const T &b)
T fract(const T &a)
T ceil(const T &a)
T max(const T &a, const T &b)
T abs(const T &a)
static void gather_blur(const Result &input_image, const Result &input_depth, const Result &input_velocity, const int2 &size, const float2 &screen_uv, const float2 &center_motion, const float center_depth, const float2 &max_motion, const float ofs, const bool next, const int samples_count, const float shutter_speed, Accumulator &accum)
static uint32_t velocity_atomic_max_value(const float2 &value, const int2 &texel)
static void motion_blur_cpu(const Result &input_image, const Result &input_depth, const Result &input_velocity, const Result &max_velocity, Result &output, const int samples_count, const float shutter_speed)
static NodeOperation * get_compositor_operation(Context &context, DNode node)
static MotionRect compute_motion_rect(const int2 &tile, const float2 &motion, const int2 &size)
static bool is_inside_motion_line(const int2 &tile, const MotionLine &motion_line)
static float2 spread_compare(const float center_motion_length, const float sample_motion_length, const float offset_length)
static float2 max_velocity_approximate(const float2 &a, const float2 &b, const int2 &a_texel, const int2 &b_texel)
static float dir_compare(const float2 &offset, const float2 &sample_motion, const float &sample_motion_length)
static void gather_sample(const Result &input_image, const Result &input_depth, const Result &input_velocity, const int2 &size, const float2 &screen_uv, const float center_depth, const float center_motion_len, const float2 &offset, const float offset_len, const bool next, const float shutter_speed, Accumulator &accum)
static Result dilate_max_velocity_cpu(Context &context, const Result &max_tile_velocity, const float shutter_speed)
static Result compute_max_tile_velocity_cpu(Context &context, const Result &velocity_image)
static float2 depth_compare(const float center_depth, const float sample_depth)
static void cmp_node_vec_blur_declare(NodeDeclarationBuilder &b)
static MotionLine compute_motion_line(const int2 &tile, const float2 &motion)
static float2 sample_weights(const float center_depth, const float sample_depth, const float center_motion_length, const float sample_motion_length, const float offset_length)
static float2 max_velocity(const float2 &a, const float2 &b)
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
Definition BLI_task.hh:93
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
VecBase< float, 3 > float3
void cmp_node_type_base(blender::bke::bNodeType *ntype, std::string idname, const std::optional< int16_t > legacy_type)
static void register_node_type_cmp_vecblur()
#define DEPTH_SCALE
#define min(a, b)
Definition sort.cc:36
VecBase< T, 2 > zw() const
VecBase< T, 2 > xy() const
Defines a node type.
Definition BKE_node.hh:226
std::string ui_description
Definition BKE_node.hh:232
NodeGetCompositorOperationFunction get_compositor_operation
Definition BKE_node.hh:336
const char * enum_name_legacy
Definition BKE_node.hh:235
NodeDeclareFunction declare
Definition BKE_node.hh:355
i
Definition text_draw.cc:230
static pxr::UsdShadeInput get_input(const pxr::UsdShadeShader &usd_shader, const pxr::TfToken &input_name)