Blender V5.0
node_composite_vec_blur.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2006 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <cstdint>
10
11#include "BLI_index_range.hh"
12#include "BLI_math_base.hh"
13#include "BLI_math_vector.hh"
15
16#include "UI_resources.hh"
17
18#include "GPU_compute.hh"
19#include "GPU_shader.hh"
20#include "GPU_state.hh"
21#include "GPU_storage_buffer.hh"
22#include "GPU_vertex_buffer.hh"
23
24#include "COM_node_operation.hh"
25#include "COM_result.hh"
26#include "COM_utilities.hh"
27
29
30/* **************** VECTOR BLUR ******************** */
31
33
35{
36 b.use_custom_socket_order();
37 b.allow_any_socket_order();
38 b.add_input<decl::Color>("Image")
39 .default_value({1.0f, 1.0f, 1.0f, 1.0f})
40 .hide_value()
41 .structure_type(StructureType::Dynamic);
42 b.add_output<decl::Color>("Image").structure_type(StructureType::Dynamic).align_with_previous();
43
44 b.add_input<decl::Vector>("Speed")
45 .dimensions(4)
46 .default_value({0.0f, 0.0f, 0.0f})
47 .min(0.0f)
48 .max(1.0f)
49 .subtype(PROP_VELOCITY)
50 .structure_type(StructureType::Dynamic);
51 b.add_input<decl::Float>("Z").default_value(0.0f).min(0.0f).structure_type(
52 StructureType::Dynamic);
53 b.add_input<decl::Int>("Samples").default_value(32).min(1).max(256).description(
54 "The number of samples used to approximate the motion blur");
55 b.add_input<decl::Float>("Shutter").default_value(0.5f).min(0.0f).description(
56 "Time between shutter opening and closing in frames");
57}
58
59using namespace blender::compositor;
60
61#define MOTION_BLUR_TILE_SIZE 32
62#define DEPTH_SCALE 100.0f
63
64/* Returns the input velocity that has the larger magnitude. */
65static float2 max_velocity(const float2 &a, const float2 &b)
66{
68}
69
70/* Identical to motion_blur_tile_indirection_pack_payload, encodes the value and its texel such
71 * that the integer length of the value is encoded in the most significant bits, then the x value
72 * of the texel are encoded in the middle bits, then the y value of the texel is stored in the
73 * least significant bits. */
74static uint32_t velocity_atomic_max_value(const float2 &value, const int2 &texel)
75{
76 const uint32_t length_bits = math::min(uint32_t(math::ceil(math::length(value))), 0x3FFFu);
77 return (length_bits << 18u) | ((texel.x & 0x1FFu) << 9u) | (texel.y & 0x1FFu);
78}
79
80/* Returns the input velocity that has the larger integer magnitude, and if equal the larger x
81 * texel coordinates, and if equal, the larger y texel coordinates. It might be weird that we use
82 * an approximate comparison, but this is used for compatibility with the GPU code, which uses
83 * atomic integer operations, hence the limited precision. See velocity_atomic_max_value for more
84 * information. */
86 const float2 &b,
87 const int2 &a_texel,
88 const int2 &b_texel)
89{
90 return velocity_atomic_max_value(a, a_texel) > velocity_atomic_max_value(b, b_texel) ? a : b;
91}
92
93/* Reduces each 32x32 block of velocity pixels into a single velocity whose magnitude is largest.
94 * Each of the previous and next velocities are reduces independently. */
95static Result compute_max_tile_velocity_cpu(Context &context, const Result &velocity_image)
96{
97 if (velocity_image.is_single_value()) {
98 Result output = context.create_result(ResultType::Float4);
99 output.allocate_single_value();
100 output.set_single_value(velocity_image.get_single_value<float4>());
101 return output;
102 }
103
104 const int2 tile_size = int2(MOTION_BLUR_TILE_SIZE);
105 const int2 velocity_size = velocity_image.domain().size;
106 const int2 tiles_count = math::divide_ceil(velocity_size, tile_size);
107 Result output = context.create_result(ResultType::Float4);
108 output.allocate_texture(Domain(tiles_count));
109
110 parallel_for(tiles_count, [&](const int2 texel) {
111 float2 max_previous_velocity = float2(0.0f);
112 float2 max_next_velocity = float2(0.0f);
113
114 for (int j = 0; j < tile_size.y; j++) {
115 for (int i = 0; i < tile_size.x; i++) {
116 int2 sub_texel = texel * tile_size + int2(i, j);
117 const float4 velocity = velocity_image.load_pixel_extended<float4>(sub_texel);
118 max_previous_velocity = max_velocity(velocity.xy(), max_previous_velocity);
119 max_next_velocity = max_velocity(velocity.zw(), max_next_velocity);
120 }
121 }
122
123 const float4 max_velocity = float4(max_previous_velocity, max_next_velocity);
124 output.store_pixel(texel, max_velocity);
125 });
126
127 return output;
128}
129
134
135static MotionRect compute_motion_rect(const int2 &tile, const float2 &motion, const int2 &size)
136{
137 /* `ceil()` to number of tile touched. */
138 int2 point1 = tile + int2(math::sign(motion) *
139 math::ceil(math::abs(motion) / float(MOTION_BLUR_TILE_SIZE)));
140 int2 point2 = tile;
141
142 int2 max_point = math::max(point1, point2);
143 int2 min_point = math::min(point1, point2);
144 /* Clamp to bounds. */
145 max_point = math::min(max_point, size - 1);
146 min_point = math::max(min_point, int2(0));
147
148 MotionRect rect;
149 rect.bottom_left = min_point;
150 rect.extent = 1 + max_point - min_point;
151 return rect;
152}
153
160
161static MotionLine compute_motion_line(const int2 &tile, const float2 &motion)
162{
163 float magnitude = math::length(motion);
164 float2 dir = magnitude != 0.0f ? motion / magnitude : motion;
165
166 MotionLine line;
167 line.origin = float2(tile);
168 /* Rotate 90 degrees counter-clockwise. */
169 line.normal = float2(-dir.y, dir.x);
170 return line;
171}
172
173static bool is_inside_motion_line(const int2 &tile, const MotionLine &motion_line)
174{
175 /* NOTE: Everything in is tile unit. */
176 float distance_to_line = math::dot(motion_line.normal, motion_line.origin - float2(tile));
177 /* In order to be conservative and for simplicity, we use the tiles bounding circles.
178 * Consider that both the tile and the line have bounding radius of M_SQRT1_2. */
179 return math::abs(distance_to_line) < math::numbers::sqrt2_v<float>;
180}
181
182/* The max tile velocity image computes the maximum within 32x32 blocks, while the velocity can
183 * in fact extend beyond such a small block. So we dilate the max blocks by taking the maximum
184 * along the path of each of the max velocity tiles. */
186 const Result &max_tile_velocity,
187 const float shutter_speed)
188{
189 if (max_tile_velocity.is_single_value()) {
190 Result output = context.create_result(ResultType::Float4);
191 output.allocate_single_value();
192 output.set_single_value(max_tile_velocity.get_single_value<float4>());
193 return output;
194 }
195
196 const int2 size = max_tile_velocity.domain().size;
197 Result output = context.create_result(ResultType::Float4);
198 output.allocate_texture(Domain(size));
199
200 parallel_for(size, [&](const int2 texel) { output.store_pixel(texel, float4(0.0f)); });
201
202 for (const int64_t y : IndexRange(size.y)) {
203 for (const int64_t x : IndexRange(size.x)) {
204 const int2 src_tile = int2(x, y);
205
206 const float4 max_motion = max_tile_velocity.load_pixel<float4>(src_tile);
207 const float2 max_previous_velocity = max_motion.xy() * shutter_speed;
208 const float2 max_next_velocity = max_motion.zw() * -shutter_speed;
209
210 {
211 /* Rectangular area (in tiles) where the motion vector spreads. */
212 MotionRect motion_rect = compute_motion_rect(src_tile, max_previous_velocity, size);
213 MotionLine motion_line = compute_motion_line(src_tile, max_previous_velocity);
214 /* Do a conservative rasterization of the line of the motion vector line. */
215 for (int j = 0; j < motion_rect.extent.y; j++) {
216 for (int i = 0; i < motion_rect.extent.x; i++) {
217 int2 tile = motion_rect.bottom_left + int2(i, j);
218 if (is_inside_motion_line(tile, motion_line)) {
219 const float4 current_max_velocity = output.load_pixel<float4>(tile);
220 const float2 new_max_previous_velocity = max_velocity_approximate(
221 current_max_velocity.xy(), max_previous_velocity, tile, src_tile);
222 const float2 new_max_next_velocity = max_velocity_approximate(
223 current_max_velocity.zw(), max_next_velocity, tile, src_tile);
224 output.store_pixel(tile, float4(new_max_previous_velocity, new_max_next_velocity));
225 }
226 }
227 }
228 }
229
230 {
231 /* Rectangular area (in tiles) where the motion vector spreads. */
232 MotionRect motion_rect = compute_motion_rect(src_tile, max_next_velocity, size);
233 MotionLine motion_line = compute_motion_line(src_tile, max_next_velocity);
234 /* Do a conservative rasterization of the line of the motion vector line. */
235 for (int j = 0; j < motion_rect.extent.y; j++) {
236 for (int i = 0; i < motion_rect.extent.x; i++) {
237 int2 tile = motion_rect.bottom_left + int2(i, j);
238 if (is_inside_motion_line(tile, motion_line)) {
239 const float4 current_max_velocity = output.load_pixel<float4>(tile);
240 const float2 new_max_previous_velocity = max_velocity_approximate(
241 current_max_velocity.xy(), max_previous_velocity, tile, src_tile);
242 const float2 new_max_next_velocity = max_velocity_approximate(
243 current_max_velocity.zw(), max_next_velocity, tile, src_tile);
244 output.store_pixel(tile, float4(new_max_previous_velocity, new_max_next_velocity));
245 }
246 }
247 }
248 }
249 }
250 }
251
252 return output;
253}
254
255/* Interleaved gradient noise by Jorge Jimenez
256 * http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare. */
257static float interleaved_gradient_noise(const int2 &p)
258{
259 return math::fract(52.9829189f * math::fract(0.06711056f * p.x + 0.00583715f * p.y));
260}
261
262static float2 spread_compare(const float center_motion_length,
263 const float sample_motion_length,
264 const float offset_length)
265{
266 return math::clamp(
267 float2(center_motion_length, sample_motion_length) - offset_length + 1.0f, 0.0f, 1.0f);
268}
269
270static float2 depth_compare(const float center_depth, const float sample_depth)
271{
272 float2 depth_scale = float2(DEPTH_SCALE, -DEPTH_SCALE);
273 return math::clamp(0.5f + depth_scale * (sample_depth - center_depth), 0.0f, 1.0f);
274}
275
276/* Kill contribution if not going the same direction. */
277static float dir_compare(const float2 &offset,
278 const float2 &sample_motion,
279 const float &sample_motion_length)
280{
281 if (sample_motion_length < 0.5f) {
282 return 1.0f;
283 }
284 return (math::dot(offset, sample_motion) > 0.0f) ? 1.0f : 0.0f;
285}
286
287/* Return background (x) and foreground (y) weights. */
288static float2 sample_weights(const float center_depth,
289 const float sample_depth,
290 const float center_motion_length,
291 const float sample_motion_length,
292 const float offset_length)
293{
294 /* Classify foreground/background. */
295 float2 depth_weight = depth_compare(center_depth, sample_depth);
296 /* Weight if sample is overlapping or under the center pixel. */
297 float2 spread_weight = spread_compare(center_motion_length, sample_motion_length, offset_length);
298 return depth_weight * spread_weight;
299}
300
307
308static void gather_sample(const Result &input_image,
309 const Result &input_depth,
310 const Result &input_velocity,
311 const int2 &size,
312 const float2 &screen_uv,
313 const float center_depth,
314 const float center_motion_len,
315 const float2 &offset,
316 const float offset_len,
317 const bool next,
318 const float shutter_speed,
319 Accumulator &accum)
320{
321 float2 sample_uv = screen_uv - offset / float2(size);
322 float4 sample_vectors = input_velocity.sample_bilinear_extended(sample_uv) *
323 float4(float2(shutter_speed), float2(-shutter_speed));
324 float2 sample_motion = (next) ? sample_vectors.zw() : sample_vectors.xy();
325 float sample_motion_len = math::length(sample_motion);
326 float sample_depth = input_depth.sample_bilinear_extended(sample_uv).x;
327 float4 sample_color = input_image.sample_bilinear_extended(sample_uv);
328
329 float2 direct_weights = sample_weights(
330 center_depth, sample_depth, center_motion_len, sample_motion_len, offset_len);
331
332 float3 weights;
333 weights.x = direct_weights.x;
334 weights.y = direct_weights.y;
335 weights.z = dir_compare(offset, sample_motion, sample_motion_len);
336 weights.x *= weights.z;
337 weights.y *= weights.z;
338
339 accum.fg += sample_color * weights.y;
340 accum.bg += sample_color * weights.x;
341 accum.weight += weights;
342}
343
344static void gather_blur(const Result &input_image,
345 const Result &input_depth,
346 const Result &input_velocity,
347 const int2 &size,
348 const float2 &screen_uv,
349 const float2 &center_motion,
350 const float center_depth,
351 const float2 &max_motion,
352 const float ofs,
353 const bool next,
354 const int samples_count,
355 const float shutter_speed,
356 Accumulator &accum)
357{
358 float center_motion_len = math::length(center_motion);
359 float max_motion_len = math::length(max_motion);
360
361 /* Tile boundaries randomization can fetch a tile where there is less motion than this pixel.
362 * Fix this by overriding the max_motion. */
363 float2 sanitized_max_motion = max_motion;
364 if (max_motion_len < center_motion_len) {
365 max_motion_len = center_motion_len;
366 sanitized_max_motion = center_motion;
367 }
368
369 if (max_motion_len < 0.5f) {
370 return;
371 }
372
373 int i;
374 float t, inc = 1.0f / float(samples_count);
375 for (i = 0, t = ofs * inc; i < samples_count; i++, t += inc) {
376 gather_sample(input_image,
377 input_depth,
378 input_velocity,
379 size,
380 screen_uv,
381 center_depth,
382 center_motion_len,
383 sanitized_max_motion * t,
384 max_motion_len * t,
385 next,
386 shutter_speed,
387 accum);
388 }
389
390 if (center_motion_len < 0.5f) {
391 return;
392 }
393
394 for (i = 0, t = ofs * inc; i < samples_count; i++, t += inc) {
395 /* Also sample in center motion direction.
396 * Allow recovering motion where there is conflicting
397 * motion between foreground and background. */
398 gather_sample(input_image,
399 input_depth,
400 input_velocity,
401 size,
402 screen_uv,
403 center_depth,
404 center_motion_len,
405 center_motion * t,
406 center_motion_len * t,
407 next,
408 shutter_speed,
409 accum);
410 }
411}
412
413static void motion_blur_cpu(const Result &input_image,
414 const Result &input_depth,
415 const Result &input_velocity,
416 const Result &max_velocity,
417 Result &output,
418 const int samples_count,
419 const float shutter_speed)
420{
421 const int2 size = input_image.domain().size;
422 threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
423 for (const int64_t y : sub_y_range) {
424 for (const int64_t x : IndexRange(size.x)) {
425 const int2 texel = int2(x, y);
426 float2 uv = (float2(texel) + 0.5f) / float2(size);
427
428 /* Data of the center pixel of the gather (target). */
429 float center_depth = input_depth.load_pixel<float, true>(texel);
430 float4 center_motion = input_velocity.load_pixel<float4, true>(texel);
431 float2 center_previous_motion = center_motion.xy() * shutter_speed;
432 float2 center_next_motion = center_motion.zw() * -shutter_speed;
433 float4 center_color = input_image.load_pixel<float4>(texel);
434
435 /* Randomize tile boundary to avoid ugly discontinuities. Randomize 1/4th of the tile.
436 * Note this randomize only in one direction but in practice it's enough. */
437 float rand = interleaved_gradient_noise(texel);
438 int2 tile = (texel + int2(rand * 2.0f - 1.0f * float(MOTION_BLUR_TILE_SIZE) * 0.25f)) /
439 MOTION_BLUR_TILE_SIZE;
440
441 /* No need to multiply by the shutter speed and invert the next velocities since this was
442 * already done in dilate_max_velocity. */
443 float4 max_motion = max_velocity.load_pixel<float4, true>(tile);
444
445 Accumulator accum;
446 accum.weight = float3(0.0f, 0.0f, 1.0f);
447 accum.bg = float4(0.0f);
448 accum.fg = float4(0.0f);
449 /* First linear gather. time = [T - delta, T] */
450 gather_blur(input_image,
451 input_depth,
452 input_velocity,
453 size,
454 uv,
455 center_previous_motion,
456 center_depth,
457 max_motion.xy(),
458 rand,
459 false,
460 samples_count,
461 shutter_speed,
462 accum);
463 /* Second linear gather. time = [T, T + delta] */
464 gather_blur(input_image,
465 input_depth,
466 input_velocity,
467 size,
468 uv,
469 center_next_motion,
470 center_depth,
471 max_motion.zw(),
472 rand,
473 true,
474 samples_count,
475 shutter_speed,
476 accum);
477
478#if 1 /* Own addition. Not present in reference implementation. */
479 /* Avoid division by 0.0. */
480 float w = 1.0f / (50.0f * float(samples_count) * 4.0f);
481 accum.bg += center_color * w;
482 accum.weight.x += w;
483 /* NOTE: In Jimenez's presentation, they used center sample.
484 * We use background color as it contains more information for foreground
485 * elements that have not enough weights.
486 * Yield better blur in complex motion. */
487 center_color = accum.bg / accum.weight.x;
488#endif
489 /* Merge background. */
490 accum.fg += accum.bg;
491 accum.weight.y += accum.weight.x;
492 /* Balance accumulation for failed samples.
493 * We replace the missing foreground by the background. */
494 float blend_fac = math::clamp(1.0f - accum.weight.y / accum.weight.z, 0.0f, 1.0f);
495 float4 out_color = (accum.fg / accum.weight.z) + center_color * blend_fac;
496
497 output.store_pixel(texel, out_color);
498 }
499 }
500 });
501}
502
504 public:
506
507 void execute() override
508 {
509 const Result &input = this->get_input("Image");
510 if (input.is_single_value()) {
511 Result &output = this->get_result("Image");
512 output.share_data(input);
513 return;
514 }
515
516 if (this->context().use_gpu()) {
517 this->execute_gpu();
518 }
519 else {
520 this->execute_cpu();
521 }
522 }
523
525 {
526 Result max_tile_velocity = this->compute_max_tile_velocity();
527 gpu::StorageBuf *tile_indirection_buffer = this->dilate_max_velocity(max_tile_velocity);
528 this->compute_motion_blur(max_tile_velocity, tile_indirection_buffer);
529 max_tile_velocity.release();
530 GPU_storagebuf_free(tile_indirection_buffer);
531 }
532
533 /* Reduces each 32x32 block of velocity pixels into a single velocity whose magnitude is largest.
534 * Each of the previous and next velocities are reduces independently. */
536 {
537 gpu::Shader *shader = context().get_shader("compositor_max_velocity");
538 GPU_shader_bind(shader);
539
540 GPU_shader_uniform_1b(shader, "is_initial_reduction", true);
541
542 Result &input = get_input("Speed");
543 input.bind_as_texture(shader, "input_tx");
544
545 Result output = context().create_result(ResultType::Float4);
546 const int2 tiles_count = math::divide_ceil(input.domain().size, int2(32));
547 output.allocate_texture(Domain(tiles_count));
548 output.bind_as_image(shader, "output_img");
549
550 GPU_compute_dispatch(shader, tiles_count.x, tiles_count.y, 1);
551
553 input.unbind_as_texture();
554 output.unbind_as_image();
555
556 return output;
557 }
558
559 /* The max tile velocity image computes the maximum within 32x32 blocks, while the velocity can
560 * in fact extend beyond such a small block. So we dilate the max blocks by taking the maximum
561 * along the path of each of the max velocity tiles. Since the shader uses custom max atomics,
562 * the output will be an indirection buffer that points to a particular tile in the original max
563 * tile velocity image. This is done as a form of performance optimization, see the shader for
564 * more information. */
566 {
567 gpu::Shader *shader = context().get_shader("compositor_motion_blur_max_velocity_dilate");
568 GPU_shader_bind(shader);
569
570 GPU_shader_uniform_1f(shader, "shutter_speed", this->get_shutter());
571
572 max_tile_velocity.bind_as_texture(shader, "input_tx");
573
574 /* The shader assumes a maximum input size of 16k, and since the max tile velocity image is
575 * composed of blocks of 32, we get 16k / 32 = 512. So the table is 512x512, but we store two
576 * tables for the previous and next velocities, so we double that. */
577 const int size = sizeof(uint32_t) * 512 * 512 * 2;
578 gpu::StorageBuf *tile_indirection_buffer = GPU_storagebuf_create_ex(
579 size, nullptr, GPU_USAGE_DEVICE_ONLY, __func__);
580 GPU_storagebuf_clear_to_zero(tile_indirection_buffer);
581 const int slot = GPU_shader_get_ssbo_binding(shader, "tile_indirection_buf");
582 GPU_storagebuf_bind(tile_indirection_buffer, slot);
583
584 compute_dispatch_threads_at_least(shader, max_tile_velocity.domain().size);
585
587 max_tile_velocity.unbind_as_texture();
588 GPU_storagebuf_unbind(tile_indirection_buffer);
589
590 return tile_indirection_buffer;
591 }
592
593 void compute_motion_blur(Result &max_tile_velocity, gpu::StorageBuf *tile_indirection_buffer)
594 {
595 gpu::Shader *shader = context().get_shader("compositor_motion_blur");
596 GPU_shader_bind(shader);
597
598 GPU_shader_uniform_1i(shader, "samples_count", this->get_samples_count());
599 GPU_shader_uniform_1f(shader, "shutter_speed", this->get_shutter());
600
601 Result &input = get_input("Image");
602 input.bind_as_texture(shader, "input_tx");
603
604 Result &depth = get_input("Z");
605 depth.bind_as_texture(shader, "depth_tx");
606
607 Result &velocity = get_input("Speed");
608 velocity.bind_as_texture(shader, "velocity_tx");
609
610 max_tile_velocity.bind_as_texture(shader, "max_velocity_tx");
611
613 const int slot = GPU_shader_get_ssbo_binding(shader, "tile_indirection_buf");
614 GPU_storagebuf_bind(tile_indirection_buffer, slot);
615
616 Result &output = get_result("Image");
617 const Domain domain = compute_domain();
618 output.allocate_texture(domain);
619 output.bind_as_image(shader, "output_img");
620
621 compute_dispatch_threads_at_least(shader, output.domain().size);
622
624 input.unbind_as_texture();
625 depth.unbind_as_texture();
626 velocity.unbind_as_texture();
627 max_tile_velocity.unbind_as_texture();
628 output.unbind_as_image();
629 }
630
632 {
633 const float shutter_speed = this->get_shutter();
634 const int samples_count = this->get_samples_count();
635
636 const Result &input_image = get_input("Image");
637 const Result &input_depth = get_input("Z");
638 const Result &input_velocity = get_input("Speed");
639
640 Result &output = get_result("Image");
641 const Domain domain = compute_domain();
642 output.allocate_texture(domain);
643
644 Result max_tile_velocity = compute_max_tile_velocity_cpu(this->context(), input_velocity);
646 this->context(), max_tile_velocity, shutter_speed);
647 max_tile_velocity.release();
648 motion_blur_cpu(input_image,
649 input_depth,
650 input_velocity,
652 output,
653 samples_count,
654 shutter_speed);
655 max_velocity.release();
656 }
657
659 {
660 return math::clamp(this->get_input("Samples").get_single_value_default(32), 1, 256);
661 }
662
664 {
665 /* Divide by two since the motion blur algorithm expects shutter per motion step and has two
666 * motion steps, while the user inputs the entire shutter across all steps. */
667 return math::max(0.0f, this->get_input("Shutter").get_single_value_default(0.5f)) / 2.0f;
668 }
669};
670
672{
673 return new VectorBlurOperation(context, node);
674}
675
676} // namespace blender::nodes::node_composite_vec_blur_cc
677
679{
681
682 static blender::bke::bNodeType ntype;
683
684 cmp_node_type_base(&ntype, "CompositorNodeVecBlur", CMP_NODE_VECBLUR);
685 ntype.ui_name = "Vector Blur";
686 ntype.ui_description = "Uses the vector speed render pass to blur the image pixels in 2D";
687 ntype.enum_name_legacy = "VECBLUR";
689 ntype.declare = file_ns::cmp_node_vec_blur_declare;
690 ntype.get_compositor_operation = file_ns::get_compositor_operation;
691
693}
#define NODE_CLASS_OP_FILTER
Definition BKE_node.hh:451
#define CMP_NODE_VECBLUR
void GPU_compute_dispatch(blender::gpu::Shader *shader, uint groups_x_len, uint groups_y_len, uint groups_z_len, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_uniform_1b(blender::gpu::Shader *sh, const char *name, bool value)
void GPU_shader_uniform_1f(blender::gpu::Shader *sh, const char *name, float value)
void GPU_shader_bind(blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
int GPU_shader_get_ssbo_binding(blender::gpu::Shader *shader, const char *name)
void GPU_shader_uniform_1i(blender::gpu::Shader *sh, const char *name, int value)
void GPU_shader_unbind()
@ GPU_BARRIER_SHADER_STORAGE
Definition GPU_state.hh:48
void GPU_memory_barrier(GPUBarrier barrier)
Definition gpu_state.cc:326
void GPU_storagebuf_free(blender::gpu::StorageBuf *ssbo)
blender::gpu::StorageBuf * GPU_storagebuf_create_ex(size_t size, const void *data, GPUUsageType usage, const char *name)
void GPU_storagebuf_clear_to_zero(blender::gpu::StorageBuf *ssbo)
void GPU_storagebuf_bind(blender::gpu::StorageBuf *ssbo, int slot)
void GPU_storagebuf_unbind(blender::gpu::StorageBuf *ssbo)
@ GPU_USAGE_DEVICE_ONLY
#define NOD_REGISTER_NODE(REGISTER_FUNC)
@ PROP_VELOCITY
Definition RNA_types.hh:263
long long int int64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
NodeOperation(Context &context, DNode node)
Result & get_result(StringRef identifier)
Definition operation.cc:39
virtual Domain compute_domain()
Definition operation.cc:56
void unbind_as_texture() const
Definition result.cc:511
void bind_as_texture(gpu::Shader *shader, const char *texture_name) const
Definition result.cc:487
T load_pixel_extended(const int2 &texel) const
const Domain & domain() const
T load_pixel(const int2 &texel) const
float4 sample_bilinear_extended(const float2 &coordinates) const
bool is_single_value() const
Definition result.cc:758
const T & get_single_value() const
void compute_motion_blur(Result &max_tile_velocity, gpu::StorageBuf *tile_indirection_buffer)
nullptr float
#define MOTION_BLUR_TILE_SIZE
#define input
#define output
const ccl_global KernelWorkTile * tile
static ulong * next
void node_register_type(bNodeType &ntype)
Definition node.cc:2416
void compute_dispatch_threads_at_least(gpu::Shader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:196
void parallel_for(const int2 range, const Function &function)
T length_squared(const VecBase< T, Size > &a)
T clamp(const T &a, const T &min, const T &max)
T sign(const T &a)
VecBase< T, Size > divide_ceil(const VecBase< T, Size > &a, const VecBase< T, Size > &b)
T length(const VecBase< T, Size > &a)
T dot(const QuaternionBase< T > &a, const QuaternionBase< T > &b)
T min(const T &a, const T &b)
T fract(const T &a)
T ceil(const T &a)
T max(const T &a, const T &b)
T abs(const T &a)
static void gather_blur(const Result &input_image, const Result &input_depth, const Result &input_velocity, const int2 &size, const float2 &screen_uv, const float2 &center_motion, const float center_depth, const float2 &max_motion, const float ofs, const bool next, const int samples_count, const float shutter_speed, Accumulator &accum)
static uint32_t velocity_atomic_max_value(const float2 &value, const int2 &texel)
static void motion_blur_cpu(const Result &input_image, const Result &input_depth, const Result &input_velocity, const Result &max_velocity, Result &output, const int samples_count, const float shutter_speed)
static NodeOperation * get_compositor_operation(Context &context, DNode node)
static MotionRect compute_motion_rect(const int2 &tile, const float2 &motion, const int2 &size)
static bool is_inside_motion_line(const int2 &tile, const MotionLine &motion_line)
static float2 spread_compare(const float center_motion_length, const float sample_motion_length, const float offset_length)
static float2 max_velocity_approximate(const float2 &a, const float2 &b, const int2 &a_texel, const int2 &b_texel)
static float dir_compare(const float2 &offset, const float2 &sample_motion, const float &sample_motion_length)
static void gather_sample(const Result &input_image, const Result &input_depth, const Result &input_velocity, const int2 &size, const float2 &screen_uv, const float center_depth, const float center_motion_len, const float2 &offset, const float offset_len, const bool next, const float shutter_speed, Accumulator &accum)
static Result dilate_max_velocity_cpu(Context &context, const Result &max_tile_velocity, const float shutter_speed)
static Result compute_max_tile_velocity_cpu(Context &context, const Result &velocity_image)
static float2 depth_compare(const float center_depth, const float sample_depth)
static void cmp_node_vec_blur_declare(NodeDeclarationBuilder &b)
static MotionLine compute_motion_line(const int2 &tile, const float2 &motion)
static float2 sample_weights(const float center_depth, const float sample_depth, const float center_motion_length, const float sample_motion_length, const float offset_length)
static float2 max_velocity(const float2 &a, const float2 &b)
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
Definition BLI_task.hh:93
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
VecBase< float, 3 > float3
void cmp_node_type_base(blender::bke::bNodeType *ntype, std::string idname, const std::optional< int16_t > legacy_type)
static void register_node_type_cmp_vecblur()
#define DEPTH_SCALE
#define min(a, b)
Definition sort.cc:36
VecBase< T, 2 > zw() const
VecBase< T, 2 > xy() const
Defines a node type.
Definition BKE_node.hh:238
std::string ui_description
Definition BKE_node.hh:244
NodeGetCompositorOperationFunction get_compositor_operation
Definition BKE_node.hh:348
const char * enum_name_legacy
Definition BKE_node.hh:247
NodeDeclareFunction declare
Definition BKE_node.hh:362
i
Definition text_draw.cc:230
static pxr::UsdShadeInput get_input(const pxr::UsdShadeShader &usd_shader, const pxr::TfToken &input_name)