21#define MOTION_BLUR_TILE_SIZE 32
22#define DEPTH_SCALE 100.0f
48 return (length_bits << 18u) | ((texel.x & 0x1FFu) << 9u) | (texel.y & 0x1FFu);
74 for (const int64_t y : sub_y_range) {
75 for (const int64_t x : IndexRange(tiles_count.x)) {
76 const int2 texel = int2(x, y);
78 float2 max_previous_velocity = float2(0.0f);
79 float2 max_next_velocity = float2(0.0f);
81 for (int j = 0; j < tile_size.y; j++) {
82 for (int i = 0; i < tile_size.x; i++) {
83 int2 sub_texel = texel * tile_size + int2(i, j);
84 const float4 velocity = velocity_buffer->get_elem_clamped(sub_texel.x, sub_texel.y);
85 max_previous_velocity = max_velocity(velocity.xy(), max_previous_velocity);
86 max_next_velocity = max_velocity(velocity.zw(), max_next_velocity);
90 const float4 max_velocity = float4(max_previous_velocity, max_next_velocity);
91 copy_v4_v4(output.get_elem(texel.x, texel.y), max_velocity);
114 max_point =
math::min(max_point, size - 1);
119 rect.
extent = 1 + max_point - min_point;
133 float2 dir = magnitude != 0.0f ? motion / magnitude : motion;
138 line.normal =
float2(-dir.y, dir.x);
162 output.fill(output.get_rect(), zero_value);
176 for (
int j = 0; j < motion_rect.
extent.y; j++) {
177 for (
int i = 0; i < motion_rect.
extent.x; i++) {
180 float *pixel = output.get_elem(
tile.
x,
tile.
y);
194 for (
int j = 0; j < motion_rect.
extent.y; j++) {
195 for (
int i = 0; i < motion_rect.
extent.x; i++) {
198 float *pixel = output.get_elem(
tile.
x,
tile.
y);
220 float sample_motion_length,
224 float2(center_motion_length, sample_motion_length) - offset_length + 1.0f, 0.0f, 1.0f);
230 return math::clamp(0.5f + depth_scale * (sample_depth - center_depth), 0.0f, 1.0f);
236 if (sample_motion_length < 0.5f) {
239 return (
math::dot(offset, sample_motion) > 0.0f) ? 1.0f : 0.0f;
245 float center_motion_length,
246 float sample_motion_length,
252 float2 spread_weight =
spread_compare(center_motion_length, sample_motion_length, offset_length);
253 return depth_weight * spread_weight;
269 float center_motion_len,
279 float2 sample_motion = (
next) ? sample_vectors.
zw() : sample_vectors.
xy();
285 center_depth, sample_depth, center_motion_len, sample_motion_len, offset_len);
288 weights.x = direct_weights.x;
289 weights.y = direct_weights.y;
290 weights.z =
dir_compare(offset, sample_motion, sample_motion_len);
291 weights.x *= weights.z;
292 weights.y *= weights.z;
294 accum.
fg += sample_color * weights.y;
295 accum.
bg += sample_color * weights.x;
318 if (max_motion_len < center_motion_len) {
319 max_motion_len = center_motion_len;
320 max_motion = center_motion;
323 if (max_motion_len < 0.5f) {
328 float t, inc = 1.0f /
float(samples_count);
329 for (i = 0, t = ofs * inc; i < samples_count; i++, t += inc) {
344 if (center_motion_len < 0.5f) {
348 for (i = 0, t = ofs * inc; i < samples_count; i++, t += inc) {
360 center_motion_len * t,
377 for (const int64_t y : sub_y_range) {
378 for (const int64_t x : IndexRange(size.x)) {
379 const int2 texel = int2(x, y);
380 float2 uv = (float2(texel) + 0.5f) / float2(size);
383 float center_depth = *depth_buffer->get_elem(x, y);
384 float4 center_motion = float4(velocity_buffer->get_elem(x, y)) *
385 float4(float2(shutter_speed), float2(-shutter_speed));
386 float4 center_color = image_buffer->get_elem(x, y);
390 float rand = interleaved_gradient_noise(texel);
391 int2 tile = (texel + int2(rand * 2.0f - 1.0f * float(MOTION_BLUR_TILE_SIZE) * 0.25f)) /
392 MOTION_BLUR_TILE_SIZE;
396 float4 max_motion = max_velocity_buffer->get_elem(tile.x, tile.y);
399 accum.weight = float3(0.0f, 0.0f, 1.0f);
400 accum.bg = float4(0.0f);
401 accum.fg = float4(0.0f);
403 gather_blur(image_buffer,
417 gather_blur(image_buffer,
433 float w = 1.0f / (50.0f * float(samples_count) * 4.0f);
434 accum.bg += center_color * w;
440 center_color = accum.bg / accum.weight.x;
443 accum.fg += accum.bg;
444 accum.weight.y += accum.weight.x;
447 float blend_fac = math::clamp(1.0f - accum.weight.y / accum.weight.z, 0.0f, 1.0f);
448 float4 out_color = (accum.fg / accum.weight.z) + center_color * blend_fac;
450 copy_v4_v4(output->get_elem(x, y), out_color);
465 const bool depth_needs_inflation = depth->is_a_single_elem();
482 if (image_needs_inflation) {
486 if (depth_needs_inflation) {
490 if (velocity_needs_inflation) {
491 delete velocity_buffer;
495void VectorBlurOperation::get_area_of_interest(
const int ,
499 r_input_area = this->get_canvas();
MINLINE void copy_v2_v2(float r[2], const float a[2])
#define MOTION_BLUR_TILE_SIZE
a MemoryBuffer contains access to the data
MemoryBuffer * inflate() const
const int get_width() const
get the width of this MemoryBuffer
const int get_height() const
get the height of this MemoryBuffer
bool is_a_single_elem() const
float * get_elem(int x, int y)
float4 texture_bilinear_extend(float2 coordinates) const
void add_output_socket(DataType datatype)
void add_input_socket(DataType datatype, ResizeMode resize_mode=ResizeMode::Center)
input_tx image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "preview_img") .compute_source("compositor_compute_preview.glsl") .do_static_compilation(true)
local_group_size(16, 16) .push_constant(Type b
draw_view in_light_buf[] float
ccl_global const KernelWorkTile * tile
constexpr int IMAGE_INPUT_INDEX
static MemoryBuffer compute_max_tile_velocity(MemoryBuffer *velocity_buffer)
static float2 depth_compare(float center_depth, float sample_depth)
static float2 spread_compare(float center_motion_length, float sample_motion_length, float offset_length)
static float2 max_velocity_approximate(const float2 &a, const float2 &b, const int2 &a_texel, const int2 &b_texel)
static float2 sample_weights(float center_depth, float sample_depth, float center_motion_length, float sample_motion_length, float offset_length)
static void gather_blur(MemoryBuffer *image_buffer, MemoryBuffer *depth_buffer, MemoryBuffer *velocity_buffer, int2 size, float2 screen_uv, float2 center_motion, float center_depth, float2 max_motion, float ofs, const bool next, int samples_count, float shutter_speed, Accumulator &accum)
static float dir_compare(float2 offset, float2 sample_motion, float sample_motion_length)
static uint32_t velocity_atomic_max_value(const float2 &value, const int2 &texel)
static float interleaved_gradient_noise(int2 p)
static void motion_blur(MemoryBuffer *image_buffer, MemoryBuffer *depth_buffer, MemoryBuffer *velocity_buffer, MemoryBuffer *max_velocity_buffer, MemoryBuffer *output, int samples_count, float shutter_speed)
static MemoryBuffer dilate_max_velocity(MemoryBuffer &max_tile_velocity, float shutter_speed)
static bool is_inside_motion_line(int2 tile, MotionLine motion_line)
static float2 max_velocity(const float2 &a, const float2 &b)
static void gather_sample(MemoryBuffer *image_buffer, MemoryBuffer *depth_buffer, MemoryBuffer *velocity_buffer, int2 size, float2 screen_uv, float center_depth, float center_motion_len, float2 offset, float offset_len, const bool next, float shutter_speed, Accumulator &accum)
static MotionLine compute_motion_line(int2 tile, float2 motion)
static MotionRect compute_motion_rect(int2 tile, float2 motion, int2 size)
T length_squared(const VecBase< T, Size > &a)
T clamp(const T &a, const T &min, const T &max)
VecBase< T, Size > divide_ceil(const VecBase< T, Size > &a, const VecBase< T, Size > &b)
T length(const VecBase< T, Size > &a)
T dot(const QuaternionBase< T > &a, const QuaternionBase< T > &b)
T min(const T &a, const T &b)
T max(const T &a, const T &b)
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
VecBase< T, 2 > zw() const
VecBase< T, 2 > xy() const