36 b.use_custom_socket_order();
37 b.allow_any_socket_order();
39 .default_value({1.0f, 1.0f, 1.0f, 1.0f})
42 b.add_output<
decl::Color>(
"Image").structure_type(StructureType::Dynamic).align_with_previous();
50 .structure_type(StructureType::Dynamic);
52 StructureType::Dynamic);
54 "The number of samples used to approximate the motion blur");
56 "Time between shutter opening and closing in frames");
61#define MOTION_BLUR_TILE_SIZE 32
62#define DEPTH_SCALE 100.0f
77 return (length_bits << 18u) | ((texel.x & 0x1FFu) << 9u) | (texel.y & 0x1FFu);
99 output.allocate_single_value();
114 for (
int j = 0; j < tile_size.y; j++) {
115 for (
int i = 0;
i < tile_size.x;
i++) {
116 int2 sub_texel = texel * tile_size +
int2(
i, j);
118 max_previous_velocity =
max_velocity(velocity.
xy(), max_previous_velocity);
119 max_next_velocity =
max_velocity(velocity.
zw(), max_next_velocity);
150 rect.
extent = 1 + max_point - min_point;
164 float2 dir = magnitude != 0.0f ? motion / magnitude : motion;
186 const Result &max_tile_velocity,
187 const float shutter_speed)
191 output.allocate_single_value();
207 const float2 max_previous_velocity = max_motion.
xy() * shutter_speed;
208 const float2 max_next_velocity = max_motion.
zw() * -shutter_speed;
215 for (
int j = 0; j < motion_rect.
extent.y; j++) {
216 for (
int i = 0;
i < motion_rect.
extent.x;
i++) {
221 current_max_velocity.
xy(), max_previous_velocity,
tile, src_tile);
223 current_max_velocity.
zw(), max_next_velocity,
tile, src_tile);
224 output.store_pixel(
tile,
float4(new_max_previous_velocity, new_max_next_velocity));
235 for (
int j = 0; j < motion_rect.
extent.y; j++) {
236 for (
int i = 0;
i < motion_rect.
extent.x;
i++) {
241 current_max_velocity.
xy(), max_previous_velocity,
tile, src_tile);
243 current_max_velocity.
zw(), max_next_velocity,
tile, src_tile);
244 output.store_pixel(
tile,
float4(new_max_previous_velocity, new_max_next_velocity));
263 const float sample_motion_length,
264 const float offset_length)
267 float2(center_motion_length, sample_motion_length) - offset_length + 1.0f, 0.0f, 1.0f);
273 return math::clamp(0.5f + depth_scale * (sample_depth - center_depth), 0.0f, 1.0f);
278 const float2 &sample_motion,
279 const float &sample_motion_length)
281 if (sample_motion_length < 0.5f) {
284 return (
math::dot(offset, sample_motion) > 0.0f) ? 1.0f : 0.0f;
289 const float sample_depth,
290 const float center_motion_length,
291 const float sample_motion_length,
292 const float offset_length)
297 float2 spread_weight =
spread_compare(center_motion_length, sample_motion_length, offset_length);
298 return depth_weight * spread_weight;
309 const Result &input_depth,
310 const Result &input_velocity,
313 const float center_depth,
314 const float center_motion_len,
316 const float offset_len,
318 const float shutter_speed,
324 float2 sample_motion = (
next) ? sample_vectors.
zw() : sample_vectors.
xy();
330 center_depth, sample_depth, center_motion_len, sample_motion_len, offset_len);
333 weights.x = direct_weights.x;
334 weights.y = direct_weights.y;
335 weights.z =
dir_compare(offset, sample_motion, sample_motion_len);
336 weights.x *= weights.z;
337 weights.y *= weights.z;
339 accum.
fg += sample_color * weights.y;
340 accum.
bg += sample_color * weights.x;
345 const Result &input_depth,
346 const Result &input_velocity,
349 const float2 ¢er_motion,
350 const float center_depth,
354 const int samples_count,
355 const float shutter_speed,
363 float2 sanitized_max_motion = max_motion;
364 if (max_motion_len < center_motion_len) {
365 max_motion_len = center_motion_len;
366 sanitized_max_motion = center_motion;
369 if (max_motion_len < 0.5f) {
374 float t, inc = 1.0f /
float(samples_count);
375 for (
i = 0, t = ofs * inc;
i < samples_count;
i++, t += inc) {
383 sanitized_max_motion * t,
390 if (center_motion_len < 0.5f) {
394 for (
i = 0, t = ofs * inc;
i < samples_count;
i++, t += inc) {
406 center_motion_len * t,
414 const Result &input_depth,
415 const Result &input_velocity,
418 const int samples_count,
419 const float shutter_speed)
423 for (const int64_t y : sub_y_range) {
424 for (const int64_t x : IndexRange(size.x)) {
425 const int2 texel = int2(x, y);
426 float2 uv = (float2(texel) + 0.5f) / float2(size);
429 float center_depth = input_depth.load_pixel<float, true>(texel);
430 float4 center_motion = input_velocity.load_pixel<float4, true>(texel);
431 float2 center_previous_motion = center_motion.xy() * shutter_speed;
432 float2 center_next_motion = center_motion.zw() * -shutter_speed;
433 float4 center_color = input_image.load_pixel<float4>(texel);
437 float rand = interleaved_gradient_noise(texel);
438 int2 tile = (texel + int2(rand * 2.0f - 1.0f * float(MOTION_BLUR_TILE_SIZE) * 0.25f)) /
439 MOTION_BLUR_TILE_SIZE;
443 float4 max_motion = max_velocity.load_pixel<float4, true>(tile);
446 accum.weight = float3(0.0f, 0.0f, 1.0f);
447 accum.bg = float4(0.0f);
448 accum.fg = float4(0.0f);
450 gather_blur(input_image,
455 center_previous_motion,
464 gather_blur(input_image,
480 float w = 1.0f / (50.0f * float(samples_count) * 4.0f);
481 accum.bg += center_color * w;
487 center_color = accum.bg / accum.weight.x;
490 accum.fg += accum.bg;
491 accum.weight.y += accum.weight.x;
494 float blend_fac = math::clamp(1.0f - accum.weight.y / accum.weight.z, 0.0f, 1.0f);
495 float4 out_color = (accum.fg / accum.weight.z) + center_color * blend_fac;
497 output.store_pixel(texel, out_color);
516 if (this->
context().use_gpu()) {
543 input.bind_as_texture(shader,
"input_tx");
548 output.bind_as_image(shader,
"output_img");
553 input.unbind_as_texture();
567 gpu::Shader *shader =
context().get_shader(
"compositor_motion_blur_max_velocity_dilate");
577 const int size =
sizeof(uint32_t) * 512 * 512 * 2;
590 return tile_indirection_buffer;
602 input.bind_as_texture(shader,
"input_tx");
618 output.allocate_texture(domain);
619 output.bind_as_image(shader,
"output_img");
624 input.unbind_as_texture();
642 output.allocate_texture(domain);
646 this->
context(), max_tile_velocity, shutter_speed);
667 return math::max(0.0f, this->
get_input(
"Shutter").get_single_value_default(0.5f)) / 2.0f;
686 ntype.
ui_description =
"Uses the vector speed render pass to blur the image pixels in 2D";
689 ntype.
declare = file_ns::cmp_node_vec_blur_declare;
#define NODE_CLASS_OP_FILTER
void GPU_compute_dispatch(blender::gpu::Shader *shader, uint groups_x_len, uint groups_y_len, uint groups_z_len, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_uniform_1b(blender::gpu::Shader *sh, const char *name, bool value)
void GPU_shader_uniform_1f(blender::gpu::Shader *sh, const char *name, float value)
void GPU_shader_bind(blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
int GPU_shader_get_ssbo_binding(blender::gpu::Shader *shader, const char *name)
void GPU_shader_uniform_1i(blender::gpu::Shader *sh, const char *name, int value)
@ GPU_BARRIER_SHADER_STORAGE
void GPU_memory_barrier(GPUBarrier barrier)
void GPU_storagebuf_free(blender::gpu::StorageBuf *ssbo)
blender::gpu::StorageBuf * GPU_storagebuf_create_ex(size_t size, const void *data, GPUUsageType usage, const char *name)
void GPU_storagebuf_clear_to_zero(blender::gpu::StorageBuf *ssbo)
void GPU_storagebuf_bind(blender::gpu::StorageBuf *ssbo, int slot)
void GPU_storagebuf_unbind(blender::gpu::StorageBuf *ssbo)
#define NOD_REGISTER_NODE(REGISTER_FUNC)
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
NodeOperation(Context &context, DNode node)
Result & get_result(StringRef identifier)
Context & context() const
virtual Domain compute_domain()
void unbind_as_texture() const
void bind_as_texture(gpu::Shader *shader, const char *texture_name) const
T load_pixel_extended(const int2 &texel) const
const Domain & domain() const
T load_pixel(const int2 &texel) const
float4 sample_bilinear_extended(const float2 &coordinates) const
bool is_single_value() const
const T & get_single_value() const
StructureType structure_type
Result compute_max_tile_velocity()
gpu::StorageBuf * dilate_max_velocity(Result &max_tile_velocity)
NodeOperation(Context &context, DNode node)
void compute_motion_blur(Result &max_tile_velocity, gpu::StorageBuf *tile_indirection_buffer)
#define MOTION_BLUR_TILE_SIZE
const ccl_global KernelWorkTile * tile
void node_register_type(bNodeType &ntype)
void compute_dispatch_threads_at_least(gpu::Shader *shader, int2 threads_range, int2 local_size=int2(16))
void parallel_for(const int2 range, const Function &function)
T length_squared(const VecBase< T, Size > &a)
T clamp(const T &a, const T &min, const T &max)
VecBase< T, Size > divide_ceil(const VecBase< T, Size > &a, const VecBase< T, Size > &b)
T length(const VecBase< T, Size > &a)
T dot(const QuaternionBase< T > &a, const QuaternionBase< T > &b)
T min(const T &a, const T &b)
T max(const T &a, const T &b)
static void gather_blur(const Result &input_image, const Result &input_depth, const Result &input_velocity, const int2 &size, const float2 &screen_uv, const float2 ¢er_motion, const float center_depth, const float2 &max_motion, const float ofs, const bool next, const int samples_count, const float shutter_speed, Accumulator &accum)
static uint32_t velocity_atomic_max_value(const float2 &value, const int2 &texel)
static void motion_blur_cpu(const Result &input_image, const Result &input_depth, const Result &input_velocity, const Result &max_velocity, Result &output, const int samples_count, const float shutter_speed)
static NodeOperation * get_compositor_operation(Context &context, DNode node)
static MotionRect compute_motion_rect(const int2 &tile, const float2 &motion, const int2 &size)
static bool is_inside_motion_line(const int2 &tile, const MotionLine &motion_line)
static float2 spread_compare(const float center_motion_length, const float sample_motion_length, const float offset_length)
static float interleaved_gradient_noise(const int2 &p)
static float2 max_velocity_approximate(const float2 &a, const float2 &b, const int2 &a_texel, const int2 &b_texel)
static float dir_compare(const float2 &offset, const float2 &sample_motion, const float &sample_motion_length)
static void gather_sample(const Result &input_image, const Result &input_depth, const Result &input_velocity, const int2 &size, const float2 &screen_uv, const float center_depth, const float center_motion_len, const float2 &offset, const float offset_len, const bool next, const float shutter_speed, Accumulator &accum)
static Result dilate_max_velocity_cpu(Context &context, const Result &max_tile_velocity, const float shutter_speed)
static Result compute_max_tile_velocity_cpu(Context &context, const Result &velocity_image)
static float2 depth_compare(const float center_depth, const float sample_depth)
static void cmp_node_vec_blur_declare(NodeDeclarationBuilder &b)
static MotionLine compute_motion_line(const int2 &tile, const float2 &motion)
static float2 sample_weights(const float center_depth, const float sample_depth, const float center_motion_length, const float sample_motion_length, const float offset_length)
static float2 max_velocity(const float2 &a, const float2 &b)
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
VecBase< float, 3 > float3
void cmp_node_type_base(blender::bke::bNodeType *ntype, std::string idname, const std::optional< int16_t > legacy_type)
static void register_node_type_cmp_vecblur()
VecBase< T, 2 > zw() const
VecBase< T, 2 > xy() const
std::string ui_description
NodeGetCompositorOperationFunction get_compositor_operation
const char * enum_name_legacy
NodeDeclareFunction declare
static pxr::UsdShadeInput get_input(const pxr::UsdShadeShader &usd_shader, const pxr::TfToken &input_name)