24 const Result &causal_input,
25 const Result &non_causal_input,
28 gpu::Shader *shader = context.get_shader(
"compositor_deriche_gaussian_blur_sum");
36 output.allocate_texture(transposed_domain);
37 output.bind_as_image(shader,
"output_img");
49 const Result &non_causal_input,
54 output.allocate_texture(transposed_domain);
59 float4 filter_output = causal_input.load_pixel<float4>(texel) +
60 non_causal_input.load_pixel<float4>(texel);
64 output.store_pixel(int2(texel.y, texel.x), filter_output);
81 const Result &causal_input,
82 const Result &non_causal_input,
85 if (context.use_gpu()) {
99 gpu::Shader *shader = context.get_shader(
"compositor_deriche_gaussian_blur");
103 context.cache_manager().deriche_gaussian_coefficients.get(context, sigma);
106 "causal_feedforward_coefficients",
109 "non_causal_feedforward_coefficients",
116 "non_causal_boundary_coefficient",
119 input.bind_as_texture(shader,
"input_tx");
125 non_causal_result.
bind_as_image(shader,
"non_causal_output_img");
132 input.unbind_as_texture();
140 Result &non_causal_output,
144 context.cache_manager().deriche_gaussian_coefficients.get(context, sigma);
148 const float4 non_causal_feedforward_coefficients =
float4(
152 const float non_causal_boundary_coefficient =
float(
173 int y = invocation.y;
174 int width =
input.domain().size.x;
178 bool is_causal = invocation.x == 0;
179 float4 feedforward_coefficients = is_causal ? causal_feedforward_coefficients :
180 non_causal_feedforward_coefficients;
181 float boundary_coefficient = is_causal ? causal_boundary_coefficient :
182 non_causal_boundary_coefficient;
187 int2 boundary_texel = is_causal ?
int2(0,
y) :
int2(width - 1,
y);
190 input_boundary, input_boundary, input_boundary, input_boundary, input_boundary};
197 float4 output_boundary = input_boundary * boundary_coefficient;
199 output_boundary, output_boundary, output_boundary, output_boundary, output_boundary};
201 for (
int x = 0;
x < width;
x++) {
211 int first_input_index = is_causal ? 0 : 1;
213 outputs[0] += feedforward_coefficients[
i] *
inputs[first_input_index +
i];
248 if (context.use_gpu()) {
266 "Deriche filter is slower and less accurate than direct convolution for sigma "
267 "values less 3. Use direct convolution blur instead.");
269 "Deriche filter is not accurate nor numerically stable for sigma values larger "
270 "than 32. Use Van Vliet filter instead.");
275 horizontal_pass_result.
release();
#define BLI_assert_msg(a, msg)
void GPU_shader_uniform_1f(blender::gpu::Shader *sh, const char *name, float value)
void GPU_shader_uniform_4fv(blender::gpu::Shader *sh, const char *name, const float data[4])
void GPU_shader_bind(blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
double non_causal_boundary_coefficient() const
const double4 & causal_feedforward_coefficients() const
double causal_boundary_coefficient() const
const double4 & feedback_coefficients() const
const double4 & non_causal_feedforward_coefficients() const
Domain transposed() const
void store_pixel(const int2 &texel, const T &pixel_value)
void allocate_texture(const Domain domain, const bool from_pool=true, const std::optional< ResultStorageType > storage_type=std::nullopt)
void unbind_as_texture() const
void bind_as_texture(gpu::Shader *shader, const char *texture_name) const
const Domain & domain() const
void unbind_as_image() const
void bind_as_image(gpu::Shader *shader, const char *image_name, bool read=false) const
static void sum_causal_and_non_causal_results_cpu(const Result &causal_input, const Result &non_causal_input, Result &output)
void compute_dispatch_threads_at_least(gpu::Shader *shader, int2 threads_range, int2 local_size=int2(16))
static void sum_causal_and_non_causal_results(Context &context, const Result &causal_input, const Result &non_causal_input, Result &output)
static void blur_pass_cpu(Context &context, const Result &input, Result &causal_output, Result &non_causal_output, const float sigma)
void deriche_gaussian_blur(Context &context, const Result &input, Result &output, const float2 &sigma)
void parallel_for(const int2 range, const Function &function)
static void blur_pass_gpu(Context &context, const Result &input, Result &causal_result, Result &non_causal_result, const float sigma)
static void sum_causal_and_non_causal_results_gpu(Context &context, const Result &causal_input, const Result &non_causal_input, Result &output)
static void blur_pass(Context &context, const Result &input, Result &output, const float sigma)
T reduce_max(const VecBase< T, Size > &a)
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
static blender::bke::bNodeSocketTemplate outputs[]
static blender::bke::bNodeSocketTemplate inputs[]