Blender V4.3
deriche_gaussian_blur.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include "BLI_assert.h"
6#include "BLI_math_base.hh"
7#include "BLI_math_vector.hh"
8
9#include "GPU_shader.hh"
10
11#include "COM_context.hh"
12#include "COM_result.hh"
13#include "COM_utilities.hh"
14
17
19
20/* Sum the causal and non causal outputs of the filter and write the sum to the output. This is
21 * because the Deriche filter is a parallel interconnection filter, meaning its output is the sum
22 * of its causal and non causal filters. The output is expected not to be allocated as it will be
23 * allocated internally.
24 *
25 * The output is allocated and written transposed, that is, with a height equivalent to the width
26 * of the input and vice versa. This is done as a performance optimization. The blur pass will
27 * blur the image horizontally and write it to the intermediate output transposed. Then the
28 * vertical pass will execute the same horizontal blur shader, but since its input is transposed,
29 * it will effectively do a vertical blur and write to the output transposed, effectively undoing
30 * the transposition in the horizontal pass. This is done to improve spatial cache locality in the
31 * shader and to avoid having two separate shaders for each blur pass. */
33 Result &causal_input,
34 Result &non_causal_input,
35 Result &output)
36{
37 GPUShader *shader = context.get_shader("compositor_deriche_gaussian_blur_sum");
38 GPU_shader_bind(shader);
39
40 causal_input.bind_as_texture(shader, "causal_input_tx");
41 non_causal_input.bind_as_texture(shader, "non_causal_input_tx");
42
43 const Domain domain = causal_input.domain();
44 const int2 transposed_domain = int2(domain.size.y, domain.size.x);
45 output.allocate_texture(transposed_domain);
46 output.bind_as_image(shader, "output_img");
47
48 compute_dispatch_threads_at_least(shader, domain.size);
49
51 causal_input.unbind_as_texture();
52 non_causal_input.unbind_as_texture();
53 output.unbind_as_image();
54}
55
56static void blur_pass(Context &context, Result &input, Result &output, float sigma)
57{
58 GPUShader *shader = context.get_shader("compositor_deriche_gaussian_blur");
59 GPU_shader_bind(shader);
60
61 const DericheGaussianCoefficients &coefficients =
62 context.cache_manager().deriche_gaussian_coefficients.get(context, sigma);
63
65 "causal_feedforward_coefficients",
68 "non_causal_feedforward_coefficients",
71 shader, "feedback_coefficients", float4(coefficients.feedback_coefficients()));
73 shader, "causal_boundary_coefficient", float(coefficients.causal_boundary_coefficient()));
75 "non_causal_boundary_coefficient",
76 float(coefficients.non_causal_boundary_coefficient()));
77
78 input.bind_as_texture(shader, "input_tx");
79
80 const Domain domain = input.domain();
81
82 Result causal_result = context.create_result(ResultType::Color);
83 causal_result.allocate_texture(domain);
84 causal_result.bind_as_image(shader, "causal_output_img");
85
86 Result non_causal_result = context.create_result(ResultType::Color);
87 non_causal_result.allocate_texture(domain);
88 non_causal_result.bind_as_image(shader, "non_causal_output_img");
89
90 /* The second dispatch dimension is two dispatches, one for the causal filter and one for the non
91 * causal one. */
92 compute_dispatch_threads_at_least(shader, int2(domain.size.y, 2), int2(128, 2));
93
95 input.unbind_as_texture();
96 causal_result.unbind_as_image();
97 non_causal_result.unbind_as_image();
98
99 sum_causal_and_non_causal_results(context, causal_result, non_causal_result, output);
100 causal_result.release();
101 non_causal_result.release();
102}
103
104void deriche_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma)
105{
106 BLI_assert_msg(math::reduce_max(sigma) >= 3.0f,
107 "Deriche filter is slower and less accurate than direct convolution for sigma "
108 "values less 3. Use direct convolution blur instead.");
109 BLI_assert_msg(math::reduce_max(sigma) < 32.0f,
110 "Deriche filter is not accurate nor numerically stable for sigma values larger "
111 "than 32. Use Van Vliet filter instead.");
112
113 Result horizontal_pass_result = context.create_result(ResultType::Color);
114 blur_pass(context, input, horizontal_pass_result, sigma.x);
115 blur_pass(context, horizontal_pass_result, output, sigma.y);
116 horizontal_pass_result.release();
117}
118
119} // namespace blender::realtime_compositor
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:57
void GPU_shader_uniform_1f(GPUShader *sh, const char *name, float value)
void GPU_shader_bind(GPUShader *shader)
void GPU_shader_uniform_4fv(GPUShader *sh, const char *name, const float data[4])
void GPU_shader_unbind()
struct GPUShader GPUShader
void bind_as_image(GPUShader *shader, const char *image_name, bool read=false) const
Definition result.cc:264
const Domain & domain() const
Definition result.cc:712
void allocate_texture(Domain domain, bool from_pool=true)
Definition result.cc:204
void bind_as_texture(GPUShader *shader, const char *texture_name) const
Definition result.cc:253
T reduce_max(const VecBase< T, Size > &a)
static void blur_pass(Context &context, Result &input, Result &output, float sigma)
static void sum_causal_and_non_causal_results(Context &context, Result &causal_input, Result &non_causal_input, Result &output)
void deriche_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma)
void compute_dispatch_threads_at_least(GPUShader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:131
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2