Blender V5.0
symmetric_separable_blur.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include "BLI_assert.h"
7
8#include "GPU_shader.hh"
9
10#include "COM_context.hh"
11#include "COM_result.hh"
12#include "COM_utilities.hh"
13
15
17
18namespace blender::compositor {
19
20template<typename T>
21static void blur_pass(const Result &input, const Result &weights, Result &output)
22{
23 /* Notice that the size is transposed, see the note on the horizontal pass method for more
24 * information on the reasoning behind this. */
25 const int2 size = int2(output.domain().size.y, output.domain().size.x);
26 parallel_for(size, [&](const int2 texel) {
27 T accumulated_color = T(0);
28
29 /* First, compute the contribution of the center pixel. */
30 T center_color = input.load_pixel_extended<T>(texel);
31 accumulated_color += center_color * weights.load_pixel<float>(int2(0));
32
33 /* Then, compute the contributions of the pixel to the right and left, noting that the
34 * weights texture only stores the weights for the positive half, but since the filter is
35 * symmetric, the same weight is used for the negative half and we add both of their
36 * contributions. */
37 for (int i = 1; i < weights.domain().size.x; i++) {
38 float weight = weights.load_pixel<float>(int2(i, 0));
39 accumulated_color += input.load_pixel_extended<T>(texel + int2(i, 0)) * weight;
40 accumulated_color += input.load_pixel_extended<T>(texel + int2(-i, 0)) * weight;
41 }
42
43 /* Write the color using the transposed texel. See the horizontal_pass method for more
44 * information on the rational behind this. */
45 output.store_pixel(int2(texel.y, texel.x), accumulated_color);
46 });
47}
48
49static const char *get_blur_shader(const ResultType type)
50{
51 switch (type) {
53 return "compositor_symmetric_separable_blur_float";
55 return "compositor_symmetric_separable_blur_float4";
56 default:
57 break;
58 }
59
61 return nullptr;
62}
63
65 const Result &input,
66 const float radius,
67 const int filter_type)
68{
69 gpu::Shader *shader = context.get_shader(get_blur_shader(input.type()));
70 GPU_shader_bind(shader);
71
72 input.bind_as_texture(shader, "input_tx");
73
74 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
75 context, filter_type, radius);
76 weights.bind_as_texture(shader, "weights_tx");
77
78 /* We allocate an output image of a transposed size, that is, with a height equivalent to the
79 * width of the input and vice versa. This is done as a performance optimization. The shader
80 * will blur the image horizontally and write it to the intermediate output transposed. Then
81 * the vertical pass will execute the same horizontal blur shader, but since its input is
82 * transposed, it will effectively do a vertical blur and write to the output transposed,
83 * effectively undoing the transposition in the horizontal pass. This is done to improve
84 * spatial cache locality in the shader and to avoid having two separate shaders for each blur
85 * pass. */
86 Domain domain = input.domain();
87 const int2 transposed_domain = int2(domain.size.y, domain.size.x);
88
89 Result output = context.create_result(input.type());
90 output.allocate_texture(transposed_domain);
91 output.bind_as_image(shader, "output_img");
92
94
96 input.unbind_as_texture();
97 weights.unbind_as_texture();
98 output.unbind_as_image();
99
100 return output;
101}
102
104 const Result &input,
105 const float radius,
106 const int filter_type)
107{
108 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
109 context, filter_type, radius);
110
111 /* We allocate an output image of a transposed size, that is, with a height equivalent to the
112 * width of the input and vice versa. This is done as a performance optimization. The shader
113 * will blur the image horizontally and write it to the intermediate output transposed. Then
114 * the vertical pass will execute the same horizontal blur shader, but since its input is
115 * transposed, it will effectively do a vertical blur and write to the output transposed,
116 * effectively undoing the transposition in the horizontal pass. This is done to improve
117 * spatial cache locality in the shader and to avoid having two separate shaders for each blur
118 * pass. */
119 const Domain domain = input.domain();
120 const int2 transposed_domain = int2(domain.size.y, domain.size.x);
121
122 Result output = context.create_result(input.type());
123 output.allocate_texture(transposed_domain);
124
125 switch (input.type()) {
127 blur_pass<float>(input, weights, output);
128 break;
130 blur_pass<float4>(input, weights, output);
131 break;
132 default:
134 break;
135 }
136
137 return output;
138}
139
141 const Result &input,
142 const float radius,
143 const int filter_type)
144{
145 if (context.use_gpu()) {
146 return horizontal_pass_gpu(context, input, radius, filter_type);
147 }
148 return horizontal_pass_cpu(context, input, radius, filter_type);
149}
150
151static void vertical_pass_gpu(Context &context,
152 const Result &original_input,
153 const Result &horizontal_pass_result,
154 Result &output,
155 const float2 &radius,
156 const int filter_type)
157{
158 gpu::Shader *shader = context.get_shader(get_blur_shader(original_input.type()));
159 GPU_shader_bind(shader);
160
161 horizontal_pass_result.bind_as_texture(shader, "input_tx");
162
163 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
164 context, filter_type, radius.y);
165 weights.bind_as_texture(shader, "weights_tx");
166
167 const Domain domain = original_input.domain();
168 output.allocate_texture(domain);
169 output.bind_as_image(shader, "output_img");
170
171 /* Notice that the domain is transposed, see the note on the horizontal pass method for more
172 * information on the reasoning behind this. */
173 compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
174
176 horizontal_pass_result.unbind_as_texture();
177 output.unbind_as_image();
178 weights.unbind_as_texture();
179}
180
181static void vertical_pass_cpu(Context &context,
182 const Result &original_input,
183 const Result &horizontal_pass_result,
184 Result &output,
185 const float2 &radius,
186 const int filter_type)
187{
188 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
189 context, filter_type, radius.y);
190
191 output.allocate_texture(original_input.domain());
192
193 switch (original_input.type()) {
195 blur_pass<float>(horizontal_pass_result, weights, output);
196 break;
198 blur_pass<float4>(horizontal_pass_result, weights, output);
199 break;
200 default:
202 break;
203 }
204}
205
206static void vertical_pass(Context &context,
207 const Result &original_input,
208 const Result &horizontal_pass_result,
209 Result &output,
210 const float2 &radius,
211 const int filter_type)
212{
213 if (context.use_gpu()) {
215 context, original_input, horizontal_pass_result, output, radius, filter_type);
216 }
217 else {
219 context, original_input, horizontal_pass_result, output, radius, filter_type);
220 }
221}
222
224 const Result &input,
225 Result &output,
226 const float2 &radius,
227 const int filter_type)
228{
229 Result horizontal_pass_result = horizontal_pass(context, input, radius.x, filter_type);
230 vertical_pass(context, input, horizontal_pass_result, output, radius, filter_type);
231 horizontal_pass_result.release();
232}
233
234} // namespace blender::compositor
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
void GPU_shader_bind(blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_unbind()
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void unbind_as_texture() const
Definition result.cc:511
static ResultType type(blender::gpu::TextureFormat format)
Definition result.cc:261
void bind_as_texture(gpu::Shader *shader, const char *texture_name) const
Definition result.cc:487
const Domain & domain() const
T load_pixel(const int2 &texel) const
#define input
#define output
#define T
void symmetric_separable_blur(Context &context, const Result &input, Result &output, const float2 &radius, const int filter_type=R_FILTER_GAUSS)
static void vertical_pass_cpu(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
void compute_dispatch_threads_at_least(gpu::Shader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:196
static void vertical_pass(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
static Result horizontal_pass_cpu(Context &context, const Result &input, const int distance, const int falloff_type)
static const char * get_blur_shader(const ResultType type)
static Result horizontal_pass_gpu(Context &context, const Result &input, const int distance, const int falloff_type)
static Result horizontal_pass(Context &context, const Result &input, const int distance, const int falloff_type)
void parallel_for(const int2 range, const Function &function)
static void vertical_pass_gpu(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
static void blur_pass(Context &context, const Result &input, Result &output, const float sigma)
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
i
Definition text_draw.cc:230