Blender V4.5
symmetric_separable_blur.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include "BLI_assert.h"
6#include "BLI_math_base.hh"
7#include "BLI_math_vector.hh"
9
10#include "GPU_shader.hh"
11
12#include "COM_context.hh"
13#include "COM_result.hh"
14#include "COM_utilities.hh"
15
17
19
20namespace blender::compositor {
21
22template<typename T, bool ExtendBounds>
23static void blur_pass(const Result &input, const Result &weights, Result &output)
24{
25 /* Loads the input color of the pixel at the given texel. If bounds are extended, then the input
26 * is treated as padded by a blur size amount of pixels of zero color, and the given texel is
27 * assumed to be in the space of the image after padding. So we offset the texel by the blur
28 * radius amount and fallback to a zero color if it is out of bounds. For instance, if the input
29 * is padded by 5 pixels to the left of the image, the first 5 pixels should be out of bounds and
30 * thus zero, hence the introduced offset. */
31 auto load_input = [&](const int2 texel) {
32 T color;
33 if constexpr (ExtendBounds) {
34 /* Notice that we subtract 1 because the weights result have an extra center weight, see the
35 * SymmetricBlurWeights class for more information. */
36 int2 blur_radius = weights.domain().size - 1;
37 color = input.load_pixel_zero<T>(texel - blur_radius);
38 }
39 else {
40 color = input.load_pixel_extended<T>(texel);
41 }
42
43 return color;
44 };
45
46 /* Notice that the size is transposed, see the note on the horizontal pass method for more
47 * information on the reasoning behind this. */
48 const int2 size = int2(output.domain().size.y, output.domain().size.x);
49 parallel_for(size, [&](const int2 texel) {
50 T accumulated_color = T(0);
51
52 /* First, compute the contribution of the center pixel. */
53 T center_color = load_input(texel);
54 accumulated_color += center_color * weights.load_pixel<float>(int2(0));
55
56 /* Then, compute the contributions of the pixel to the right and left, noting that the
57 * weights texture only stores the weights for the positive half, but since the filter is
58 * symmetric, the same weight is used for the negative half and we add both of their
59 * contributions. */
60 for (int i = 1; i < weights.domain().size.x; i++) {
61 float weight = weights.load_pixel<float>(int2(i, 0));
62 accumulated_color += load_input(texel + int2(i, 0)) * weight;
63 accumulated_color += load_input(texel + int2(-i, 0)) * weight;
64 }
65
66 /* Write the color using the transposed texel. See the horizontal_pass method for more
67 * information on the rational behind this. */
68 output.store_pixel(int2(texel.y, texel.x), accumulated_color);
69 });
70}
71
72static const char *get_blur_shader(const ResultType type)
73{
74 switch (type) {
76 return "compositor_symmetric_separable_blur_float";
78 return "compositor_symmetric_separable_blur_float4";
79 default:
80 break;
81 }
82
84 return nullptr;
85}
86
88 const Result &input,
89 const float radius,
90 const int filter_type,
91 const bool extend_bounds)
92{
93 GPUShader *shader = context.get_shader(get_blur_shader(input.type()));
94 GPU_shader_bind(shader);
95
96 GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);
97
98 input.bind_as_texture(shader, "input_tx");
99
100 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
101 context, filter_type, radius);
102 weights.bind_as_texture(shader, "weights_tx");
103
104 Domain domain = input.domain();
105 if (extend_bounds) {
106 domain.size.x += int(math::ceil(radius)) * 2;
107 }
108
109 /* We allocate an output image of a transposed size, that is, with a height equivalent to the
110 * width of the input and vice versa. This is done as a performance optimization. The shader
111 * will blur the image horizontally and write it to the intermediate output transposed. Then
112 * the vertical pass will execute the same horizontal blur shader, but since its input is
113 * transposed, it will effectively do a vertical blur and write to the output transposed,
114 * effectively undoing the transposition in the horizontal pass. This is done to improve
115 * spatial cache locality in the shader and to avoid having two separate shaders for each blur
116 * pass. */
117 const int2 transposed_domain = int2(domain.size.y, domain.size.x);
118
119 Result output = context.create_result(input.type());
120 output.allocate_texture(transposed_domain);
121 output.bind_as_image(shader, "output_img");
122
124
126 input.unbind_as_texture();
127 weights.unbind_as_texture();
128 output.unbind_as_image();
129
130 return output;
131}
132
134 const Result &input,
135 const float radius,
136 const int filter_type,
137 const bool extend_bounds)
138{
139 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
140 context, filter_type, radius);
141
142 Domain domain = input.domain();
143 if (extend_bounds) {
144 domain.size.x += int(math::ceil(radius)) * 2;
145 }
146
147 /* We allocate an output image of a transposed size, that is, with a height equivalent to the
148 * width of the input and vice versa. This is done as a performance optimization. The shader
149 * will blur the image horizontally and write it to the intermediate output transposed. Then
150 * the vertical pass will execute the same horizontal blur shader, but since its input is
151 * transposed, it will effectively do a vertical blur and write to the output transposed,
152 * effectively undoing the transposition in the horizontal pass. This is done to improve
153 * spatial cache locality in the shader and to avoid having two separate shaders for each blur
154 * pass. */
155 const int2 transposed_domain = int2(domain.size.y, domain.size.x);
156
157 Result output = context.create_result(input.type());
158 output.allocate_texture(transposed_domain);
159
160 switch (input.type()) {
162 if (extend_bounds) {
164 }
165 else {
167 }
168 break;
170 if (extend_bounds) {
172 }
173 else {
175 }
176 break;
177 default:
179 break;
180 }
181
182 return output;
183}
184
186 const Result &input,
187 const float radius,
188 const int filter_type,
189 const bool extend_bounds)
190{
191 if (context.use_gpu()) {
192 return horizontal_pass_gpu(context, input, radius, filter_type, extend_bounds);
193 }
194 return horizontal_pass_cpu(context, input, radius, filter_type, extend_bounds);
195}
196
197static void vertical_pass_gpu(Context &context,
198 const Result &original_input,
199 const Result &horizontal_pass_result,
200 Result &output,
201 const float2 &radius,
202 const int filter_type,
203 const bool extend_bounds)
204{
205 GPUShader *shader = context.get_shader(get_blur_shader(original_input.type()));
206 GPU_shader_bind(shader);
207
208 GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);
209
210 horizontal_pass_result.bind_as_texture(shader, "input_tx");
211
212 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
213 context, filter_type, radius.y);
214 weights.bind_as_texture(shader, "weights_tx");
215
216 Domain domain = original_input.domain();
217 if (extend_bounds) {
218 /* Add a radius amount of pixels in both sides of the image, hence the multiply by 2. */
219 domain.size += int2(math::ceil(radius)) * 2;
220 }
221
222 output.allocate_texture(domain);
223 output.bind_as_image(shader, "output_img");
224
225 /* Notice that the domain is transposed, see the note on the horizontal pass method for more
226 * information on the reasoning behind this. */
227 compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
228
230 horizontal_pass_result.unbind_as_texture();
231 output.unbind_as_image();
232 weights.unbind_as_texture();
233}
234
235static void vertical_pass_cpu(Context &context,
236 const Result &original_input,
237 const Result &horizontal_pass_result,
238 Result &output,
239 const float2 &radius,
240 const int filter_type,
241 const bool extend_bounds)
242{
243 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
244 context, filter_type, radius.y);
245
246 Domain domain = original_input.domain();
247 if (extend_bounds) {
248 /* Add a radius amount of pixels in both sides of the image, hence the multiply by 2. */
249 domain.size += int2(math::ceil(radius)) * 2;
250 }
251 output.allocate_texture(domain);
252
253 switch (original_input.type()) {
255 if (extend_bounds) {
256 blur_pass<float, true>(horizontal_pass_result, weights, output);
257 }
258 else {
259 blur_pass<float, false>(horizontal_pass_result, weights, output);
260 }
261 break;
263 if (extend_bounds) {
264 blur_pass<float4, true>(horizontal_pass_result, weights, output);
265 }
266 else {
267 blur_pass<float4, false>(horizontal_pass_result, weights, output);
268 }
269 break;
270 default:
272 break;
273 }
274}
275
276static void vertical_pass(Context &context,
277 const Result &original_input,
278 const Result &horizontal_pass_result,
279 Result &output,
280 const float2 &radius,
281 const int filter_type,
282 const bool extend_bounds)
283{
284 if (context.use_gpu()) {
285 vertical_pass_gpu(context,
286 original_input,
287 horizontal_pass_result,
288 output,
289 radius,
290 filter_type,
291 extend_bounds);
292 }
293 else {
294 vertical_pass_cpu(context,
295 original_input,
296 horizontal_pass_result,
297 output,
298 radius,
299 filter_type,
300 extend_bounds);
301 }
302}
303
305 const Result &input,
306 Result &output,
307 const float2 &radius,
308 const int filter_type,
309 const bool extend_bounds)
310{
311 Result horizontal_pass_result = horizontal_pass(
312 context, input, radius.x, filter_type, extend_bounds);
313
315 context, input, horizontal_pass_result, output, radius, filter_type, extend_bounds);
316
317 horizontal_pass_result.release();
318}
319
320} // namespace blender::compositor
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
void GPU_shader_bind(GPUShader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_uniform_1b(GPUShader *sh, const char *name, bool value)
void GPU_shader_unbind()
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void unbind_as_texture() const
Definition result.cc:389
static ResultType type(eGPUTextureFormat format)
Definition result.cc:194
void bind_as_texture(GPUShader *shader, const char *texture_name) const
Definition result.cc:365
const Domain & domain() const
T load_pixel(const int2 &texel) const
#define input
#define output
#define T
static void vertical_pass_cpu(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
void compute_dispatch_threads_at_least(GPUShader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:170
void symmetric_separable_blur(Context &context, const Result &input, Result &output, const float2 &radius, const int filter_type=R_FILTER_GAUSS, const bool extend_bounds=false)
static void vertical_pass(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
static Result horizontal_pass_cpu(Context &context, const Result &input, const int distance, const int falloff_type)
static const char * get_blur_shader(const ResultType type)
static Result horizontal_pass_gpu(Context &context, const Result &input, const int distance, const int falloff_type)
static Result horizontal_pass(Context &context, const Result &input, const int distance, const int falloff_type)
void parallel_for(const int2 range, const Function &function)
static void vertical_pass_gpu(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
static void blur_pass(Context &context, const Result &input, Result &output, const float sigma)
T ceil(const T &a)
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
i
Definition text_draw.cc:230