Blender V5.0
symmetric_separable_blur_variable_size.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include "BLI_assert.h"
7
8#include "GPU_shader.hh"
9#include "GPU_texture.hh"
10
11#include "COM_context.hh"
12#include "COM_result.hh"
13#include "COM_utilities.hh"
14
16
18
19namespace blender::compositor {
20
21static void blur_pass(const Result &input,
22 const Result &radius_input,
23 const Result &weights,
25 const bool is_vertical_pass)
26{
27 /* Notice that the size is transposed, see the note on the horizontal pass method for more
28 * information on the reasoning behind this. */
29 const int2 size = int2(output.domain().size.y, output.domain().size.x);
30 parallel_for(size, [&](const int2 texel) {
31 float accumulated_weight = 0.0f;
32 float4 accumulated_color = float4(0.0f);
33
34 /* First, compute the contribution of the center pixel. */
35 float4 center_color = input.load_pixel<float4>(texel);
36 float center_weight = weights.load_pixel<float>(int2(0));
37 accumulated_color += center_color * center_weight;
38 accumulated_weight += center_weight;
39
40 /* The dispatch domain is transposed in the vertical pass, so make sure to reverse transpose
41 * the texel coordinates when loading the radius. See the horizontal_pass function for more
42 * information. */
43 int radius = int(
44 radius_input.load_pixel<float>(is_vertical_pass ? int2(texel.y, texel.x) : texel));
45
46 /* Then, compute the contributions of the pixel to the right and left, noting that the
47 * weights texture only stores the weights for the positive half, but since the filter is
48 * symmetric, the same weight is used for the negative half and we add both of their
49 * contributions. */
50 for (int i = 1; i <= radius; i++) {
51 /* Add 0.5 to evaluate at the center of the pixels. */
52 float weight =
53 weights.sample_bilinear_extended(float2((float(i) + 0.5f) / float(radius + 1), 0.0f)).x;
54 accumulated_color += input.load_pixel_extended<float4>(texel + int2(i, 0)) * weight;
55 accumulated_color += input.load_pixel_extended<float4>(texel + int2(-i, 0)) * weight;
56 accumulated_weight += weight * 2.0f;
57 }
58
59 /* Write the color using the transposed texel. See the horizontal_pass_cpu function for more
60 * information on the rational behind this. */
61 output.store_pixel(int2(texel.y, texel.x), accumulated_color / accumulated_weight);
62 });
63}
64
66 const Result &input,
67 const Result &radius,
68 const int weights_resolution,
69 const int filter_type)
70{
71 gpu::Shader *shader = context.get_shader("compositor_symmetric_separable_blur_variable_size");
72 GPU_shader_bind(shader);
73
74 GPU_shader_uniform_1b(shader, "is_vertical_pass", false);
75
76 input.bind_as_texture(shader, "input_tx");
77
78 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
79 context, filter_type, weights_resolution);
80 GPU_texture_filter_mode(weights, true);
82 weights.bind_as_texture(shader, "weights_tx");
83
84 radius.bind_as_texture(shader, "radius_tx");
85
86 /* We allocate an output image of a transposed size, that is, with a height equivalent to the
87 * width of the input and vice versa. This is done as a performance optimization. The shader
88 * will blur the image horizontally and write it to the intermediate output transposed. Then
89 * the vertical pass will execute the same horizontal blur shader, but since its input is
90 * transposed, it will effectively do a vertical blur and write to the output transposed,
91 * effectively undoing the transposition in the horizontal pass. This is done to improve
92 * spatial cache locality in the shader and to avoid having two separate shaders for each blur
93 * pass. */
94 Domain domain = input.domain();
95 const int2 transposed_domain = int2(domain.size.y, domain.size.x);
96
97 Result output = context.create_result(input.type());
98 output.allocate_texture(transposed_domain);
99 output.bind_as_image(shader, "output_img");
100
102
104 input.unbind_as_texture();
105 weights.unbind_as_texture();
106 radius.unbind_as_texture();
107 output.unbind_as_image();
108
109 return output;
110}
111
113 const Result &input,
114 const Result &radius,
115 const int weights_resolution,
116 const int filter_type)
117{
118 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
119 context, filter_type, weights_resolution);
120
121 /* We allocate an output image of a transposed size, that is, with a height equivalent to the
122 * width of the input and vice versa. This is done as a performance optimization. The shader
123 * will blur the image horizontally and write it to the intermediate output transposed. Then
124 * the vertical pass will execute the same horizontal blur shader, but since its input is
125 * transposed, it will effectively do a vertical blur and write to the output transposed,
126 * effectively undoing the transposition in the horizontal pass. This is done to improve
127 * spatial cache locality in the shader and to avoid having two separate shaders for each blur
128 * pass. */
129 Domain domain = input.domain();
130 const int2 transposed_domain = int2(domain.size.y, domain.size.x);
131
132 Result output = context.create_result(input.type());
133 output.allocate_texture(transposed_domain);
134
135 blur_pass(input, radius, weights, output, false);
136
137 return output;
138}
139
141 const Result &input,
142 const Result &radius,
143 const int weights_resolution,
144 const int filter_type)
145{
146 if (context.use_gpu()) {
147 return horizontal_pass_gpu(context, input, radius, weights_resolution, filter_type);
148 }
149 return horizontal_pass_cpu(context, input, radius, weights_resolution, filter_type);
150}
151
152static void vertical_pass_gpu(Context &context,
153 const Result &original_input,
154 const Result &horizontal_pass_result,
155 const Result &radius,
156 Result &output,
157 const int weights_resolution,
158 const int filter_type)
159{
160 gpu::Shader *shader = context.get_shader("compositor_symmetric_separable_blur_variable_size");
161 GPU_shader_bind(shader);
162
163 GPU_shader_uniform_1b(shader, "is_vertical_pass", true);
164
165 horizontal_pass_result.bind_as_texture(shader, "input_tx");
166
167 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
168 context, filter_type, weights_resolution);
169 GPU_texture_filter_mode(weights, true);
171 weights.bind_as_texture(shader, "weights_tx");
172
173 radius.bind_as_texture(shader, "radius_tx");
174
175 Domain domain = original_input.domain();
176 output.allocate_texture(domain);
177 output.bind_as_image(shader, "output_img");
178
179 /* Notice that the domain is transposed, see the note on the horizontal pass method for more
180 * information on the reasoning behind this. */
181 compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
182
184 horizontal_pass_result.unbind_as_texture();
185 output.unbind_as_image();
186 weights.unbind_as_texture();
187 radius.unbind_as_texture();
188}
189
190static void vertical_pass_cpu(Context &context,
191 const Result &original_input,
192 const Result &horizontal_pass_result,
193 const Result &radius,
194 Result &output,
195 const int weights_resolution,
196 const int filter_type)
197{
198 const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
199 context, filter_type, weights_resolution);
200
201 Domain domain = original_input.domain();
202 output.allocate_texture(domain);
203
204 blur_pass(horizontal_pass_result, radius, weights, output, true);
205}
206
207static void vertical_pass(Context &context,
208 const Result &original_input,
209 const Result &horizontal_pass_result,
210 const Result &radius,
211 Result &output,
212 const int weights_resolution,
213 const int filter_type)
214{
215 if (context.use_gpu()) {
216 vertical_pass_gpu(context,
217 original_input,
218 horizontal_pass_result,
219 radius,
220 output,
221 weights_resolution,
222 filter_type);
223 }
224 else {
225 vertical_pass_cpu(context,
226 original_input,
227 horizontal_pass_result,
228 radius,
229 output,
230 weights_resolution,
231 filter_type);
232 }
233}
234
236 const Result &input,
237 const Result &radius,
238 Result &output,
239 const int weights_resolution,
240 const int filter_type)
241{
243
244 Result horizontal_pass_result = horizontal_pass(
245 context, input, radius, weights_resolution, filter_type);
247 context, input, horizontal_pass_result, radius, output, weights_resolution, filter_type);
248 horizontal_pass_result.release();
249}
250
251} // namespace blender::compositor
#define BLI_assert(a)
Definition BLI_assert.h:46
void GPU_shader_uniform_1b(blender::gpu::Shader *sh, const char *name, bool value)
void GPU_shader_bind(blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_unbind()
void GPU_texture_extend_mode(blender::gpu::Texture *texture, GPUSamplerExtendMode extend_mode)
@ GPU_SAMPLER_EXTEND_MODE_EXTEND
void GPU_texture_filter_mode(blender::gpu::Texture *texture, bool use_filter)
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void unbind_as_texture() const
Definition result.cc:511
void bind_as_texture(gpu::Shader *shader, const char *texture_name) const
Definition result.cc:487
const Domain & domain() const
T load_pixel(const int2 &texel) const
float4 sample_bilinear_extended(const float2 &coordinates) const
#define input
#define output
static void vertical_pass_cpu(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
void compute_dispatch_threads_at_least(gpu::Shader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:196
static void vertical_pass(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
static Result horizontal_pass_cpu(Context &context, const Result &input, const int distance, const int falloff_type)
static Result horizontal_pass_gpu(Context &context, const Result &input, const int distance, const int falloff_type)
static Result horizontal_pass(Context &context, const Result &input, const int distance, const int falloff_type)
void symmetric_separable_blur_variable_size(Context &context, const Result &input, const Result &radius, Result &output, const int weights_resolution=128, const int filter_type=R_FILTER_GAUSS)
void parallel_for(const int2 range, const Function &function)
static void vertical_pass_gpu(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
static void blur_pass(Context &context, const Result &input, Result &output, const float sigma)
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
i
Definition text_draw.cc:230