Blender V5.0
van_vliet_gaussian_blur.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include "BLI_assert.h"
6#include "BLI_math_vector.hh"
7
8#include "GPU_shader.hh"
9
10#include "COM_context.hh"
11#include "COM_result.hh"
12#include "COM_utilities.hh"
13
16
17namespace blender::compositor {
18
19#define FILTER_ORDER 2
20
22 const Result &first_causal_input,
23 const Result &first_non_causal_input,
24 const Result &second_causal_input,
25 const Result &second_non_causal_input,
27{
28 gpu::Shader *shader = context.get_shader("compositor_van_vliet_gaussian_blur_sum");
29 GPU_shader_bind(shader);
30
31 first_causal_input.bind_as_texture(shader, "first_causal_input_tx");
32 first_non_causal_input.bind_as_texture(shader, "first_non_causal_input_tx");
33 second_causal_input.bind_as_texture(shader, "second_causal_input_tx");
34 second_non_causal_input.bind_as_texture(shader, "second_non_causal_input_tx");
35
36 const Domain domain = first_causal_input.domain();
37 const Domain transposed_domain = domain.transposed();
38 output.allocate_texture(transposed_domain);
39 output.bind_as_image(shader, "output_img");
40
42
44 first_causal_input.unbind_as_texture();
45 first_non_causal_input.unbind_as_texture();
46 second_causal_input.unbind_as_texture();
47 second_non_causal_input.unbind_as_texture();
48 output.unbind_as_image();
49}
50
51/* See sum_causal_and_non_causal_results. */
52static void sum_causal_and_non_causal_results_cpu(const Result &first_causal_input,
53 const Result &first_non_causal_input,
54 const Result &second_causal_input,
55 const Result &second_non_causal_input,
57{
58 const Domain domain = first_causal_input.domain();
59 const Domain transposed_domain = domain.transposed();
60 output.allocate_texture(transposed_domain);
61 parallel_for(domain.size, [&](const int2 texel) {
62 /* The Van Vliet filter is a parallel interconnection filter, meaning its output is the sum of
63 * all of its causal and non causal filters. */
64 float4 filter_output = first_causal_input.load_pixel<float4>(texel) +
65 first_non_causal_input.load_pixel<float4>(texel) +
66 second_causal_input.load_pixel<float4>(texel) +
67 second_non_causal_input.load_pixel<float4>(texel);
68
69 /* Write the color using the transposed texel. See the sum_causal_and_non_causal_results method
70 * for more information on the rational behind this. */
71 output.store_pixel(int2(texel.y, texel.x), filter_output);
72 });
73}
74
75/* Sum all four of the causal and non causal outputs of the first and second filters and write the
76 * sum to the output. This is because the Van Vliet filter is implemented as a bank of 2 parallel
77 * second order filters, meaning its output is the sum of the causal and non causal filters of both
78 * filters. The output is expected not to be allocated as it will be allocated internally.
79 *
80 * The output is allocated and written transposed, that is, with a height equivalent to the width
81 * of the input and vice versa. This is done as a performance optimization. The blur pass will
82 * blur the image horizontally and write it to the intermediate output transposed. Then the
83 * vertical pass will execute the same horizontal blur shader, but since its input is transposed,
84 * it will effectively do a vertical blur and write to the output transposed, effectively undoing
85 * the transposition in the horizontal pass. This is done to improve spatial cache locality in the
86 * shader and to avoid having two separate shaders for each blur pass. */
88 const Result &first_causal_input,
89 const Result &first_non_causal_input,
90 const Result &second_causal_input,
91 const Result &second_non_causal_input,
93{
94 if (context.use_gpu()) {
96 first_causal_input,
97 first_non_causal_input,
98 second_causal_input,
99 second_non_causal_input,
100 output);
101 }
102 else {
103 sum_causal_and_non_causal_results_cpu(first_causal_input,
104 first_non_causal_input,
105 second_causal_input,
106 second_non_causal_input,
107 output);
108 }
109}
110
111static void blur_pass_gpu(Context &context,
112 const Result &input,
113 Result &first_causal_result,
114 Result &first_non_causal_result,
115 Result &second_causal_result,
116 Result &second_non_causal_result,
117 const float sigma)
118{
119 gpu::Shader *shader = context.get_shader("compositor_van_vliet_gaussian_blur");
120 GPU_shader_bind(shader);
121
122 const VanVlietGaussianCoefficients &coefficients =
123 context.cache_manager().van_vliet_gaussian_coefficients.get(context, sigma);
124
126 shader, "first_feedback_coefficients", float2(coefficients.first_feedback_coefficients()));
128 "first_causal_feedforward_coefficients",
131 "first_non_causal_feedforward_coefficients",
134 shader, "second_feedback_coefficients", float2(coefficients.second_feedback_coefficients()));
136 "second_causal_feedforward_coefficients",
139 "second_non_causal_feedforward_coefficients",
142 "first_causal_boundary_coefficient",
143 float(coefficients.first_causal_boundary_coefficient()));
145 "first_non_causal_boundary_coefficient",
146 float(coefficients.first_non_causal_boundary_coefficient()));
148 "second_causal_boundary_coefficient",
149 float(coefficients.second_causal_boundary_coefficient()));
151 "second_non_causal_boundary_coefficient",
152 float(coefficients.second_non_causal_boundary_coefficient()));
153
154 input.bind_as_texture(shader, "input_tx");
155
156 const Domain domain = input.domain();
157
158 first_causal_result.allocate_texture(domain);
159 first_causal_result.bind_as_image(shader, "first_causal_output_img");
160
161 first_non_causal_result.allocate_texture(domain);
162 first_non_causal_result.bind_as_image(shader, "first_non_causal_output_img");
163
164 second_causal_result.allocate_texture(domain);
165 second_causal_result.bind_as_image(shader, "second_causal_output_img");
166
167 second_non_causal_result.allocate_texture(domain);
168 second_non_causal_result.bind_as_image(shader, "second_non_causal_output_img");
169
170 /* The second dispatch dimension is 4 dispatches, one for the first causal filter, one for the
171 * first non causal filter, one for the second causal filter, and one for the second non causal
172 * filter. */
173 compute_dispatch_threads_at_least(shader, int2(domain.size.y, 4), int2(64, 4));
174
176 input.unbind_as_texture();
177 first_causal_result.unbind_as_image();
178 first_non_causal_result.unbind_as_image();
179 second_causal_result.unbind_as_image();
180 second_non_causal_result.unbind_as_image();
181}
182
183static void blur_pass_cpu(Context &context,
184 const Result &input,
185 Result &first_causal_output,
186 Result &first_non_causal_output,
187 Result &second_causal_output,
188 Result &second_non_causal_output,
189 const float sigma)
190{
191 const VanVlietGaussianCoefficients &coefficients =
192 context.cache_manager().van_vliet_gaussian_coefficients.get(context, sigma);
193
194 const float2 first_feedback_coefficients = float2(coefficients.first_feedback_coefficients());
195 const float2 first_causal_feedforward_coefficients = float2(
197 const float2 first_non_causal_feedforward_coefficients = float2(
199 const float2 second_feedback_coefficients = float2(coefficients.second_feedback_coefficients());
200 const float2 second_causal_feedforward_coefficients = float2(
202 const float2 second_non_causal_feedforward_coefficients = float2(
204 const float first_causal_boundary_coefficient = float(
205 coefficients.first_causal_boundary_coefficient());
206 const float first_non_causal_boundary_coefficient = float(
208 const float second_causal_boundary_coefficient = float(
210 const float second_non_causal_boundary_coefficient = float(
212
213 const Domain domain = input.domain();
214 first_causal_output.allocate_texture(domain);
215 first_non_causal_output.allocate_texture(domain);
216 second_causal_output.allocate_texture(domain);
217 second_non_causal_output.allocate_texture(domain);
218
219 /* The first dispatch dimension is 4 dispatches, one for the first causal filter, one for the
220 * first non causal filter, one for the second causal filter, and one for the second non causal
221 * filter. */
222 const int2 parallel_for_size = int2(4, domain.size.y);
223 /* Blur the input horizontally by applying a fourth order IIR filter approximating a Gaussian
224 * filter using Van Vliet's design method. This is based on the following paper:
225 *
226 * Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
227 * filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
228 * 98EX170). Vol. 1. IEEE, 1998.
229 *
230 * We decomposed the filter into two second order filters, so we actually run four filters per
231 * row in parallel, one for the first causal filter, one for the first non causal filter, one for
232 * the second causal filter, and one for the second non causal filter, storing the result of each
233 * separately. See the VanVlietGaussianCoefficients class and the implementation for more
234 * information. */
235 parallel_for(parallel_for_size, [&](const int2 invocation) {
236 /* The shader runs parallel across rows but serially across columns. */
237 int y = invocation.y;
238 int width = input.domain().size.x;
239
240 /* The second dispatch dimension is four dispatches:
241 *
242 * 0 -> First causal filter.
243 * 1 -> First non causal filter.
244 * 2 -> Second causal filter.
245 * 3 -> Second non causal filter.
246 *
247 * We detect causality by even numbers. */
248 bool is_causal = invocation.x % 2 == 0;
249 float2 first_feedforward_coefficients = is_causal ? first_causal_feedforward_coefficients :
250 first_non_causal_feedforward_coefficients;
251 float first_boundary_coefficient = is_causal ? first_causal_boundary_coefficient :
252 first_non_causal_boundary_coefficient;
253 float2 second_feedforward_coefficients = is_causal ?
254 second_causal_feedforward_coefficients :
255 second_non_causal_feedforward_coefficients;
256 float second_boundary_coefficient = is_causal ? second_causal_boundary_coefficient :
257 second_non_causal_boundary_coefficient;
258 /* And we detect the filter by order. */
259 bool is_first_filter = invocation.x < 2;
260 float2 feedforward_coefficients = is_first_filter ? first_feedforward_coefficients :
261 second_feedforward_coefficients;
262 float2 feedback_coefficients = is_first_filter ? first_feedback_coefficients :
263 second_feedback_coefficients;
264 float boundary_coefficient = is_first_filter ? first_boundary_coefficient :
265 second_boundary_coefficient;
266
267 /* Create an array that holds the last FILTER_ORDER inputs along with the current input. The
268 * current input is at index 0 and the oldest input is at index FILTER_ORDER. We assume Neumann
269 * boundary condition, so we initialize all inputs by the boundary pixel. */
270 int2 boundary_texel = is_causal ? int2(0, y) : int2(width - 1, y);
271 float4 input_boundary = input.load_pixel<float4>(boundary_texel);
272 float4 inputs[FILTER_ORDER + 1] = {input_boundary, input_boundary, input_boundary};
273
274 /* Create an array that holds the last FILTER_ORDER outputs along with the current output. The
275 * current output is at index 0 and the oldest output is at index FILTER_ORDER. We assume
276 * Neumann boundary condition, so we initialize all outputs by the boundary pixel multiplied by
277 * the boundary coefficient. See the VanVlietGaussianCoefficients class for more information on
278 * the boundary handing. */
279 float4 output_boundary = input_boundary * boundary_coefficient;
280 float4 outputs[FILTER_ORDER + 1] = {output_boundary, output_boundary, output_boundary};
281
282 for (int x = 0; x < width; x++) {
283 /* Run forward across rows for the causal filter and backward for the non causal filter. */
284 int2 texel = is_causal ? int2(x, y) : int2(width - 1 - x, y);
285 inputs[0] = input.load_pixel<float4>(texel);
286
287 /* Compute the filter based on its difference equation, this is not in the Van Vliet paper
288 * because the filter was decomposed, but it is essentially similar to Equation (28) for the
289 * causal filter or Equation (29) for the non causal filter in Deriche's paper, except it is
290 * second order, not fourth order.
291 *
292 * Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss.
293 * INRIA, 1993.
294 *
295 * The only difference is that the non causal filter ignores the current value and starts
296 * from the previous input, as can be seen in the subscript of the first input term in both
297 * equations. So add one while indexing the non causal inputs. */
298 outputs[0] = float4(0.0f);
299 int first_input_index = is_causal ? 0 : 1;
300 for (int i = 0; i < FILTER_ORDER; i++) {
301 outputs[0] += feedforward_coefficients[i] * inputs[first_input_index + i];
302 outputs[0] -= feedback_coefficients[i] * outputs[i + 1];
303 }
304
305 /* Store the causal and non causal outputs of each of the two filters independently, then sum
306 * them in a separate shader dispatch for better parallelism. */
307 if (is_causal) {
308 if (is_first_filter) {
309 first_causal_output.store_pixel(texel, outputs[0]);
310 }
311 else {
312 second_causal_output.store_pixel(texel, outputs[0]);
313 }
314 }
315 else {
316 if (is_first_filter) {
317 first_non_causal_output.store_pixel(texel, outputs[0]);
318 }
319 else {
320 second_non_causal_output.store_pixel(texel, outputs[0]);
321 }
322 }
323
324 /* Shift the inputs temporally by one. The oldest input is discarded, while the current input
325 * will retain its value but will be overwritten with the new current value in the next
326 * iteration. */
327 for (int i = FILTER_ORDER; i >= 1; i--) {
328 inputs[i] = inputs[i - 1];
329 }
330
331 /* Shift the outputs temporally by one. The oldest output is discarded, while the current
332 * output will retain its value but will be overwritten with the new current value in the
333 * next iteration. */
334 for (int i = FILTER_ORDER; i >= 1; i--) {
335 outputs[i] = outputs[i - 1];
336 }
337 }
338 });
339}
340
341static void blur_pass(Context &context, const Result &input, Result &output, const float sigma)
342{
343 Result first_causal_result = context.create_result(ResultType::Color);
344 Result first_non_causal_result = context.create_result(ResultType::Color);
345 Result second_causal_result = context.create_result(ResultType::Color);
346 Result second_non_causal_result = context.create_result(ResultType::Color);
347
348 if (context.use_gpu()) {
349 blur_pass_gpu(context,
350 input,
351 first_causal_result,
352 first_non_causal_result,
353 second_causal_result,
354 second_non_causal_result,
355 sigma);
356 }
357 else {
358 blur_pass_cpu(context,
359 input,
360 first_causal_result,
361 first_non_causal_result,
362 second_causal_result,
363 second_non_causal_result,
364 sigma);
365 }
366
368 first_causal_result,
369 first_non_causal_result,
370 second_causal_result,
371 second_non_causal_result,
372 output);
373 first_causal_result.release();
374 first_non_causal_result.release();
375 second_causal_result.release();
376 second_non_causal_result.release();
377}
378
380 const Result &input,
381 Result &output,
382 const float2 &sigma)
383{
384 BLI_assert_msg(math::reduce_max(sigma) >= 32.0f,
385 "Van Vliet filter is less accurate for sigma values less than 32. Use Deriche "
386 "filter instead or direct convolution instead.");
387
388 Result horizontal_pass_result = context.create_result(ResultType::Color);
389 blur_pass(context, input, horizontal_pass_result, sigma.x);
390 blur_pass(context, horizontal_pass_result, output, sigma.y);
391 horizontal_pass_result.release();
392}
393
394} // namespace blender::compositor
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
void GPU_shader_uniform_1f(blender::gpu::Shader *sh, const char *name, float value)
void GPU_shader_bind(blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_uniform_2fv(blender::gpu::Shader *sh, const char *name, const float data[2])
void GPU_shader_unbind()
Domain transposed() const
Definition domain.cc:29
void store_pixel(const int2 &texel, const T &pixel_value)
void allocate_texture(const Domain domain, const bool from_pool=true, const std::optional< ResultStorageType > storage_type=std::nullopt)
Definition result.cc:389
void unbind_as_texture() const
Definition result.cc:511
void bind_as_texture(gpu::Shader *shader, const char *texture_name) const
Definition result.cc:487
const Domain & domain() const
void unbind_as_image() const
Definition result.cc:517
void bind_as_image(gpu::Shader *shader, const char *image_name, bool read=false) const
Definition result.cc:498
nullptr float
#define FILTER_ORDER
#define input
#define output
static void sum_causal_and_non_causal_results_cpu(const Result &causal_input, const Result &non_causal_input, Result &output)
void compute_dispatch_threads_at_least(gpu::Shader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:196
static void sum_causal_and_non_causal_results(Context &context, const Result &causal_input, const Result &non_causal_input, Result &output)
static void blur_pass_cpu(Context &context, const Result &input, Result &causal_output, Result &non_causal_output, const float sigma)
void van_vliet_gaussian_blur(Context &context, const Result &input, Result &output, const float2 &sigma)
void parallel_for(const int2 range, const Function &function)
static void blur_pass_gpu(Context &context, const Result &input, Result &causal_result, Result &non_causal_result, const float sigma)
static void sum_causal_and_non_causal_results_gpu(Context &context, const Result &causal_input, const Result &non_causal_input, Result &output)
static void blur_pass(Context &context, const Result &input, Result &output, const float sigma)
T reduce_max(const VecBase< T, Size > &a)
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
static blender::bke::bNodeSocketTemplate outputs[]
static blender::bke::bNodeSocketTemplate inputs[]
i
Definition text_draw.cc:230