Blender V4.3
symmetric_separable_blur.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include "BLI_assert.h"
6#include "BLI_math_base.hh"
7#include "BLI_math_vector.hh"
9
10#include "GPU_shader.hh"
11#include "GPU_texture.hh"
12
13#include "COM_context.hh"
14#include "COM_result.hh"
15#include "COM_utilities.hh"
16
18
20
22
23static const char *get_blur_shader(ResultType type)
24{
25 switch (type) {
27 return "compositor_symmetric_separable_blur_float";
29 return "compositor_symmetric_separable_blur_float2";
32 return "compositor_symmetric_separable_blur_float4";
34 /* GPU module does not support float3 outputs. */
35 break;
37 /* Blur does not support integer types. */
38 break;
39 }
40
42 return nullptr;
43}
44
46 Result &input,
47 float radius,
48 int filter_type,
49 bool extend_bounds,
50 bool gamma_correct)
51{
52 GPUShader *shader = context.get_shader(get_blur_shader(input.type()));
53 GPU_shader_bind(shader);
54
55 GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);
56 GPU_shader_uniform_1b(shader, "gamma_correct_input", gamma_correct);
57 GPU_shader_uniform_1b(shader, "gamma_uncorrect_output", false);
58
59 input.bind_as_texture(shader, "input_tx");
60
61 const SymmetricSeparableBlurWeights &weights =
62 context.cache_manager().symmetric_separable_blur_weights.get(context, filter_type, radius);
63 weights.bind_as_texture(shader, "weights_tx");
64
65 Domain domain = input.domain();
66 if (extend_bounds) {
67 domain.size.x += int(math::ceil(radius)) * 2;
68 }
69
70 /* We allocate an output image of a transposed size, that is, with a height equivalent to the
71 * width of the input and vice versa. This is done as a performance optimization. The shader
72 * will blur the image horizontally and write it to the intermediate output transposed. Then
73 * the vertical pass will execute the same horizontal blur shader, but since its input is
74 * transposed, it will effectively do a vertical blur and write to the output transposed,
75 * effectively undoing the transposition in the horizontal pass. This is done to improve
76 * spatial cache locality in the shader and to avoid having two separate shaders for each blur
77 * pass. */
78 const int2 transposed_domain = int2(domain.size.y, domain.size.x);
79
80 Result output = context.create_result(input.type());
81 output.allocate_texture(transposed_domain);
82 output.bind_as_image(shader, "output_img");
83
84 compute_dispatch_threads_at_least(shader, domain.size);
85
87 input.unbind_as_texture();
88 weights.unbind_as_texture();
89 output.unbind_as_image();
90
91 return output;
92}
93
94static void vertical_pass(Context &context,
95 Result &original_input,
96 Result &horizontal_pass_result,
97 Result &output,
98 float2 radius,
99 int filter_type,
100 bool extend_bounds,
101 bool gamma_correct)
102{
103 GPUShader *shader = context.get_shader(get_blur_shader(original_input.type()));
104 GPU_shader_bind(shader);
105
106 GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);
107 GPU_shader_uniform_1b(shader, "gamma_correct_input", false);
108 GPU_shader_uniform_1b(shader, "gamma_uncorrect_output", gamma_correct);
109
110 horizontal_pass_result.bind_as_texture(shader, "input_tx");
111
112 const SymmetricSeparableBlurWeights &weights =
113 context.cache_manager().symmetric_separable_blur_weights.get(context, filter_type, radius.y);
114 weights.bind_as_texture(shader, "weights_tx");
115
116 Domain domain = original_input.domain();
117 if (extend_bounds) {
118 /* Add a radius amount of pixels in both sides of the image, hence the multiply by 2. */
119 domain.size += int2(math::ceil(radius)) * 2;
120 }
121
122 output.allocate_texture(domain);
123 output.bind_as_image(shader, "output_img");
124
125 /* Notice that the domain is transposed, see the note on the horizontal pass method for more
126 * information on the reasoning behind this. */
127 compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
128
130 horizontal_pass_result.unbind_as_texture();
131 output.unbind_as_image();
132 weights.unbind_as_texture();
133}
134
136 Result &input,
137 Result &output,
138 float2 radius,
139 int filter_type,
140 bool extend_bounds,
141 bool gamma_correct)
142{
143 Result horizontal_pass_result = horizontal_pass(
144 context, input, radius.x, filter_type, extend_bounds, gamma_correct);
145
146 vertical_pass(context,
147 input,
148 horizontal_pass_result,
149 output,
150 radius,
151 filter_type,
152 extend_bounds,
153 gamma_correct);
154
155 horizontal_pass_result.release();
156}
157
158} // namespace blender::realtime_compositor
#define BLI_assert_unreachable()
Definition BLI_assert.h:97
void GPU_shader_bind(GPUShader *shader)
void GPU_shader_uniform_1b(GPUShader *sh, const char *name, bool value)
void GPU_shader_unbind()
struct GPUShader GPUShader
#define output
const Domain & domain() const
Definition result.cc:712
void allocate_texture(Domain domain, bool from_pool=true)
Definition result.cc:204
static ResultType type(eGPUTextureFormat format)
Definition result.cc:148
void bind_as_texture(GPUShader *shader, const char *texture_name) const
Definition result.cc:253
void bind_as_texture(GPUShader *shader, const char *texture_name) const
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
T ceil(const T &a)
void symmetric_separable_blur(Context &context, Result &input, Result &output, float2 radius, int filter_type=R_FILTER_GAUSS, bool extend_bounds=false, bool gamma_correct=false)
static const char * get_blur_shader(ResultType type)
static void vertical_pass(Context &context, Result &original_input, Result &horizontal_pass_result, Result &output, int distance, int falloff_type)
static Result horizontal_pass(Context &context, Result &input, int distance, int falloff_type)
void compute_dispatch_threads_at_least(GPUShader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:131
VecBase< int32_t, 2 > int2