Blender V5.0
morphological_distance_feather.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include "BLI_math_base.hh"
7
8#include "GPU_shader.hh"
9
11#include "COM_context.hh"
13#include "COM_result.hh"
14#include "COM_utilities.hh"
15
16namespace blender::compositor {
17
18/* The Morphological Distance Feather operation is a linear combination between the result of two
19 * operations. The first operation is a Gaussian blur with a radius equivalent to the dilate/erode
20 * distance, which is straightforward and implemented as a separable filter similar to the blur
21 * operation.
22 *
23 * The second operation is an approximation of a morphological inverse distance operation evaluated
24 * at a distance falloff function. The result of a morphological inverse distance operation is a
25 * narrow band distance field that starts at its maximum value at boundaries where a difference in
26 * values took place and linearly deceases until it reaches zero in the span of a number of pixels
27 * equivalent to the erode/dilate distance. Additionally, instead of linearly decreasing, the user
28 * may choose a different falloff which is evaluated at the computed distance. For dilation, the
29 * distance field decreases outwards, and for erosion, the distance field decreased inwards.
30 *
31 * The reason why the result of a Gaussian blur is mixed in with the distance field is because the
32 * distance field is merely approximated and not accurately computed, the defects of which is more
33 * apparent away from boundaries and especially at corners where the distance field should take a
34 * circular shape. That's why the Gaussian blur is mostly mixed only further from boundaries.
35 *
36 * The morphological inverse distance operation is approximated using a separable implementation
37 * and intertwined with the Gaussian blur implementation as follows. A search window of a radius
38 * equivalent to the dilate/erode distance is applied on the image to find either the minimum or
39 * maximum pixel value multiplied by its corresponding falloff value in the window. For dilation,
40 * we try to find the maximum, and for erosion, we try to find the minimum. The implementation uses
41 * an inverse function to find the minimum. Additionally, we also save the falloff value where the
42 * minimum or maximum was found. The found value will be that of the narrow band distance field and
43 * the saved falloff value will be used as the mixing factor with the Gaussian blur.
44 *
45 * To make sense of the aforementioned algorithm, assume we are dilating a binary image by 5 pixels
46 * whose half has a value of 1 and the other half has a value of zero. Consider the following:
47 *
48 * - A pixel of value 1 already has the maximum possible value, so its value will remain unchanged
49 * regardless of its position.
50 * - A pixel of value 0 that is right at the boundary of the 1's region will have a maximum value
51 * of around 0.8 depending on the falloff. That's because the search window intersects the 1's
52 * region, which when multiplied by the falloff gives the first value of the falloff, which is
53 * larger than the initially zero value computed at the center of the search window.
54 * - A pixel of value 0 that is 3 pixels away from the boundary will have a maximum value of around
55 * 0.4 depending on the falloff. That's because the search window intersects the 1's region,
56 * which when multiplied by the falloff gives the third value of the falloff, which is larger
57 * than the initially zero value computed at the center of the search window.
58 * - Finally, a pixel of value 0 that is 6 pixels away from the boundary will have a maximum value
59 * of 0, because the search window doesn't intersects the 1's region and only spans zero values.
60 *
61 * The previous example demonstrates how the distance field naturally arises, and the same goes for
62 * the erode case, except the minimum value is computed instead. */
63template<bool IsErode>
67{
68 /* Notice that the size is transposed, see the note on the horizontal pass method for more
69 * information on the reasoning behind this. */
70 const int2 size = int2(output.domain().size.y, output.domain().size.x);
71 parallel_for(size, [&](const int2 texel) {
72 /* A value for accumulating the blur result. */
73 float accumulated_value = 0.0f;
74
75 /* Compute the contribution of the center pixel to the blur result. */
76 float center_value = input.load_pixel<float>(texel);
77 if constexpr (IsErode) {
78 center_value = 1.0f - center_value;
79 }
80 accumulated_value += center_value * weights.weights_result.load_pixel<float>(int2(0));
81
82 /* Start with the center value as the maximum/minimum distance and reassign to the true maximum
83 * or minimum in the search loop below. Additionally, the center falloff is always 1.0, so
84 * start with that. */
85 float limit_distance = center_value;
86 float limit_distance_falloff = 1.0f;
87
88 /* Compute the contributions of the pixels to the right and left, noting that the weights and
89 * falloffs textures only store the weights and falloffs for the positive half, but since the
90 * they are both symmetric, the same weights and falloffs are used for the negative half and we
91 * compute both of their contributions. */
92 for (int i = 1; i < weights.weights_result.domain().size.x; i++) {
93 float weight = weights.weights_result.load_pixel<float>(int2(i, 0));
94 float falloff = weights.falloffs_result.load_pixel<float>(int2(i, 0));
95
96 /* Loop for two iterations, where s takes the value of -1 and 1, which is used as the sign
97 * needed to evaluated the positive and negative sides as explain above. */
98 for (int s = -1; s < 2; s += 2) {
99 /* Compute the contribution of the pixel to the blur result. */
100 float value = input.load_pixel_extended<float>(texel + int2(s * i, 0));
101 if constexpr (IsErode) {
102 value = 1.0f - value;
103 }
104 accumulated_value += value * weight;
105
106 /* The distance is computed such that its highest value is the pixel value itself, so
107 * multiply the distance falloff by the pixel value. */
108 float falloff_distance = value * falloff;
109
110 /* Find either the maximum or the minimum for the dilate and erode cases respectively. */
111 if (falloff_distance > limit_distance) {
112 limit_distance = falloff_distance;
113 limit_distance_falloff = falloff;
114 }
115 }
116 }
117
118 /* Mix between the limit distance and the blurred accumulated value such that the limit
119 * distance is used for pixels closer to the boundary and the blurred value is used for pixels
120 * away from the boundary. */
121 float value = math::interpolate(accumulated_value, limit_distance, limit_distance_falloff);
122 if constexpr (IsErode) {
123 value = 1.0f - value;
124 }
125
126 /* Write the value using the transposed texel. See the horizontal pass function for more
127 * information on the rational behind this. */
128 output.store_pixel(int2(texel.y, texel.x), value);
129 });
130}
131
132static const char *get_shader_name(const int distance)
133{
134 if (distance > 0) {
135 return "compositor_morphological_distance_feather_dilate";
136 }
137 return "compositor_morphological_distance_feather_erode";
138}
139
141 const Result &input,
142 const int distance,
143 const int falloff_type)
144{
145 gpu::Shader *shader = context.get_shader(get_shader_name(distance));
146 GPU_shader_bind(shader);
147
148 input.bind_as_texture(shader, "input_tx");
149
151 context.cache_manager().morphological_distance_feather_weights.get(
152 context, falloff_type, math::abs(distance));
153 weights.weights_result.bind_as_texture(shader, "weights_tx");
154 weights.falloffs_result.bind_as_texture(shader, "falloffs_tx");
155
156 /* We allocate an output image of a transposed size, that is, with a height equivalent to the
157 * width of the input and vice versa. This is done as a performance optimization. The shader
158 * will process the image horizontally and write it to the intermediate output transposed. Then
159 * the vertical pass will execute the same horizontal pass shader, but since its input is
160 * transposed, it will effectively do a vertical pass and write to the output transposed,
161 * effectively undoing the transposition in the horizontal pass. This is done to improve
162 * spatial cache locality in the shader and to avoid having two separate shaders for each of
163 * the passes. */
164 const Domain domain = input.domain();
165 const int2 transposed_domain = int2(domain.size.y, domain.size.x);
166
167 Result output = context.create_result(ResultType::Float);
168 output.allocate_texture(transposed_domain);
169 output.bind_as_image(shader, "output_img");
170
172
174 input.unbind_as_texture();
177 output.unbind_as_image();
178
179 return output;
180}
181
183 const Result &input,
184 const int distance,
185 const int falloff_type)
186{
188 context.cache_manager().morphological_distance_feather_weights.get(
189 context, falloff_type, math::abs(distance));
190
191 /* We allocate an output image of a transposed size, that is, with a height equivalent to the
192 * width of the input and vice versa. This is done as a performance optimization. The shader
193 * will process the image horizontally and write it to the intermediate output transposed. Then
194 * the vertical pass will execute the same horizontal pass shader, but since its input is
195 * transposed, it will effectively do a vertical pass and write to the output transposed,
196 * effectively undoing the transposition in the horizontal pass. This is done to improve
197 * spatial cache locality in the shader and to avoid having two separate shaders for each of
198 * the passes. */
199 const Domain domain = input.domain();
200 const int2 transposed_domain = int2(domain.size.y, domain.size.x);
201
202 Result output = context.create_result(ResultType::Float);
203 output.allocate_texture(transposed_domain);
204
205 if (distance > 0) {
207 }
208 else {
210 }
211
212 return output;
213}
214
216 const Result &input,
217 const int distance,
218 const int falloff_type)
219{
220 if (context.use_gpu()) {
221 return horizontal_pass_gpu(context, input, distance, falloff_type);
222 }
223 return horizontal_pass_cpu(context, input, distance, falloff_type);
224}
225
226static void vertical_pass_gpu(Context &context,
227 const Result &original_input,
228 const Result &horizontal_pass_result,
229 Result &output,
230 const int distance,
231 const int falloff_type)
232{
233 gpu::Shader *shader = context.get_shader(get_shader_name(distance));
234 GPU_shader_bind(shader);
235
236 horizontal_pass_result.bind_as_texture(shader, "input_tx");
237
239 context.cache_manager().morphological_distance_feather_weights.get(
240 context, falloff_type, math::abs(distance));
241 weights.weights_result.bind_as_texture(shader, "weights_tx");
242 weights.falloffs_result.bind_as_texture(shader, "falloffs_tx");
243
244 const Domain domain = original_input.domain();
245 output.allocate_texture(domain);
246 output.bind_as_image(shader, "output_img");
247
248 /* Notice that the domain is transposed, see the note on the horizontal pass function for more
249 * information on the reasoning behind this. */
250 compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
251
253 horizontal_pass_result.unbind_as_texture();
256 output.unbind_as_image();
257}
258
259static void vertical_pass_cpu(Context &context,
260 const Result &original_input,
261 const Result &horizontal_pass_result,
262 Result &output,
263 const int distance,
264 const int falloff_type)
265{
267 context.cache_manager().morphological_distance_feather_weights.get(
268 context, falloff_type, math::abs(distance));
269
270 const Domain domain = original_input.domain();
271 output.allocate_texture(domain);
272
273 if (distance > 0) {
274 morphological_distance_feather_pass<false>(horizontal_pass_result, weights, output);
275 }
276 else {
277 morphological_distance_feather_pass<true>(horizontal_pass_result, weights, output);
278 }
279}
280
281static void vertical_pass(Context &context,
282 const Result &original_input,
283 const Result &horizontal_pass_result,
284 Result &output,
285 const int distance,
286 const int falloff_type)
287{
288 if (context.use_gpu()) {
290 context, original_input, horizontal_pass_result, output, distance, falloff_type);
291 }
292 else {
294 context, original_input, horizontal_pass_result, output, distance, falloff_type);
295 }
296}
297
299 const Result &input,
300 Result &output,
301 const int distance,
302 const int falloff_type)
303{
304 Result horizontal_pass_result = horizontal_pass(context, input, distance, falloff_type);
305 vertical_pass(context, input, horizontal_pass_result, output, distance, falloff_type);
306 horizontal_pass_result.release();
307}
308
309} // namespace blender::compositor
void GPU_shader_bind(blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_unbind()
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void unbind_as_texture() const
Definition result.cc:511
void bind_as_texture(gpu::Shader *shader, const char *texture_name) const
Definition result.cc:487
const Domain & domain() const
T load_pixel(const int2 &texel) const
#define input
#define output
float distance(VecOp< float, D >, VecOp< float, D >) RET
static void vertical_pass_cpu(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
void compute_dispatch_threads_at_least(gpu::Shader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:196
void morphological_distance_feather(Context &context, const Result &input, Result &output, const int distance, const int falloff_type=PROP_SMOOTH)
static void vertical_pass(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
static Result horizontal_pass_cpu(Context &context, const Result &input, const int distance, const int falloff_type)
static Result horizontal_pass_gpu(Context &context, const Result &input, const int distance, const int falloff_type)
static Result horizontal_pass(Context &context, const Result &input, const int distance, const int falloff_type)
static void morphological_distance_feather_pass(const Result &input, const MorphologicalDistanceFeatherWeights &weights, Result &output)
void parallel_for(const int2 range, const Function &function)
static void vertical_pass_gpu(Context &context, const Result &original_input, const Result &horizontal_pass_result, Result &output, const int distance, const int falloff_type)
static const char * get_shader_name(const int distance)
T interpolate(const T &a, const T &b, const FactorT &t)
T abs(const T &a)
VecBase< int32_t, 2 > int2
i
Definition text_draw.cc:230