Blender V4.5
node_composite_kuwahara.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <limits>
10
11#include "BLI_math_base.hh"
13#include "BLI_math_numbers.hh"
14#include "BLI_math_vector.hh"
16
17#include "RNA_access.hh"
18
19#include "UI_interface.hh"
20#include "UI_resources.hh"
21
22#include "COM_node_operation.hh"
23#include "COM_utilities.hh"
24
27
29
30/* **************** Kuwahara ******************** */
31
33
35
37{
38 b.add_input<decl::Color>("Image")
39 .default_value({1.0f, 1.0f, 1.0f, 1.0f})
40 .compositor_domain_priority(0);
41 b.add_input<decl::Float>("Size")
42 .default_value(6.0f)
43 .min(0.0f)
44 .description("The size of the filter in pixels")
45 .compositor_domain_priority(1);
46 b.add_input<decl::Int>("Uniformity")
48 .min(0)
49 .description(
50 "Controls the uniformity of the direction of the filter. Higher values produces more "
51 "uniform directions")
52 .compositor_expects_single_value();
53 b.add_input<decl::Float>("Sharpness")
54 .default_value(1.0f)
55 .subtype(PROP_FACTOR)
56 .min(0.0f)
57 .max(1.0f)
58 .description(
59 "Controls the sharpness of the filter. 0 means completely smooth while 1 means "
60 "completely sharp")
61 .compositor_expects_single_value();
62 b.add_input<decl::Float>("Eccentricity")
63 .default_value(1.0f)
64 .subtype(PROP_FACTOR)
65 .min(0.0f)
66 .max(2.0f)
67 .description(
68 "Controls how directional the filter is. 0 means the filter is completely "
69 "omnidirectional while 2 means it is maximally directed along the edges of the image")
70 .compositor_expects_single_value();
71 b.add_input<decl::Bool>("High Precision")
72 .default_value(false)
73 .description(
74 "Uses a more precise but slower method. Use if the output contains undesirable noise.")
75 .compositor_expects_single_value();
76
77 b.add_output<decl::Color>("Image");
78}
79
80static void node_composit_init_kuwahara(bNodeTree * /*ntree*/, bNode *node)
81{
84 node->storage = data;
85}
86
88{
89 layout->prop(ptr, "variation", UI_ITEM_NONE, std::nullopt, ICON_NONE);
90}
91
92static void node_update(bNodeTree *ntree, bNode *node)
93{
94 const bool is_classic = node_storage(*node).variation == CMP_NODE_KUWAHARA_CLASSIC;
95
96 bNodeSocket *high_precision_input = bke::node_find_socket(*node, SOCK_IN, "High Precision");
97 blender::bke::node_set_socket_availability(*ntree, *high_precision_input, is_classic);
98
99 bNodeSocket *uniformity_input = bke::node_find_socket(*node, SOCK_IN, "Uniformity");
100 bNodeSocket *sharpness_input = bke::node_find_socket(*node, SOCK_IN, "Sharpness");
101 bNodeSocket *eccentricity_input = bke::node_find_socket(*node, SOCK_IN, "Eccentricity");
102 blender::bke::node_set_socket_availability(*ntree, *uniformity_input, !is_classic);
103 blender::bke::node_set_socket_availability(*ntree, *sharpness_input, !is_classic);
104 blender::bke::node_set_socket_availability(*ntree, *eccentricity_input, !is_classic);
105}
106
107using namespace blender::compositor;
108
110 public:
112
113 void execute() override
114 {
115 const Result &input = this->get_input("Image");
116 if (input.is_single_value()) {
117 Result &output = this->get_result("Image");
118 output.share_data(input);
119 return;
120 }
121
122 if (node_storage(bnode()).variation == CMP_NODE_KUWAHARA_ANISOTROPIC) {
124 }
125 else {
127 }
128 }
129
131 {
132 /* For high radii, we accelerate the filter using a summed area table, making the filter
133 * execute in constant time as opposed to having quadratic complexity. Except if high precision
134 * is enabled, since summed area tables are less precise. */
135 Result &size_input = get_input("Size");
136 if (!this->get_high_precision() &&
137 (!size_input.is_single_value() || size_input.get_single_value<float>() > 5.0f))
138 {
140 }
141 else {
143 }
144 }
145
147 {
148 if (this->context().use_gpu()) {
150 }
151 else {
153 }
154 }
155
157 {
159 GPU_shader_bind(shader);
160
161 const Result &input_image = get_input("Image");
162 input_image.bind_as_texture(shader, "input_tx");
163
164 Result &size_input = get_input("Size");
165 if (size_input.is_single_value()) {
166 GPU_shader_uniform_1i(shader, "size", int(size_input.get_single_value<float>()));
167 }
168 else {
169 size_input.bind_as_texture(shader, "size_tx");
170 }
171
172 const Domain domain = compute_domain();
173 Result &output_image = get_result("Image");
174 output_image.allocate_texture(domain);
175 output_image.bind_as_image(shader, "output_img");
176
178
179 input_image.unbind_as_texture();
180 output_image.unbind_as_image();
182 }
183
185 {
186 if (is_constant_size()) {
187 return "compositor_kuwahara_classic_convolution_constant_size";
188 }
189 return "compositor_kuwahara_classic_convolution_variable_size";
190 }
191
193 {
194 const Domain domain = this->compute_domain();
195 Result &output = this->get_result("Image");
196 output.allocate_texture(domain);
197
199 &this->get_input("Image"), nullptr, nullptr, this->get_input("Size"), output, domain.size);
200 }
201
203 {
205 summed_area_table(this->context(), this->get_input("Image"), table);
206
209 this->get_input("Image"),
210 squared_table,
212
213 if (this->context().use_gpu()) {
214 this->execute_classic_summed_area_table_gpu(table, squared_table);
215 }
216 else {
217 this->execute_classic_summed_area_table_cpu(table, squared_table);
218 }
219
220 table.release();
221 squared_table.release();
222 }
223
224 void execute_classic_summed_area_table_gpu(const Result &table, const Result &squared_table)
225 {
227 GPU_shader_bind(shader);
228
229 Result &size_input = get_input("Size");
230 if (size_input.is_single_value()) {
231 GPU_shader_uniform_1i(shader, "size", int(size_input.get_single_value<float>()));
232 }
233 else {
234 size_input.bind_as_texture(shader, "size_tx");
235 }
236
237 table.bind_as_texture(shader, "table_tx");
238 squared_table.bind_as_texture(shader, "squared_table_tx");
239
240 const Domain domain = compute_domain();
241 Result &output_image = get_result("Image");
242 output_image.allocate_texture(domain);
243 output_image.bind_as_image(shader, "output_img");
244
246
247 table.unbind_as_texture();
248 squared_table.unbind_as_texture();
249 output_image.unbind_as_image();
251 }
252
254 {
255 if (is_constant_size()) {
256 return "compositor_kuwahara_classic_summed_area_table_constant_size";
257 }
258 return "compositor_kuwahara_classic_summed_area_table_variable_size";
259 }
260
261 void execute_classic_summed_area_table_cpu(const Result &table, const Result &squared_table)
262 {
263 const Domain domain = this->compute_domain();
264 Result &output = this->get_result("Image");
265 output.allocate_texture(domain);
266
268 nullptr, &table, &squared_table, this->get_input("Size"), output, domain.size);
269 }
270
271 /* If UseSummedAreaTable is true, then `table` and `squared_table` should be provided while
272 * `input` should be nullptr, otherwise, `input` should be provided while `table` and
273 * `squared_table` should be nullptr. */
274 template<bool UseSummedAreaTable>
276 const Result *table,
277 const Result *squared_table,
278 const Result &size_input,
279 Result &output,
280 const int2 size)
281 {
282 parallel_for(size, [&](const int2 texel) {
283 int radius = math::max(0, int(size_input.load_pixel<float, true>(texel)));
284
285 float4 mean_of_squared_color_of_quadrants[4] = {
286 float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
287 float4 mean_of_color_of_quadrants[4] = {
288 float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
289
290 /* Compute the above statistics for each of the quadrants around the current pixel. */
291 for (int q = 0; q < 4; q++) {
292 /* A fancy expression to compute the sign of the quadrant q. */
293 int2 sign = int2((q % 2) * 2 - 1, ((q / 2) * 2 - 1));
294
295 int2 lower_bound = texel - int2(sign.x > 0 ? 0 : radius, sign.y > 0 ? 0 : radius);
296 int2 upper_bound = texel + int2(sign.x < 0 ? 0 : radius, sign.y < 0 ? 0 : radius);
297
298 /* Limit the quadrants to the image bounds. */
299 int2 image_bound = size - int2(1);
300 int2 corrected_lower_bound = math::min(image_bound, math::max(int2(0), lower_bound));
301 int2 corrected_upper_bound = math::min(image_bound, math::max(int2(0), upper_bound));
302 int2 region_size = corrected_upper_bound - corrected_lower_bound + int2(1);
303 int quadrant_pixel_count = region_size.x * region_size.y;
304
305 if constexpr (UseSummedAreaTable) {
306 mean_of_color_of_quadrants[q] = summed_area_table_sum(*table, lower_bound, upper_bound);
307 mean_of_squared_color_of_quadrants[q] = summed_area_table_sum(
308 *squared_table, lower_bound, upper_bound);
309 }
310 else {
311 for (int j = 0; j <= radius; j++) {
312 for (int i = 0; i <= radius; i++) {
313 float4 color = input->load_pixel_zero<float4>(texel + int2(i, j) * sign);
314 mean_of_color_of_quadrants[q] += color;
315 mean_of_squared_color_of_quadrants[q] += color * color;
316 }
317 }
318 }
319
320 mean_of_color_of_quadrants[q] /= quadrant_pixel_count;
321 mean_of_squared_color_of_quadrants[q] /= quadrant_pixel_count;
322 }
323
324 /* Find the quadrant which has the minimum variance. */
325 float minimum_variance = std::numeric_limits<float>::max();
326 float4 mean_color_of_chosen_quadrant = mean_of_color_of_quadrants[0];
327 for (int q = 0; q < 4; q++) {
328 float4 color_mean = mean_of_color_of_quadrants[q];
329 float4 squared_color_mean = mean_of_squared_color_of_quadrants[q];
330 float4 color_variance = squared_color_mean - color_mean * color_mean;
331
332 float variance = math::dot(color_variance.xyz(), float3(1.0f));
333 if (variance < minimum_variance) {
334 minimum_variance = variance;
335 mean_color_of_chosen_quadrant = color_mean;
336 }
337 }
338
339 output.store_pixel(texel, mean_color_of_chosen_quadrant);
340 });
341 }
342
343 /* An implementation of the Anisotropic Kuwahara filter described in the paper:
344 *
345 * Kyprianidis, Jan Eric, Henry Kang, and Jurgen Dollner. "Image and video abstraction by
346 * anisotropic Kuwahara filtering." 2009.
347 */
349 {
350 Result structure_tensor = compute_structure_tensor();
351 Result smoothed_structure_tensor = context().create_result(ResultType::Color);
353 context(), structure_tensor, smoothed_structure_tensor, float2(this->get_uniformity()));
354 structure_tensor.release();
355
356 if (this->context().use_gpu()) {
357 this->execute_anisotropic_gpu(smoothed_structure_tensor);
358 }
359 else {
360 this->execute_anisotropic_cpu(smoothed_structure_tensor);
361 }
362
363 smoothed_structure_tensor.release();
364 }
365
366 void execute_anisotropic_gpu(const Result &structure_tensor)
367 {
368 GPUShader *shader = context().get_shader(get_anisotropic_shader_name());
369 GPU_shader_bind(shader);
370
371 GPU_shader_uniform_1f(shader, "eccentricity", this->compute_eccentricity());
372 GPU_shader_uniform_1f(shader, "sharpness", this->compute_sharpness());
373
374 Result &input = get_input("Image");
375 input.bind_as_texture(shader, "input_tx");
376
377 Result &size_input = get_input("Size");
378 if (size_input.is_single_value()) {
379 GPU_shader_uniform_1f(shader, "size", size_input.get_single_value<float>());
380 }
381 else {
382 size_input.bind_as_texture(shader, "size_tx");
383 }
384
385 structure_tensor.bind_as_texture(shader, "structure_tensor_tx");
386
387 const Domain domain = compute_domain();
388 Result &output_image = get_result("Image");
389 output_image.allocate_texture(domain);
390 output_image.bind_as_image(shader, "output_img");
391
393
394 input.unbind_as_texture();
395 structure_tensor.unbind_as_texture();
396 output_image.unbind_as_image();
398 }
399
401 {
402 if (is_constant_size()) {
403 return "compositor_kuwahara_anisotropic_constant_size";
404 }
405 return "compositor_kuwahara_anisotropic_variable_size";
406 }
407
408 void execute_anisotropic_cpu(const Result &structure_tensor)
409 {
410 const float eccentricity = this->compute_eccentricity();
411 const float sharpness = this->compute_sharpness();
412
413 Result &input = this->get_input("Image");
414 Result &size = this->get_input("Size");
415
416 const Domain domain = this->compute_domain();
417 Result &output = this->get_result("Image");
418 output.allocate_texture(domain);
419
420 /* An implementation of the Anisotropic Kuwahara filter described in the paper:
421 *
422 * Kyprianidis, Jan Eric, Henry Kang, and Jurgen Dollner. "Image and video abstraction by
423 * anisotropic Kuwahara filtering." 2009.
424 *
425 * But with the polynomial weighting functions described in the paper:
426 *
427 * Kyprianidis, Jan Eric, et al. "Anisotropic Kuwahara Filtering with Polynomial Weighting
428 * Functions." 2010.
429 *
430 * And the sector weight function described in the paper:
431 *
432 * Kyprianidis, Jan Eric. "Image and video abstraction by multi-scale anisotropic Kuwahara
433 * filtering." 2011.
434 */
435
436 parallel_for(domain.size, [&](const int2 texel) {
437 /* The structure tensor is encoded in a float4 using a column major storage order, as can be
438 * seen in the compute_structure_tensor_cpu method. */
439 float4 encoded_structure_tensor = structure_tensor.load_pixel<float4>(texel);
440 float dxdx = encoded_structure_tensor.x;
441 float dxdy = encoded_structure_tensor.y;
442 float dydy = encoded_structure_tensor.w;
443
444 /* Compute the first and second eigenvalues of the structure tensor using the equations in
445 * section "3.1 Orientation and Anisotropy Estimation" of the paper. */
446 float eigenvalue_first_term = (dxdx + dydy) / 2.0f;
447 float eigenvalue_square_root_term = math::sqrt(math::square(dxdx - dydy) +
448 4.0f * math::square(dxdy)) /
449 2.0f;
450 float first_eigenvalue = eigenvalue_first_term + eigenvalue_square_root_term;
451 float second_eigenvalue = eigenvalue_first_term - eigenvalue_square_root_term;
452
453 /* Compute the normalized eigenvector of the structure tensor oriented in direction of the
454 * minimum rate of change using the equations in section "3.1 Orientation and Anisotropy
455 * Estimation" of the paper. */
456 float2 eigenvector = float2(first_eigenvalue - dxdx, -dxdy);
457 float eigenvector_length = math::length(eigenvector);
458 float2 unit_eigenvector = eigenvector_length != 0.0f ? eigenvector / eigenvector_length :
459 float2(1.0f);
460
461 /* Compute the amount of anisotropy using equations in section "3.1 Orientation and
462 * Anisotropy Estimation" of the paper. The anisotropy ranges from 0 to 1, where 0
463 * corresponds to isotropic and 1 corresponds to entirely anisotropic regions. */
464 float eigenvalue_sum = first_eigenvalue + second_eigenvalue;
465 float eigenvalue_difference = first_eigenvalue - second_eigenvalue;
466 float anisotropy = eigenvalue_sum > 0.0f ? eigenvalue_difference / eigenvalue_sum : 0.0f;
467
468 float radius = math::max(0.0f, size.load_pixel<float, true>(texel));
469 if (radius == 0.0f) {
470 output.store_pixel(texel, input.load_pixel<float4>(texel));
471 return;
472 }
473
474 /* Compute the width and height of an ellipse that is more width-elongated for high
475 * anisotropy and more circular for low anisotropy, controlled using the eccentricity factor.
476 * Since the anisotropy is in the [0, 1] range, the width factor tends to 1 as the
477 * eccentricity tends to infinity and tends to infinity when the eccentricity tends to zero.
478 * This is based on the equations in section "3.2. Anisotropic Kuwahara Filtering" of the
479 * paper. */
480 float ellipse_width_factor = (eccentricity + anisotropy) / eccentricity;
481 float ellipse_width = ellipse_width_factor * radius;
482 float ellipse_height = radius / ellipse_width_factor;
483
484 /* Compute the cosine and sine of the angle that the eigenvector makes with the x axis. Since
485 * the eigenvector is normalized, its x and y components are the cosine and sine of the angle
486 * it makes with the x axis. */
487 float cosine = unit_eigenvector.x;
488 float sine = unit_eigenvector.y;
489
490 /* Compute an inverse transformation matrix that represents an ellipse of the given width and
491 * height and makes and an angle with the x axis of the given cosine and sine. This is an
492 * inverse matrix, so it transforms the ellipse into a disk of unit radius. */
493 float2x2 inverse_ellipse_matrix = float2x2(
494 float2(cosine / ellipse_width, -sine / ellipse_height),
495 float2(sine / ellipse_width, cosine / ellipse_height));
496
497 /* Compute the bounding box of a zero centered ellipse whose major axis is aligned with the
498 * eigenvector and has the given width and height. This is based on the equations described
499 * in:
500 *
501 * https://iquilezles.org/articles/ellipses/
502 *
503 * Notice that we only compute the upper bound, the lower bound is just negative that since
504 * the ellipse is zero centered. Also notice that we take the ceiling of the bounding box,
505 * just to ensure the filter window is at least 1x1. */
506 float2 ellipse_major_axis = ellipse_width * unit_eigenvector;
507 float2 ellipse_minor_axis = ellipse_height * float2(unit_eigenvector.y, unit_eigenvector.x) *
508 float2(-1, 1);
509 int2 ellipse_bounds = int2(math::ceil(
510 math::sqrt(math::square(ellipse_major_axis) + math::square(ellipse_minor_axis))));
511
512 /* Compute the overlap polynomial parameters for 8-sector ellipse based on the equations in
513 * section "3 Alternative Weighting Functions" of the polynomial weights paper. More on this
514 * later in the code. */
515 const int number_of_sectors = 8;
516 float sector_center_overlap_parameter = 2.0f / radius;
517 float sector_envelope_angle = ((3.0f / 2.0f) * math::numbers::pi_v<float>) /
518 number_of_sectors;
519 float cross_sector_overlap_parameter = (sector_center_overlap_parameter +
520 math::cos(sector_envelope_angle)) /
521 math::square(math::sin(sector_envelope_angle));
522
523 /* We need to compute the weighted mean of color and squared color of each of the 8 sectors
524 * of the ellipse, so we declare arrays for accumulating those and initialize them in the
525 * next code section. */
526 float4 weighted_mean_of_squared_color_of_sectors[8];
527 float4 weighted_mean_of_color_of_sectors[8];
528 float sum_of_weights_of_sectors[8];
529
530 /* The center pixel (0, 0) is exempt from the main loop below for reasons that are explained
531 * in the first if statement in the loop, so we need to accumulate its color, squared color,
532 * and weight separately first. Luckily, the zero coordinates of the center pixel zeros out
533 * most of the complex computations below, and it can easily be shown that the weight for the
534 * center pixel in all sectors is simply (1 / number_of_sectors). */
535 float4 center_color = input.load_pixel<float4>(texel);
536 float4 center_color_squared = center_color * center_color;
537 float center_weight = 1.0f / number_of_sectors;
538 float4 weighted_center_color = center_color * center_weight;
539 float4 weighted_center_color_squared = center_color_squared * center_weight;
540 for (int i = 0; i < number_of_sectors; i++) {
541 weighted_mean_of_squared_color_of_sectors[i] = weighted_center_color_squared;
542 weighted_mean_of_color_of_sectors[i] = weighted_center_color;
543 sum_of_weights_of_sectors[i] = center_weight;
544 }
545
546 /* Loop over the window of pixels inside the bounding box of the ellipse. However, we utilize
547 * the fact that ellipses are mirror symmetric along the horizontal axis, so we reduce the
548 * window to only the upper two quadrants, and compute each two mirrored pixels at the same
549 * time using the same weight as an optimization. */
550 for (int j = 0; j <= ellipse_bounds.y; j++) {
551 for (int i = -ellipse_bounds.x; i <= ellipse_bounds.x; i++) {
552 /* Since we compute each two mirrored pixels at the same time, we need to also exempt the
553 * pixels whose x coordinates are negative and their y coordinates are zero, that's
554 * because those are mirrored versions of the pixels whose x coordinates are positive and
555 * their y coordinates are zero, and we don't want to compute and accumulate them twice.
556 * Moreover, we also need to exempt the center pixel with zero coordinates for the same
557 * reason, however, since the mirror of the center pixel is itself, it need to be
558 * accumulated separately, hence why we did that in the code section just before this
559 * loop. */
560 if (j == 0 && i <= 0) {
561 continue;
562 }
563
564 /* Map the pixels of the ellipse into a unit disk, exempting any points that are not part
565 * of the ellipse or disk. */
566 float2 disk_point = inverse_ellipse_matrix * float2(i, j);
567 float disk_point_length_squared = math::dot(disk_point, disk_point);
568 if (disk_point_length_squared > 1.0f) {
569 continue;
570 }
571
572 /* While each pixel belongs to a single sector in the ellipse, we expand the definition
573 * of a sector a bit to also overlap with other sectors as illustrated in Figure 8 of the
574 * polynomial weights paper. So each pixel may contribute to multiple sectors, and thus
575 * we compute its weight in each of the 8 sectors. */
576 float sector_weights[8];
577
578 /* We evaluate the weighting polynomial at each of the 8 sectors by rotating the disk
579 * point by 45 degrees and evaluating the weighting polynomial at each incremental
580 * rotation. To avoid potentially expensive rotations, we utilize the fact that rotations
581 * by 90 degrees are simply swapping of the coordinates and negating the x component. We
582 * also note that since the y term of the weighting polynomial is squared, it is not
583 * affected by the sign and can be computed once for the x and once for the y
584 * coordinates. So we compute every other even-indexed 4 weights by successive 90 degree
585 * rotations as discussed. */
586 float2 polynomial = sector_center_overlap_parameter -
587 cross_sector_overlap_parameter * math::square(disk_point);
588 sector_weights[0] = math::square(math::max(0.0f, disk_point.y + polynomial.x));
589 sector_weights[2] = math::square(math::max(0.0f, -disk_point.x + polynomial.y));
590 sector_weights[4] = math::square(math::max(0.0f, -disk_point.y + polynomial.x));
591 sector_weights[6] = math::square(math::max(0.0f, disk_point.x + polynomial.y));
592
593 /* Then we rotate the disk point by 45 degrees, which is a simple expression involving a
594 * constant as can be demonstrated by applying a 45 degree rotation matrix. */
595 float2 rotated_disk_point = (1.0f / math::numbers::sqrt2) *
596 float2(disk_point.x - disk_point.y,
597 disk_point.x + disk_point.y);
598
599 /* Finally, we compute every other odd-index 4 weights starting from the 45 degrees
600 * rotated disk point. */
601 float2 rotated_polynomial = sector_center_overlap_parameter -
602 cross_sector_overlap_parameter *
603 math::square(rotated_disk_point);
604 sector_weights[1] = math::square(
605 math::max(0.0f, rotated_disk_point.y + rotated_polynomial.x));
606 sector_weights[3] = math::square(
607 math::max(0.0f, -rotated_disk_point.x + rotated_polynomial.y));
608 sector_weights[5] = math::square(
609 math::max(0.0f, -rotated_disk_point.y + rotated_polynomial.x));
610 sector_weights[7] = math::square(
611 math::max(0.0f, rotated_disk_point.x + rotated_polynomial.y));
612
613 /* We compute a radial Gaussian weighting component such that pixels further away from
614 * the sector center gets attenuated, and we also divide by the sum of sector weights to
615 * normalize them, since the radial weight will eventually be multiplied to the sector
616 * weight below. */
617 float sector_weights_sum = sector_weights[0] + sector_weights[1] + sector_weights[2] +
618 sector_weights[3] + sector_weights[4] + sector_weights[5] +
619 sector_weights[6] + sector_weights[7];
620 float radial_gaussian_weight = math::exp(-math::numbers::pi *
621 disk_point_length_squared) /
622 sector_weights_sum;
623
624 /* Load the color of the pixel and its mirrored pixel and compute their square. */
625 float4 upper_color = input.load_pixel_extended<float4>(texel + int2(i, j));
626 float4 lower_color = input.load_pixel_extended<float4>(texel - int2(i, j));
627 float4 upper_color_squared = upper_color * upper_color;
628 float4 lower_color_squared = lower_color * lower_color;
629
630 for (int k = 0; k < number_of_sectors; k++) {
631 float weight = sector_weights[k] * radial_gaussian_weight;
632
633 /* Accumulate the pixel to each of the sectors multiplied by the sector weight. */
634 int upper_index = k;
635 sum_of_weights_of_sectors[upper_index] += weight;
636 weighted_mean_of_color_of_sectors[upper_index] += upper_color * weight;
637 weighted_mean_of_squared_color_of_sectors[upper_index] += upper_color_squared * weight;
638
639 /* Accumulate the mirrored pixel to each of the sectors multiplied by the sector
640 * weight. */
641 int lower_index = (k + number_of_sectors / 2) % number_of_sectors;
642 sum_of_weights_of_sectors[lower_index] += weight;
643 weighted_mean_of_color_of_sectors[lower_index] += lower_color * weight;
644 weighted_mean_of_squared_color_of_sectors[lower_index] += lower_color_squared * weight;
645 }
646 }
647 }
648
649 /* Compute the weighted sum of mean of sectors, such that sectors with lower standard
650 * deviation gets more significant weight than sectors with higher standard deviation. */
651 float sum_of_weights = 0.0f;
652 float4 weighted_sum = float4(0.0f);
653 for (int i = 0; i < number_of_sectors; i++) {
654 weighted_mean_of_color_of_sectors[i] /= sum_of_weights_of_sectors[i];
655 weighted_mean_of_squared_color_of_sectors[i] /= sum_of_weights_of_sectors[i];
656
657 float4 color_mean = weighted_mean_of_color_of_sectors[i];
658 float4 squared_color_mean = weighted_mean_of_squared_color_of_sectors[i];
659 float4 color_variance = math::abs(squared_color_mean - color_mean * color_mean);
660
661 float standard_deviation = math::dot(math::sqrt(color_variance.xyz()), float3(1.0f));
662
663 /* Compute the sector weight based on the weight function introduced in section "3.3.1
664 * Single-scale Filtering" of the multi-scale paper. Use a threshold of 0.02 to avoid zero
665 * division and avoid artifacts in homogeneous regions as demonstrated in the paper. */
666 float weight = 1.0f / math::pow(math::max(0.02f, standard_deviation), sharpness);
667
668 sum_of_weights += weight;
669 weighted_sum += color_mean * weight;
670 }
671
672 /* Fall back to the original color if all sector weights are zero due to very high standard
673 * deviation and sharpness. */
674 if (sum_of_weights == 0.0f) {
675 weighted_sum = center_color;
676 }
677 else {
678 weighted_sum /= sum_of_weights;
679 }
680
681 output.store_pixel(texel, weighted_sum);
682 });
683 }
684
686 {
687 if (this->context().use_gpu()) {
688 return this->compute_structure_tensor_gpu();
689 }
690 return this->compute_structure_tensor_cpu();
691 }
692
694 {
695 GPUShader *shader = context().get_shader(
696 "compositor_kuwahara_anisotropic_compute_structure_tensor");
697 GPU_shader_bind(shader);
698
699 Result &input = get_input("Image");
700 input.bind_as_texture(shader, "input_tx");
701
702 const Domain domain = compute_domain();
703 Result structure_tensor = context().create_result(ResultType::Color);
704 structure_tensor.allocate_texture(domain);
705 structure_tensor.bind_as_image(shader, "structure_tensor_img");
706
708
709 input.unbind_as_texture();
710 structure_tensor.unbind_as_image();
712
713 return structure_tensor;
714 }
715
717 {
718 const Result &input = this->get_input("Image");
719
720 const Domain domain = this->compute_domain();
721 Result structure_tensor_image = context().create_result(ResultType::Color);
722 structure_tensor_image.allocate_texture(domain);
723
724 /* Computes the structure tensor of the image using a Dirac delta window function as described
725 * in section "3.2 Local Structure Estimation" of the paper:
726 *
727 * Kyprianidis, Jan Eric. "Image and video abstraction by multi-scale anisotropic Kuwahara
728 * filtering." 2011.
729 *
730 * The structure tensor should then be smoothed using a Gaussian function to eliminate high
731 * frequency details. */
732 parallel_for(domain.size, [&](const int2 texel) {
733 /* The weight kernels of the filter optimized for rotational symmetry described in section
734 * "3.2.1 Gradient Calculation". */
735 const float corner_weight = 0.182f;
736 const float center_weight = 1.0f - 2.0f * corner_weight;
737
738 float3 x_partial_derivative =
739 input.load_pixel_extended<float4>(texel + int2(-1, 1)).xyz() * -corner_weight +
740 input.load_pixel_extended<float4>(texel + int2(-1, 0)).xyz() * -center_weight +
741 input.load_pixel_extended<float4>(texel + int2(-1, -1)).xyz() * -corner_weight +
742 input.load_pixel_extended<float4>(texel + int2(1, 1)).xyz() * corner_weight +
743 input.load_pixel_extended<float4>(texel + int2(1, 0)).xyz() * center_weight +
744 input.load_pixel_extended<float4>(texel + int2(1, -1)).xyz() * corner_weight;
745
746 float3 y_partial_derivative =
747 input.load_pixel_extended<float4>(texel + int2(-1, 1)).xyz() * corner_weight +
748 input.load_pixel_extended<float4>(texel + int2(0, 1)).xyz() * center_weight +
749 input.load_pixel_extended<float4>(texel + int2(1, 1)).xyz() * corner_weight +
750 input.load_pixel_extended<float4>(texel + int2(-1, -1)).xyz() * -corner_weight +
751 input.load_pixel_extended<float4>(texel + int2(0, -1)).xyz() * -center_weight +
752 input.load_pixel_extended<float4>(texel + int2(1, -1)).xyz() * -corner_weight;
753
754 float dxdx = math::dot(x_partial_derivative, x_partial_derivative);
755 float dxdy = math::dot(x_partial_derivative, y_partial_derivative);
756 float dydy = math::dot(y_partial_derivative, y_partial_derivative);
757
758 /* We encode the structure tensor in a float4 using a column major storage order. */
759 float4 structure_tensor = float4(dxdx, dxdy, dxdy, dydy);
760
761 structure_tensor_image.store_pixel(texel, structure_tensor);
762 });
763
764 return structure_tensor_image;
765 }
766
768 {
769 return get_input("Size").is_single_value();
770 }
771
772 /* The sharpness controls the sharpness of the transitions between the kuwahara sectors, which
773 * is controlled by the weighting function pow(standard_deviation, -sharpness) as can be seen
774 * in the shader. The transition is completely smooth when the sharpness is zero and completely
775 * sharp when it is infinity. But realistically, the sharpness doesn't change much beyond the
776 * value of 16 due to its exponential nature, so we just assume a maximum sharpness of 16.
777 *
778 * The stored sharpness is in the range [0, 1], so we multiply by 16 to get it in the range
779 * [0, 16], however, we also square it before multiplication to slow down the rate of change
780 * near zero to counter its exponential nature for more intuitive user control. */
782 {
783 const float sharpness_factor = this->get_sharpness();
784 return sharpness_factor * sharpness_factor * 16.0f;
785 }
786
787 /* The eccentricity controls how much the image anisotropy affects the eccentricity of the
788 * kuwahara sectors, which is controlled by the following factor that gets multiplied to the
789 * radius to get the ellipse width and divides the radius to get the ellipse height:
790 *
791 * (eccentricity + anisotropy) / eccentricity
792 *
793 * Since the anisotropy is in the [0, 1] range, the factor tends to 1 as the eccentricity tends
794 * to infinity and tends to infinity when the eccentricity tends to zero. The stored
795 * eccentricity is in the range [0, 2], we map that to the range [infinity, 0.5] by taking the
796 * reciprocal, satisfying the aforementioned limits. The upper limit doubles the computed
797 * default eccentricity, which users can use to enhance the directionality of the filter.
798 * Instead of actual infinity, we just use an eccentricity of 1 / 0.01 since the result is very
799 * similar to that of infinity. */
801 {
802 return 1.0f / math::max(0.01f, this->get_eccentricity());
803 }
804
806 {
807 return this->get_input("High Precision").get_single_value_default(false);
808 }
809
811 {
812 return math::max(0, this->get_input("Uniformity").get_single_value_default(4));
813 }
814
816 {
817 return math::clamp(this->get_input("Sharpness").get_single_value_default(1.0f), 0.0f, 1.0f);
818 }
819
821 {
822 return math::clamp(this->get_input("Eccentricity").get_single_value_default(1.0f), 0.0f, 2.0f);
823 }
824};
825
827{
828 return new ConvertKuwaharaOperation(context, node);
829}
830
831} // namespace blender::nodes::node_composite_kuwahara_cc
832
834{
836
837 static blender::bke::bNodeType ntype;
838
839 cmp_node_type_base(&ntype, "CompositorNodeKuwahara", CMP_NODE_KUWAHARA);
840 ntype.ui_name = "Kuwahara";
841 ntype.ui_description =
842 "Apply smoothing filter that preserves edges, for stylized and painterly effects";
843 ntype.enum_name_legacy = "KUWAHARA";
845 ntype.declare = file_ns::cmp_node_kuwahara_declare;
846 ntype.updatefunc = file_ns::node_update;
847 ntype.draw_buttons = file_ns::node_composit_buts_kuwahara;
848 ntype.initfunc = file_ns::node_composit_init_kuwahara;
850 ntype, "NodeKuwaharaData", node_free_standard_storage, node_copy_standard_storage);
851 ntype.get_compositor_operation = file_ns::get_compositor_operation;
852
854}
#define NODE_STORAGE_FUNCS(StorageT)
Definition BKE_node.hh:1215
#define NODE_CLASS_OP_FILTER
Definition BKE_node.hh:437
#define CMP_NODE_KUWAHARA
@ CMP_NODE_KUWAHARA_CLASSIC
@ CMP_NODE_KUWAHARA_ANISOTROPIC
@ SOCK_IN
void GPU_shader_uniform_1i(GPUShader *sh, const char *name, int value)
void GPU_shader_uniform_1f(GPUShader *sh, const char *name, float value)
void GPU_shader_bind(GPUShader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_unbind()
#define NOD_REGISTER_NODE(REGISTER_FUNC)
@ PROP_FACTOR
Definition RNA_types.hh:239
#define UI_ITEM_NONE
BMesh const char void * data
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
Result create_result(ResultType type, ResultPrecision precision)
GPUShader * get_shader(const char *info_name, ResultPrecision precision)
NodeOperation(Context &context, DNode node)
Result & get_result(StringRef identifier)
Definition operation.cc:39
Result & get_input(StringRef identifier) const
Definition operation.cc:138
virtual Domain compute_domain()
Definition operation.cc:56
void share_data(const Result &source)
Definition result.cc:401
void allocate_texture(Domain domain, bool from_pool=true)
Definition result.cc:309
void unbind_as_texture() const
Definition result.cc:389
void bind_as_texture(GPUShader *shader, const char *texture_name) const
Definition result.cc:365
T load_pixel(const int2 &texel) const
void bind_as_image(GPUShader *shader, const char *image_name, bool read=false) const
Definition result.cc:376
void unbind_as_image() const
Definition result.cc:395
bool is_single_value() const
Definition result.cc:625
const T & get_single_value() const
void execute_classic_summed_area_table_cpu(const Result &table, const Result &squared_table)
void compute_classic(const Result *input, const Result *table, const Result *squared_table, const Result &size_input, Result &output, const int2 size)
void execute_classic_summed_area_table_gpu(const Result &table, const Result &squared_table)
#define input
constexpr T sign(T) RET
#define output
void * MEM_callocN(size_t len, const char *str)
Definition mallocn.cc:118
bNodeSocket * node_find_socket(bNode &node, eNodeSocketInOut in_out, StringRef identifier)
Definition node.cc:2864
void node_register_type(bNodeType &ntype)
Definition node.cc:2748
void node_set_socket_availability(bNodeTree &ntree, bNodeSocket &sock, bool is_available)
Definition node.cc:5011
void node_type_storage(bNodeType &ntype, std::optional< StringRefNull > storagename, void(*freefunc)(bNode *node), void(*copyfunc)(bNodeTree *dest_ntree, bNode *dest_node, const bNode *src_node))
Definition node.cc:5603
void compute_dispatch_threads_at_least(GPUShader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:170
void summed_area_table(Context &context, Result &input, Result &output, SummedAreaTableOperation operation=SummedAreaTableOperation::Identity)
void symmetric_separable_blur(Context &context, const Result &input, Result &output, const float2 &radius, const int filter_type=R_FILTER_GAUSS, const bool extend_bounds=false)
float4 summed_area_table_sum(const Result &table, const int2 &lower_bound, const int2 &upper_bound)
void parallel_for(const int2 range, const Function &function)
T cos(const AngleRadianBase< T > &a)
T pow(const T &x, const T &power)
T clamp(const T &a, const T &min, const T &max)
T sqrt(const T &a)
T exp(const T &x)
T dot(const QuaternionBase< T > &a, const QuaternionBase< T > &b)
T min(const T &a, const T &b)
T square(const T &a)
T ceil(const T &a)
T sin(const AngleRadianBase< T > &a)
T max(const T &a, const T &b)
T abs(const T &a)
static void node_update(bNodeTree *ntree, bNode *node)
static void node_composit_buts_kuwahara(uiLayout *layout, bContext *, PointerRNA *ptr)
static void cmp_node_kuwahara_declare(NodeDeclarationBuilder &b)
static NodeOperation * get_compositor_operation(Context &context, DNode node)
static void node_composit_init_kuwahara(bNodeTree *, bNode *node)
MatBase< float, 2, 2 > float2x2
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
VecBase< float, 3 > float3
static void register_node_type_cmp_kuwahara()
void cmp_node_type_base(blender::bke::bNodeType *ntype, std::string idname, const std::optional< int16_t > legacy_type)
void node_free_standard_storage(bNode *node)
Definition node_util.cc:42
void node_copy_standard_storage(bNodeTree *, bNode *dest_node, const bNode *src_node)
Definition node_util.cc:54
#define min(a, b)
Definition sort.cc:36
void * storage
VecBase< T, 3 > xyz() const
Defines a node type.
Definition BKE_node.hh:226
std::string ui_description
Definition BKE_node.hh:232
NodeGetCompositorOperationFunction get_compositor_operation
Definition BKE_node.hh:336
void(* initfunc)(bNodeTree *ntree, bNode *node)
Definition BKE_node.hh:277
const char * enum_name_legacy
Definition BKE_node.hh:235
void(* draw_buttons)(uiLayout *, bContext *C, PointerRNA *ptr)
Definition BKE_node.hh:247
NodeDeclareFunction declare
Definition BKE_node.hh:355
void(* updatefunc)(bNodeTree *ntree, bNode *node)
Definition BKE_node.hh:269
void prop(PointerRNA *ptr, PropertyRNA *prop, int index, int value, eUI_Item_Flag flag, std::optional< blender::StringRef > name_opt, int icon, std::optional< blender::StringRef > placeholder=std::nullopt)
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251
static pxr::UsdShadeInput get_input(const pxr::UsdShadeShader &usd_shader, const pxr::TfToken &input_name)
PointerRNA * ptr
Definition wm_files.cc:4227