Blender V5.0
node_composite_kuwahara.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <limits>
10
11#include "BLI_math_base.hh"
13#include "BLI_math_numbers.hh"
14#include "BLI_math_vector.hh"
16
17#include "RNA_types.hh"
18
19#include "COM_node_operation.hh"
20#include "COM_utilities.hh"
21
24
26
28
29static const EnumPropertyItem type_items[] = {
31 "CLASSIC",
32 0,
33 N_("Classic"),
34 N_("Fast but less accurate variation")},
36 "ANISOTROPIC",
37 0,
38 N_("Anisotropic"),
39 N_("Accurate but slower variation")},
40 {0, nullptr, 0, nullptr, nullptr},
41};
42
44{
45 b.use_custom_socket_order();
46 b.allow_any_socket_order();
47 b.add_input<decl::Color>("Image")
48 .default_value({1.0f, 1.0f, 1.0f, 1.0f})
49 .hide_value()
50 .structure_type(StructureType::Dynamic);
51 b.add_output<decl::Color>("Image").structure_type(StructureType::Dynamic).align_with_previous();
52
53 b.add_input<decl::Float>("Size")
54 .default_value(6.0f)
55 .min(0.0f)
56 .description("The size of the filter in pixels")
57 .structure_type(StructureType::Dynamic);
58 b.add_input<decl::Menu>("Type")
59 .default_value(CMP_NODE_KUWAHARA_ANISOTROPIC)
60 .static_items(type_items)
62
63 b.add_input<decl::Int>("Uniformity")
64 .default_value(4)
65 .min(0)
66 .usage_by_single_menu(CMP_NODE_KUWAHARA_ANISOTROPIC)
68 "Controls the uniformity of the direction of the filter. Higher values produces more "
69 "uniform directions");
70 b.add_input<decl::Float>("Sharpness")
71 .default_value(1.0f)
73 .min(0.0f)
74 .max(1.0f)
75 .usage_by_single_menu(CMP_NODE_KUWAHARA_ANISOTROPIC)
76 .description(
77 "Controls the sharpness of the filter. 0 means completely smooth while 1 means "
78 "completely sharp");
79 b.add_input<decl::Float>("Eccentricity")
80 .default_value(1.0f)
82 .min(0.0f)
83 .max(2.0f)
84 .usage_by_single_menu(CMP_NODE_KUWAHARA_ANISOTROPIC)
85 .description(
86 "Controls how directional the filter is. 0 means the filter is completely "
87 "omnidirectional while 2 means it is maximally directed along the edges of the image");
88 b.add_input<decl::Bool>("High Precision")
89 .default_value(false)
90 .usage_by_single_menu(CMP_NODE_KUWAHARA_CLASSIC)
92 "Uses a more precise but slower method. Use if the output contains undesirable noise.");
93}
94
95static void node_composit_init_kuwahara(bNodeTree * /*ntree*/, bNode *node)
96{
97 /* Unused, kept for forward compatibility. */
99 node->storage = data;
100}
101
102using namespace blender::compositor;
103
105 public:
107
108 void execute() override
109 {
110 const Result &input = this->get_input("Image");
111 if (input.is_single_value()) {
112 Result &output = this->get_result("Image");
113 output.share_data(input);
114 return;
115 }
116
119 }
120 else {
122 }
123 }
124
126 {
127 /* For high radii, we accelerate the filter using a summed area table, making the filter
128 * execute in constant time as opposed to having quadratic complexity. Except if high precision
129 * is enabled, since summed area tables are less precise. */
130 Result &size_input = get_input("Size");
131 if (!this->get_high_precision() &&
132 (!size_input.is_single_value() || size_input.get_single_value<float>() > 5.0f))
133 {
135 }
136 else {
138 }
139 }
140
142 {
143 if (this->context().use_gpu()) {
145 }
146 else {
148 }
149 }
150
152 {
154 GPU_shader_bind(shader);
155
156 const Result &input_image = get_input("Image");
157 input_image.bind_as_texture(shader, "input_tx");
158
159 Result &size_input = get_input("Size");
160 if (size_input.is_single_value()) {
161 GPU_shader_uniform_1i(shader, "size", int(size_input.get_single_value<float>()));
162 }
163 else {
164 size_input.bind_as_texture(shader, "size_tx");
165 }
166
167 const Domain domain = compute_domain();
168 Result &output_image = get_result("Image");
169 output_image.allocate_texture(domain);
170 output_image.bind_as_image(shader, "output_img");
171
173
174 input_image.unbind_as_texture();
175 output_image.unbind_as_image();
177 }
178
180 {
181 if (is_constant_size()) {
182 return "compositor_kuwahara_classic_convolution_constant_size";
183 }
184 return "compositor_kuwahara_classic_convolution_variable_size";
185 }
186
188 {
189 const Domain domain = this->compute_domain();
190 Result &output = this->get_result("Image");
191 output.allocate_texture(domain);
192
194 &this->get_input("Image"), nullptr, nullptr, this->get_input("Size"), output, domain.size);
195 }
196
198 {
200 summed_area_table(this->context(), this->get_input("Image"), table);
201
204 this->get_input("Image"),
205 squared_table,
207
208 if (this->context().use_gpu()) {
209 this->execute_classic_summed_area_table_gpu(table, squared_table);
210 }
211 else {
212 this->execute_classic_summed_area_table_cpu(table, squared_table);
213 }
214
215 table.release();
216 squared_table.release();
217 }
218
219 void execute_classic_summed_area_table_gpu(const Result &table, const Result &squared_table)
220 {
222 GPU_shader_bind(shader);
223
224 Result &size_input = get_input("Size");
225 if (size_input.is_single_value()) {
226 GPU_shader_uniform_1i(shader, "size", int(size_input.get_single_value<float>()));
227 }
228 else {
229 size_input.bind_as_texture(shader, "size_tx");
230 }
231
232 table.bind_as_texture(shader, "table_tx");
233 squared_table.bind_as_texture(shader, "squared_table_tx");
234
235 const Domain domain = compute_domain();
236 Result &output_image = get_result("Image");
237 output_image.allocate_texture(domain);
238 output_image.bind_as_image(shader, "output_img");
239
241
242 table.unbind_as_texture();
243 squared_table.unbind_as_texture();
244 output_image.unbind_as_image();
246 }
247
249 {
250 if (is_constant_size()) {
251 return "compositor_kuwahara_classic_summed_area_table_constant_size";
252 }
253 return "compositor_kuwahara_classic_summed_area_table_variable_size";
254 }
255
256 void execute_classic_summed_area_table_cpu(const Result &table, const Result &squared_table)
257 {
258 const Domain domain = this->compute_domain();
259 Result &output = this->get_result("Image");
260 output.allocate_texture(domain);
261
263 nullptr, &table, &squared_table, this->get_input("Size"), output, domain.size);
264 }
265
266 /* If UseSummedAreaTable is true, then `table` and `squared_table` should be provided while
267 * `input` should be nullptr, otherwise, `input` should be provided while `table` and
268 * `squared_table` should be nullptr. */
269 template<bool UseSummedAreaTable>
271 const Result *table,
272 const Result *squared_table,
273 const Result &size_input,
274 Result &output,
275 const int2 size)
276 {
277 parallel_for(size, [&](const int2 texel) {
278 int radius = math::max(0, int(size_input.load_pixel<float, true>(texel)));
279
280 float4 mean_of_squared_color_of_quadrants[4] = {
281 float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
282 float4 mean_of_color_of_quadrants[4] = {
283 float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
284
285 /* Compute the above statistics for each of the quadrants around the current pixel. */
286 for (int q = 0; q < 4; q++) {
287 /* A fancy expression to compute the sign of the quadrant q. */
288 int2 sign = int2((q % 2) * 2 - 1, ((q / 2) * 2 - 1));
289
290 int2 lower_bound = texel - int2(sign.x > 0 ? 0 : radius, sign.y > 0 ? 0 : radius);
291 int2 upper_bound = texel + int2(sign.x < 0 ? 0 : radius, sign.y < 0 ? 0 : radius);
292
293 /* Limit the quadrants to the image bounds. */
294 int2 image_bound = size - int2(1);
295 int2 corrected_lower_bound = math::min(image_bound, math::max(int2(0), lower_bound));
296 int2 corrected_upper_bound = math::min(image_bound, math::max(int2(0), upper_bound));
297 int2 region_size = corrected_upper_bound - corrected_lower_bound + int2(1);
298 int quadrant_pixel_count = region_size.x * region_size.y;
299
300 if constexpr (UseSummedAreaTable) {
301 mean_of_color_of_quadrants[q] = summed_area_table_sum(*table, lower_bound, upper_bound);
302 mean_of_squared_color_of_quadrants[q] = summed_area_table_sum(
303 *squared_table, lower_bound, upper_bound);
304 }
305 else {
306 for (int j = 0; j <= radius; j++) {
307 for (int i = 0; i <= radius; i++) {
308 float4 color = input->load_pixel_zero<float4>(texel + int2(i, j) * sign);
309 mean_of_color_of_quadrants[q] += color;
310 mean_of_squared_color_of_quadrants[q] += color * color;
311 }
312 }
313 }
314
315 mean_of_color_of_quadrants[q] /= quadrant_pixel_count;
316 mean_of_squared_color_of_quadrants[q] /= quadrant_pixel_count;
317 }
318
319 /* Find the quadrant which has the minimum variance. */
320 float minimum_variance = std::numeric_limits<float>::max();
321 float4 mean_color_of_chosen_quadrant = mean_of_color_of_quadrants[0];
322 for (int q = 0; q < 4; q++) {
323 float4 color_mean = mean_of_color_of_quadrants[q];
324 float4 squared_color_mean = mean_of_squared_color_of_quadrants[q];
325 float4 color_variance = squared_color_mean - color_mean * color_mean;
326
327 float variance = math::dot(color_variance.xyz(), float3(1.0f));
328 if (variance < minimum_variance) {
329 minimum_variance = variance;
330 mean_color_of_chosen_quadrant = color_mean;
331 }
332 }
333
334 output.store_pixel(texel, mean_color_of_chosen_quadrant);
335 });
336 }
337
338 /* An implementation of the Anisotropic Kuwahara filter described in the paper:
339 *
340 * Kyprianidis, Jan Eric, Henry Kang, and Jurgen Dollner. "Image and video abstraction by
341 * anisotropic Kuwahara filtering." 2009.
342 */
344 {
345 Result structure_tensor = compute_structure_tensor();
346 Result smoothed_structure_tensor = context().create_result(ResultType::Color);
348 context(), structure_tensor, smoothed_structure_tensor, float2(this->get_uniformity()));
349 structure_tensor.release();
350
351 if (this->context().use_gpu()) {
352 this->execute_anisotropic_gpu(smoothed_structure_tensor);
353 }
354 else {
355 this->execute_anisotropic_cpu(smoothed_structure_tensor);
356 }
357
358 smoothed_structure_tensor.release();
359 }
360
361 void execute_anisotropic_gpu(const Result &structure_tensor)
362 {
364 GPU_shader_bind(shader);
365
366 GPU_shader_uniform_1f(shader, "eccentricity", this->compute_eccentricity());
367 GPU_shader_uniform_1f(shader, "sharpness", this->compute_sharpness());
368
369 Result &input = get_input("Image");
370 input.bind_as_texture(shader, "input_tx");
371
372 Result &size_input = get_input("Size");
373 if (size_input.is_single_value()) {
374 GPU_shader_uniform_1f(shader, "size", size_input.get_single_value<float>());
375 }
376 else {
377 size_input.bind_as_texture(shader, "size_tx");
378 }
379
380 structure_tensor.bind_as_texture(shader, "structure_tensor_tx");
381
382 const Domain domain = compute_domain();
383 Result &output_image = get_result("Image");
384 output_image.allocate_texture(domain);
385 output_image.bind_as_image(shader, "output_img");
386
388
389 input.unbind_as_texture();
390 structure_tensor.unbind_as_texture();
391 output_image.unbind_as_image();
393 }
394
396 {
397 if (is_constant_size()) {
398 return "compositor_kuwahara_anisotropic_constant_size";
399 }
400 return "compositor_kuwahara_anisotropic_variable_size";
401 }
402
403 void execute_anisotropic_cpu(const Result &structure_tensor)
404 {
405 const float eccentricity = this->compute_eccentricity();
406 const float sharpness = this->compute_sharpness();
407
408 Result &input = this->get_input("Image");
409 Result &size = this->get_input("Size");
410
411 const Domain domain = this->compute_domain();
412 Result &output = this->get_result("Image");
413 output.allocate_texture(domain);
414
415 /* An implementation of the Anisotropic Kuwahara filter described in the paper:
416 *
417 * Kyprianidis, Jan Eric, Henry Kang, and Jurgen Dollner. "Image and video abstraction by
418 * anisotropic Kuwahara filtering." 2009.
419 *
420 * But with the polynomial weighting functions described in the paper:
421 *
422 * Kyprianidis, Jan Eric, et al. "Anisotropic Kuwahara Filtering with Polynomial Weighting
423 * Functions." 2010.
424 *
425 * And the sector weight function described in the paper:
426 *
427 * Kyprianidis, Jan Eric. "Image and video abstraction by multi-scale anisotropic Kuwahara
428 * filtering." 2011.
429 */
430
431 parallel_for(domain.size, [&](const int2 texel) {
432 /* The structure tensor is encoded in a float4 using a column major storage order, as can be
433 * seen in the compute_structure_tensor_cpu method. */
434 float4 encoded_structure_tensor = structure_tensor.load_pixel<float4>(texel);
435 float dxdx = encoded_structure_tensor.x;
436 float dxdy = encoded_structure_tensor.y;
437 float dydy = encoded_structure_tensor.w;
438
439 /* Compute the first and second eigenvalues of the structure tensor using the equations in
440 * section "3.1 Orientation and Anisotropy Estimation" of the paper. */
441 float eigenvalue_first_term = (dxdx + dydy) / 2.0f;
442 float eigenvalue_square_root_term = math::sqrt(math::square(dxdx - dydy) +
443 4.0f * math::square(dxdy)) /
444 2.0f;
445 float first_eigenvalue = eigenvalue_first_term + eigenvalue_square_root_term;
446 float second_eigenvalue = eigenvalue_first_term - eigenvalue_square_root_term;
447
448 /* Compute the normalized eigenvector of the structure tensor oriented in direction of the
449 * minimum rate of change using the equations in section "3.1 Orientation and Anisotropy
450 * Estimation" of the paper. */
451 float2 eigenvector = float2(first_eigenvalue - dxdx, -dxdy);
452 float eigenvector_length = math::length(eigenvector);
453 float2 unit_eigenvector = eigenvector_length != 0.0f ? eigenvector / eigenvector_length :
454 float2(1.0f);
455
456 /* Compute the amount of anisotropy using equations in section "3.1 Orientation and
457 * Anisotropy Estimation" of the paper. The anisotropy ranges from 0 to 1, where 0
458 * corresponds to isotropic and 1 corresponds to entirely anisotropic regions. */
459 float eigenvalue_sum = first_eigenvalue + second_eigenvalue;
460 float eigenvalue_difference = first_eigenvalue - second_eigenvalue;
461 float anisotropy = eigenvalue_sum > 0.0f ? eigenvalue_difference / eigenvalue_sum : 0.0f;
462
463 float radius = math::max(0.0f, size.load_pixel<float, true>(texel));
464 if (radius == 0.0f) {
465 output.store_pixel(texel, input.load_pixel<float4>(texel));
466 return;
467 }
468
469 /* Compute the width and height of an ellipse that is more width-elongated for high
470 * anisotropy and more circular for low anisotropy, controlled using the eccentricity factor.
471 * Since the anisotropy is in the [0, 1] range, the width factor tends to 1 as the
472 * eccentricity tends to infinity and tends to infinity when the eccentricity tends to zero.
473 * This is based on the equations in section "3.2. Anisotropic Kuwahara Filtering" of the
474 * paper. */
475 float ellipse_width_factor = (eccentricity + anisotropy) / eccentricity;
476 float ellipse_width = ellipse_width_factor * radius;
477 float ellipse_height = radius / ellipse_width_factor;
478
479 /* Compute the cosine and sine of the angle that the eigenvector makes with the x axis. Since
480 * the eigenvector is normalized, its x and y components are the cosine and sine of the angle
481 * it makes with the x axis. */
482 float cosine = unit_eigenvector.x;
483 float sine = unit_eigenvector.y;
484
485 /* Compute an inverse transformation matrix that represents an ellipse of the given width and
486 * height and makes and an angle with the x axis of the given cosine and sine. This is an
487 * inverse matrix, so it transforms the ellipse into a disk of unit radius. */
488 float2x2 inverse_ellipse_matrix = float2x2(
489 float2(cosine / ellipse_width, -sine / ellipse_height),
490 float2(sine / ellipse_width, cosine / ellipse_height));
491
492 /* Compute the bounding box of a zero centered ellipse whose major axis is aligned with the
493 * eigenvector and has the given width and height. This is based on the equations described
494 * in:
495 *
496 * https://iquilezles.org/articles/ellipses/
497 *
498 * Notice that we only compute the upper bound, the lower bound is just negative that since
499 * the ellipse is zero centered. Also notice that we take the ceiling of the bounding box,
500 * just to ensure the filter window is at least 1x1. */
501 float2 ellipse_major_axis = ellipse_width * unit_eigenvector;
502 float2 ellipse_minor_axis = ellipse_height * float2(unit_eigenvector.y, unit_eigenvector.x) *
503 float2(-1, 1);
504 int2 ellipse_bounds = int2(math::ceil(
505 math::sqrt(math::square(ellipse_major_axis) + math::square(ellipse_minor_axis))));
506
507 /* Compute the overlap polynomial parameters for 8-sector ellipse based on the equations in
508 * section "3 Alternative Weighting Functions" of the polynomial weights paper. More on this
509 * later in the code. */
510 const int number_of_sectors = 8;
511 float sector_center_overlap_parameter = 2.0f / radius;
512 float sector_envelope_angle = ((3.0f / 2.0f) * math::numbers::pi_v<float>) /
513 number_of_sectors;
514 float cross_sector_overlap_parameter = (sector_center_overlap_parameter +
515 math::cos(sector_envelope_angle)) /
516 math::square(math::sin(sector_envelope_angle));
517
518 /* We need to compute the weighted mean of color and squared color of each of the 8 sectors
519 * of the ellipse, so we declare arrays for accumulating those and initialize them in the
520 * next code section. */
521 float4 weighted_mean_of_squared_color_of_sectors[8];
522 float4 weighted_mean_of_color_of_sectors[8];
523 float sum_of_weights_of_sectors[8];
524
525 /* The center pixel (0, 0) is exempt from the main loop below for reasons that are explained
526 * in the first if statement in the loop, so we need to accumulate its color, squared color,
527 * and weight separately first. Luckily, the zero coordinates of the center pixel zeros out
528 * most of the complex computations below, and it can easily be shown that the weight for the
529 * center pixel in all sectors is simply (1 / number_of_sectors). */
530 float4 center_color = input.load_pixel<float4>(texel);
531 float4 center_color_squared = center_color * center_color;
532 float center_weight = 1.0f / number_of_sectors;
533 float4 weighted_center_color = center_color * center_weight;
534 float4 weighted_center_color_squared = center_color_squared * center_weight;
535 for (int i = 0; i < number_of_sectors; i++) {
536 weighted_mean_of_squared_color_of_sectors[i] = weighted_center_color_squared;
537 weighted_mean_of_color_of_sectors[i] = weighted_center_color;
538 sum_of_weights_of_sectors[i] = center_weight;
539 }
540
541 /* Loop over the window of pixels inside the bounding box of the ellipse. However, we utilize
542 * the fact that ellipses are mirror symmetric along the horizontal axis, so we reduce the
543 * window to only the upper two quadrants, and compute each two mirrored pixels at the same
544 * time using the same weight as an optimization. */
545 for (int j = 0; j <= ellipse_bounds.y; j++) {
546 for (int i = -ellipse_bounds.x; i <= ellipse_bounds.x; i++) {
547 /* Since we compute each two mirrored pixels at the same time, we need to also exempt the
548 * pixels whose x coordinates are negative and their y coordinates are zero, that's
549 * because those are mirrored versions of the pixels whose x coordinates are positive and
550 * their y coordinates are zero, and we don't want to compute and accumulate them twice.
551 * Moreover, we also need to exempt the center pixel with zero coordinates for the same
552 * reason, however, since the mirror of the center pixel is itself, it need to be
553 * accumulated separately, hence why we did that in the code section just before this
554 * loop. */
555 if (j == 0 && i <= 0) {
556 continue;
557 }
558
559 /* Map the pixels of the ellipse into a unit disk, exempting any points that are not part
560 * of the ellipse or disk. */
561 float2 disk_point = inverse_ellipse_matrix * float2(i, j);
562 float disk_point_length_squared = math::dot(disk_point, disk_point);
563 if (disk_point_length_squared > 1.0f) {
564 continue;
565 }
566
567 /* While each pixel belongs to a single sector in the ellipse, we expand the definition
568 * of a sector a bit to also overlap with other sectors as illustrated in Figure 8 of the
569 * polynomial weights paper. So each pixel may contribute to multiple sectors, and thus
570 * we compute its weight in each of the 8 sectors. */
571 float sector_weights[8];
572
573 /* We evaluate the weighting polynomial at each of the 8 sectors by rotating the disk
574 * point by 45 degrees and evaluating the weighting polynomial at each incremental
575 * rotation. To avoid potentially expensive rotations, we utilize the fact that rotations
576 * by 90 degrees are simply swapping of the coordinates and negating the x component. We
577 * also note that since the y term of the weighting polynomial is squared, it is not
578 * affected by the sign and can be computed once for the x and once for the y
579 * coordinates. So we compute every other even-indexed 4 weights by successive 90 degree
580 * rotations as discussed. */
581 float2 polynomial = sector_center_overlap_parameter -
582 cross_sector_overlap_parameter * math::square(disk_point);
583 sector_weights[0] = math::square(math::max(0.0f, disk_point.y + polynomial.x));
584 sector_weights[2] = math::square(math::max(0.0f, -disk_point.x + polynomial.y));
585 sector_weights[4] = math::square(math::max(0.0f, -disk_point.y + polynomial.x));
586 sector_weights[6] = math::square(math::max(0.0f, disk_point.x + polynomial.y));
587
588 /* Then we rotate the disk point by 45 degrees, which is a simple expression involving a
589 * constant as can be demonstrated by applying a 45 degree rotation matrix. */
590 float2 rotated_disk_point = (1.0f / math::numbers::sqrt2) *
591 float2(disk_point.x - disk_point.y,
592 disk_point.x + disk_point.y);
593
594 /* Finally, we compute every other odd-index 4 weights starting from the 45 degrees
595 * rotated disk point. */
596 float2 rotated_polynomial = sector_center_overlap_parameter -
597 cross_sector_overlap_parameter *
598 math::square(rotated_disk_point);
599 sector_weights[1] = math::square(
600 math::max(0.0f, rotated_disk_point.y + rotated_polynomial.x));
601 sector_weights[3] = math::square(
602 math::max(0.0f, -rotated_disk_point.x + rotated_polynomial.y));
603 sector_weights[5] = math::square(
604 math::max(0.0f, -rotated_disk_point.y + rotated_polynomial.x));
605 sector_weights[7] = math::square(
606 math::max(0.0f, rotated_disk_point.x + rotated_polynomial.y));
607
608 /* We compute a radial Gaussian weighting component such that pixels further away from
609 * the sector center gets attenuated, and we also divide by the sum of sector weights to
610 * normalize them, since the radial weight will eventually be multiplied to the sector
611 * weight below. */
612 float sector_weights_sum = sector_weights[0] + sector_weights[1] + sector_weights[2] +
613 sector_weights[3] + sector_weights[4] + sector_weights[5] +
614 sector_weights[6] + sector_weights[7];
615 float radial_gaussian_weight = math::exp(-math::numbers::pi *
616 disk_point_length_squared) /
617 sector_weights_sum;
618
619 /* Load the color of the pixel and its mirrored pixel and compute their square. */
620 float4 upper_color = input.load_pixel_extended<float4>(texel + int2(i, j));
621 float4 lower_color = input.load_pixel_extended<float4>(texel - int2(i, j));
622 float4 upper_color_squared = upper_color * upper_color;
623 float4 lower_color_squared = lower_color * lower_color;
624
625 for (int k = 0; k < number_of_sectors; k++) {
626 float weight = sector_weights[k] * radial_gaussian_weight;
627
628 /* Accumulate the pixel to each of the sectors multiplied by the sector weight. */
629 int upper_index = k;
630 sum_of_weights_of_sectors[upper_index] += weight;
631 weighted_mean_of_color_of_sectors[upper_index] += upper_color * weight;
632 weighted_mean_of_squared_color_of_sectors[upper_index] += upper_color_squared * weight;
633
634 /* Accumulate the mirrored pixel to each of the sectors multiplied by the sector
635 * weight. */
636 int lower_index = (k + number_of_sectors / 2) % number_of_sectors;
637 sum_of_weights_of_sectors[lower_index] += weight;
638 weighted_mean_of_color_of_sectors[lower_index] += lower_color * weight;
639 weighted_mean_of_squared_color_of_sectors[lower_index] += lower_color_squared * weight;
640 }
641 }
642 }
643
644 /* Compute the weighted sum of mean of sectors, such that sectors with lower standard
645 * deviation gets more significant weight than sectors with higher standard deviation. */
646 float sum_of_weights = 0.0f;
647 float4 weighted_sum = float4(0.0f);
648 for (int i = 0; i < number_of_sectors; i++) {
649 weighted_mean_of_color_of_sectors[i] /= sum_of_weights_of_sectors[i];
650 weighted_mean_of_squared_color_of_sectors[i] /= sum_of_weights_of_sectors[i];
651
652 float4 color_mean = weighted_mean_of_color_of_sectors[i];
653 float4 squared_color_mean = weighted_mean_of_squared_color_of_sectors[i];
654 float4 color_variance = math::abs(squared_color_mean - color_mean * color_mean);
655
656 float standard_deviation = math::dot(math::sqrt(color_variance.xyz()), float3(1.0f));
657
658 /* Compute the sector weight based on the weight function introduced in section "3.3.1
659 * Single-scale Filtering" of the multi-scale paper. Use a threshold of 0.02 to avoid zero
660 * division and avoid artifacts in homogeneous regions as demonstrated in the paper. */
661 float weight = 1.0f / math::pow(math::max(0.02f, standard_deviation), sharpness);
662
663 sum_of_weights += weight;
664 weighted_sum += color_mean * weight;
665 }
666
667 /* Fall back to the original color if all sector weights are zero due to very high standard
668 * deviation and sharpness. */
669 if (sum_of_weights == 0.0f) {
670 weighted_sum = center_color;
671 }
672 else {
673 weighted_sum /= sum_of_weights;
674 }
675
676 output.store_pixel(texel, weighted_sum);
677 });
678 }
679
681 {
682 if (this->context().use_gpu()) {
683 return this->compute_structure_tensor_gpu();
684 }
685 return this->compute_structure_tensor_cpu();
686 }
687
689 {
690 gpu::Shader *shader = context().get_shader(
691 "compositor_kuwahara_anisotropic_compute_structure_tensor");
692 GPU_shader_bind(shader);
693
694 Result &input = get_input("Image");
695 input.bind_as_texture(shader, "input_tx");
696
697 const Domain domain = compute_domain();
698 Result structure_tensor = context().create_result(ResultType::Color);
699 structure_tensor.allocate_texture(domain);
700 structure_tensor.bind_as_image(shader, "structure_tensor_img");
701
703
704 input.unbind_as_texture();
705 structure_tensor.unbind_as_image();
707
708 return structure_tensor;
709 }
710
712 {
713 const Result &input = this->get_input("Image");
714
715 const Domain domain = this->compute_domain();
716 Result structure_tensor_image = context().create_result(ResultType::Color);
717 structure_tensor_image.allocate_texture(domain);
718
719 /* Computes the structure tensor of the image using a Dirac delta window function as described
720 * in section "3.2 Local Structure Estimation" of the paper:
721 *
722 * Kyprianidis, Jan Eric. "Image and video abstraction by multi-scale anisotropic Kuwahara
723 * filtering." 2011.
724 *
725 * The structure tensor should then be smoothed using a Gaussian function to eliminate high
726 * frequency details. */
727 parallel_for(domain.size, [&](const int2 texel) {
728 /* The weight kernels of the filter optimized for rotational symmetry described in section
729 * "3.2.1 Gradient Calculation". */
730 const float corner_weight = 0.182f;
731 const float center_weight = 1.0f - 2.0f * corner_weight;
732
733 float3 x_partial_derivative =
734 input.load_pixel_extended<float4>(texel + int2(-1, 1)).xyz() * -corner_weight +
735 input.load_pixel_extended<float4>(texel + int2(-1, 0)).xyz() * -center_weight +
736 input.load_pixel_extended<float4>(texel + int2(-1, -1)).xyz() * -corner_weight +
737 input.load_pixel_extended<float4>(texel + int2(1, 1)).xyz() * corner_weight +
738 input.load_pixel_extended<float4>(texel + int2(1, 0)).xyz() * center_weight +
739 input.load_pixel_extended<float4>(texel + int2(1, -1)).xyz() * corner_weight;
740
741 float3 y_partial_derivative =
742 input.load_pixel_extended<float4>(texel + int2(-1, 1)).xyz() * corner_weight +
743 input.load_pixel_extended<float4>(texel + int2(0, 1)).xyz() * center_weight +
744 input.load_pixel_extended<float4>(texel + int2(1, 1)).xyz() * corner_weight +
745 input.load_pixel_extended<float4>(texel + int2(-1, -1)).xyz() * -corner_weight +
746 input.load_pixel_extended<float4>(texel + int2(0, -1)).xyz() * -center_weight +
747 input.load_pixel_extended<float4>(texel + int2(1, -1)).xyz() * -corner_weight;
748
749 float dxdx = math::dot(x_partial_derivative, x_partial_derivative);
750 float dxdy = math::dot(x_partial_derivative, y_partial_derivative);
751 float dydy = math::dot(y_partial_derivative, y_partial_derivative);
752
753 /* We encode the structure tensor in a float4 using a column major storage order. */
754 float4 structure_tensor = float4(dxdx, dxdy, dxdy, dydy);
755
756 structure_tensor_image.store_pixel(texel, structure_tensor);
757 });
758
759 return structure_tensor_image;
760 }
761
763 {
764 return get_input("Size").is_single_value();
765 }
766
767 /* The sharpness controls the sharpness of the transitions between the kuwahara sectors, which
768 * is controlled by the weighting function pow(standard_deviation, -sharpness) as can be seen
769 * in the shader. The transition is completely smooth when the sharpness is zero and completely
770 * sharp when it is infinity. But realistically, the sharpness doesn't change much beyond the
771 * value of 16 due to its exponential nature, so we just assume a maximum sharpness of 16.
772 *
773 * The stored sharpness is in the range [0, 1], so we multiply by 16 to get it in the range
774 * [0, 16], however, we also square it before multiplication to slow down the rate of change
775 * near zero to counter its exponential nature for more intuitive user control. */
777 {
778 const float sharpness_factor = this->get_sharpness();
779 return sharpness_factor * sharpness_factor * 16.0f;
780 }
781
782 /* The eccentricity controls how much the image anisotropy affects the eccentricity of the
783 * kuwahara sectors, which is controlled by the following factor that gets multiplied to the
784 * radius to get the ellipse width and divides the radius to get the ellipse height:
785 *
786 * (eccentricity + anisotropy) / eccentricity
787 *
788 * Since the anisotropy is in the [0, 1] range, the factor tends to 1 as the eccentricity tends
789 * to infinity and tends to infinity when the eccentricity tends to zero. The stored
790 * eccentricity is in the range [0, 2], we map that to the range [infinity, 0.5] by taking the
791 * reciprocal, satisfying the aforementioned limits. The upper limit doubles the computed
792 * default eccentricity, which users can use to enhance the directionality of the filter.
793 * Instead of actual infinity, we just use an eccentricity of 1 / 0.01 since the result is very
794 * similar to that of infinity. */
796 {
797 return 1.0f / math::max(0.01f, this->get_eccentricity());
798 }
799
801 {
802 return this->get_input("High Precision").get_single_value_default(false);
803 }
804
806 {
807 return math::max(0, this->get_input("Uniformity").get_single_value_default(4));
808 }
809
811 {
812 return math::clamp(this->get_input("Sharpness").get_single_value_default(1.0f), 0.0f, 1.0f);
813 }
814
816 {
817 return math::clamp(this->get_input("Eccentricity").get_single_value_default(1.0f), 0.0f, 2.0f);
818 }
819
821 {
822 const Result &input = this->get_input("Type");
823 const MenuValue default_menu_value = MenuValue(CMP_NODE_KUWAHARA_ANISOTROPIC);
824 const MenuValue menu_value = input.get_single_value_default(default_menu_value);
825 return static_cast<CMPNodeKuwahara>(menu_value.value);
826 }
827};
828
830{
831 return new ConvertKuwaharaOperation(context, node);
832}
833
834} // namespace blender::nodes::node_composite_kuwahara_cc
835
837{
839
840 static blender::bke::bNodeType ntype;
841
842 cmp_node_type_base(&ntype, "CompositorNodeKuwahara", CMP_NODE_KUWAHARA);
843 ntype.ui_name = "Kuwahara";
844 ntype.ui_description =
845 "Apply smoothing filter that preserves edges, for stylized and painterly effects";
846 ntype.enum_name_legacy = "KUWAHARA";
848 ntype.declare = file_ns::cmp_node_kuwahara_declare;
849 ntype.initfunc = file_ns::node_composit_init_kuwahara;
851 ntype, "NodeKuwaharaData", node_free_standard_storage, node_copy_standard_storage);
852 ntype.get_compositor_operation = file_ns::get_compositor_operation;
854
856}
constexpr int NODE_DEFAULT_MAX_WIDTH
Definition BKE_node.hh:1250
#define NODE_CLASS_OP_FILTER
Definition BKE_node.hh:451
#define CMP_NODE_KUWAHARA
CMPNodeKuwahara
@ CMP_NODE_KUWAHARA_CLASSIC
@ CMP_NODE_KUWAHARA_ANISOTROPIC
void GPU_shader_uniform_1f(blender::gpu::Shader *sh, const char *name, float value)
void GPU_shader_bind(blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_uniform_1i(blender::gpu::Shader *sh, const char *name, int value)
void GPU_shader_unbind()
#define NOD_REGISTER_NODE(REGISTER_FUNC)
@ PROP_FACTOR
Definition RNA_types.hh:251
BMesh const char void * data
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
Result create_result(ResultType type, ResultPrecision precision)
gpu::Shader * get_shader(const char *info_name, ResultPrecision precision)
NodeOperation(Context &context, DNode node)
Result & get_result(StringRef identifier)
Definition operation.cc:39
Result & get_input(StringRef identifier) const
Definition operation.cc:138
virtual Domain compute_domain()
Definition operation.cc:56
void share_data(const Result &source)
Definition result.cc:523
void allocate_texture(const Domain domain, const bool from_pool=true, const std::optional< ResultStorageType > storage_type=std::nullopt)
Definition result.cc:389
void unbind_as_texture() const
Definition result.cc:511
void bind_as_texture(gpu::Shader *shader, const char *texture_name) const
Definition result.cc:487
T load_pixel(const int2 &texel) const
void unbind_as_image() const
Definition result.cc:517
void bind_as_image(gpu::Shader *shader, const char *image_name, bool read=false) const
Definition result.cc:498
bool is_single_value() const
Definition result.cc:758
const T & get_single_value() const
void execute_classic_summed_area_table_cpu(const Result &table, const Result &squared_table)
void compute_classic(const Result *input, const Result *table, const Result *squared_table, const Result &size_input, Result &output, const int2 size)
void execute_classic_summed_area_table_gpu(const Result &table, const Result &squared_table)
#define input
#define output
constexpr T sign(T) RET
void * MEM_callocN(size_t len, const char *str)
Definition mallocn.cc:118
void node_type_size(bNodeType &ntype, int width, int minwidth, int maxwidth)
Definition node.cc:5384
void node_register_type(bNodeType &ntype)
Definition node.cc:2416
void node_type_storage(bNodeType &ntype, std::optional< StringRefNull > storagename, void(*freefunc)(bNode *node), void(*copyfunc)(bNodeTree *dest_ntree, bNode *dest_node, const bNode *src_node))
Definition node.cc:5414
void symmetric_separable_blur(Context &context, const Result &input, Result &output, const float2 &radius, const int filter_type=R_FILTER_GAUSS)
void compute_dispatch_threads_at_least(gpu::Shader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:196
void summed_area_table(Context &context, Result &input, Result &output, SummedAreaTableOperation operation=SummedAreaTableOperation::Identity)
float4 summed_area_table_sum(const Result &table, const int2 &lower_bound, const int2 &upper_bound)
void parallel_for(const int2 range, const Function &function)
T cos(const AngleRadianBase< T > &a)
T pow(const T &x, const T &power)
T clamp(const T &a, const T &min, const T &max)
T sqrt(const T &a)
T exp(const T &x)
T dot(const QuaternionBase< T > &a, const QuaternionBase< T > &b)
T min(const T &a, const T &b)
T square(const T &a)
T ceil(const T &a)
T sin(const AngleRadianBase< T > &a)
T max(const T &a, const T &b)
T abs(const T &a)
static void cmp_node_kuwahara_declare(NodeDeclarationBuilder &b)
static NodeOperation * get_compositor_operation(Context &context, DNode node)
static void node_composit_init_kuwahara(bNodeTree *, bNode *node)
MatBase< float, 2, 2 > float2x2
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
VecBase< float, 3 > float3
static void register_node_type_cmp_kuwahara()
void cmp_node_type_base(blender::bke::bNodeType *ntype, std::string idname, const std::optional< int16_t > legacy_type)
void node_free_standard_storage(bNode *node)
Definition node_util.cc:42
void node_copy_standard_storage(bNodeTree *, bNode *dest_node, const bNode *src_node)
Definition node_util.cc:54
void * storage
VecBase< T, 3 > xyz() const
Defines a node type.
Definition BKE_node.hh:238
std::string ui_description
Definition BKE_node.hh:244
NodeGetCompositorOperationFunction get_compositor_operation
Definition BKE_node.hh:348
void(* initfunc)(bNodeTree *ntree, bNode *node)
Definition BKE_node.hh:289
const char * enum_name_legacy
Definition BKE_node.hh:247
NodeDeclareFunction declare
Definition BKE_node.hh:362
i
Definition text_draw.cc:230
static pxr::UsdShadeInput get_input(const pxr::UsdShadeShader &usd_shader, const pxr::TfToken &input_name)
#define N_(msgid)