Blender V4.3
node_composite_glare.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2006 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
9#include <array>
10#include <complex>
11#include <memory>
12
13#if defined(WITH_FFTW3)
14# include <fftw3.h>
15#endif
16
17#include "BLI_array.hh"
18#include "BLI_assert.h"
19#include "BLI_fftw.hh"
20#include "BLI_index_range.hh"
21#include "BLI_math_base.h"
22#include "BLI_math_base.hh"
24#include "BLI_task.hh"
25
26#include "DNA_scene_types.h"
27
28#include "RNA_access.hh"
29
30#include "UI_interface.hh"
31#include "UI_resources.hh"
32
33#include "GPU_shader.hh"
34#include "GPU_state.hh"
35#include "GPU_texture.hh"
36
38#include "COM_node_operation.hh"
39#include "COM_utilities.hh"
40
42
43#define MAX_GLARE_ITERATIONS 5
44#define MAX_GLARE_SIZE 9
45
47
49
51{
52 b.add_input<decl::Color>("Image")
53 .default_value({1.0f, 1.0f, 1.0f, 1.0f})
54 .compositor_domain_priority(0);
55 b.add_output<decl::Color>("Image");
56}
57
58static void node_composit_init_glare(bNodeTree * /*ntree*/, bNode *node)
59{
60 NodeGlare *ndg = MEM_cnew<NodeGlare>(__func__);
61 ndg->quality = 1;
63 ndg->iter = 3;
64 ndg->colmod = 0.25;
65 ndg->mix = 0;
66 ndg->threshold = 1;
67 ndg->star_45 = true;
68 ndg->streaks = 4;
69 ndg->angle_ofs = 0.0f;
70 ndg->fade = 0.9;
71 ndg->size = 8;
72 node->storage = ndg;
73}
74
76{
77 const int glare_type = RNA_enum_get(ptr, "glare_type");
78#ifndef WITH_FFTW3
79 if (glare_type == CMP_NODE_GLARE_FOG_GLOW) {
80 uiItemL(layout, RPT_("Disabled, built without FFTW"), ICON_ERROR);
81 }
82#endif
83
84 uiItemR(layout, ptr, "glare_type", UI_ITEM_R_SPLIT_EMPTY_NAME, "", ICON_NONE);
85 uiItemR(layout, ptr, "quality", UI_ITEM_R_SPLIT_EMPTY_NAME, "", ICON_NONE);
86
88 uiItemR(layout, ptr, "iterations", UI_ITEM_R_SPLIT_EMPTY_NAME, nullptr, ICON_NONE);
89 }
90
92 uiItemR(layout,
93 ptr,
94 "color_modulation",
96 nullptr,
97 ICON_NONE);
98 }
99
100 uiItemR(layout, ptr, "mix", UI_ITEM_R_SPLIT_EMPTY_NAME, nullptr, ICON_NONE);
101 uiItemR(layout, ptr, "threshold", UI_ITEM_R_SPLIT_EMPTY_NAME, nullptr, ICON_NONE);
102
103 if (glare_type == CMP_NODE_GLARE_STREAKS) {
104 uiItemR(layout, ptr, "streaks", UI_ITEM_R_SPLIT_EMPTY_NAME, nullptr, ICON_NONE);
105 uiItemR(layout, ptr, "angle_offset", UI_ITEM_R_SPLIT_EMPTY_NAME, nullptr, ICON_NONE);
106 }
107
109 uiItemR(
110 layout, ptr, "fade", UI_ITEM_R_SPLIT_EMPTY_NAME | UI_ITEM_R_SLIDER, nullptr, ICON_NONE);
111 }
112
113 if (glare_type == CMP_NODE_GLARE_SIMPLE_STAR) {
114 uiItemR(layout, ptr, "use_rotate_45", UI_ITEM_R_SPLIT_EMPTY_NAME, nullptr, ICON_NONE);
115 }
116
118 uiItemR(layout, ptr, "size", UI_ITEM_R_SPLIT_EMPTY_NAME, nullptr, ICON_NONE);
119 }
120}
121
122using namespace blender::realtime_compositor;
123
125 public:
127
128 void execute() override
129 {
130 if (is_identity()) {
131 get_input("Image").pass_through(get_result("Image"));
132 return;
133 }
134
135 Result highlights_result = execute_highlights();
136 Result glare_result = execute_glare(highlights_result);
137 execute_mix(glare_result);
138 }
139
141 {
142 if (get_input("Image").is_single_value()) {
143 return true;
144 }
145
146 /* A mix factor of -1 indicates that the original image is returned as is. See the execute_mix
147 * method for more information. */
148 if (node_storage(bnode()).mix == -1.0f) {
149 return true;
150 }
151
152 return false;
153 }
154
155 Result execute_glare(Result &highlights_result)
156 {
157 switch (node_storage(bnode()).type) {
159 return execute_simple_star(highlights_result);
161 return execute_fog_glow(highlights_result);
163 return execute_streaks(highlights_result);
165 return execute_ghost(highlights_result);
167 return execute_bloom(highlights_result);
168 default:
170 return context().create_result(ResultType::Color);
171 }
172 }
173
174 /* -----------------
175 * Glare Highlights.
176 * ----------------- */
177
179 {
180 GPUShader *shader = context().get_shader("compositor_glare_highlights");
181 GPU_shader_bind(shader);
182
183 GPU_shader_uniform_1f(shader, "threshold", node_storage(bnode()).threshold);
184
185 const Result &input_image = get_input("Image");
186 GPU_texture_filter_mode(input_image, true);
187 input_image.bind_as_texture(shader, "input_tx");
188
189 const int2 glare_size = get_glare_size();
190 Result highlights_result = context().create_result(ResultType::Color);
191 highlights_result.allocate_texture(glare_size);
192 highlights_result.bind_as_image(shader, "output_img");
193
194 compute_dispatch_threads_at_least(shader, glare_size);
195
197 input_image.unbind_as_texture();
198 highlights_result.unbind_as_image();
199
200 return highlights_result;
201 }
202
203 /* ------------------
204 * Simple Star Glare.
205 * ------------------ */
206
208 {
209 if (node_storage(bnode()).star_45) {
210 return execute_simple_star_diagonal(highlights_result);
211 }
212 else {
213 return execute_simple_star_axis_aligned(highlights_result);
214 }
215 }
216
218 {
219 Result horizontal_pass_result = execute_simple_star_horizontal_pass(highlights_result);
220
221 /* The vertical pass is applied in-plane, but the highlights result is no longer needed,
222 * so just use it as the pass result. */
223 Result &vertical_pass_result = highlights_result;
224
225 GPUShader *shader = context().get_shader("compositor_glare_simple_star_vertical_pass");
226 GPU_shader_bind(shader);
227
228 GPU_shader_uniform_1i(shader, "iterations", get_number_of_iterations());
229 GPU_shader_uniform_1f(shader, "fade_factor", node_storage(bnode()).fade);
230
231 horizontal_pass_result.bind_as_texture(shader, "horizontal_tx");
232
233 vertical_pass_result.bind_as_image(shader, "vertical_img");
234
235 /* Dispatch a thread for each column in the image. */
236 const int width = get_glare_size().x;
237 compute_dispatch_threads_at_least(shader, int2(width, 1));
238
239 horizontal_pass_result.unbind_as_texture();
240 vertical_pass_result.unbind_as_image();
242
243 horizontal_pass_result.release();
244
245 return vertical_pass_result;
246 }
247
249 {
250 /* The horizontal pass is applied in-plane, so copy the highlights to a new image since the
251 * highlights result is still needed by the vertical pass. */
252 const int2 glare_size = get_glare_size();
253 Result horizontal_pass_result = context().create_result(ResultType::Color);
254 horizontal_pass_result.allocate_texture(glare_size);
256 GPU_texture_copy(horizontal_pass_result, highlights_result);
257
258 GPUShader *shader = context().get_shader("compositor_glare_simple_star_horizontal_pass");
259 GPU_shader_bind(shader);
260
261 GPU_shader_uniform_1i(shader, "iterations", get_number_of_iterations());
262 GPU_shader_uniform_1f(shader, "fade_factor", node_storage(bnode()).fade);
263
264 horizontal_pass_result.bind_as_image(shader, "horizontal_img");
265
266 /* Dispatch a thread for each row in the image. */
267 compute_dispatch_threads_at_least(shader, int2(glare_size.y, 1));
268
269 horizontal_pass_result.unbind_as_image();
271
272 return horizontal_pass_result;
273 }
274
276 {
277 Result diagonal_pass_result = execute_simple_star_diagonal_pass(highlights_result);
278
279 /* The anti-diagonal pass is applied in-plane, but the highlights result is no longer needed,
280 * so just use it as the pass result. */
281 Result &anti_diagonal_pass_result = highlights_result;
282
283 GPUShader *shader = context().get_shader("compositor_glare_simple_star_anti_diagonal_pass");
284 GPU_shader_bind(shader);
285
286 GPU_shader_uniform_1i(shader, "iterations", get_number_of_iterations());
287 GPU_shader_uniform_1f(shader, "fade_factor", node_storage(bnode()).fade);
288
289 diagonal_pass_result.bind_as_texture(shader, "diagonal_tx");
290
291 anti_diagonal_pass_result.bind_as_image(shader, "anti_diagonal_img");
292
293 /* Dispatch a thread for each diagonal in the image. */
295
296 diagonal_pass_result.unbind_as_texture();
297 anti_diagonal_pass_result.unbind_as_image();
299
300 diagonal_pass_result.release();
301
302 return anti_diagonal_pass_result;
303 }
304
306 {
307 /* The diagonal pass is applied in-plane, so copy the highlights to a new image since the
308 * highlights result is still needed by the anti-diagonal pass. */
309 const int2 glare_size = get_glare_size();
310 Result diagonal_pass_result = context().create_result(ResultType::Color);
311 diagonal_pass_result.allocate_texture(glare_size);
313 GPU_texture_copy(diagonal_pass_result, highlights_result);
314
315 GPUShader *shader = context().get_shader("compositor_glare_simple_star_diagonal_pass");
316 GPU_shader_bind(shader);
317
318 GPU_shader_uniform_1i(shader, "iterations", get_number_of_iterations());
319 GPU_shader_uniform_1f(shader, "fade_factor", node_storage(bnode()).fade);
320
321 diagonal_pass_result.bind_as_image(shader, "diagonal_img");
322
323 /* Dispatch a thread for each diagonal in the image. */
325
326 diagonal_pass_result.unbind_as_image();
328
329 return diagonal_pass_result;
330 }
331
332 /* The Star 45 option of the Simple Star mode of glare is applied on the diagonals of the image.
333 * This method computes the number of diagonals in the glare image. For more information on the
334 * used equation, see the compute_number_of_diagonals function in the following shader library
335 * file: gpu_shader_compositor_image_diagonals.glsl */
337 {
338 const int2 size = get_glare_size();
339 return size.x + size.y - 1;
340 }
341
342 /* --------------
343 * Streaks Glare.
344 * -------------- */
345
346 Result execute_streaks(Result &highlights_result)
347 {
348 /* Create an initially zero image where streaks will be accumulated. */
349 const float4 zero_color = float4(0.0f);
350 const int2 glare_size = get_glare_size();
351 Result accumulated_streaks_result = context().create_result(ResultType::Color);
352 accumulated_streaks_result.allocate_texture(glare_size);
353 GPU_texture_clear(accumulated_streaks_result, GPU_DATA_FLOAT, zero_color);
354
355 /* For each streak, compute its direction and apply a streak filter in that direction, then
356 * accumulate the result into the accumulated streaks result. */
357 for (const int streak_index : IndexRange(get_number_of_streaks())) {
358 const float2 streak_direction = compute_streak_direction(streak_index);
359 Result streak_result = apply_streak_filter(highlights_result, streak_direction);
360
361 GPUShader *shader = context().get_shader("compositor_glare_streaks_accumulate");
362 GPU_shader_bind(shader);
363
364 const float attenuation_factor = compute_streak_attenuation_factor();
365 GPU_shader_uniform_1f(shader, "attenuation_factor", attenuation_factor);
366
367 streak_result.bind_as_texture(shader, "streak_tx");
368 accumulated_streaks_result.bind_as_image(shader, "accumulated_streaks_img", true);
369
370 compute_dispatch_threads_at_least(shader, glare_size);
371
372 streak_result.unbind_as_texture();
373 accumulated_streaks_result.unbind_as_image();
374
375 streak_result.release();
377 }
378
379 return accumulated_streaks_result;
380 }
381
382 Result apply_streak_filter(Result &highlights_result, const float2 &streak_direction)
383 {
384 GPUShader *shader = context().get_shader("compositor_glare_streaks_filter");
385 GPU_shader_bind(shader);
386
387 /* Copy the highlights result into a new image because the output will be copied to the input
388 * after each iteration and the highlights result is still needed to compute other streaks. */
389 const int2 glare_size = get_glare_size();
390 Result input_streak_result = context().create_result(ResultType::Color);
391 input_streak_result.allocate_texture(glare_size);
393 GPU_texture_copy(input_streak_result, highlights_result);
394
395 Result output_streak_result = context().create_result(ResultType::Color);
396 output_streak_result.allocate_texture(glare_size);
397
398 /* For the given number of iterations, apply the streak filter in the given direction. The
399 * result of the previous iteration is used as the input of the current iteration. */
400 const IndexRange iterations_range = IndexRange(get_number_of_iterations());
401 for (const int iteration : iterations_range) {
402 const float color_modulator = compute_streak_color_modulator(iteration);
403 const float iteration_magnitude = compute_streak_iteration_magnitude(iteration);
404 const float3 fade_factors = compute_streak_fade_factors(iteration_magnitude);
405 const float2 streak_vector = streak_direction * iteration_magnitude;
406
407 GPU_shader_uniform_1f(shader, "color_modulator", color_modulator);
408 GPU_shader_uniform_3fv(shader, "fade_factors", fade_factors);
409 GPU_shader_uniform_2fv(shader, "streak_vector", streak_vector);
410
411 GPU_texture_filter_mode(input_streak_result, true);
413 input_streak_result.bind_as_texture(shader, "input_streak_tx");
414
415 output_streak_result.bind_as_image(shader, "output_streak_img");
416
417 compute_dispatch_threads_at_least(shader, glare_size);
418
419 input_streak_result.unbind_as_texture();
420 output_streak_result.unbind_as_image();
421
422 /* The accumulated result serves as the input for the next iteration, so copy the result to
423 * the input result since it can't be used for reading and writing simultaneously. Skip
424 * copying for the last iteration since it is not needed. */
425 if (iteration != iterations_range.last()) {
427 GPU_texture_copy(input_streak_result, output_streak_result);
428 }
429 }
430
431 input_streak_result.release();
433
434 return output_streak_result;
435 }
436
437 /* As the number of iterations increase, the streaks spread farther and their intensity decrease.
438 * To maintain similar intensities regardless of the number of iterations, streaks with lower
439 * number of iteration are linearly attenuated. When the number of iterations is maximum, we need
440 * not attenuate, so the denominator should be one, and when the number of iterations is one, we
441 * need the attenuation to be maximum. This can be modeled as a simple decreasing linear equation
442 * by substituting the two aforementioned cases. */
447
448 /* Given the index of the streak in the [0, Number Of Streaks - 1] range, compute the unit
449 * direction vector defining the streak. The streak directions should make angles with the
450 * x-axis that are equally spaced and covers the whole two pi range, starting with the user
451 * supplied angle. */
453 {
454 const int number_of_streaks = get_number_of_streaks();
455 const float start_angle = get_streaks_start_angle();
456 const float angle = start_angle + (float(streak_index) / number_of_streaks) * (M_PI * 2.0f);
457 return float2(math::cos(angle), math::sin(angle));
458 }
459
460 /* Different color channels of the streaks can be modulated by being multiplied by the color
461 * modulator computed by this method. The color modulation is expected to be maximum when the
462 * modulation factor is 1 and non existent when it is zero. But since the color modulator is
463 * multiplied to the channel and the multiplicative identity is 1, we invert the modulation
464 * factor. Moreover, color modulation should be less visible on higher iterations because they
465 * produce the farther more faded away parts of the streaks. To achieve that, the modulation
466 * factor is raised to the power of the iteration, noting that the modulation value is in the
467 * [0, 1] range so the higher the iteration the lower the resulting modulation factor. The plus
468 * one makes sure the power starts at one. */
470 {
471 return 1.0f - std::pow(get_color_modulation_factor(), iteration + 1);
472 }
473
474 /* Streaks are computed by iteratively applying a filter that samples 3 neighboring pixels in
475 * the direction of the streak. Those neighboring pixels are then combined using a weighted sum.
476 * The weights of the neighbors are the fade factors computed by this method. Farther neighbors
477 * are expected to have lower weights because they contribute less to the combined result. Since
478 * the iteration magnitude represents how far the neighbors are, as noted in the description of
479 * the compute_streak_iteration_magnitude method, the fade factor for the closest neighbor is
480 * computed as the user supplied fade parameter raised to the power of the magnitude, noting that
481 * the fade value is in the [0, 1] range while the magnitude is larger than or equal one, so the
482 * higher the power the lower the resulting fade factor. Furthermore, the other two neighbors
483 * are just squared and cubed versions of the fade factor for the closest neighbor to get even
484 * lower fade factors for those farther neighbors. */
485 float3 compute_streak_fade_factors(float iteration_magnitude)
486 {
487 const float fade_factor = std::pow(node_storage(bnode()).fade, iteration_magnitude);
488 return float3(fade_factor, std::pow(fade_factor, 2.0f), std::pow(fade_factor, 3.0f));
489 }
490
491 /* Streaks are computed by iteratively applying a filter that samples the neighboring pixels in
492 * the direction of the streak. Each higher iteration samples pixels that are farther away, the
493 * magnitude computed by this method describes how farther away the neighbors are sampled. The
494 * magnitude exponentially increase with the iteration. A base of 4, was chosen as compromise
495 * between better quality and performance, since a lower base corresponds to more tightly spaced
496 * neighbors but would require more iterations to produce a streak of the same length. */
498 {
499 return std::pow(4.0f, iteration);
500 }
501
503 {
504 return node_storage(bnode()).angle_ofs;
505 }
506
508 {
509 return node_storage(bnode()).streaks;
510 }
511
512 /* ------------
513 * Ghost Glare.
514 * ------------ */
515
516 Result execute_ghost(Result &highlights_result)
517 {
518 Result base_ghost_result = compute_base_ghost(highlights_result);
519
520 GPUShader *shader = context().get_shader("compositor_glare_ghost_accumulate");
521 GPU_shader_bind(shader);
522
523 /* Color modulators are constant across iterations. */
524 std::array<float4, 4> color_modulators = compute_ghost_color_modulators();
526 "color_modulators",
527 color_modulators.size(),
528 (const float(*)[4])color_modulators.data());
529
530 /* Create an initially zero image where ghosts will be accumulated. */
531 const float4 zero_color = float4(0.0f);
532 const int2 glare_size = get_glare_size();
533 Result accumulated_ghosts_result = context().create_result(ResultType::Color);
534 accumulated_ghosts_result.allocate_texture(glare_size);
535 GPU_texture_clear(accumulated_ghosts_result, GPU_DATA_FLOAT, zero_color);
536
537 /* For the given number of iterations, accumulate four ghosts with different scales and color
538 * modulators. The result of the previous iteration is used as the input of the current
539 * iteration. We start from index 1 because we are not interested in the scales produced for
540 * the first iteration according to visual judgment, see the compute_ghost_scales method. */
541 Result &input_ghost_result = base_ghost_result;
542 const IndexRange iterations_range = IndexRange(get_number_of_iterations()).drop_front(1);
543 for (const int i : iterations_range) {
544 std::array<float, 4> scales = compute_ghost_scales(i);
545 GPU_shader_uniform_4fv(shader, "scales", scales.data());
546
547 input_ghost_result.bind_as_texture(shader, "input_ghost_tx");
548 accumulated_ghosts_result.bind_as_image(shader, "accumulated_ghost_img", true);
549
550 compute_dispatch_threads_at_least(shader, glare_size);
551
552 input_ghost_result.unbind_as_texture();
553 accumulated_ghosts_result.unbind_as_image();
554
555 /* The accumulated result serves as the input for the next iteration, so copy the result to
556 * the input result since it can't be used for reading and writing simultaneously. Skip
557 * copying for the last iteration since it is not needed. */
558 if (i != iterations_range.last()) {
560 GPU_texture_copy(input_ghost_result, accumulated_ghosts_result);
561 }
562 }
563
565 input_ghost_result.release();
566
567 return accumulated_ghosts_result;
568 }
569
570 /* Computes two ghosts by blurring the highlights with two different radii, then adds them into a
571 * single base ghost image after scaling them by some factor and flipping the bigger ghost along
572 * the center of the image. */
573 Result compute_base_ghost(Result &highlights_result)
574 {
575 Result small_ghost_result = context().create_result(ResultType::Color);
577 highlights_result,
578 small_ghost_result,
581 false,
582 false);
583
584 Result big_ghost_result = context().create_result(ResultType::Color);
586 highlights_result,
587 big_ghost_result,
590 false,
591 false);
592
593 highlights_result.release();
594
595 GPUShader *shader = context().get_shader("compositor_glare_ghost_base");
596 GPU_shader_bind(shader);
597
598 GPU_texture_filter_mode(small_ghost_result, true);
600 small_ghost_result.bind_as_texture(shader, "small_ghost_tx");
601
602 GPU_texture_filter_mode(big_ghost_result, true);
604 big_ghost_result.bind_as_texture(shader, "big_ghost_tx");
605
606 const int2 glare_size = get_glare_size();
607 Result base_ghost_result = context().create_result(ResultType::Color);
608 base_ghost_result.allocate_texture(glare_size);
609 base_ghost_result.bind_as_image(shader, "combined_ghost_img");
610
611 compute_dispatch_threads_at_least(shader, glare_size);
612
614 small_ghost_result.unbind_as_texture();
615 big_ghost_result.unbind_as_texture();
616 base_ghost_result.unbind_as_image();
617
618 small_ghost_result.release();
619 big_ghost_result.release();
620
621 return base_ghost_result;
622 }
623
624 /* In each iteration of ghost accumulation, four ghosts are accumulated, each of which might be
625 * modulated by multiplying by some color modulator, this function generates a color modulator
626 * for each of the four ghosts. The first ghost is always unmodulated, so is the multiplicative
627 * identity of 1. The second ghost gets only its green and blue channels modulated, the third
628 * ghost gets only its red and green channels modulated, and the fourth ghost gets only its red
629 * and blue channels modulated. */
630 std::array<float4, 4> compute_ghost_color_modulators()
631 {
632 const float color_modulation_factor = get_ghost_color_modulation_factor();
633
634 std::array<float4, 4> color_modulators;
635 color_modulators[0] = float4(1.0f);
636 color_modulators[1] = float4(1.0f, color_modulation_factor, color_modulation_factor, 1.0f);
637 color_modulators[2] = float4(color_modulation_factor, color_modulation_factor, 1.0f, 1.0f);
638 color_modulators[3] = float4(color_modulation_factor, 1.0f, color_modulation_factor, 1.0f);
639
640 return color_modulators;
641 }
642
643 /* In each iteration of ghost accumulation, four ghosts with different scales are accumulated.
644 * Given the index of a certain iteration, this method computes the 4 scales for it. Assuming we
645 * have n number of iterations, that means the total number of accumulations is 4 * n. To get a
646 * variety of scales, we generate an arithmetic progression that starts from 2.1 and ends at
647 * zero exclusive, containing 4 * n elements. The start scale of 2.1 is chosen arbitrarily using
648 * visual judgment. To get more scale variations, every other scale is inverted with a slight
649 * change in scale such that it alternates between scaling down and up, additionally every other
650 * ghost is flipped across the image center by negating its scale. Finally, to get variations
651 * across the number of iterations, a shift of 0.5 is introduced when the number of iterations is
652 * odd, that way, the user will get variations when changing the number of iterations as opposed
653 * to just getting less or more ghosts. */
654 std::array<float, 4> compute_ghost_scales(int iteration)
655 {
656 /* Shift scales by 0.5 for odd number of iterations as discussed in the method description. */
657 const float offset = (get_number_of_iterations() % 2 == 1) ? 0.5f : 0.0f;
658
659 std::array<float, 4> scales;
660 for (const int i : IndexRange(scales.size())) {
661 /* Global index in all accumulations. */
662 const int global_i = iteration * 4 + i;
663 /* Arithmetic progression in the range [0, 1) + offset. */
664 const float progression = (global_i + offset) / (get_number_of_iterations() * 4);
665 /* Remap range [0, 1) to [1, 0) and multiply to remap to [2.1, 0). */
666 scales[i] = 2.1f * (1.0f - progression);
667
668 /* Invert the scale with a slight variation and flip it across the image center through
669 * negation for odd scales as discussed in the method description. */
670 if (i % 2 == 1) {
671 scales[i] = -0.99f / scales[i];
672 }
673 }
674
675 return scales;
676 }
677
678 /* The operation computes two base ghosts by blurring the highlights with two different radii,
679 * this method computes the blur radius for the smaller one. The value is chosen using visual
680 * judgment. Make sure to take the quality factor into account, see the get_quality_factor
681 * method for more information. */
683 {
684 return 16.0f / get_quality_factor();
685 }
686
687 /* Computes the blur radius of the bigger ghost, which is double the blur radius if the smaller
688 * one, see the get_small_ghost_radius for more information. */
690 {
691 return get_small_ghost_radius() * 2.0f;
692 }
693
694 /* The color channels of the glare can be modulated by being multiplied by this factor. In the
695 * user interface, 0 means no modulation and 1 means full modulation. But since the factor is
696 * multiplied, 1 corresponds to no modulation and 0 corresponds to full modulation, so we
697 * subtract from one. */
699 {
700 return 1.0f - get_color_modulation_factor();
701 }
702
703 /* ------------
704 * Bloom Glare.
705 * ------------ */
706
707 /* Bloom is computed by first progressively half-down-sampling the highlights down to a certain
708 * size, then progressively double-up-sampling the last down-sampled result up to the original
709 * size of the highlights, adding the down-sampled result of the same size in each up-sampling
710 * step. This can be illustrated as follows:
711 *
712 * Highlights ---+---> Bloom
713 * | |
714 * Down-sampled ---+---> Up-sampled
715 * | |
716 * Down-sampled ---+---> Up-sampled
717 * | |
718 * Down-sampled ---+---> Up-sampled
719 * | ^
720 * ... |
721 * Down-sampled ------------'
722 *
723 * The smooth down-sampling followed by smooth up-sampling can be thought of as a cheap way to
724 * approximate a large radius blur, and adding the corresponding down-sampled result while
725 * up-sampling is done to counter the attenuation that happens during down-sampling.
726 *
727 * Smaller down-sampled results contribute to larger glare size, so controlling the size can be
728 * done by stopping down-sampling down to a certain size, where the maximum possible size is
729 * achieved when down-sampling happens down to the smallest size of 2. */
730 Result execute_bloom(Result &highlights_result)
731 {
732 /* The maximum possible glare size is achieved when we down-sampled down to the smallest size
733 * of 2, which would result in a down-sampling chain length of the binary logarithm of the
734 * smaller dimension of the size of the highlights.
735 *
736 * However, as users might want a smaller glare size, we reduce the chain length by the halving
737 * count supplied by the user. */
738 const int2 glare_size = get_glare_size();
739 const int smaller_glare_dimension = math::min(glare_size.x, glare_size.y);
740 const int chain_length = int(std::log2(smaller_glare_dimension)) -
742
743 /* If the chain length is less than 2, that means no down-sampling will happen, so we just
744 * return a copy of the highlights. This is a sanitization of a corner case, so no need to
745 * worry about optimizing the copy away. */
746 if (chain_length < 2) {
747 Result bloom_result = context().create_result(ResultType::Color);
748 bloom_result.allocate_texture(highlights_result.domain());
749 GPU_texture_copy(bloom_result, highlights_result);
750 return bloom_result;
751 }
752
753 Array<Result> downsample_chain = compute_bloom_downsample_chain(highlights_result,
754 chain_length);
755
756 /* Notice that for a chain length of n, we need (n - 1) up-sampling passes. */
757 const IndexRange upsample_passes_range(chain_length - 1);
758 GPUShader *shader = context().get_shader("compositor_glare_bloom_upsample");
759 GPU_shader_bind(shader);
760
761 for (const int i : upsample_passes_range) {
762 Result &input = downsample_chain[upsample_passes_range.last() - i + 1];
763 GPU_texture_filter_mode(input, true);
764 input.bind_as_texture(shader, "input_tx");
765
766 const Result &output = downsample_chain[upsample_passes_range.last() - i];
767 output.bind_as_image(shader, "output_img", true);
768
769 compute_dispatch_threads_at_least(shader, output.domain().size);
770
771 input.unbind_as_texture();
772 output.unbind_as_image();
773 input.release();
774 }
775
777
778 return downsample_chain[0];
779 }
780
781 /* Progressively down-sample the given result into a result with half the size for the given
782 * chain length, returning an array containing the chain of down-sampled results. The first
783 * result of the chain is the given result itself for easier handling. The chain length is
784 * expected not to exceed the binary logarithm of the smaller dimension of the given result,
785 * because that would result in down-sampling passes that produce useless textures with just
786 * one pixel. */
787 Array<Result> compute_bloom_downsample_chain(Result &highlights_result, int chain_length)
788 {
789 const Result downsampled_result = context().create_result(ResultType::Color);
790 Array<Result> downsample_chain(chain_length, downsampled_result);
791
792 /* We assign the original highlights result to the first result of the chain to make the code
793 * easier. In turn, the number of passes is one less than the chain length, because the first
794 * result needn't be computed. */
795 downsample_chain[0] = highlights_result;
796 const IndexRange downsample_passes_range(chain_length - 1);
797
798 GPUShader *shader;
799 for (const int i : downsample_passes_range) {
800 /* For the first down-sample pass, we use a special "Karis" down-sample pass that applies a
801 * form of local tone mapping to reduce the contributions of fireflies, see the shader for
802 * more information. Later passes use a simple average down-sampling filter because fireflies
803 * doesn't service the first pass. */
804 if (i == downsample_passes_range.first()) {
805 shader = context().get_shader("compositor_glare_bloom_downsample_karis_average");
806 GPU_shader_bind(shader);
807 }
808 else {
809 shader = context().get_shader("compositor_glare_bloom_downsample_simple_average");
810 GPU_shader_bind(shader);
811 }
812
813 const Result &input = downsample_chain[i];
814 GPU_texture_filter_mode(input, true);
815 input.bind_as_texture(shader, "input_tx");
816
817 Result &output = downsample_chain[i + 1];
818 output.allocate_texture(input.domain().size / 2);
819 output.bind_as_image(shader, "output_img");
820
821 compute_dispatch_threads_at_least(shader, output.domain().size);
822
823 input.unbind_as_texture();
824 output.unbind_as_image();
826 }
827
828 return downsample_chain;
829 }
830
831 /* The bloom has a maximum possible size when the bloom size is equal to MAX_GLARE_SIZE and
832 * halves for every unit decrement of the bloom size. This method computes the number of halving
833 * that should take place, which is simply the difference to MAX_GLARE_SIZE. */
838
839 /* The size of the bloom relative to its maximum possible size, see the
840 * compute_bloom_size_halving_count() method for more information. */
842 {
843 return node_storage(bnode()).size;
844 }
845
846 /* ---------------
847 * Fog Glow Glare.
848 * --------------- */
849
850 Result execute_fog_glow(Result &highlights_result)
851 {
852 Result fog_glow_result = context().create_result(ResultType::Color);
853 fog_glow_result.allocate_texture(highlights_result.domain());
854
855#if defined(WITH_FFTW3)
857
858 const int kernel_size = compute_fog_glow_kernel_size();
859
860 /* Since we will be doing a circular convolution, we need to zero pad our input image by half
861 * the kernel size to avoid the kernel affecting the pixels at the other side of image.
862 * Therefore, zero boundary is assumed. */
863 const int needed_padding_amount = kernel_size / 2;
864 const int2 image_size = highlights_result.domain().size;
865 const int2 needed_spatial_size = image_size + needed_padding_amount;
866 const int2 spatial_size = fftw::optimal_size_for_real_transform(needed_spatial_size);
867
868 /* The FFTW real to complex transforms utilizes the hermitian symmetry of real transforms and
869 * stores only half the output since the other half is redundant, so we only allocate half of
870 * the first dimension. See Section 4.3.4 Real-data DFT Array Format in the FFTW manual for
871 * more information. */
872 const int2 frequency_size = int2(spatial_size.x / 2 + 1, spatial_size.y);
873
874 /* We only process the color channels, the alpha channel is written to the output as is. */
875 const int channels_count = 3;
876 const int image_channels_count = 4;
877 const int64_t spatial_pixels_per_channel = int64_t(spatial_size.x) * spatial_size.y;
878 const int64_t frequency_pixels_per_channel = int64_t(frequency_size.x) * frequency_size.y;
879 const int64_t spatial_pixels_count = spatial_pixels_per_channel * channels_count;
880 const int64_t frequency_pixels_count = frequency_pixels_per_channel * channels_count;
881
882 float *image_spatial_domain = fftwf_alloc_real(spatial_pixels_count);
883 std::complex<float> *image_frequency_domain = reinterpret_cast<std::complex<float> *>(
884 fftwf_alloc_complex(frequency_pixels_count));
885
886 /* Create a real to complex plan to transform the image to the frequency domain. */
887 fftwf_plan forward_plan = fftwf_plan_dft_r2c_2d(
888 spatial_size.y,
889 spatial_size.x,
890 image_spatial_domain,
891 reinterpret_cast<fftwf_complex *>(image_frequency_domain),
892 FFTW_ESTIMATE);
893
895 float *highlights_buffer = static_cast<float *>(
896 GPU_texture_read(highlights_result, GPU_DATA_FLOAT, 0));
897
898 /* Zero pad the image to the required spatial domain size, storing each channel in planar
899 * format for better cache locality, that is, RRRR...GGGG...BBBB. */
900 threading::parallel_for(IndexRange(spatial_size.y), 1, [&](const IndexRange sub_y_range) {
901 for (const int64_t y : sub_y_range) {
902 for (const int64_t x : IndexRange(spatial_size.x)) {
903 const bool is_inside_image = x < image_size.x && y < image_size.y;
904 for (const int64_t channel : IndexRange(channels_count)) {
905 const int64_t base_index = y * spatial_size.x + x;
906 const int64_t output_index = base_index + spatial_pixels_per_channel * channel;
907 if (is_inside_image) {
908 const int64_t image_index = (y * image_size.x + x) * image_channels_count + channel;
909 image_spatial_domain[output_index] = highlights_buffer[image_index];
910 }
911 else {
912 image_spatial_domain[output_index] = 0.0f;
913 }
914 }
915 }
916 }
917 });
918
919 threading::parallel_for(IndexRange(channels_count), 1, [&](const IndexRange sub_range) {
920 for (const int64_t channel : sub_range) {
921 fftwf_execute_dft_r2c(forward_plan,
922 image_spatial_domain + spatial_pixels_per_channel * channel,
923 reinterpret_cast<fftwf_complex *>(image_frequency_domain) +
924 frequency_pixels_per_channel * channel);
925 }
926 });
927
928 const FogGlowKernel &fog_glow_kernel = context().cache_manager().fog_glow_kernels.get(
929 kernel_size, spatial_size);
930
931 /* Multiply the kernel and the image in the frequency domain to perform the convolution. The
932 * FFT is not normalized, meaning the result of the FFT followed by an inverse FFT will result
933 * in an image that is scaled by a factor of the product of the width and height, so we take
934 * that into account by dividing by that scale. See Section 4.8.6 Multi-dimensional Transforms
935 * of the FFTW manual for more information. */
936 const float normalization_scale = float(spatial_size.x) * spatial_size.y *
937 fog_glow_kernel.normalization_factor();
938 threading::parallel_for(IndexRange(frequency_size.y), 1, [&](const IndexRange sub_y_range) {
939 for (const int64_t channel : IndexRange(channels_count)) {
940 for (const int64_t y : sub_y_range) {
941 for (const int64_t x : IndexRange(frequency_size.x)) {
942 const int64_t base_index = x + y * frequency_size.x;
943 const int64_t output_index = base_index + frequency_pixels_per_channel * channel;
944 const std::complex<float> kernel_value = fog_glow_kernel.frequencies()[base_index];
945 image_frequency_domain[output_index] *= kernel_value / normalization_scale;
946 }
947 }
948 }
949 });
950
951 /* Create a complex to real plan to transform the image to the real domain. */
952 fftwf_plan backward_plan = fftwf_plan_dft_c2r_2d(
953 spatial_size.y,
954 spatial_size.x,
955 reinterpret_cast<fftwf_complex *>(image_frequency_domain),
956 image_spatial_domain,
957 FFTW_ESTIMATE);
958
959 threading::parallel_for(IndexRange(channels_count), 1, [&](const IndexRange sub_range) {
960 for (const int64_t channel : sub_range) {
961 fftwf_execute_dft_c2r(backward_plan,
962 reinterpret_cast<fftwf_complex *>(image_frequency_domain) +
963 frequency_pixels_per_channel * channel,
964 image_spatial_domain + spatial_pixels_per_channel * channel);
965 }
966 });
967
968 Array<float> output(int64_t(image_size.x) * int64_t(image_size.y) * image_channels_count);
969
970 /* Copy the result to the output. */
971 threading::parallel_for(IndexRange(image_size.y), 1, [&](const IndexRange sub_y_range) {
972 for (const int64_t y : sub_y_range) {
973 for (const int64_t x : IndexRange(image_size.x)) {
974 for (const int64_t channel : IndexRange(channels_count)) {
975 const int64_t output_index = (x + y * image_size.x) * image_channels_count;
976 const int64_t base_index = x + y * spatial_size.x;
977 const int64_t input_index = base_index + spatial_pixels_per_channel * channel;
978 output[output_index + channel] = image_spatial_domain[input_index];
979 output[output_index + 3] = highlights_buffer[output_index + 3];
980 }
981 }
982 }
983 });
984
985 MEM_freeN(highlights_buffer);
986 fftwf_destroy_plan(forward_plan);
987 fftwf_destroy_plan(backward_plan);
988 fftwf_free(image_spatial_domain);
989 fftwf_free(image_frequency_domain);
990
991 GPU_texture_update(fog_glow_result, GPU_DATA_FLOAT, output.data());
992#else
993 GPU_texture_copy(fog_glow_result, highlights_result);
994#endif
995
996 return fog_glow_result;
997 }
998
999 /* Computes the size of the fog glow kernel that will be convolved with the image, which is
1000 * essentially the extent of the glare in pixels. */
1002 {
1003 /* We use an odd sized kernel since an even one will typically introduce a tiny offset as it
1004 * has no exact center value. */
1005 return (1 << node_storage(bnode()).size) + 1;
1006 }
1007
1008 /* ----------
1009 * Glare Mix.
1010 * ---------- */
1011
1012 void execute_mix(Result &glare_result)
1013 {
1014 GPUShader *shader = context().get_shader("compositor_glare_mix");
1015 GPU_shader_bind(shader);
1016
1017 GPU_shader_uniform_1f(shader, "mix_factor", node_storage(bnode()).mix);
1018
1019 const Result &input_image = get_input("Image");
1020 input_image.bind_as_texture(shader, "input_tx");
1021
1022 GPU_texture_filter_mode(glare_result, true);
1023 glare_result.bind_as_texture(shader, "glare_tx");
1024
1025 const Domain domain = compute_domain();
1026 Result &output_image = get_result("Image");
1027 output_image.allocate_texture(domain);
1028 output_image.bind_as_image(shader, "output_img");
1029
1030 compute_dispatch_threads_at_least(shader, domain.size);
1031
1033 output_image.unbind_as_image();
1034 input_image.unbind_as_texture();
1035 glare_result.unbind_as_texture();
1036
1037 glare_result.release();
1038 }
1039
1040 /* -------
1041 * Common.
1042 * ------- */
1043
1044 /* As a performance optimization, the operation can compute the glare on a fraction of the input
1045 * image size, which is what this method returns. */
1047 {
1048 return compute_domain().size / get_quality_factor();
1049 }
1050
1052 {
1053 return node_storage(bnode()).iter;
1054 }
1055
1057 {
1058 return node_storage(bnode()).colmod;
1059 }
1060
1061 /* The glare node can compute the glare on a fraction of the input image size to improve
1062 * performance. The quality values and their corresponding quality factors are as follows:
1063 *
1064 * - High Quality => Quality Value: 0 => Quality Factor: 1.
1065 * - Medium Quality => Quality Value: 1 => Quality Factor: 2.
1066 * - Low Quality => Quality Value: 2 => Quality Factor: 4.
1067 *
1068 * Dividing the image size by the quality factor gives the size where the glare should be
1069 * computed. The glare algorithm should also take the quality factor into account to compensate
1070 * for the reduced sized, perhaps by dividing blur radii and similar values by the quality
1071 * factor. */
1073 {
1074 return 1 << node_storage(bnode()).quality;
1075 }
1076};
1077
1079{
1080 return new GlareOperation(context, node);
1081}
1082
1083} // namespace blender::nodes::node_composite_glare_cc
1084
1086{
1087 namespace file_ns = blender::nodes::node_composite_glare_cc;
1088
1089 static blender::bke::bNodeType ntype;
1090
1091 cmp_node_type_base(&ntype, CMP_NODE_GLARE, "Glare", NODE_CLASS_OP_FILTER);
1092 ntype.declare = file_ns::cmp_node_glare_declare;
1093 ntype.draw_buttons = file_ns::node_composit_buts_glare;
1094 ntype.initfunc = file_ns::node_composit_init_glare;
1097 ntype.get_compositor_operation = file_ns::get_compositor_operation;
1098
1100}
#define NODE_STORAGE_FUNCS(StorageT)
Definition BKE_node.hh:1799
#define NODE_CLASS_OP_FILTER
Definition BKE_node.hh:408
#define BLI_assert_unreachable()
Definition BLI_assert.h:97
#define M_PI
#define ELEM(...)
#define RPT_(msgid)
#define MAX_GLARE_SIZE
@ CMP_NODE_GLARE_STREAKS
@ CMP_NODE_GLARE_BLOOM
@ CMP_NODE_GLARE_GHOST
@ CMP_NODE_GLARE_SIMPLE_STAR
@ CMP_NODE_GLARE_FOG_GLOW
@ R_FILTER_GAUSS
void GPU_shader_uniform_2fv(GPUShader *sh, const char *name, const float data[2])
void GPU_shader_uniform_1i(GPUShader *sh, const char *name, int value)
void GPU_shader_uniform_1f(GPUShader *sh, const char *name, float value)
void GPU_shader_uniform_3fv(GPUShader *sh, const char *name, const float data[3])
void GPU_shader_uniform_4fv_array(GPUShader *sh, const char *name, int len, const float(*val)[4])
void GPU_shader_bind(GPUShader *shader)
void GPU_shader_uniform_4fv(GPUShader *sh, const char *name, const float data[4])
void GPU_shader_unbind()
void GPU_memory_barrier(eGPUBarrier barrier)
Definition gpu_state.cc:374
@ GPU_BARRIER_TEXTURE_UPDATE
Definition GPU_state.hh:39
void GPU_texture_clear(GPUTexture *texture, eGPUDataFormat data_format, const void *data)
void GPU_texture_copy(GPUTexture *dst, GPUTexture *src)
void * GPU_texture_read(GPUTexture *texture, eGPUDataFormat data_format, int mip_level)
@ GPU_DATA_FLOAT
void GPU_texture_extend_mode(GPUTexture *texture, GPUSamplerExtendMode extend_mode)
@ GPU_SAMPLER_EXTEND_MODE_CLAMP_TO_BORDER
void GPU_texture_filter_mode(GPUTexture *texture, bool use_filter)
void GPU_texture_update(GPUTexture *texture, eGPUDataFormat data_format, const void *data)
void uiItemL(uiLayout *layout, const char *name, int icon)
void uiItemR(uiLayout *layout, PointerRNA *ptr, const char *propname, eUI_Item_Flag flag, const char *name, int icon)
@ UI_ITEM_R_SPLIT_EMPTY_NAME
@ UI_ITEM_R_SLIDER
struct GPUShader GPUShader
#define output
constexpr int64_t first() const
constexpr int64_t last(const int64_t n=0) const
constexpr IndexRange drop_front(int64_t n) const
Result execute_simple_star_horizontal_pass(Result &highlights_result)
Array< Result > compute_bloom_downsample_chain(Result &highlights_result, int chain_length)
Result execute_simple_star_diagonal_pass(Result &highlights_result)
Result apply_streak_filter(Result &highlights_result, const float2 &streak_direction)
GPUShader * get_shader(const char *info_name, ResultPrecision precision)
Result create_result(ResultType type, ResultPrecision precision)
FogGlowKernel & get(int kernel_size, int2 spatial_size)
NodeOperation(Context &context, DNode node)
Result & get_input(StringRef identifier) const
Definition operation.cc:144
Result & get_result(StringRef identifier)
Definition operation.cc:46
void bind_as_image(GPUShader *shader, const char *image_name, bool read=false) const
Definition result.cc:264
void pass_through(Result &target)
Definition result.cc:289
const Domain & domain() const
Definition result.cc:712
void allocate_texture(Domain domain, bool from_pool=true)
Definition result.cc:204
void bind_as_texture(GPUShader *shader, const char *texture_name) const
Definition result.cc:253
local_group_size(16, 16) .push_constant(Type b
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
#define mix(a, b, c)
Definition hash.h:36
void MEM_freeN(void *vmemh)
Definition mallocn.cc:105
void node_type_storage(bNodeType *ntype, const char *storagename, void(*freefunc)(bNode *node), void(*copyfunc)(bNodeTree *dest_ntree, bNode *dest_node, const bNode *src_node))
Definition node.cc:4632
void node_register_type(bNodeType *ntype)
Definition node.cc:1708
void initialize_float()
Definition fftw.cc:84
int optimal_size_for_real_transform(int size)
Definition fftw.cc:55
T cos(const AngleRadianBase< T > &a)
T min(const T &a, const T &b)
T sin(const AngleRadianBase< T > &a)
static void cmp_node_glare_declare(NodeDeclarationBuilder &b)
static void node_composit_buts_glare(uiLayout *layout, bContext *, PointerRNA *ptr)
static NodeOperation * get_compositor_operation(Context &context, DNode node)
static void node_composit_init_glare(bNodeTree *, bNode *node)
void symmetric_separable_blur(Context &context, Result &input, Result &output, float2 radius, int filter_type=R_FILTER_GAUSS, bool extend_bounds=false, bool gamma_correct=false)
void compute_dispatch_threads_at_least(GPUShader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:131
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
Definition BLI_task.hh:95
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
VecBase< float, 3 > float3
void register_node_type_cmp_glare()
#define MAX_GLARE_ITERATIONS
void cmp_node_type_base(blender::bke::bNodeType *ntype, int type, const char *name, short nclass)
void node_free_standard_storage(bNode *node)
Definition node_util.cc:46
void node_copy_standard_storage(bNodeTree *, bNode *dest_node, const bNode *src_node)
Definition node_util.cc:58
CCL_NAMESPACE_BEGIN ccl_device float fade(float t)
Definition noise.h:14
int RNA_enum_get(PointerRNA *ptr, const char *name)
__int64 int64_t
Definition stdint.h:89
char angle size
char angle streaks
char angle star_45
Defines a node type.
Definition BKE_node.hh:218
NodeGetCompositorOperationFunction get_compositor_operation
Definition BKE_node.hh:324
void(* initfunc)(bNodeTree *ntree, bNode *node)
Definition BKE_node.hh:267
void(* draw_buttons)(uiLayout *, bContext *C, PointerRNA *ptr)
Definition BKE_node.hh:238
NodeDeclareFunction declare
Definition BKE_node.hh:347
static pxr::UsdShadeInput get_input(const pxr::UsdShadeShader &usd_shader, const pxr::TfToken &input_name)
PointerRNA * ptr
Definition wm_files.cc:4126