Blender V4.5
node_composite_glare.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2006 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <array>
10#include <cmath>
11#include <complex>
12#include <limits>
13
14#include "MEM_guardedalloc.h"
15
16#if defined(WITH_FFTW3)
17# include <fftw3.h>
18#endif
19
20#include "BLI_array.hh"
21#include "BLI_assert.h"
22#include "BLI_fftw.hh"
23#include "BLI_index_range.hh"
24#include "BLI_math_base.hh"
25#include "BLI_math_color.h"
26#include "BLI_math_vector.hh"
28#include "BLI_task.hh"
29
30#include "DNA_scene_types.h"
31
32#include "RNA_access.hh"
33
34#include "UI_interface.hh"
35#include "UI_resources.hh"
36
37#include "GPU_shader.hh"
38#include "GPU_state.hh"
39#include "GPU_texture.hh"
40
42#include "COM_node_operation.hh"
43#include "COM_utilities.hh"
45
47
48#define MAX_GLARE_ITERATIONS 5
49
51
53
55{
56 b.use_custom_socket_order();
57
58 b.add_output<decl::Color>("Image").description("The image with the generated glare added");
59 b.add_output<decl::Color>("Glare").description("The generated glare");
60 b.add_output<decl::Color>("Highlights")
61 .description("The extracted highlights from which the glare was generated");
62
63 b.add_layout([](uiLayout *layout, bContext * /*C*/, PointerRNA *ptr) {
64#ifndef WITH_FFTW3
65 const int glare_type = RNA_enum_get(ptr, "glare_type");
66 if (glare_type == CMP_NODE_GLARE_FOG_GLOW) {
67 layout->label(RPT_("Disabled, built without FFTW"), ICON_ERROR);
68 }
69#endif
70
71 layout->prop(ptr, "glare_type", UI_ITEM_R_SPLIT_EMPTY_NAME, "", ICON_NONE);
72 layout->prop(ptr, "quality", UI_ITEM_R_SPLIT_EMPTY_NAME, "", ICON_NONE);
73 });
74
75 b.add_input<decl::Color>("Image")
76 .default_value({1.0f, 1.0f, 1.0f, 1.0f})
77 .compositor_domain_priority(0);
78
79 PanelDeclarationBuilder &highlights_panel = b.add_panel("Highlights").default_closed(true);
80 highlights_panel.add_input<decl::Float>("Threshold", "Highlights Threshold")
81 .default_value(1.0f)
82 .min(0.0f)
83 .description(
84 "The brightness level at which pixels are considered part of the highlights that "
85 "produce a glare")
86 .compositor_expects_single_value();
87 highlights_panel.add_input<decl::Float>("Smoothness", "Highlights Smoothness")
88 .default_value(0.1f)
89 .min(0.0f)
90 .max(1.0f)
91 .subtype(PROP_FACTOR)
92 .description("The smoothness of the extracted highlights")
93 .compositor_expects_single_value();
94
95 PanelDeclarationBuilder &supress_highlights_panel =
96 highlights_panel.add_panel("Clamp").default_closed(true);
97 supress_highlights_panel.add_input<decl::Bool>("Clamp", "Clamp Highlights")
98 .default_value(false)
99 .panel_toggle()
100 .description("Clamp bright highlights")
101 .compositor_expects_single_value();
102 supress_highlights_panel.add_input<decl::Float>("Maximum", "Maximum Highlights")
103 .default_value(10.0f)
104 .min(0.0f)
105 .description(
106 "Clamp bright highlights such that their brightness are not larger than this value")
107 .compositor_expects_single_value();
108
109 PanelDeclarationBuilder &mix_panel = b.add_panel("Adjust");
110 mix_panel.add_input<decl::Float>("Strength")
111 .default_value(1.0f)
112 .min(0.0f)
113 .max(1.0f)
114 .subtype(PROP_FACTOR)
115 .description("Adjusts the brightness of the glare")
116 .compositor_expects_single_value();
117 mix_panel.add_input<decl::Float>("Saturation")
118 .default_value(1.0f)
119 .min(0.0f)
120 .max(1.0f)
121 .subtype(PROP_FACTOR)
122 .description("Adjusts the saturation of the glare")
123 .compositor_expects_single_value();
124 mix_panel.add_input<decl::Color>("Tint")
125 .default_value({1.0f, 1.0f, 1.0f, 1.0f})
126 .description("Tints the glare. Consider desaturating the glare to more accurate tinting")
127 .compositor_expects_single_value();
128
129 PanelDeclarationBuilder &glare_panel = b.add_panel("Glare");
130 glare_panel.add_input<decl::Float>("Size")
131 .default_value(0.5f)
132 .min(0.0f)
133 .max(1.0f)
134 .subtype(PROP_FACTOR)
135 .description(
136 "The size of the glare relative to the image. 1 means the glare covers the entire "
137 "image, 0.5 means the glare covers half the image, and so on")
138 .compositor_expects_single_value();
139 glare_panel.add_input<decl::Int>("Streaks")
140 .default_value(4)
141 .min(1)
142 .max(16)
143 .description("The number of streaks")
144 .compositor_expects_single_value();
145 glare_panel.add_input<decl::Float>("Streaks Angle")
146 .default_value(0.0f)
147 .subtype(PROP_ANGLE)
148 .description("The angle that the first streak makes with the horizontal axis")
149 .compositor_expects_single_value();
150 glare_panel.add_input<decl::Int>("Iterations")
151 .default_value(3)
152 .min(2)
153 .max(5)
154 .description(
155 "The number of ghosts for Ghost glare or the quality and spread of Glare for Streaks "
156 "and Simple Star")
157 .compositor_expects_single_value();
158 glare_panel.add_input<decl::Float>("Fade")
159 .default_value(0.9f)
160 .min(0.75f)
161 .max(1.0f)
162 .subtype(PROP_FACTOR)
163 .description("Streak fade-out factor")
164 .compositor_expects_single_value();
165 glare_panel.add_input<decl::Float>("Color Modulation")
166 .default_value(0.25)
167 .min(0.0f)
168 .max(1.0f)
169 .subtype(PROP_FACTOR)
170 .description("Modulates colors of streaks and ghosts for a spectral dispersion effect")
171 .compositor_expects_single_value();
172 glare_panel.add_input<decl::Bool>("Diagonal", "Diagonal Star")
173 .default_value(true)
174 .description("Align the star diagonally")
175 .compositor_expects_single_value();
176}
177
178static void node_composit_init_glare(bNodeTree * /*ntree*/, bNode *node)
179{
180 NodeGlare *ndg = MEM_callocN<NodeGlare>(__func__);
181 ndg->quality = 1;
183 node->storage = ndg;
184}
185
186static void node_update(bNodeTree *ntree, bNode *node)
187{
188 const CMPNodeGlareType glare_type = static_cast<CMPNodeGlareType>(node_storage(*node).type);
189
190 bNodeSocket *size_input = bke::node_find_socket(*node, SOCK_IN, "Size");
192 *ntree, *size_input, ELEM(glare_type, CMP_NODE_GLARE_FOG_GLOW, CMP_NODE_GLARE_BLOOM));
193
194 bNodeSocket *iterations_input = bke::node_find_socket(*node, SOCK_IN, "Iterations");
196 *ntree,
197 *iterations_input,
199
200 bNodeSocket *fade_input = bke::node_find_socket(*node, SOCK_IN, "Fade");
202 *ntree, *fade_input, ELEM(glare_type, CMP_NODE_GLARE_SIMPLE_STAR, CMP_NODE_GLARE_STREAKS));
203
204 bNodeSocket *color_modulation_input = bke::node_find_socket(*node, SOCK_IN, "Color Modulation");
206 *ntree,
207 *color_modulation_input,
209
210 bNodeSocket *streaks_input = bke::node_find_socket(*node, SOCK_IN, "Streaks");
212 *ntree, *streaks_input, glare_type == CMP_NODE_GLARE_STREAKS);
213
214 bNodeSocket *streaks_angle_input = bke::node_find_socket(*node, SOCK_IN, "Streaks Angle");
216 *ntree, *streaks_angle_input, glare_type == CMP_NODE_GLARE_STREAKS);
217
218 bNodeSocket *diagonal_star_input = bke::node_find_socket(*node, SOCK_IN, "Diagonal Star");
220 *ntree, *diagonal_star_input, glare_type == CMP_NODE_GLARE_SIMPLE_STAR);
221}
222
224 public:
227 {
228 bNode &node = params.add_node("CompositorNodeGlare");
229 node_storage(node).type = this->type;
230 params.update_and_connect_available_socket(node, "Image");
231 }
232};
233
235{
236 const eNodeSocketDatatype from_socket_type = eNodeSocketDatatype(params.other_socket().type);
237 if (!params.node_tree().typeinfo->validate_link(from_socket_type, SOCK_RGBA)) {
238 return;
239 }
240
241 params.add_item(IFACE_("Simple Star"), SocketSearchOp{CMP_NODE_GLARE_SIMPLE_STAR});
242 params.add_item(IFACE_("Fog Glow"), SocketSearchOp{CMP_NODE_GLARE_FOG_GLOW});
243 params.add_item(IFACE_("Streaks"), SocketSearchOp{CMP_NODE_GLARE_STREAKS});
246}
247
248using namespace blender::compositor;
249
251 public:
253
254 void execute() override
255 {
256 const Result &image_input = this->get_input("Image");
257 Result &glare_output = this->get_result("Glare");
258 Result &highlights_output = this->get_result("Highlights");
259
260 if (image_input.is_single_value()) {
261 Result &image_output = this->get_result("Image");
262 if (image_output.should_compute()) {
263 image_output.share_data(image_input);
264 }
265 if (glare_output.should_compute()) {
266 glare_output.allocate_invalid();
267 }
268 if (highlights_output.should_compute()) {
269 highlights_output.allocate_invalid();
270 }
271 return;
272 }
273
274 Result highlights = this->compute_highlights();
275 Result glare = this->compute_glare(highlights);
276
277 if (highlights_output.should_compute()) {
278 if (highlights.domain().size != image_input.domain().size) {
279 /* The highlights were computed on a fraction of the image size, see the get_quality_factor
280 * method. So we need to upsample them while writing as opposed to just stealing the
281 * existing data. */
282 this->write_highlights_output(highlights);
283 }
284 else {
285 highlights_output.steal_data(highlights);
286 }
287 }
288 highlights.release();
289
290 /* Combine the original input and the generated glare. */
291 execute_mix(glare);
292
293 if (glare_output.should_compute()) {
294 this->write_glare_output(glare);
295 }
296 glare.release();
297 }
298
299 /* -----------------
300 * Glare Highlights.
301 * ----------------- */
302
304 {
305 if (this->context().use_gpu()) {
306 return this->execute_highlights_gpu();
307 }
308 return this->execute_highlights_cpu();
309 }
310
312 {
313 GPUShader *shader = context().get_shader("compositor_glare_highlights");
314 GPU_shader_bind(shader);
315
316 GPU_shader_uniform_1f(shader, "threshold", this->get_threshold());
317 GPU_shader_uniform_1f(shader, "highlights_smoothness", this->get_highlights_smoothness());
318 GPU_shader_uniform_1f(shader, "max_brightness", this->get_maximum_brightness());
319
320 const Result &input_image = get_input("Image");
321 GPU_texture_filter_mode(input_image, true);
322 input_image.bind_as_texture(shader, "input_tx");
323
324 const int2 highlights_size = this->get_glare_image_size();
325 Result highlights_result = context().create_result(ResultType::Color);
326 highlights_result.allocate_texture(highlights_size);
327 highlights_result.bind_as_image(shader, "output_img");
328
329 compute_dispatch_threads_at_least(shader, highlights_size);
330
332 input_image.unbind_as_texture();
333 highlights_result.unbind_as_image();
334
335 return highlights_result;
336 }
337
339 {
340 const float threshold = this->get_threshold();
341 const float highlights_smoothness = this->get_highlights_smoothness();
342 const float max_brightness = this->get_maximum_brightness();
343
344 const Result &input = get_input("Image");
345
346 const int2 highlights_size = this->get_glare_image_size();
348 output.allocate_texture(highlights_size);
349
350 parallel_for(highlights_size, [&](const int2 texel) {
351 float2 normalized_coordinates = (float2(texel) + float2(0.5f)) / float2(highlights_size);
352
353 float4 hsva;
354 rgb_to_hsv_v(input.sample_bilinear_extended(normalized_coordinates), hsva);
355
356 /* Clamp the brightness of the highlights such that pixels whose brightness are less than the
357 * threshold will be equal to the threshold and will become zero once threshold is subtracted
358 * later. We also clamp by the specified max brightness to suppress very bright highlights.
359 *
360 * We use a smooth clamping function such that highlights do not become very sharp but use
361 * the adaptive variant such that we guarantee that zero highlights remain zero even after
362 * smoothing. Notice that when we mention zero, we mean zero after subtracting the threshold,
363 * so we actually mean the minimum bound, the threshold. See the adaptive_smooth_clamp
364 * function for more information. */
365 const float clamped_brightness = this->adaptive_smooth_clamp(
366 hsva.z, threshold, max_brightness, highlights_smoothness);
367
368 /* The final brightness is relative to the threshold. */
369 hsva.z = clamped_brightness - threshold;
370
371 float4 rgba;
372 hsv_to_rgb_v(hsva, rgba);
373
374 output.store_pixel(texel, float4(rgba.xyz(), 1.0f));
375 });
376
377 return output;
378 }
379
381 {
382 /* Clamp disabled, return the maximum possible brightness. */
383 if (!this->get_clamp_highlights()) {
384 return std::numeric_limits<float>::max();
385 }
386
387 /* Brightness of the highlights are relative to the threshold, see execute_highlights_cpu, so
388 * we add the threshold such that the maximum brightness corresponds to the actual brightness
389 * of the computed highlights. */
390 return this->get_threshold() + this->get_max_highlights();
391 }
392
393 /* A Quadratic Polynomial smooth minimum function *without* normalization, based on:
394 *
395 * https://iquilezles.org/articles/smin/
396 *
397 * This should not be converted into a common utility function in BLI because the glare code is
398 * specifically designed for it as can be seen in the adaptive_smooth_clamp method, and it is
399 * intentionally not normalized. */
400 float smooth_min(const float a, const float b, const float smoothness)
401 {
402 if (smoothness == 0.0f) {
403 return math::min(a, b);
404 }
405 const float h = math::max(smoothness - math::abs(a - b), 0.0f) / smoothness;
406 return math::min(a, b) - h * h * smoothness * (1.0f / 4.0f);
407 }
408
409 float smooth_max(const float a, const float b, const float smoothness)
410 {
411 return -this->smooth_min(-a, -b, smoothness);
412 }
413
414 /* Clamps the input x within min_value and max_value using a quadratic polynomial smooth minimum
415 * and maximum functions, with individual control over their smoothness. */
416 float smooth_clamp(const float x,
417 const float min_value,
418 const float max_value,
419 const float min_smoothness,
420 const float max_smoothness)
421 {
422 return this->smooth_min(
423 max_value, this->smooth_max(min_value, x, min_smoothness), max_smoothness);
424 }
425
426 /* A variant of smooth_clamp that limits the smoothness such that the function evaluates to the
427 * given min for 0 <= min <= max and x >= 0. The aforementioned guarantee holds for the standard
428 * clamp function by definition, but since the smooth clamp function gradually increases before
429 * the specified min/max, if min/max are sufficiently close together or to zero, they will not
430 * evaluate to min at zero or at min, since zero or min will be at the region of the gradual
431 * increase.
432 *
433 * It can be shown that the width of the gradual increase region is equivalent to the smoothness
434 * parameter, so smoothness can't be larger than the difference between the min/max and zero, or
435 * larger than the difference between min and max themselves. Otherwise, zero or min will lie
436 * inside the gradual increase region of min/max. So we limit the smoothness of min/max by taking
437 * the minimum with the distances to zero and to the distance to the other bound. */
438 float adaptive_smooth_clamp(const float x,
439 const float min_value,
440 const float max_value,
441 const float smoothness)
442 {
443 const float range_distance = math::distance(min_value, max_value);
444 const float distance_from_min_to_zero = math::distance(min_value, 0.0f);
445 const float distance_from_max_to_zero = math::distance(max_value, 0.0f);
446
447 const float max_safe_smoothness_for_min = math::min(distance_from_min_to_zero, range_distance);
448 const float max_safe_smoothness_for_max = math::min(distance_from_max_to_zero, range_distance);
449
450 const float min_smoothness = math::min(smoothness, max_safe_smoothness_for_min);
451 const float max_smoothness = math::min(smoothness, max_safe_smoothness_for_max);
452
453 return this->smooth_clamp(x, min_value, max_value, min_smoothness, max_smoothness);
454 }
455
457 {
458 return math::max(0.0f, this->get_input("Highlights Threshold").get_single_value_default(1.0f));
459 }
460
462 {
463 return math::max(0.0f,
464 this->get_input("Highlights Smoothness").get_single_value_default(0.1f));
465 }
466
468 {
469 return this->get_input("Clamp Highlights").get_single_value_default(false);
470 }
471
473 {
474 return math::max(0.0f, this->get_input("Maximum Highlights").get_single_value_default(0.0f));
475 }
476
477 /* Writes the given input highlights by upsampling it using bilinear interpolation to match the
478 * size of the original input, allocating the highlights output and writing the result to it. */
479 void write_highlights_output(const Result &highlights)
480 {
481 if (this->context().use_gpu()) {
482 this->write_highlights_output_gpu(highlights);
483 }
484 else {
485 this->write_highlights_output_cpu(highlights);
486 }
487 }
488
489 void write_highlights_output_gpu(const Result &highlights)
490 {
491 GPUShader *shader = this->context().get_shader("compositor_glare_write_highlights_output");
492 GPU_shader_bind(shader);
493
494 GPU_texture_filter_mode(highlights, true);
496 highlights.bind_as_texture(shader, "input_tx");
497
498 const Result &image_input = this->get_input("Image");
499 Result &output = this->get_result("Highlights");
500 output.allocate_texture(image_input.domain());
501 output.bind_as_image(shader, "output_img");
502
503 compute_dispatch_threads_at_least(shader, output.domain().size);
504
506 output.unbind_as_image();
507 highlights.unbind_as_texture();
508 }
509
510 void write_highlights_output_cpu(const Result &highlights)
511 {
512 const Result &image_input = this->get_input("Image");
513 Result &output = this->get_result("Highlights");
514 output.allocate_texture(image_input.domain());
515
516 const int2 size = output.domain().size;
517 parallel_for(size, [&](const int2 texel) {
518 float2 normalized_coordinates = (float2(texel) + float2(0.5f)) / float2(size);
519 output.store_pixel(texel, highlights.sample_bilinear_extended(normalized_coordinates));
520 });
521 }
522
523 /* ------
524 * Glare.
525 * ------ */
526
527 Result compute_glare(Result &highlights_result)
528 {
529 if (!this->should_compute_glare()) {
531 }
532
533 switch (node_storage(bnode()).type) {
535 return this->execute_simple_star(highlights_result);
537 return this->execute_fog_glow(highlights_result);
539 return this->execute_streaks(highlights_result);
541 return this->execute_ghost(highlights_result);
543 return this->execute_bloom(highlights_result);
544 default:
547 }
548 }
549
550 /* Glare should be computed either because the glare output is needed directly or the image
551 * output is needed. */
553 {
554 return this->get_result("Glare").should_compute() ||
555 this->get_result("Image").should_compute();
556 }
557
558 /* ------------------
559 * Simple Star Glare.
560 * ------------------ */
561
563 {
564 if (this->get_diagonal_star()) {
565 return execute_simple_star_diagonal(highlights);
566 }
567 return execute_simple_star_axis_aligned(highlights);
568 }
569
571 {
572 Result horizontal_pass_result = execute_simple_star_horizontal_pass(highlights);
573 Result vertical_pass_result = this->execute_simple_star_vertical_pass(highlights,
574 horizontal_pass_result);
575 horizontal_pass_result.release();
576 return vertical_pass_result;
577 }
578
580 const Result &horizontal_pass_result)
581 {
582 if (this->context().use_gpu()) {
583 return this->execute_simple_star_vertical_pass_gpu(highlights, horizontal_pass_result);
584 }
585 return this->execute_simple_star_vertical_pass_cpu(highlights, horizontal_pass_result);
586 }
587
589 const Result &horizontal_pass_result)
590 {
591 /* First, copy the highlights result to the output since we will be doing the computation
592 * in-place. */
593 const int2 size = highlights.domain().size;
594 Result vertical_pass_result = context().create_result(ResultType::Color);
595 vertical_pass_result.allocate_texture(size);
597 GPU_texture_copy(vertical_pass_result, highlights);
598
599 GPUShader *shader = context().get_shader("compositor_glare_simple_star_vertical_pass");
600 GPU_shader_bind(shader);
601
602 GPU_shader_uniform_1i(shader, "iterations", get_number_of_iterations());
603 GPU_shader_uniform_1f(shader, "fade_factor", this->get_fade());
604
605 horizontal_pass_result.bind_as_texture(shader, "horizontal_tx");
606
607 vertical_pass_result.bind_as_image(shader, "vertical_img");
608
609 /* Dispatch a thread for each column in the image. */
610 const int width = size.x;
611 compute_dispatch_threads_at_least(shader, int2(width, 1));
612
613 horizontal_pass_result.unbind_as_texture();
614 vertical_pass_result.unbind_as_image();
616
617 return vertical_pass_result;
618 }
619
621 const Result &horizontal_pass_result)
622 {
623 /* First, copy the highlights result to the output since we will be doing the computation
624 * in-place. */
625 const int2 size = highlights.domain().size;
627 output.allocate_texture(size);
628 parallel_for(size, [&](const int2 texel) {
629 output.store_pixel(texel, highlights.load_pixel<float4>(texel));
630 });
631
632 const int iterations = this->get_number_of_iterations();
633 const float fade_factor = this->get_fade();
634
635 /* Dispatch a thread for each column in the image. */
636 const int width = size.x;
637 threading::parallel_for(IndexRange(width), 1, [&](const IndexRange sub_range) {
638 for (const int64_t x : sub_range) {
639 int height = size.y;
640
641 /* For each iteration, apply a causal filter followed by a non causal filters along the
642 * column mapped to the current thread invocation. */
643 for (int i = 0; i < iterations; i++) {
644 /* Causal Pass:
645 * Sequentially apply a causal filter running from bottom to top by mixing the value of
646 * the pixel in the column with the average value of the previous output and next input
647 * in the same column. */
648 for (int y = 0; y < height; y++) {
649 int2 texel = int2(x, y);
650 float4 previous_output = output.load_pixel_zero<float4>(texel - int2(0, i));
651 float4 current_input = output.load_pixel<float4>(texel);
652 float4 next_input = output.load_pixel_zero<float4>(texel + int2(0, i));
653
654 float4 neighbor_average = (previous_output + next_input) / 2.0f;
655 float4 causal_output = math::interpolate(current_input, neighbor_average, fade_factor);
656 output.store_pixel(texel, causal_output);
657 }
658
659 /* Non Causal Pass:
660 * Sequentially apply a non causal filter running from top to bottom by mixing the value
661 * of the pixel in the column with the average value of the previous output and next
662 * input in the same column. */
663 for (int y = height - 1; y >= 0; y--) {
664 int2 texel = int2(x, y);
665 float4 previous_output = output.load_pixel_zero<float4>(texel + int2(0, i));
666 float4 current_input = output.load_pixel<float4>(texel);
667 float4 next_input = output.load_pixel_zero<float4>(texel - int2(0, i));
668
669 float4 neighbor_average = (previous_output + next_input) / 2.0f;
670 float4 non_causal_output = math::interpolate(
671 current_input, neighbor_average, fade_factor);
672 output.store_pixel(texel, non_causal_output);
673 }
674 }
675
676 /* For each pixel in the column mapped to the current invocation thread, add the result of
677 * the horizontal pass to the vertical pass. */
678 for (int y = 0; y < height; y++) {
679 int2 texel = int2(x, y);
680 float4 horizontal = horizontal_pass_result.load_pixel<float4>(texel);
681 float4 vertical = output.load_pixel<float4>(texel);
682 float4 combined = horizontal + vertical;
683 output.store_pixel(texel, float4(combined.xyz(), 1.0f));
684 }
685 }
686 });
687
688 return output;
689 }
690
692 {
693 if (this->context().use_gpu()) {
694 return this->execute_simple_star_horizontal_pass_gpu(highlights);
695 }
696 return this->execute_simple_star_horizontal_pass_cpu(highlights);
697 }
698
700 {
701 /* First, copy the highlights result to the output since we will be doing the computation
702 * in-place. */
703 const int2 size = highlights.domain().size;
704 Result horizontal_pass_result = context().create_result(ResultType::Color);
705 horizontal_pass_result.allocate_texture(size);
707 GPU_texture_copy(horizontal_pass_result, highlights);
708
709 GPUShader *shader = context().get_shader("compositor_glare_simple_star_horizontal_pass");
710 GPU_shader_bind(shader);
711
712 GPU_shader_uniform_1i(shader, "iterations", get_number_of_iterations());
713 GPU_shader_uniform_1f(shader, "fade_factor", this->get_fade());
714
715 horizontal_pass_result.bind_as_image(shader, "horizontal_img");
716
717 /* Dispatch a thread for each row in the image. */
719
720 horizontal_pass_result.unbind_as_image();
722
723 return horizontal_pass_result;
724 }
725
727 {
728 /* First, copy the highlights result to the output since we will be doing the computation
729 * in-place. */
730 const int2 size = highlights.domain().size;
731 Result horizontal_pass_result = context().create_result(ResultType::Color);
732 horizontal_pass_result.allocate_texture(size);
733 parallel_for(size, [&](const int2 texel) {
734 horizontal_pass_result.store_pixel(texel, highlights.load_pixel<float4>(texel));
735 });
736
737 const int iterations = this->get_number_of_iterations();
738 const float fade_factor = this->get_fade();
739
740 /* Dispatch a thread for each row in the image. */
741 const int width = size.x;
742 threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_range) {
743 for (const int64_t y : sub_range) {
744 /* For each iteration, apply a causal filter followed by a non causal filters along the
745 * row mapped to the current thread invocation. */
746 for (int i = 0; i < iterations; i++) {
747 /* Causal Pass:
748 * Sequentially apply a causal filter running from left to right by mixing the value of
749 * the pixel in the row with the average value of the previous output and next input in
750 * the same row. */
751 for (int x = 0; x < width; x++) {
752 int2 texel = int2(x, y);
753 float4 previous_output = horizontal_pass_result.load_pixel_zero<float4>(texel -
754 int2(i, 0));
755 float4 current_input = horizontal_pass_result.load_pixel<float4>(texel);
756 float4 next_input = horizontal_pass_result.load_pixel_zero<float4>(texel + int2(i, 0));
757
758 float4 neighbor_average = (previous_output + next_input) / 2.0f;
759 float4 causal_output = math::interpolate(current_input, neighbor_average, fade_factor);
760 horizontal_pass_result.store_pixel(texel, causal_output);
761 }
762
763 /* Non Causal Pass:
764 * Sequentially apply a non causal filter running from right to left by mixing the
765 * value of the pixel in the row with the average value of the previous output and next
766 * input in the same row. */
767 for (int x = width - 1; x >= 0; x--) {
768 int2 texel = int2(x, y);
769 float4 previous_output = horizontal_pass_result.load_pixel_zero<float4>(texel +
770 int2(i, 0));
771 float4 current_input = horizontal_pass_result.load_pixel<float4>(texel);
772 float4 next_input = horizontal_pass_result.load_pixel_zero<float4>(texel - int2(i, 0));
773
774 float4 neighbor_average = (previous_output + next_input) / 2.0f;
775 float4 non_causal_output = math::interpolate(
776 current_input, neighbor_average, fade_factor);
777 horizontal_pass_result.store_pixel(texel, non_causal_output);
778 }
779 }
780 }
781 });
782
783 return horizontal_pass_result;
784 }
785
787 {
788 Result diagonal_pass_result = execute_simple_star_diagonal_pass(highlights);
789 Result anti_diagonal_pass_result = this->execute_simple_star_anti_diagonal_pass(
790 highlights, diagonal_pass_result);
791 diagonal_pass_result.release();
792 return anti_diagonal_pass_result;
793 }
794
796 const Result &diagonal_pass_result)
797 {
798 if (this->context().use_gpu()) {
799 return this->execute_simple_star_anti_diagonal_pass_gpu(highlights, diagonal_pass_result);
800 }
801 return this->execute_simple_star_anti_diagonal_pass_cpu(highlights, diagonal_pass_result);
802 }
803
805 const Result &diagonal_pass_result)
806 {
807 /* First, copy the highlights result to the output since we will be doing the computation
808 * in-place. */
809 const int2 size = highlights.domain().size;
810 Result anti_diagonal_pass_result = context().create_result(ResultType::Color);
811 anti_diagonal_pass_result.allocate_texture(size);
813 GPU_texture_copy(anti_diagonal_pass_result, highlights);
814
815 GPUShader *shader = context().get_shader("compositor_glare_simple_star_anti_diagonal_pass");
816 GPU_shader_bind(shader);
817
818 GPU_shader_uniform_1i(shader, "iterations", get_number_of_iterations());
819 GPU_shader_uniform_1f(shader, "fade_factor", this->get_fade());
820
821 diagonal_pass_result.bind_as_texture(shader, "diagonal_tx");
822
823 anti_diagonal_pass_result.bind_as_image(shader, "anti_diagonal_img");
824
825 /* Dispatch a thread for each diagonal in the image. */
827
828 diagonal_pass_result.unbind_as_texture();
829 anti_diagonal_pass_result.unbind_as_image();
831
832 return anti_diagonal_pass_result;
833 }
834
836 const Result &diagonal_pass_result)
837 {
838 /* First, copy the highlights result to the output since we will be doing the computation
839 * in-place. */
840 const int2 size = highlights.domain().size;
841 Result output = this->context().create_result(ResultType::Color);
842 output.allocate_texture(size);
843 parallel_for(size, [&](const int2 texel) {
844 output.store_pixel(texel, highlights.load_pixel<float4>(texel));
845 });
846
847 const int iterations = this->get_number_of_iterations();
848 const float fade_factor = this->get_fade();
849
850 /* Dispatch a thread for each diagonal in the image. */
851 const int diagonals_count = compute_number_of_diagonals(size);
852 threading::parallel_for(IndexRange(diagonals_count), 1, [&](const IndexRange sub_range) {
853 for (const int64_t index : sub_range) {
854 int anti_diagonal_length = compute_anti_diagonal_length(size, index);
855 int2 start = compute_anti_diagonal_start(size, index);
856 int2 direction = get_anti_diagonal_direction();
857 int2 end = start + (anti_diagonal_length - 1) * direction;
858
859 /* For each iteration, apply a causal filter followed by a non causal filters along the
860 * anti diagonal mapped to the current thread invocation. */
861 for (int i = 0; i < iterations; i++) {
862 /* Causal Pass:
863 * Sequentially apply a causal filter running from the start of the anti diagonal to
864 * its end by mixing the value of the pixel in the anti diagonal with the average value
865 * of the previous output and next input in the same anti diagonal. */
866 for (int j = 0; j < anti_diagonal_length; j++) {
867 int2 texel = start + j * direction;
868 float4 previous_output = output.load_pixel_zero<float4>(texel - i * direction);
869 float4 current_input = output.load_pixel<float4>(texel);
870 float4 next_input = output.load_pixel_zero<float4>(texel + i * direction);
871
872 float4 neighbor_average = (previous_output + next_input) / 2.0f;
873 float4 causal_output = math::interpolate(current_input, neighbor_average, fade_factor);
874 output.store_pixel(texel, causal_output);
875 }
876
877 /* Non Causal Pass:
878 * Sequentially apply a non causal filter running from the end of the diagonal to its
879 * start by mixing the value of the pixel in the diagonal with the average value of the
880 * previous output and next input in the same diagonal. */
881 for (int j = 0; j < anti_diagonal_length; j++) {
882 int2 texel = end - j * direction;
883 float4 previous_output = output.load_pixel_zero<float4>(texel + i * direction);
884 float4 current_input = output.load_pixel<float4>(texel);
885 float4 next_input = output.load_pixel_zero<float4>(texel - i * direction);
886
887 float4 neighbor_average = (previous_output + next_input) / 2.0f;
888 float4 non_causal_output = math::interpolate(
889 current_input, neighbor_average, fade_factor);
890 output.store_pixel(texel, non_causal_output);
891 }
892 }
893
894 /* For each pixel in the anti diagonal mapped to the current invocation thread, add the
895 * result of the diagonal pass to the vertical pass. */
896 for (int j = 0; j < anti_diagonal_length; j++) {
897 int2 texel = start + j * direction;
898 float4 horizontal = diagonal_pass_result.load_pixel<float4>(texel);
899 float4 vertical = output.load_pixel<float4>(texel);
900 float4 combined = horizontal + vertical;
901 output.store_pixel(texel, float4(combined.xyz(), 1.0f));
902 }
903 }
904 });
905
906 return output;
907 }
908
910 {
911 if (this->context().use_gpu()) {
912 return this->execute_simple_star_diagonal_pass_gpu(highlights);
913 }
914 return this->execute_simple_star_diagonal_pass_cpu(highlights);
915 }
916
918 {
919 /* First, copy the highlights result to the output since we will be doing the computation
920 * in-place. */
921 const int2 size = highlights.domain().size;
922 Result diagonal_pass_result = context().create_result(ResultType::Color);
923 diagonal_pass_result.allocate_texture(size);
925 GPU_texture_copy(diagonal_pass_result, highlights);
926
927 GPUShader *shader = context().get_shader("compositor_glare_simple_star_diagonal_pass");
928 GPU_shader_bind(shader);
929
930 GPU_shader_uniform_1i(shader, "iterations", get_number_of_iterations());
931 GPU_shader_uniform_1f(shader, "fade_factor", this->get_fade());
932
933 diagonal_pass_result.bind_as_image(shader, "diagonal_img");
934
935 /* Dispatch a thread for each diagonal in the image. */
937
938 diagonal_pass_result.unbind_as_image();
940
941 return diagonal_pass_result;
942 }
943
945 {
946 /* First, copy the highlights result to the output since we will be doing the computation
947 * in-place. */
948 const int2 size = highlights.domain().size;
949 Result diagonal_pass_result = this->context().create_result(ResultType::Color);
950 diagonal_pass_result.allocate_texture(size);
951 parallel_for(size, [&](const int2 texel) {
952 diagonal_pass_result.store_pixel(texel, highlights.load_pixel<float4>(texel));
953 });
954
955 const int iterations = this->get_number_of_iterations();
956 const float fade_factor = this->get_fade();
957
958 /* Dispatch a thread for each diagonal in the image. */
959 const int diagonals_count = compute_number_of_diagonals(size);
960 threading::parallel_for(IndexRange(diagonals_count), 1, [&](const IndexRange sub_range) {
961 for (const int64_t index : sub_range) {
962 int diagonal_length = compute_diagonal_length(size, index);
963 int2 start = compute_diagonal_start(size, index);
964 int2 direction = get_diagonal_direction();
965 int2 end = start + (diagonal_length - 1) * direction;
966
967 /* For each iteration, apply a causal filter followed by a non causal filters along the
968 * diagonal mapped to the current thread invocation. */
969 for (int i = 0; i < iterations; i++) {
970 /* Causal Pass:
971 * Sequentially apply a causal filter running from the start of the diagonal to its end
972 * by mixing the value of the pixel in the diagonal with the average value of the
973 * previous output and next input in the same diagonal. */
974 for (int j = 0; j < diagonal_length; j++) {
975 int2 texel = start + j * direction;
976 float4 previous_output = diagonal_pass_result.load_pixel_zero<float4>(texel -
977 i * direction);
978 float4 current_input = diagonal_pass_result.load_pixel<float4>(texel);
979 float4 next_input = diagonal_pass_result.load_pixel_zero<float4>(texel +
980 i * direction);
981
982 float4 neighbor_average = (previous_output + next_input) / 2.0f;
983 float4 causal_output = math::interpolate(current_input, neighbor_average, fade_factor);
984 diagonal_pass_result.store_pixel(texel, causal_output);
985 }
986
987 /* Non Causal Pass:
988 * Sequentially apply a non causal filter running from the end of the diagonal to its
989 * start by mixing the value of the pixel in the diagonal with the average value of the
990 * previous output and next input in the same diagonal. */
991 for (int j = 0; j < diagonal_length; j++) {
992 int2 texel = end - j * direction;
993 float4 previous_output = diagonal_pass_result.load_pixel_zero<float4>(texel +
994 i * direction);
995 float4 current_input = diagonal_pass_result.load_pixel<float4>(texel);
996 float4 next_input = diagonal_pass_result.load_pixel_zero<float4>(texel -
997 i * direction);
998
999 float4 neighbor_average = (previous_output + next_input) / 2.0f;
1000 float4 non_causal_output = math::interpolate(
1001 current_input, neighbor_average, fade_factor);
1002 diagonal_pass_result.store_pixel(texel, non_causal_output);
1003 }
1004 }
1005 }
1006 });
1007
1008 return diagonal_pass_result;
1009 }
1010
1012 {
1013 return this->get_input("Diagonal Star").get_single_value_default(true);
1014 }
1015
1016 /* --------------
1017 * Streaks Glare.
1018 * -------------- */
1019
1020 Result execute_streaks(const Result &highlights)
1021 {
1022 /* Create an initially zero image where streaks will be accumulated. */
1023 const int2 size = highlights.domain().size;
1024 Result accumulated_streaks_result = context().create_result(ResultType::Color);
1025 accumulated_streaks_result.allocate_texture(size);
1026 if (this->context().use_gpu()) {
1027 const float4 zero_color = float4(0.0f);
1028 GPU_texture_clear(accumulated_streaks_result, GPU_DATA_FLOAT, zero_color);
1029 }
1030 else {
1031 parallel_for(size, [&](const int2 texel) {
1032 accumulated_streaks_result.store_pixel(texel, float4(0.0f));
1033 });
1034 }
1035
1036 /* For each streak, compute its direction and apply a streak filter in that direction, then
1037 * accumulate the result into the accumulated streaks result. */
1038 for (const int streak_index : IndexRange(get_number_of_streaks())) {
1039 const float2 streak_direction = compute_streak_direction(streak_index);
1040 Result streak_result = apply_streak_filter(highlights, streak_direction);
1041 this->accumulate_streak(streak_result, accumulated_streaks_result);
1042 streak_result.release();
1043 }
1044
1045 return accumulated_streaks_result;
1046 }
1047
1048 Result apply_streak_filter(const Result &highlights, const float2 &streak_direction)
1049 {
1050 if (this->context().use_gpu()) {
1051 return this->apply_streak_filter_gpu(highlights, streak_direction);
1052 }
1053 return this->apply_streak_filter_cpu(highlights, streak_direction);
1054 }
1055
1056 Result apply_streak_filter_gpu(const Result &highlights, const float2 &streak_direction)
1057 {
1058 GPUShader *shader = context().get_shader("compositor_glare_streaks_filter");
1059 GPU_shader_bind(shader);
1060
1061 /* Copy the highlights result into a new result because the output will be copied to the input
1062 * after each iteration. */
1063 const int2 size = highlights.domain().size;
1064 Result input_streak_result = context().create_result(ResultType::Color);
1065 input_streak_result.allocate_texture(size);
1067 GPU_texture_copy(input_streak_result, highlights);
1068
1069 Result output_streak_result = context().create_result(ResultType::Color);
1070 output_streak_result.allocate_texture(size);
1071
1072 /* For the given number of iterations, apply the streak filter in the given direction. The
1073 * result of the previous iteration is used as the input of the current iteration. */
1074 const IndexRange iterations_range = IndexRange(get_number_of_iterations());
1075 for (const int iteration : iterations_range) {
1076 const float color_modulator = compute_streak_color_modulator(iteration);
1077 const float iteration_magnitude = compute_streak_iteration_magnitude(iteration);
1078 const float3 fade_factors = compute_streak_fade_factors(iteration_magnitude);
1079 const float2 streak_vector = streak_direction * iteration_magnitude;
1080
1081 GPU_shader_uniform_1f(shader, "color_modulator", color_modulator);
1082 GPU_shader_uniform_3fv(shader, "fade_factors", fade_factors);
1083 GPU_shader_uniform_2fv(shader, "streak_vector", streak_vector);
1084
1085 GPU_texture_filter_mode(input_streak_result, true);
1087 input_streak_result.bind_as_texture(shader, "input_streak_tx");
1088
1089 output_streak_result.bind_as_image(shader, "output_streak_img");
1090
1092
1093 input_streak_result.unbind_as_texture();
1094 output_streak_result.unbind_as_image();
1095
1096 /* The accumulated result serves as the input for the next iteration, so copy the result to
1097 * the input result since it can't be used for reading and writing simultaneously. Skip
1098 * copying for the last iteration since it is not needed. */
1099 if (iteration != iterations_range.last()) {
1101 GPU_texture_copy(input_streak_result, output_streak_result);
1102 }
1103 }
1104
1105 input_streak_result.release();
1107
1108 return output_streak_result;
1109 }
1110
1111 Result apply_streak_filter_cpu(const Result &highlights, const float2 &streak_direction)
1112 {
1113 /* Copy the highlights result into a new result because the output will be copied to the input
1114 * after each iteration. */
1115 const int2 size = highlights.domain().size;
1116 Result input = this->context().create_result(ResultType::Color);
1117 input.allocate_texture(size);
1118 parallel_for(size, [&](const int2 texel) {
1119 input.store_pixel(texel, highlights.load_pixel<float4>(texel));
1120 });
1121
1122 Result output = this->context().create_result(ResultType::Color);
1123 output.allocate_texture(size);
1124
1125 /* For the given number of iterations, apply the streak filter in the given direction. The
1126 * result of the previous iteration is used as the input of the current iteration. */
1127 const IndexRange iterations_range = IndexRange(this->get_number_of_iterations());
1128 for (const int iteration : iterations_range) {
1129 const float color_modulator = this->compute_streak_color_modulator(iteration);
1130 const float iteration_magnitude = this->compute_streak_iteration_magnitude(iteration);
1131 const float3 fade_factors = this->compute_streak_fade_factors(iteration_magnitude);
1132 const float2 streak_vector = streak_direction * iteration_magnitude;
1133
1134 parallel_for(size, [&](const int2 texel) {
1135 /* Add 0.5 to evaluate the input sampler at the center of the pixel and divide by the image
1136 * size to get the coordinates into the sampler's expected [0, 1] range. Similarly,
1137 * transform the vector into the sampler's space by dividing by the input size. */
1138 float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
1139 float2 vector = streak_vector / float2(size);
1140
1141 /* Load three equally spaced neighbors to the current pixel in the direction of the streak
1142 * vector. */
1143 float4 neighbors[3];
1144 neighbors[0] = input.sample_bilinear_zero(coordinates + vector);
1145 neighbors[1] = input.sample_bilinear_zero(coordinates + vector * 2.0f);
1146 neighbors[2] = input.sample_bilinear_zero(coordinates + vector * 3.0f);
1147
1148 /* Attenuate the value of two of the channels for each of the neighbors by multiplying by
1149 * the color modulator. The particular channels for each neighbor were chosen to be
1150 * visually similar to the modulation pattern of chromatic aberration. */
1151 neighbors[0] *= float4(1.0f, color_modulator, color_modulator, 1.0f);
1152 neighbors[1] *= float4(color_modulator, color_modulator, 1.0f, 1.0f);
1153 neighbors[2] *= float4(color_modulator, 1.0f, color_modulator, 1.0f);
1154
1155 /* Compute the weighted sum of all neighbors using the given fade factors as weights. The
1156 * weights are expected to be lower for neighbors that are further away. */
1157 float4 weighted_neighbors_sum = float4(0.0f);
1158 for (int i = 0; i < 3; i++) {
1159 weighted_neighbors_sum += fade_factors[i] * neighbors[i];
1160 }
1161
1162 /* The output is the average between the center color and the weighted sum of the
1163 * neighbors. Which intuitively mean that highlights will spread in the direction of the
1164 * streak, which is the desired result. */
1165 float4 center_color = input.sample_bilinear_zero(coordinates);
1166 float4 output_color = (center_color + weighted_neighbors_sum) / 2.0f;
1167 output.store_pixel(texel, output_color);
1168 });
1169
1170 /* The accumulated result serves as the input for the next iteration, so copy the result to
1171 * the input result since it can't be used for reading and writing simultaneously. Skip
1172 * copying for the last iteration since it is not needed. */
1173 if (iteration != iterations_range.last()) {
1174 parallel_for(size, [&](const int2 texel) {
1175 input.store_pixel(texel, output.load_pixel<float4>(texel));
1176 });
1177 }
1178 }
1179
1180 input.release();
1181 return output;
1182 }
1183
1184 void accumulate_streak(const Result &streak_result, Result &accumulated_streaks_result)
1185 {
1186 if (this->context().use_gpu()) {
1187 this->accumulate_streak_gpu(streak_result, accumulated_streaks_result);
1188 }
1189 else {
1190 this->accumulate_streak_cpu(streak_result, accumulated_streaks_result);
1191 }
1192 }
1193
1194 void accumulate_streak_gpu(const Result &streak_result, Result &accumulated_streaks_result)
1195 {
1196 GPUShader *shader = this->context().get_shader("compositor_glare_streaks_accumulate");
1197 GPU_shader_bind(shader);
1198
1199 const float attenuation_factor = this->compute_streak_attenuation_factor();
1200 GPU_shader_uniform_1f(shader, "attenuation_factor", attenuation_factor);
1201
1202 streak_result.bind_as_texture(shader, "streak_tx");
1203 accumulated_streaks_result.bind_as_image(shader, "accumulated_streaks_img", true);
1204
1205 compute_dispatch_threads_at_least(shader, streak_result.domain().size);
1206
1207 streak_result.unbind_as_texture();
1208 accumulated_streaks_result.unbind_as_image();
1210 }
1211
1212 void accumulate_streak_cpu(const Result &streak, Result &accumulated_streaks)
1213 {
1214 const float attenuation_factor = this->compute_streak_attenuation_factor();
1215
1216 const int2 size = streak.domain().size;
1217 parallel_for(size, [&](const int2 texel) {
1218 float4 attenuated_streak = streak.load_pixel<float4>(texel) * attenuation_factor;
1219 float4 current_accumulated_streaks = accumulated_streaks.load_pixel<float4>(texel);
1220 float4 combined_streaks = current_accumulated_streaks + attenuated_streak;
1221 accumulated_streaks.store_pixel(texel, float4(combined_streaks.xyz(), 1.0f));
1222 });
1223 }
1224
1225 /* As the number of iterations increase, the streaks spread farther and their intensity decrease.
1226 * To maintain similar intensities regardless of the number of iterations, streaks with lower
1227 * number of iteration are linearly attenuated. When the number of iterations is maximum, we need
1228 * not attenuate, so the denominator should be one, and when the number of iterations is one, we
1229 * need the attenuation to be maximum. This can be modeled as a simple decreasing linear equation
1230 * by substituting the two aforementioned cases. */
1232 {
1233 return 1.0f / (MAX_GLARE_ITERATIONS + 1 - get_number_of_iterations());
1234 }
1235
1236 /* Given the index of the streak in the [0, Number Of Streaks - 1] range, compute the unit
1237 * direction vector defining the streak. The streak directions should make angles with the x-axis
1238 * that are equally spaced and covers the whole two pi range, starting with the user supplied
1239 * angle. */
1241 {
1242 const int number_of_streaks = get_number_of_streaks();
1243 const float start_angle = this->get_streaks_angle();
1244 const float angle = start_angle + (float(streak_index) / number_of_streaks) * (M_PI * 2.0f);
1246 }
1247
1248 /* Different color channels of the streaks can be modulated by being multiplied by the color
1249 * modulator computed by this method. The color modulation is expected to be maximum when the
1250 * modulation factor is 1 and non existent when it is zero. But since the color modulator is
1251 * multiplied to the channel and the multiplicative identity is 1, we invert the modulation
1252 * factor. Moreover, color modulation should be less visible on higher iterations because they
1253 * produce the farther more faded away parts of the streaks. To achieve that, the modulation
1254 * factor is raised to the power of the iteration, noting that the modulation value is in the
1255 * [0, 1] range so the higher the iteration the lower the resulting modulation factor. The plus
1256 * one makes sure the power starts at one. */
1258 {
1259 return 1.0f - std::pow(this->get_color_modulation(), iteration + 1);
1260 }
1261
1262 /* Streaks are computed by iteratively applying a filter that samples 3 neighboring pixels in the
1263 * direction of the streak. Those neighboring pixels are then combined using a weighted sum. The
1264 * weights of the neighbors are the fade factors computed by this method. Farther neighbors are
1265 * expected to have lower weights because they contribute less to the combined result. Since the
1266 * iteration magnitude represents how far the neighbors are, as noted in the description of the
1267 * compute_streak_iteration_magnitude method, the fade factor for the closest neighbor is
1268 * computed as the user supplied fade parameter raised to the power of the magnitude, noting that
1269 * the fade value is in the [0, 1] range while the magnitude is larger than or equal one, so the
1270 * higher the power the lower the resulting fade factor. Furthermore, the other two neighbors are
1271 * just squared and cubed versions of the fade factor for the closest neighbor to get even lower
1272 * fade factors for those farther neighbors. */
1273 float3 compute_streak_fade_factors(float iteration_magnitude)
1274 {
1275 const float fade_factor = std::pow(this->get_fade(), iteration_magnitude);
1276 return float3(fade_factor, std::pow(fade_factor, 2.0f), std::pow(fade_factor, 3.0f));
1277 }
1278
1279 /* Streaks are computed by iteratively applying a filter that samples the neighboring pixels in
1280 * the direction of the streak. Each higher iteration samples pixels that are farther away, the
1281 * magnitude computed by this method describes how farther away the neighbors are sampled. The
1282 * magnitude exponentially increase with the iteration. A base of 4, was chosen as compromise
1283 * between better quality and performance, since a lower base corresponds to more tightly spaced
1284 * neighbors but would require more iterations to produce a streak of the same length. */
1286 {
1287 return std::pow(4.0f, iteration);
1288 }
1289
1291 {
1292 return math::clamp(this->get_input("Streaks").get_single_value_default(4), 1, 16);
1293 }
1294
1296 {
1297 return this->get_input("Streaks Angle").get_single_value_default(0.0f);
1298 }
1299
1300 /* ------------
1301 * Ghost Glare.
1302 * ------------ */
1303
1304 Result execute_ghost(const Result &highlights)
1305 {
1306 Result base_ghost_result = compute_base_ghost(highlights);
1307 Result accumulated_ghosts_result = context().create_result(ResultType::Color);
1308 if (this->context().use_gpu()) {
1309 this->accumulate_ghosts_gpu(base_ghost_result, accumulated_ghosts_result);
1310 }
1311 else {
1312 this->accumulate_ghosts_cpu(base_ghost_result, accumulated_ghosts_result);
1313 }
1314
1315 base_ghost_result.release();
1316 return accumulated_ghosts_result;
1317 }
1318
1319 void accumulate_ghosts_gpu(const Result &base_ghost_result, Result &accumulated_ghosts_result)
1320 {
1321 GPUShader *shader = context().get_shader("compositor_glare_ghost_accumulate");
1322 GPU_shader_bind(shader);
1323
1324 /* Color modulators are constant across iterations. */
1325 std::array<float4, 4> color_modulators = compute_ghost_color_modulators();
1327 "color_modulators",
1328 color_modulators.size(),
1329 (const float(*)[4])color_modulators.data());
1330
1331 /* Zero initialize output image where ghosts will be accumulated. */
1332 const float4 zero_color = float4(0.0f);
1333 const int2 size = base_ghost_result.domain().size;
1334 accumulated_ghosts_result.allocate_texture(size);
1335 GPU_texture_clear(accumulated_ghosts_result, GPU_DATA_FLOAT, zero_color);
1336
1337 /* Copy the highlights result into a new result because the output will be copied to the input
1338 * after each iteration. */
1339 Result input_ghost_result = context().create_result(ResultType::Color);
1340 input_ghost_result.allocate_texture(size);
1341 GPU_texture_copy(input_ghost_result, base_ghost_result);
1342
1343 /* For the given number of iterations, accumulate four ghosts with different scales and color
1344 * modulators. The result of the previous iteration is used as the input of the current
1345 * iteration. We start from index 1 because we are not interested in the scales produced for
1346 * the first iteration according to visual judgment, see the compute_ghost_scales method. */
1347 const IndexRange iterations_range = IndexRange(get_number_of_iterations()).drop_front(1);
1348 for (const int i : iterations_range) {
1349 std::array<float, 4> scales = compute_ghost_scales(i);
1350 GPU_shader_uniform_4fv(shader, "scales", scales.data());
1351
1352 input_ghost_result.bind_as_texture(shader, "input_ghost_tx");
1353 accumulated_ghosts_result.bind_as_image(shader, "accumulated_ghost_img", true);
1354
1356
1357 input_ghost_result.unbind_as_texture();
1358 accumulated_ghosts_result.unbind_as_image();
1359
1360 /* The accumulated result serves as the input for the next iteration, so copy the result to
1361 * the input result since it can't be used for reading and writing simultaneously. Skip
1362 * copying for the last iteration since it is not needed. */
1363 if (i != iterations_range.last()) {
1365 GPU_texture_copy(input_ghost_result, accumulated_ghosts_result);
1366 }
1367 }
1368
1370 input_ghost_result.release();
1371 }
1372
1373 void accumulate_ghosts_cpu(const Result &base_ghost, Result &accumulated_ghosts_result)
1374 {
1375 /* Color modulators are constant across iterations. */
1376 std::array<float4, 4> color_modulators = this->compute_ghost_color_modulators();
1377
1378 /* Zero initialize output image where ghosts will be accumulated. */
1379 const int2 size = base_ghost.domain().size;
1380 accumulated_ghosts_result.allocate_texture(size);
1381 parallel_for(size, [&](const int2 texel) {
1382 accumulated_ghosts_result.store_pixel(texel, float4(0.0f));
1383 });
1384
1385 /* Copy the highlights result into a new result because the output will be copied to the input
1386 * after each iteration. */
1387 Result input = context().create_result(ResultType::Color);
1388 input.allocate_texture(size);
1389 parallel_for(size, [&](const int2 texel) {
1390 input.store_pixel(texel, base_ghost.load_pixel<float4>(texel));
1391 });
1392
1393 /* For the given number of iterations, accumulate four ghosts with different scales and color
1394 * modulators. The result of the previous iteration is used as the input of the current
1395 * iteration. We start from index 1 because we are not interested in the scales produced for
1396 * the first iteration according to visual judgment, see the compute_ghost_scales method. */
1397 const IndexRange iterations_range = IndexRange(this->get_number_of_iterations()).drop_front(1);
1398 for (const int i : iterations_range) {
1399 std::array<float, 4> scales = compute_ghost_scales(i);
1400
1401 parallel_for(size, [&](const int2 texel) {
1402 /* Add 0.5 to evaluate the input sampler at the center of the pixel and divide by the image
1403 * size to get the coordinates into the sampler's expected [0, 1] range. */
1404 float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
1405
1406 /* We accumulate four variants of the input ghost texture, each is scaled by some amount
1407 * and possibly multiplied by some color as a form of color modulation. */
1408 float4 accumulated_ghost = float4(0.0f);
1409 for (int i = 0; i < 4; i++) {
1410 float scale = scales[i];
1411 float4 color_modulator = color_modulators[i];
1412
1413 /* Scale the coordinates for the ghost, pre subtract 0.5 and post add 0.5 to use 0.5 as
1414 * the origin of the scaling. */
1415 float2 scaled_coordinates = (coordinates - 0.5f) * scale + 0.5f;
1416
1417 /* The value of the ghost is attenuated by a scalar multiple of the inverse distance to
1418 * the center, such that it is maximum at the center and become zero further from the
1419 * center, making sure to take the scale into account. The scalar multiple of 1 / 4 is
1420 * chosen using visual judgment. */
1421 float distance_to_center = math::distance(coordinates, float2(0.5f)) * 2.0f;
1422 float attenuator = math::max(0.0f, 1.0f - distance_to_center * math::abs(scale)) / 4.0f;
1423
1424 /* Accumulate the scaled ghost after attenuating and color modulating its value. */
1425 float4 multiplier = attenuator * color_modulator;
1426 accumulated_ghost += input.sample_bilinear_zero(scaled_coordinates) * multiplier;
1427 }
1428
1429 float4 current_accumulated_ghost = accumulated_ghosts_result.load_pixel<float4>(texel);
1430 float4 combined_ghost = current_accumulated_ghost + accumulated_ghost;
1431 accumulated_ghosts_result.store_pixel(texel, float4(combined_ghost.xyz(), 1.0f));
1432 });
1433
1434 /* The accumulated result serves as the input for the next iteration, so copy the result to
1435 * the input result since it can't be used for reading and writing simultaneously. Skip
1436 * copying for the last iteration since it is not needed. */
1437 if (i != iterations_range.last()) {
1438 parallel_for(size, [&](const int2 texel) {
1439 input.store_pixel(texel, accumulated_ghosts_result.load_pixel<float4>(texel));
1440 });
1441 }
1442 }
1443
1444 input.release();
1445 }
1446
1447 /* Computes two ghosts by blurring the highlights with two different radii, then adds them into a
1448 * single base ghost image after scaling them by some factor and flipping the bigger ghost along
1449 * the center of the image. */
1451 {
1452 Result small_ghost_result = context().create_result(ResultType::Color);
1454 highlights,
1455 small_ghost_result,
1458 false);
1459
1460 Result big_ghost_result = context().create_result(ResultType::Color);
1462 highlights,
1463 big_ghost_result,
1466 false);
1467
1468 Result base_ghost_result = context().create_result(ResultType::Color);
1469 if (this->context().use_gpu()) {
1470 this->compute_base_ghost_gpu(small_ghost_result, big_ghost_result, base_ghost_result);
1471 }
1472 else {
1473 this->compute_base_ghost_cpu(small_ghost_result, big_ghost_result, base_ghost_result);
1474 }
1475
1476 small_ghost_result.release();
1477 big_ghost_result.release();
1478
1479 return base_ghost_result;
1480 }
1481
1482 void compute_base_ghost_gpu(const Result &small_ghost_result,
1483 const Result &big_ghost_result,
1484 Result &base_ghost_result)
1485 {
1486 GPUShader *shader = context().get_shader("compositor_glare_ghost_base");
1487 GPU_shader_bind(shader);
1488
1489 GPU_texture_filter_mode(small_ghost_result, true);
1491 small_ghost_result.bind_as_texture(shader, "small_ghost_tx");
1492
1493 GPU_texture_filter_mode(big_ghost_result, true);
1495 big_ghost_result.bind_as_texture(shader, "big_ghost_tx");
1496
1497 base_ghost_result.allocate_texture(small_ghost_result.domain());
1498 base_ghost_result.bind_as_image(shader, "combined_ghost_img");
1499
1500 compute_dispatch_threads_at_least(shader, base_ghost_result.domain().size);
1501
1503 small_ghost_result.unbind_as_texture();
1504 big_ghost_result.unbind_as_texture();
1505 base_ghost_result.unbind_as_image();
1506 }
1507
1508 void compute_base_ghost_cpu(const Result &small_ghost_result,
1509 const Result &big_ghost_result,
1510 Result &combined_ghost)
1511 {
1512 const int2 size = small_ghost_result.domain().size;
1513 combined_ghost.allocate_texture(size);
1514
1515 parallel_for(size, [&](const int2 texel) {
1516 /* Add 0.5 to evaluate the input sampler at the center of the pixel and divide by the image
1517 * size to get the coordinates into the sampler's expected [0, 1] range. */
1518 float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
1519
1520 /* The small ghost is scaled down with the origin as the center of the image by a factor
1521 * of 2.13, while the big ghost is flipped and scaled up with the origin as the center of the
1522 * image by a factor of 0.97. Note that 1) The negative scale implements the flipping. 2)
1523 * Factors larger than 1 actually scales down the image since the factor multiplies the
1524 * coordinates and not the images itself. 3) The values are arbitrarily chosen using visual
1525 * judgment. */
1526 float small_ghost_scale = 2.13f;
1527 float big_ghost_scale = -0.97f;
1528
1529 /* Scale the coordinates for the small and big ghosts, pre subtract 0.5 and post add 0.5 to
1530 * use 0.5 as the origin of the scaling. Notice that the big ghost is flipped due to the
1531 * negative scale. */
1532 float2 small_ghost_coordinates = (coordinates - 0.5f) * small_ghost_scale + 0.5f;
1533 float2 big_ghost_coordinates = (coordinates - 0.5f) * big_ghost_scale + 0.5f;
1534
1535 /* The values of the ghosts are attenuated by the inverse distance to the center, such that
1536 * they are maximum at the center and become zero further from the center, making sure to
1537 * take the aforementioned scale into account. */
1538 float distance_to_center = math::distance(coordinates, float2(0.5f)) * 2.0f;
1539 float small_ghost_attenuator = math::max(0.0f,
1540 1.0f - distance_to_center * small_ghost_scale);
1541 float big_ghost_attenuator = math::max(
1542 0.0f, 1.0f - distance_to_center * math::abs(big_ghost_scale));
1543
1544 float4 small_ghost = small_ghost_result.sample_bilinear_zero(small_ghost_coordinates) *
1545 small_ghost_attenuator;
1546 float4 big_ghost = big_ghost_result.sample_bilinear_zero(big_ghost_coordinates) *
1547 big_ghost_attenuator;
1548
1549 combined_ghost.store_pixel(texel, small_ghost + big_ghost);
1550 });
1551 }
1552
1553 /* In each iteration of ghost accumulation, four ghosts are accumulated, each of which might be
1554 * modulated by multiplying by some color modulator, this function generates a color modulator
1555 * for each of the four ghosts. The first ghost is always unmodulated, so is the multiplicative
1556 * identity of 1. The second ghost gets only its green and blue channels modulated, the third
1557 * ghost gets only its red and green channels modulated, and the fourth ghost gets only its red
1558 * and blue channels modulated. */
1559 std::array<float4, 4> compute_ghost_color_modulators()
1560 {
1561 const float color_modulation_factor = get_ghost_color_modulation_factor();
1562
1563 std::array<float4, 4> color_modulators;
1564 color_modulators[0] = float4(1.0f);
1565 color_modulators[1] = float4(1.0f, color_modulation_factor, color_modulation_factor, 1.0f);
1566 color_modulators[2] = float4(color_modulation_factor, color_modulation_factor, 1.0f, 1.0f);
1567 color_modulators[3] = float4(color_modulation_factor, 1.0f, color_modulation_factor, 1.0f);
1568
1569 return color_modulators;
1570 }
1571
1572 /* In each iteration of ghost accumulation, four ghosts with different scales are accumulated.
1573 * Given the index of a certain iteration, this method computes the 4 scales for it. Assuming we
1574 * have n number of iterations, that means the total number of accumulations is 4 * n. To get a
1575 * variety of scales, we generate an arithmetic progression that starts from 2.1 and ends at zero
1576 * exclusive, containing 4 * n elements. The start scale of 2.1 is chosen arbitrarily using
1577 * visual judgment. To get more scale variations, every other scale is inverted with a slight
1578 * change in scale such that it alternates between scaling down and up, additionally every other
1579 * ghost is flipped across the image center by negating its scale. Finally, to get variations
1580 * across the number of iterations, a shift of 0.5 is introduced when the number of iterations is
1581 * odd, that way, the user will get variations when changing the number of iterations as opposed
1582 * to just getting less or more ghosts. */
1583 std::array<float, 4> compute_ghost_scales(int iteration)
1584 {
1585 /* Shift scales by 0.5 for odd number of iterations as discussed in the method description.
1586 */
1587 const float offset = (get_number_of_iterations() % 2 == 1) ? 0.5f : 0.0f;
1588
1589 std::array<float, 4> scales;
1590 for (const int i : IndexRange(scales.size())) {
1591 /* Global index in all accumulations. */
1592 const int global_i = iteration * 4 + i;
1593 /* Arithmetic progression in the range [0, 1) + offset. */
1594 const float progression = (global_i + offset) / (get_number_of_iterations() * 4);
1595 /* Remap range [0, 1) to [1, 0) and multiply to remap to [2.1, 0). */
1596 scales[i] = 2.1f * (1.0f - progression);
1597
1598 /* Invert the scale with a slight variation and flip it across the image center through
1599 * negation for odd scales as discussed in the method description. */
1600 if (i % 2 == 1) {
1601 scales[i] = -0.99f / scales[i];
1602 }
1603 }
1604
1605 return scales;
1606 }
1607
1608 /* The operation computes two base ghosts by blurring the highlights with two different radii,
1609 * this method computes the blur radius for the smaller one. The value is chosen using visual
1610 * judgment. Make sure to take the quality factor into account, see the get_quality_factor method
1611 * for more information. */
1613 {
1614 return 16.0f / get_quality_factor();
1615 }
1616
1617 /* Computes the blur radius of the bigger ghost, which is double the blur radius if the smaller
1618 * one, see the get_small_ghost_radius for more information. */
1620 {
1621 return get_small_ghost_radius() * 2.0f;
1622 }
1623
1624 /* The color channels of the glare can be modulated by being multiplied by this factor. In the
1625 * user interface, 0 means no modulation and 1 means full modulation. But since the factor is
1626 * multiplied, 1 corresponds to no modulation and 0 corresponds to full modulation, so we
1627 * subtract from one. */
1629 {
1630 return 1.0f - this->get_color_modulation();
1631 }
1632
1633 /* ------------
1634 * Bloom Glare.
1635 * ------------ */
1636
1637 /* Bloom is computed by first progressively half-down-sampling the highlights down to a certain
1638 * size, then progressively double-up-sampling the last down-sampled result up to the original
1639 * size of the highlights, adding the down-sampled result of the same size in each up-sampling
1640 * step. This can be illustrated as follows:
1641 *
1642 * Highlights ---+---> Bloom
1643 * | |
1644 * Down-sampled ---+---> Up-sampled
1645 * | |
1646 * Down-sampled ---+---> Up-sampled
1647 * | |
1648 * Down-sampled ---+---> Up-sampled
1649 * | ^
1650 * ... |
1651 * Down-sampled ------------'
1652 *
1653 * The smooth down-sampling followed by smooth up-sampling can be thought of as a cheap way to
1654 * approximate a large radius blur, and adding the corresponding down-sampled result while
1655 * up-sampling is done to counter the attenuation that happens during down-sampling.
1656 *
1657 * Smaller down-sampled results contribute to larger glare size, so controlling the size can be
1658 * done by stopping down-sampling down to a certain size, where the maximum possible size is
1659 * achieved when down-sampling happens down to the smallest size of 2. */
1661 {
1662 const int chain_length = this->compute_bloom_chain_length();
1663
1664 /* If the chain length is less than 2, that means no down-sampling will happen, so we just
1665 * return a copy of the highlights. This is a sanitization of a corner case, so no need to
1666 * worry about optimizing the copy away. */
1667 if (chain_length < 2) {
1668 Result bloom_result = context().create_result(ResultType::Color);
1669 bloom_result.allocate_texture(highlights.domain());
1670 if (this->context().use_gpu()) {
1671 GPU_texture_copy(bloom_result, highlights);
1672 }
1673 else {
1674 parallel_for(bloom_result.domain().size, [&](const int2 texel) {
1675 bloom_result.store_pixel(texel, highlights.load_pixel<float4>(texel));
1676 });
1677 }
1678 return bloom_result;
1679 }
1680
1681 Array<Result> downsample_chain = compute_bloom_downsample_chain(highlights, chain_length);
1682
1683 /* Notice that for a chain length of n, we need (n - 1) up-sampling passes. */
1684 const IndexRange upsample_passes_range(chain_length - 1);
1685
1686 for (const int i : upsample_passes_range) {
1687 Result &input = downsample_chain[upsample_passes_range.last() - i + 1];
1688 Result &output = downsample_chain[upsample_passes_range.last() - i];
1689 if (this->context().use_gpu()) {
1691 }
1692 else {
1694 }
1695 input.release();
1696 }
1697
1698 return downsample_chain[0];
1699 }
1700
1702 {
1703 GPUShader *shader = context().get_shader("compositor_glare_bloom_upsample");
1704 GPU_shader_bind(shader);
1705
1707 input.bind_as_texture(shader, "input_tx");
1708
1709 output.bind_as_image(shader, "output_img", true);
1710
1711 compute_dispatch_threads_at_least(shader, output.domain().size);
1712
1713 input.unbind_as_texture();
1714 output.unbind_as_image();
1716 }
1717
1719 {
1720 /* Each invocation corresponds to one output pixel, where the output has twice the size of the
1721 * input. */
1722 const int2 size = output.domain().size;
1723 parallel_for(size, [&](const int2 texel) {
1724 /* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to
1725 * get the coordinates into the sampler's expected [0, 1] range. */
1726 float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
1727
1728 /* All the offsets in the following code section are in the normalized pixel space of the
1729 * output image, so compute its normalized pixel size. */
1730 float2 pixel_size = 1.0f / float2(size);
1731
1732 /* Upsample by applying a 3x3 tent filter on the bi-linearly interpolated values evaluated at
1733 * the center of neighboring output pixels. As more tent filter upsampling passes are
1734 * applied, the result approximates a large sized Gaussian filter. This upsampling strategy
1735 * is described in the talk:
1736 *
1737 * Next Generation Post Processing in Call of Duty: Advanced Warfare
1738 * https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
1739 *
1740 * In particular, the upsampling strategy is described and illustrated in slide 162 titled
1741 * "Upsampling - Our Solution". */
1742 float4 upsampled = float4(0.0f);
1743 upsampled += (4.0f / 16.0f) * input.sample_bilinear_extended(coordinates);
1744 upsampled += (2.0f / 16.0f) *
1745 input.sample_bilinear_extended(coordinates + pixel_size * float2(-1.0f, 0.0f));
1746 upsampled += (2.0f / 16.0f) *
1747 input.sample_bilinear_extended(coordinates + pixel_size * float2(0.0f, 1.0f));
1748 upsampled += (2.0f / 16.0f) *
1749 input.sample_bilinear_extended(coordinates + pixel_size * float2(1.0f, 0.0f));
1750 upsampled += (2.0f / 16.0f) *
1751 input.sample_bilinear_extended(coordinates + pixel_size * float2(0.0f, -1.0f));
1752 upsampled += (1.0f / 16.0f) *
1753 input.sample_bilinear_extended(coordinates + pixel_size * float2(-1.0f, -1.0f));
1754 upsampled += (1.0f / 16.0f) *
1755 input.sample_bilinear_extended(coordinates + pixel_size * float2(-1.0f, 1.0f));
1756 upsampled += (1.0f / 16.0f) *
1757 input.sample_bilinear_extended(coordinates + pixel_size * float2(1.0f, -1.0f));
1758 upsampled += (1.0f / 16.0f) *
1759 input.sample_bilinear_extended(coordinates + pixel_size * float2(1.0f, 1.0f));
1760
1761 float4 combined = output.load_pixel<float4>(texel) + upsampled;
1762 output.store_pixel(texel, float4(combined.xyz(), 1.0f));
1763 });
1764 }
1765
1766 /* Progressively down-sample the given result into a result with half the size for the given
1767 * chain length, returning an array containing the chain of down-sampled results. The first
1768 * result of the chain is the given result itself for easier handling. The chain length is
1769 * expected not to exceed the binary logarithm of the smaller dimension of the given result,
1770 * because that would result in down-sampling passes that produce useless textures with just
1771 * one pixel. */
1772 Array<Result> compute_bloom_downsample_chain(const Result &highlights, int chain_length)
1773 {
1774 const Result downsampled_result = context().create_result(ResultType::Color);
1775 Array<Result> downsample_chain(chain_length, downsampled_result);
1776
1777 /* We copy the original highlights result to the first result of the chain to make the code
1778 * easier. */
1779 Result &base_layer = downsample_chain[0];
1780 base_layer.allocate_texture(highlights.domain());
1781 if (this->context().use_gpu()) {
1782 GPU_texture_copy(base_layer, highlights);
1783 }
1784 else {
1785 parallel_for(base_layer.domain().size, [&](const int2 texel) {
1786 base_layer.store_pixel(texel, highlights.load_pixel<float4>(texel));
1787 });
1788 }
1789
1790 /* In turn, the number of passes is one less than the chain length, because the first result
1791 * needn't be computed. */
1792 const IndexRange downsample_passes_range(chain_length - 1);
1793
1794 for (const int i : downsample_passes_range) {
1795 /* For the first down-sample pass, we use a special "Karis" down-sample pass that applies a
1796 * form of local tone mapping to reduce the contributions of fireflies, see the shader for
1797 * more information. Later passes use a simple average down-sampling filter because
1798 * fireflies doesn't service the first pass. */
1799 const bool use_karis_average = i == downsample_passes_range.first();
1800 if (this->context().use_gpu()) {
1802 downsample_chain[i], downsample_chain[i + 1], use_karis_average);
1803 }
1804 else {
1805 if (use_karis_average) {
1806 this->compute_bloom_downsample_cpu<true>(downsample_chain[i], downsample_chain[i + 1]);
1807 }
1808 else {
1809 this->compute_bloom_downsample_cpu<false>(downsample_chain[i], downsample_chain[i + 1]);
1810 }
1811 }
1812 }
1813
1814 return downsample_chain;
1815 }
1816
1818 Result &output,
1819 const bool use_karis_average)
1820 {
1821 GPUShader *shader = context().get_shader(
1822 use_karis_average ? "compositor_glare_bloom_downsample_karis_average" :
1823 "compositor_glare_bloom_downsample_simple_average");
1824 GPU_shader_bind(shader);
1825
1827 input.bind_as_texture(shader, "input_tx");
1828
1829 output.allocate_texture(input.domain().size / 2);
1830 output.bind_as_image(shader, "output_img");
1831
1832 compute_dispatch_threads_at_least(shader, output.domain().size);
1833
1834 input.unbind_as_texture();
1835 output.unbind_as_image();
1837 }
1838
1839 template<bool UseKarisAverage>
1841 {
1842 const int2 size = input.domain().size / 2;
1843 output.allocate_texture(size);
1844
1845 /* Each invocation corresponds to one output pixel, where the output has half the size of the
1846 * input. */
1847 parallel_for(size, [&](const int2 texel) {
1848 /* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to
1849 * get the coordinates into the sampler's expected [0, 1] range. */
1850 float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
1851
1852 /* All the offsets in the following code section are in the normalized pixel space of the
1853 * input texture, so compute its normalized pixel size. */
1854 float2 pixel_size = 1.0f / float2(input.domain().size);
1855
1856 /* Each invocation downsamples a 6x6 area of pixels around the center of the corresponding
1857 * output pixel, but instead of sampling each of the 36 pixels in the area, we only sample 13
1858 * positions using bilinear fetches at the center of a number of overlapping square 4-pixel
1859 * groups. This downsampling strategy is described in the talk:
1860 *
1861 * Next Generation Post Processing in Call of Duty: Advanced Warfare
1862 * https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
1863 *
1864 * In particular, the downsampling strategy is described and illustrated in slide 153 titled
1865 * "Downsampling - Our Solution". This is employed as it significantly improves the stability
1866 * of the glare as can be seen in the videos in the talk. */
1867 float4 center = input.sample_bilinear_extended(coordinates);
1868 float4 upper_left_near = input.sample_bilinear_extended(coordinates +
1869 pixel_size * float2(-1.0f, 1.0f));
1870 float4 upper_right_near = input.sample_bilinear_extended(coordinates +
1871 pixel_size * float2(1.0f, 1.0f));
1872 float4 lower_left_near = input.sample_bilinear_extended(coordinates +
1873 pixel_size * float2(-1.0f, -1.0f));
1874 float4 lower_right_near = input.sample_bilinear_extended(coordinates +
1875 pixel_size * float2(1.0f, -1.0f));
1876 float4 left_far = input.sample_bilinear_extended(coordinates +
1877 pixel_size * float2(-2.0f, 0.0f));
1878 float4 right_far = input.sample_bilinear_extended(coordinates +
1879 pixel_size * float2(2.0f, 0.0f));
1880 float4 upper_far = input.sample_bilinear_extended(coordinates +
1881 pixel_size * float2(0.0f, 2.0f));
1882 float4 lower_far = input.sample_bilinear_extended(coordinates +
1883 pixel_size * float2(0.0f, -2.0f));
1884 float4 upper_left_far = input.sample_bilinear_extended(coordinates +
1885 pixel_size * float2(-2.0f, 2.0f));
1886 float4 upper_right_far = input.sample_bilinear_extended(coordinates +
1887 pixel_size * float2(2.0f, 2.0f));
1888 float4 lower_left_far = input.sample_bilinear_extended(coordinates +
1889 pixel_size * float2(-2.0f, -2.0f));
1890 float4 lower_right_far = input.sample_bilinear_extended(coordinates +
1891 pixel_size * float2(2.0f, -2.0f));
1892
1893 float4 result;
1894 if constexpr (!UseKarisAverage) {
1895 /* The original weights equation mentioned in slide 153 is:
1896 * 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
1897 * The 0.5 corresponds to the center group of pixels and the 0.125 corresponds to the other
1898 * groups of pixels. The center is sampled 4 times, the far non corner pixels are sampled 2
1899 * times, the near corner pixels are sampled only once; but their weight is quadruple the
1900 * weights of other groups; so they count as sampled 4 times, finally the far corner pixels
1901 * are sampled only once, essentially totaling 32 samples. So the weights are as used in
1902 * the following code section. */
1903 result = (4.0f / 32.0f) * center +
1904 (4.0f / 32.0f) *
1905 (upper_left_near + upper_right_near + lower_left_near + lower_right_near) +
1906 (2.0f / 32.0f) * (left_far + right_far + upper_far + lower_far) +
1907 (1.0f / 32.0f) *
1908 (upper_left_far + upper_right_far + lower_left_far + lower_right_far);
1909 }
1910 else {
1911 /* Reduce the contributions of fireflies on the result by reducing each group of pixels
1912 * using a Karis brightness weighted sum. This is described in slide 168 titled "Fireflies
1913 * - Partial Karis Average".
1914 *
1915 * This needn't be done on all downsampling passes, but only the first one, since fireflies
1916 * will not survive the first pass, later passes can use the weighted average. */
1917 float4 center_weighted_sum = this->karis_brightness_weighted_sum(
1918 upper_left_near, upper_right_near, lower_right_near, lower_left_near);
1919 float4 upper_left_weighted_sum = this->karis_brightness_weighted_sum(
1920 upper_left_far, upper_far, center, left_far);
1921 float4 upper_right_weighted_sum = this->karis_brightness_weighted_sum(
1922 upper_far, upper_right_far, right_far, center);
1923 float4 lower_right_weighted_sum = this->karis_brightness_weighted_sum(
1924 center, right_far, lower_right_far, lower_far);
1925 float4 lower_left_weighted_sum = this->karis_brightness_weighted_sum(
1926 left_far, center, lower_far, lower_left_far);
1927
1928 /* The original weights equation mentioned in slide 153 is:
1929 * 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
1930 * Multiply both sides by 8 and you get:
1931 * 4 + 1 + 1 + 1 + 1 = 8
1932 * So the weights are as used in the following code section. */
1933 result = (4.0f / 8.0f) * center_weighted_sum +
1934 (1.0f / 8.0f) * (upper_left_weighted_sum + upper_right_weighted_sum +
1935 lower_left_weighted_sum + lower_right_weighted_sum);
1936 }
1937
1938 output.store_pixel(texel, result);
1939 });
1940 }
1941
1942 /* Computes the weighted average of the given four colors, which are assumed to the colors of
1943 * spatially neighboring pixels. The weights are computed so as to reduce the contributions of
1944 * fireflies on the result by applying a form of local tone mapping as described by Brian Karis
1945 * in the article "Graphic Rants: Tone Mapping".
1946 *
1947 * https://graphicrants.blogspot.com/2013/12/tone-mapping.html */
1949 const float4 &color2,
1950 const float4 &color3,
1951 const float4 &color4)
1952 {
1953 float4 brightness = float4(math::reduce_max(color1.xyz()),
1954 math::reduce_max(color2.xyz()),
1955 math::reduce_max(color3.xyz()),
1956 math::reduce_max(color4.xyz()));
1957 float4 weights = 1.0f / (brightness + 1.0f);
1958 return (color1 * weights.x + color2 * weights.y + color3 * weights.z + color4 * weights.w) *
1960 }
1961
1962 /* The maximum possible glare size is achieved when we down-sampled down to the smallest size of
1963 * 2, which would result in a down-sampling chain length of the binary logarithm of the smaller
1964 * dimension of the size of the highlights.
1965 *
1966 * However, as users might want a smaller glare size, we reduce the chain length by the size
1967 * supplied by the user. Also make sure that log2 does not get zero. */
1969 {
1970 const int2 image_size = this->get_glare_image_size();
1971 const int smaller_dimension = math::reduce_min(image_size);
1972 const float scaled_dimension = smaller_dimension * this->get_size();
1973 return int(std::log2(math::max(1.0f, scaled_dimension)));
1974 }
1975
1976 /* ---------------
1977 * Fog Glow Glare.
1978 * --------------- */
1979
1980 Result execute_fog_glow(const Result &highlights)
1981 {
1982#if defined(WITH_FFTW3)
1983
1984 const int kernel_size = compute_fog_glow_kernel_size(highlights);
1985
1986 /* Since we will be doing a circular convolution, we need to zero pad our input image by half
1987 * the kernel size to avoid the kernel affecting the pixels at the other side of image.
1988 * Therefore, zero boundary is assumed. */
1989 const int needed_padding_amount = kernel_size / 2;
1990 const int2 image_size = highlights.domain().size;
1991 const int2 needed_spatial_size = image_size + needed_padding_amount;
1992 const int2 spatial_size = fftw::optimal_size_for_real_transform(needed_spatial_size);
1993
1994 /* The FFTW real to complex transforms utilizes the hermitian symmetry of real transforms and
1995 * stores only half the output since the other half is redundant, so we only allocate half of
1996 * the first dimension. See Section 4.3.4 Real-data DFT Array Format in the FFTW manual for
1997 * more information. */
1998 const int2 frequency_size = int2(spatial_size.x / 2 + 1, spatial_size.y);
1999
2000 /* We only process the color channels, the alpha channel is written to the output as is. */
2001 const int channels_count = 3;
2002 const int image_channels_count = 4;
2003 const int64_t spatial_pixels_per_channel = int64_t(spatial_size.x) * spatial_size.y;
2004 const int64_t frequency_pixels_per_channel = int64_t(frequency_size.x) * frequency_size.y;
2005 const int64_t spatial_pixels_count = spatial_pixels_per_channel * channels_count;
2006 const int64_t frequency_pixels_count = frequency_pixels_per_channel * channels_count;
2007
2008 float *image_spatial_domain = fftwf_alloc_real(spatial_pixels_count);
2009 std::complex<float> *image_frequency_domain = reinterpret_cast<std::complex<float> *>(
2010 fftwf_alloc_complex(frequency_pixels_count));
2011
2012 /* Create a real to complex plan to transform the image to the frequency domain. */
2013 fftwf_plan forward_plan = fftwf_plan_dft_r2c_2d(
2014 spatial_size.y,
2015 spatial_size.x,
2016 image_spatial_domain,
2017 reinterpret_cast<fftwf_complex *>(image_frequency_domain),
2018 FFTW_ESTIMATE);
2019
2020 const float *highlights_buffer = nullptr;
2021 if (this->context().use_gpu()) {
2023 highlights_buffer = static_cast<const float *>(
2024 GPU_texture_read(highlights, GPU_DATA_FLOAT, 0));
2025 }
2026 else {
2027 highlights_buffer = static_cast<const float *>(highlights.cpu_data().data());
2028 }
2029
2030 /* Zero pad the image to the required spatial domain size, storing each channel in planar
2031 * format for better cache locality, that is, RRRR...GGGG...BBBB. */
2032 threading::parallel_for(IndexRange(spatial_size.y), 1, [&](const IndexRange sub_y_range) {
2033 for (const int64_t y : sub_y_range) {
2034 for (const int64_t x : IndexRange(spatial_size.x)) {
2035 const bool is_inside_image = x < image_size.x && y < image_size.y;
2036 for (const int64_t channel : IndexRange(channels_count)) {
2037 const int64_t base_index = y * spatial_size.x + x;
2038 const int64_t output_index = base_index + spatial_pixels_per_channel * channel;
2039 if (is_inside_image) {
2040 const int64_t image_index = (y * image_size.x + x) * image_channels_count + channel;
2041 image_spatial_domain[output_index] = highlights_buffer[image_index];
2042 }
2043 else {
2044 image_spatial_domain[output_index] = 0.0f;
2045 }
2046 }
2047 }
2048 }
2049 });
2050
2051 threading::parallel_for(IndexRange(channels_count), 1, [&](const IndexRange sub_range) {
2052 for (const int64_t channel : sub_range) {
2053 fftwf_execute_dft_r2c(forward_plan,
2054 image_spatial_domain + spatial_pixels_per_channel * channel,
2055 reinterpret_cast<fftwf_complex *>(image_frequency_domain) +
2056 frequency_pixels_per_channel * channel);
2057 }
2058 });
2059
2060 const FogGlowKernel &fog_glow_kernel = context().cache_manager().fog_glow_kernels.get(
2061 kernel_size, spatial_size);
2062
2063 /* Multiply the kernel and the image in the frequency domain to perform the convolution. The
2064 * FFT is not normalized, meaning the result of the FFT followed by an inverse FFT will result
2065 * in an image that is scaled by a factor of the product of the width and height, so we take
2066 * that into account by dividing by that scale. See Section 4.8.6 Multi-dimensional Transforms
2067 * of the FFTW manual for more information. */
2068 const float normalization_scale = float(spatial_size.x) * spatial_size.y *
2069 fog_glow_kernel.normalization_factor();
2070 threading::parallel_for(IndexRange(frequency_size.y), 1, [&](const IndexRange sub_y_range) {
2071 for (const int64_t channel : IndexRange(channels_count)) {
2072 for (const int64_t y : sub_y_range) {
2073 for (const int64_t x : IndexRange(frequency_size.x)) {
2074 const int64_t base_index = x + y * frequency_size.x;
2075 const int64_t output_index = base_index + frequency_pixels_per_channel * channel;
2076 const std::complex<float> kernel_value = fog_glow_kernel.frequencies()[base_index];
2077 image_frequency_domain[output_index] *= kernel_value / normalization_scale;
2078 }
2079 }
2080 }
2081 });
2082
2083 /* Create a complex to real plan to transform the image to the real domain. */
2084 fftwf_plan backward_plan = fftwf_plan_dft_c2r_2d(
2085 spatial_size.y,
2086 spatial_size.x,
2087 reinterpret_cast<fftwf_complex *>(image_frequency_domain),
2088 image_spatial_domain,
2089 FFTW_ESTIMATE);
2090
2091 threading::parallel_for(IndexRange(channels_count), 1, [&](const IndexRange sub_range) {
2092 for (const int64_t channel : sub_range) {
2093 fftwf_execute_dft_c2r(backward_plan,
2094 reinterpret_cast<fftwf_complex *>(image_frequency_domain) +
2095 frequency_pixels_per_channel * channel,
2096 image_spatial_domain + spatial_pixels_per_channel * channel);
2097 }
2098 });
2099
2100 Result fog_glow_result = context().create_result(ResultType::Color);
2101 fog_glow_result.allocate_texture(highlights.domain());
2102
2103 /* For GPU, write the output to the exist highlights_buffer then upload to the result after,
2104 * while for CPU, write to the result directly. */
2105 float *output = this->context().use_gpu() ?
2106 const_cast<float *>(highlights_buffer) :
2107 static_cast<float *>(fog_glow_result.cpu_data().data());
2108
2109 /* Copy the result to the output. */
2110 threading::parallel_for(IndexRange(image_size.y), 1, [&](const IndexRange sub_y_range) {
2111 for (const int64_t y : sub_y_range) {
2112 for (const int64_t x : IndexRange(image_size.x)) {
2113 for (const int64_t channel : IndexRange(channels_count)) {
2114 const int64_t output_index = (x + y * image_size.x) * image_channels_count;
2115 const int64_t base_index = x + y * spatial_size.x;
2116 const int64_t input_index = base_index + spatial_pixels_per_channel * channel;
2117 output[output_index + channel] = image_spatial_domain[input_index];
2118 output[output_index + 3] = highlights_buffer[output_index + 3];
2119 }
2120 }
2121 }
2122 });
2123
2124 if (this->context().use_gpu()) {
2125 GPU_texture_update(fog_glow_result, GPU_DATA_FLOAT, output);
2126 /* CPU writes to the output directly, so no need to free it. */
2128 }
2129
2130 fftwf_destroy_plan(forward_plan);
2131 fftwf_destroy_plan(backward_plan);
2132 fftwf_free(image_spatial_domain);
2133 fftwf_free(image_frequency_domain);
2134#else
2135 Result fog_glow_result = context().create_result(ResultType::Color);
2136 fog_glow_result.allocate_texture(highlights.domain());
2137 if (this->context().use_gpu()) {
2138 GPU_texture_copy(fog_glow_result, highlights);
2139 }
2140 else {
2141 parallel_for(fog_glow_result.domain().size, [&](const int2 texel) {
2142 fog_glow_result.store_pixel(texel, highlights.load_pixel<float4>(texel));
2143 });
2144 }
2145#endif
2146
2147 return fog_glow_result;
2148 }
2149
2150 /* Computes the size of the fog glow kernel that will be convolved with the image, which is
2151 * essentially the extent of the glare in pixels. */
2153 {
2154 /* The input size is relative to the larger dimension of the image. */
2155 const int size = int(math::reduce_max(highlights.domain().size) * this->get_size());
2156
2157 /* Make sure size is at least 3 pixels for implicitly since code deals with half kernel sizes
2158 * which will be zero if less than 3, causing zero division. */
2159 const int safe_size = math::max(3, size);
2160
2161 /* Make sure the kernel size is odd since an even one will typically introduce a tiny offset as
2162 * it has no exact center value. */
2163 const bool is_even = safe_size % 2 == 0;
2164 const int odd_size = safe_size + (is_even ? 1 : 0);
2165
2166 return odd_size;
2167 }
2168
2169 /* ----------
2170 * Glare Mix.
2171 * ---------- */
2172
2173 void execute_mix(const Result &glare_result)
2174 {
2175 Result &image_output = this->get_result("Image");
2176 if (!image_output.should_compute()) {
2177 return;
2178 }
2179
2180 if (this->context().use_gpu()) {
2181 this->execute_mix_gpu(glare_result);
2182 }
2183 else {
2184 this->execute_mix_cpu(glare_result);
2185 }
2186 }
2187
2188 void execute_mix_gpu(const Result &glare_result)
2189 {
2190 GPUShader *shader = context().get_shader("compositor_glare_mix");
2191 GPU_shader_bind(shader);
2192
2193 GPU_shader_uniform_1f(shader, "saturation", this->get_saturation());
2194 GPU_shader_uniform_3fv(shader, "tint", this->get_corrected_tint());
2195
2196 const Result &input_image = get_input("Image");
2197 input_image.bind_as_texture(shader, "input_tx");
2198
2199 GPU_texture_filter_mode(glare_result, true);
2200 glare_result.bind_as_texture(shader, "glare_tx");
2201
2202 const Domain domain = compute_domain();
2203 Result &output_image = get_result("Image");
2204 output_image.allocate_texture(domain);
2205 output_image.bind_as_image(shader, "output_img");
2206
2208
2210 output_image.unbind_as_image();
2211 input_image.unbind_as_texture();
2212 glare_result.unbind_as_texture();
2213 }
2214
2215 void execute_mix_cpu(const Result &glare_result)
2216 {
2217 const float saturation = this->get_saturation();
2218 const float3 tint = this->get_corrected_tint();
2219
2220 const Result &input = get_input("Image");
2221
2222 const Domain domain = compute_domain();
2223 Result &output = get_result("Image");
2224 output.allocate_texture(domain);
2225
2226 parallel_for(domain.size, [&](const int2 texel) {
2227 /* Make sure the input is not negative
2228 * to avoid a subtractive effect when adding the glare. */
2229 float4 input_color = math::max(float4(0.0f), input.load_pixel<float4>(texel));
2230
2231 float2 normalized_coordinates = (float2(texel) + float2(0.5f)) / float2(input.domain().size);
2232 float4 glare_color = glare_result.sample_bilinear_extended(normalized_coordinates);
2233
2234 /* Adjust saturation of glare. */
2235 float4 glare_hsva;
2236 rgb_to_hsv_v(glare_color, glare_hsva);
2237 glare_hsva.y = math::clamp(glare_hsva.y * saturation, 0.0f, 1.0f);
2238 float4 glare_rgba;
2239 hsv_to_rgb_v(glare_hsva, glare_rgba);
2240
2241 float3 combined_color = input_color.xyz() + glare_rgba.xyz() * tint;
2242
2243 output.store_pixel(texel, float4(combined_color, input_color.w));
2244 });
2245 }
2246
2247 /* Writes the given input glare by adjusting it as needed and upsampling it using bilinear
2248 * interpolation to match the size of the original input, allocating the glare output and writing
2249 * the result to it. */
2250 void write_glare_output(const Result &glare)
2251 {
2252 if (this->context().use_gpu()) {
2253 this->write_glare_output_gpu(glare);
2254 }
2255 else {
2256 this->write_glare_output_cpu(glare);
2257 }
2258 }
2259
2261 {
2262 GPUShader *shader = this->context().get_shader("compositor_glare_write_glare_output");
2263 GPU_shader_bind(shader);
2264
2265 GPU_shader_uniform_1f(shader, "saturation", this->get_saturation());
2266 GPU_shader_uniform_3fv(shader, "tint", this->get_corrected_tint());
2267
2268 GPU_texture_filter_mode(glare, true);
2270 glare.bind_as_texture(shader, "input_tx");
2271
2272 const Result &image_input = this->get_input("Image");
2273 Result &output = this->get_result("Glare");
2274 output.allocate_texture(image_input.domain());
2275 output.bind_as_image(shader, "output_img");
2276
2277 compute_dispatch_threads_at_least(shader, output.domain().size);
2278
2280 output.unbind_as_image();
2281 glare.unbind_as_texture();
2282 }
2283
2285 {
2286 const float saturation = this->get_saturation();
2287 const float3 tint = this->get_corrected_tint();
2288
2289 const Result &image_input = this->get_input("Image");
2290 Result &output = this->get_result("Glare");
2291 output.allocate_texture(image_input.domain());
2292
2293 const int2 size = output.domain().size;
2294 parallel_for(size, [&](const int2 texel) {
2295 float2 normalized_coordinates = (float2(texel) + float2(0.5f)) / float2(size);
2296 float4 glare_color = glare.sample_bilinear_extended(normalized_coordinates);
2297
2298 /* Adjust saturation of glare. */
2299 float4 glare_hsva;
2300 rgb_to_hsv_v(glare_color, glare_hsva);
2301 glare_hsva.y = math::clamp(glare_hsva.y * saturation, 0.0f, 1.0f);
2302 float4 glare_rgba;
2303 hsv_to_rgb_v(glare_hsva, glare_rgba);
2304
2305 float3 adjusted_glare_value = glare_rgba.xyz() * tint;
2306 output.store_pixel(texel, float4(adjusted_glare_value, 1.0f));
2307 });
2308 }
2309
2310 /* Combine the tint, strength, and normalization scale into a single factor that can be
2311 * multiplied to the glare. */
2313 {
2314 return this->get_tint() * this->get_strength() / this->get_normalization_scale();
2315 }
2316
2317 /* The computed glare might need to be normalized to be energy conserving or be in a reasonable
2318 * range, instead of doing that in a separate step as part of the glare computation, we delay the
2319 * normalization until the mixing step as an optimization, since we multiply by the tint and
2320 * strength anyways. */
2322 {
2323 switch (static_cast<CMPNodeGlareType>(node_storage(bnode()).type)) {
2325 /* Bloom adds a number of passes equal to the chain length, if the input is constant, each
2326 * of those passes will hold the same constant, so we need to normalize by the chain
2327 * length, see the bloom code for more information. If the chain length is less than 1,
2328 * then no bloom will be generated, so we can return 1 in this case to avoid zero division
2329 * later on. */
2330 return math::max(1, this->compute_bloom_chain_length());
2335 return 1.0f;
2336 }
2337 return 1.0f;
2338 }
2339
2340 /* -------
2341 * Common.
2342 * ------- */
2343
2345 {
2346 return math::max(0.0f, this->get_input("Strength").get_single_value_default(1.0f));
2347 }
2348
2350 {
2351 return math::max(0.0f, this->get_input("Saturation").get_single_value_default(1.0f));
2352 }
2353
2355 {
2356 return this->get_input("Tint").get_single_value_default(float4(1.0f)).xyz();
2357 }
2358
2359 float get_size()
2360 {
2361 return math::clamp(this->get_input("Size").get_single_value_default(0.5f), 0.0f, 1.0f);
2362 }
2363
2365 {
2366 return math::clamp(this->get_input("Iterations").get_single_value_default(3), 2, 5);
2367 }
2368
2369 float get_fade()
2370 {
2371 return math::clamp(this->get_input("Fade").get_single_value_default(0.9f), 0.75f, 1.0f);
2372 }
2373
2375 {
2376 return math::clamp(
2377 this->get_input("Color Modulation").get_single_value_default(0.25f), 0.0f, 1.0f);
2378 }
2379
2380 /* As a performance optimization, the operation can compute the glare on a fraction of the input
2381 * image size, so the input is downsampled then upsampled at the end, and this method returns the
2382 * size after downsampling. */
2384 {
2385 return this->compute_domain().size / this->get_quality_factor();
2386 }
2387
2388 /* The glare node can compute the glare on a fraction of the input image size to improve
2389 * performance. The quality values and their corresponding quality factors are as follows:
2390 *
2391 * - High Quality => Quality Value: 0 => Quality Factor: 1.
2392 * - Medium Quality => Quality Value: 1 => Quality Factor: 2.
2393 * - Low Quality => Quality Value: 2 => Quality Factor: 4.
2394 *
2395 * Dividing the image size by the quality factor gives the size where the glare should be
2396 * computed. The glare algorithm should also take the quality factor into account to compensate
2397 * for the reduced sized, perhaps by dividing blur radii and similar values by the quality
2398 * factor. */
2400 {
2401 return 1 << node_storage(bnode()).quality;
2402 }
2403};
2404
2406{
2407 return new GlareOperation(context, node);
2408}
2409
2410} // namespace blender::nodes::node_composite_glare_cc
2411
2413{
2414 namespace file_ns = blender::nodes::node_composite_glare_cc;
2415
2416 static blender::bke::bNodeType ntype;
2417
2418 cmp_node_type_base(&ntype, "CompositorNodeGlare", CMP_NODE_GLARE);
2419 ntype.ui_name = "Glare";
2420 ntype.ui_description = "Add lens flares, fog and glows around bright parts of the image";
2421 ntype.enum_name_legacy = "GLARE";
2423 ntype.declare = file_ns::cmp_node_glare_declare;
2424 ntype.updatefunc = file_ns::node_update;
2425 ntype.initfunc = file_ns::node_composit_init_glare;
2426 ntype.gather_link_search_ops = file_ns::gather_link_searches;
2429 ntype.get_compositor_operation = file_ns::get_compositor_operation;
2430
2432}
#define NODE_STORAGE_FUNCS(StorageT)
Definition BKE_node.hh:1215
#define NODE_CLASS_OP_FILTER
Definition BKE_node.hh:437
#define CMP_NODE_GLARE
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
void hsv_to_rgb_v(const float hsv[3], float r_rgb[3])
Definition math_color.cc:57
void rgb_to_hsv_v(const float rgb[3], float r_hsv[3])
#define M_PI
#define ELEM(...)
#define RPT_(msgid)
#define IFACE_(msgid)
@ SOCK_IN
eNodeSocketDatatype
@ SOCK_RGBA
CMPNodeGlareType
@ CMP_NODE_GLARE_STREAKS
@ CMP_NODE_GLARE_BLOOM
@ CMP_NODE_GLARE_GHOST
@ CMP_NODE_GLARE_SIMPLE_STAR
@ CMP_NODE_GLARE_FOG_GLOW
@ R_FILTER_GAUSS
void GPU_shader_uniform_2fv(GPUShader *sh, const char *name, const float data[2])
void GPU_shader_uniform_1i(GPUShader *sh, const char *name, int value)
void GPU_shader_uniform_1f(GPUShader *sh, const char *name, float value)
void GPU_shader_bind(GPUShader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_uniform_3fv(GPUShader *sh, const char *name, const float data[3])
void GPU_shader_uniform_4fv_array(GPUShader *sh, const char *name, int len, const float(*val)[4])
void GPU_shader_uniform_4fv(GPUShader *sh, const char *name, const float data[4])
void GPU_shader_unbind()
void GPU_memory_barrier(eGPUBarrier barrier)
Definition gpu_state.cc:385
@ GPU_BARRIER_TEXTURE_UPDATE
Definition GPU_state.hh:39
void GPU_texture_clear(GPUTexture *texture, eGPUDataFormat data_format, const void *data)
void GPU_texture_copy(GPUTexture *dst, GPUTexture *src)
void * GPU_texture_read(GPUTexture *texture, eGPUDataFormat data_format, int mip_level)
@ GPU_DATA_FLOAT
void GPU_texture_extend_mode(GPUTexture *texture, GPUSamplerExtendMode extend_mode)
@ GPU_SAMPLER_EXTEND_MODE_EXTEND
@ GPU_SAMPLER_EXTEND_MODE_CLAMP_TO_BORDER
void GPU_texture_filter_mode(GPUTexture *texture, bool use_filter)
void GPU_texture_update(GPUTexture *texture, eGPUDataFormat data_format, const void *data)
static double angle(const Eigen::Vector3d &v1, const Eigen::Vector3d &v2)
Definition IK_Math.h:117
Read Guarded memory(de)allocation.
#define NOD_REGISTER_NODE(REGISTER_FUNC)
@ PROP_ANGLE
Definition RNA_types.hh:240
@ PROP_FACTOR
Definition RNA_types.hh:239
@ UI_ITEM_R_SPLIT_EMPTY_NAME
BMesh const char void * data
long long int int64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
constexpr int64_t first() const
constexpr int64_t last(const int64_t n=0) const
constexpr IndexRange drop_front(int64_t n) const
Result create_result(ResultType type, ResultPrecision precision)
GPUShader * get_shader(const char *info_name, ResultPrecision precision)
NodeOperation(Context &context, DNode node)
Result & get_result(StringRef identifier)
Definition operation.cc:39
Result & get_input(StringRef identifier) const
Definition operation.cc:138
virtual Domain compute_domain()
Definition operation.cc:56
void share_data(const Result &source)
Definition result.cc:401
T get_single_value_default(const T &default_value) const
void allocate_texture(Domain domain, bool from_pool=true)
Definition result.cc:309
void store_pixel(const int2 &texel, const T &pixel_value)
void unbind_as_texture() const
Definition result.cc:389
void bind_as_texture(GPUShader *shader, const char *texture_name) const
Definition result.cc:365
T load_pixel_zero(const int2 &texel) const
float4 sample_bilinear_zero(const float2 &coordinates) const
const Domain & domain() const
T load_pixel(const int2 &texel) const
void bind_as_image(GPUShader *shader, const char *image_name, bool read=false) const
Definition result.cc:376
void unbind_as_image() const
Definition result.cc:395
float4 sample_bilinear_extended(const float2 &coordinates) const
bool is_single_value() const
Definition result.cc:625
void steal_data(Result &source)
Definition result.cc:418
PanelDeclarationBuilder & add_panel(StringRef name, int identifier=-1)
DeclType::Builder & add_input(StringRef name, StringRef identifier="")
void compute_bloom_upsample_gpu(const Result &input, Result &output)
Result apply_streak_filter(const Result &highlights, const float2 &streak_direction)
float adaptive_smooth_clamp(const float x, const float min_value, const float max_value, const float smoothness)
void accumulate_streak_gpu(const Result &streak_result, Result &accumulated_streaks_result)
Result execute_simple_star_anti_diagonal_pass(const Result &highlights, const Result &diagonal_pass_result)
float smooth_min(const float a, const float b, const float smoothness)
void compute_base_ghost_gpu(const Result &small_ghost_result, const Result &big_ghost_result, Result &base_ghost_result)
Result execute_simple_star_horizontal_pass_gpu(const Result &highlights)
Result apply_streak_filter_gpu(const Result &highlights, const float2 &streak_direction)
float smooth_clamp(const float x, const float min_value, const float max_value, const float min_smoothness, const float max_smoothness)
Result execute_simple_star_diagonal_pass_cpu(const Result &highlights)
Result execute_simple_star_anti_diagonal_pass_gpu(const Result &highlights, const Result &diagonal_pass_result)
void compute_bloom_downsample_cpu(const Result &input, Result &output)
void accumulate_ghosts_cpu(const Result &base_ghost, Result &accumulated_ghosts_result)
Result execute_simple_star_vertical_pass_cpu(const Result &highlights, const Result &horizontal_pass_result)
Result execute_simple_star_horizontal_pass_cpu(const Result &highlights)
Result execute_simple_star_vertical_pass(const Result &highlights, const Result &horizontal_pass_result)
void compute_bloom_upsample_cpu(const Result &input, Result &output)
float smooth_max(const float a, const float b, const float smoothness)
Result execute_simple_star_vertical_pass_gpu(const Result &highlights, const Result &horizontal_pass_result)
Result execute_simple_star_diagonal_pass_gpu(const Result &highlights)
void accumulate_streak(const Result &streak_result, Result &accumulated_streaks_result)
void accumulate_streak_cpu(const Result &streak, Result &accumulated_streaks)
Array< Result > compute_bloom_downsample_chain(const Result &highlights, int chain_length)
Result apply_streak_filter_cpu(const Result &highlights, const float2 &streak_direction)
void compute_base_ghost_cpu(const Result &small_ghost_result, const Result &big_ghost_result, Result &combined_ghost)
void compute_bloom_downsample_gpu(const Result &input, Result &output, const bool use_karis_average)
Result execute_simple_star_anti_diagonal_pass_cpu(const Result &highlights, const Result &diagonal_pass_result)
float4 karis_brightness_weighted_sum(const float4 &color1, const float4 &color2, const float4 &color3, const float4 &color4)
void accumulate_ghosts_gpu(const Result &base_ghost_result, Result &accumulated_ghosts_result)
#define input
VecBase< float, 2 > float2
#define this
VecBase< float, 4 > float4
#define output
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
void * MEM_callocN(size_t len, const char *str)
Definition mallocn.cc:118
void MEM_freeN(void *vmemh)
Definition mallocn.cc:113
bNodeSocket * node_find_socket(bNode &node, eNodeSocketInOut in_out, StringRef identifier)
Definition node.cc:2864
void node_register_type(bNodeType &ntype)
Definition node.cc:2748
void node_set_socket_availability(bNodeTree &ntree, bNodeSocket &sock, bool is_available)
Definition node.cc:5011
void node_type_storage(bNodeType &ntype, std::optional< StringRefNull > storagename, void(*freefunc)(bNode *node), void(*copyfunc)(bNodeTree *dest_ntree, bNode *dest_node, const bNode *src_node))
Definition node.cc:5603
int compute_diagonal_length(const int2 &size, const int diagonal_index)
int2 compute_anti_diagonal_start(const int2 &size, const int index)
int compute_anti_diagonal_length(const int2 &size, const int diagonal_index)
int compute_number_of_diagonals(const int2 &size)
void compute_dispatch_threads_at_least(GPUShader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:170
void symmetric_separable_blur(Context &context, const Result &input, Result &output, const float2 &radius, const int filter_type=R_FILTER_GAUSS, const bool extend_bounds=false)
int2 compute_diagonal_start(const int2 &size, const int index)
void parallel_for(const int2 range, const Function &function)
int context(const bContext *C, const char *member, bContextDataResult *result)
int optimal_size_for_real_transform(int size)
Definition fftw.cc:59
T cos(const AngleRadianBase< T > &a)
T clamp(const T &a, const T &min, const T &max)
T safe_rcp(const T &a)
T reduce_max(const VecBase< T, Size > &a)
T distance(const T &a, const T &b)
T reduce_min(const VecBase< T, Size > &a)
T min(const T &a, const T &b)
T interpolate(const T &a, const T &b, const FactorT &t)
T sin(const AngleRadianBase< T > &a)
T max(const T &a, const T &b)
T abs(const T &a)
T reduce_add(const VecBase< T, Size > &a)
static void cmp_node_glare_declare(NodeDeclarationBuilder &b)
static void node_update(bNodeTree *ntree, bNode *node)
static void gather_link_searches(GatherLinkSearchOpParams &params)
static NodeOperation * get_compositor_operation(Context &context, DNode node)
static void node_composit_init_glare(bNodeTree *, bNode *node)
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
Definition BLI_task.hh:93
VecBase< float, 4 > float4
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
VecBase< float, 3 > float3
static void register_node_type_cmp_glare()
#define MAX_GLARE_ITERATIONS
void cmp_node_type_base(blender::bke::bNodeType *ntype, std::string idname, const std::optional< int16_t > legacy_type)
void node_free_standard_storage(bNode *node)
Definition node_util.cc:42
void node_copy_standard_storage(bNodeTree *, bNode *dest_node, const bNode *src_node)
Definition node_util.cc:54
int RNA_enum_get(PointerRNA *ptr, const char *name)
#define min(a, b)
Definition sort.cc:36
void * storage
VecBase< T, 3 > xyz() const
Defines a node type.
Definition BKE_node.hh:226
std::string ui_description
Definition BKE_node.hh:232
NodeGetCompositorOperationFunction get_compositor_operation
Definition BKE_node.hh:336
void(* initfunc)(bNodeTree *ntree, bNode *node)
Definition BKE_node.hh:277
const char * enum_name_legacy
Definition BKE_node.hh:235
NodeGatherSocketLinkOperationsFunction gather_link_search_ops
Definition BKE_node.hh:371
NodeDeclareFunction declare
Definition BKE_node.hh:355
void(* updatefunc)(bNodeTree *ntree, bNode *node)
Definition BKE_node.hh:269
void label(blender::StringRef name, int icon)
void prop(PointerRNA *ptr, PropertyRNA *prop, int index, int value, eUI_Item_Flag flag, std::optional< blender::StringRef > name_opt, int icon, std::optional< blender::StringRef > placeholder=std::nullopt)
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251
static pxr::UsdShadeInput get_input(const pxr::UsdShadeShader &usd_shader, const pxr::TfToken &input_name)
PointerRNA * ptr
Definition wm_files.cc:4227