Blender V4.3
COM_GlareFogGlowOperation.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include <complex>
6#include <cstring>
7#include <memory>
8#include <numeric>
9
10#if defined(WITH_FFTW3)
11# include <fftw3.h>
12#endif
13
15#include "BLI_fftw.hh"
16#include "BLI_index_range.hh"
17#include "BLI_math_base.hh"
19#include "BLI_task.hh"
20
22
23namespace blender::compositor {
24
25/* Given the x and y location in the range from 0 to kernel_size - 1, where kernel_size is odd,
26 * compute the fog glow kernel value. The equations are arbitrary and were chosen using visual
27 * judgment. The kernel is not normalized and need normalization. */
28[[maybe_unused]] static float compute_fog_glow_kernel_value(int x, int y, int kernel_size)
29{
30 const int half_kernel_size = kernel_size / 2;
31 const float scale = 0.25f * math::sqrt(math::square(kernel_size));
32 const float v = ((y - half_kernel_size) / float(half_kernel_size));
33 const float u = ((x - half_kernel_size) / float(half_kernel_size));
34 const float r = (math::square(u) + math::square(v)) * scale;
35 const float d = -math::sqrt(math::sqrt(math::sqrt(r))) * 9.0f;
36 const float kernel_value = math::exp(d);
37
38 const float window = (0.5f + 0.5f * math::cos(u * math::numbers::pi)) *
39 (0.5f + 0.5f * math::cos(v * math::numbers::pi));
40 const float windowed_kernel_value = window * kernel_value;
41
42 return windowed_kernel_value;
43}
44
46 MemoryBuffer *image,
47 const NodeGlare *settings)
48{
49#if defined(WITH_FFTW3)
51
52 /* We use an odd sized kernel since an even one will typically introduce a tiny offset as it has
53 * no exact center value. */
54 const int kernel_size = (1 << settings->size) + 1;
55
56 /* Since we will be doing a circular convolution, we need to zero pad our input image by half the
57 * kernel size to avoid the kernel affecting the pixels at the other side of image. Therefore,
58 * zero boundary is assumed. */
59 const int needed_padding_amount = kernel_size / 2;
60 const int2 image_size = int2(image->get_width(), image->get_height());
61 const int2 needed_spatial_size = image_size + needed_padding_amount;
62 const int2 spatial_size = fftw::optimal_size_for_real_transform(needed_spatial_size);
63
64 /* The FFTW real to complex transforms utilizes the hermitian symmetry of real transforms and
65 * stores only half the output since the other half is redundant, so we only allocate half of the
66 * first dimension. See Section 4.3.4 Real-data DFT Array Format in the FFTW manual for more
67 * information. */
68 const int2 frequency_size = int2(spatial_size.x / 2 + 1, spatial_size.y);
69
70 float *kernel_spatial_domain = fftwf_alloc_real(spatial_size.x * spatial_size.y);
71 std::complex<float> *kernel_frequency_domain = reinterpret_cast<std::complex<float> *>(
72 fftwf_alloc_complex(frequency_size.x * frequency_size.y));
73
74 /* Create a real to complex plan to transform the kernel to the frequency domain, but the same
75 * plan will be used for the image since they both have the same dimensions. */
76 fftwf_plan forward_plan = fftwf_plan_dft_r2c_2d(
77 spatial_size.y,
78 spatial_size.x,
79 kernel_spatial_domain,
80 reinterpret_cast<fftwf_complex *>(kernel_frequency_domain),
81 FFTW_ESTIMATE);
82
83 /* Use a double to sum the kernel since floats are not stable with threaded summation. */
84 threading::EnumerableThreadSpecific<double> sum_by_thread([]() { return 0.0; });
85
86 /* Compute the kernel while zero padding to match the padded image size. */
87 threading::parallel_for(IndexRange(spatial_size.y), 1, [&](const IndexRange sub_y_range) {
88 double &sum = sum_by_thread.local();
89 for (const int64_t y : sub_y_range) {
90 for (const int64_t x : IndexRange(spatial_size.x)) {
91 /* We offset the computed kernel with wrap around such that it is centered at the zero
92 * point, which is the expected format for doing circular convolutions in the frequency
93 * domain. */
94 const int half_kernel_size = kernel_size / 2;
95 int64_t output_x = mod_i(x - half_kernel_size, spatial_size.x);
96 int64_t output_y = mod_i(y - half_kernel_size, spatial_size.y);
97
98 const bool is_inside_kernel = x < kernel_size && y < kernel_size;
99 if (is_inside_kernel) {
100 const float kernel_value = compute_fog_glow_kernel_value(x, y, kernel_size);
101 kernel_spatial_domain[output_x + output_y * spatial_size.x] = kernel_value;
102 sum += kernel_value;
103 }
104 else {
105 kernel_spatial_domain[output_x + output_y * spatial_size.x] = 0.0f;
106 }
107 }
108 }
109 });
110
111 /* Instead of normalizing the kernel now, we normalize it in the frequency domain since we will
112 * be doing sample normalization anyways. This is okay since the Fourier transform is linear. */
113 const float kernel_normalization_factor = float(
114 std::accumulate(sum_by_thread.begin(), sum_by_thread.end(), 0.0));
115
116 fftwf_execute_dft_r2c(forward_plan,
117 kernel_spatial_domain,
118 reinterpret_cast<fftwf_complex *>(kernel_frequency_domain));
119 fftwf_free(kernel_spatial_domain);
120
121 /* We only process the color channels, the alpha channel is written to the output as is. */
122 const int channels_count = 3;
123 const int64_t spatial_pixels_per_channel = int64_t(spatial_size.x) * spatial_size.y;
124 const int64_t frequency_pixels_per_channel = int64_t(frequency_size.x) * frequency_size.y;
125 const int64_t spatial_pixels_count = spatial_pixels_per_channel * channels_count;
126 const int64_t frequency_pixels_count = frequency_pixels_per_channel * channels_count;
127
128 float *image_spatial_domain = fftwf_alloc_real(spatial_pixels_count);
129 std::complex<float> *image_frequency_domain = reinterpret_cast<std::complex<float> *>(
130 fftwf_alloc_complex(frequency_pixels_count));
131
132 /* Zero pad the image to the required spatial domain size, storing each channel in planar
133 * format for better cache locality, that is, RRRR...GGGG...BBBB. */
134 threading::parallel_for(IndexRange(spatial_size.y), 1, [&](const IndexRange sub_y_range) {
135 for (const int64_t y : sub_y_range) {
136 for (const int64_t x : IndexRange(spatial_size.x)) {
137 const bool is_inside_image = x < image_size.x && y < image_size.y;
138 for (const int64_t channel : IndexRange(channels_count)) {
139 const int64_t base_index = x + y * spatial_size.x;
140 const int64_t output_index = base_index + spatial_pixels_per_channel * channel;
141 if (is_inside_image) {
142 image_spatial_domain[output_index] = image->get_elem(x, y)[channel];
143 }
144 else {
145 image_spatial_domain[output_index] = 0.0f;
146 }
147 }
148 }
149 }
150 });
151
152 threading::parallel_for(IndexRange(channels_count), 1, [&](const IndexRange sub_range) {
153 for (const int64_t channel : sub_range) {
154 fftwf_execute_dft_r2c(forward_plan,
155 image_spatial_domain + spatial_pixels_per_channel * channel,
156 reinterpret_cast<fftwf_complex *>(image_frequency_domain) +
157 frequency_pixels_per_channel * channel);
158 }
159 });
160
161 /* Multiply the kernel and the image in the frequency domain to perform the convolution. The
162 * FFT is not normalized, meaning the result of the FFT followed by an inverse FFT will result
163 * in an image that is scaled by a factor of the product of the width and height, so we take
164 * that into account by dividing by that scale. See Section 4.8.6 Multi-dimensional Transforms of
165 * the FFTW manual for more information. */
166 const float normalization_scale = float(spatial_size.x) * spatial_size.y *
167 kernel_normalization_factor;
168 threading::parallel_for(IndexRange(frequency_size.y), 1, [&](const IndexRange sub_y_range) {
169 for (const int64_t channel : IndexRange(channels_count)) {
170 for (const int64_t y : sub_y_range) {
171 for (const int64_t x : IndexRange(frequency_size.x)) {
172 const int64_t base_index = x + y * frequency_size.x;
173 const int64_t output_index = base_index + frequency_pixels_per_channel * channel;
174 const std::complex<float> kernel_value = kernel_frequency_domain[base_index];
175 image_frequency_domain[output_index] *= kernel_value / normalization_scale;
176 }
177 }
178 }
179 });
180
181 /* Create a complex to real plan to transform the image to the real domain. */
182 fftwf_plan backward_plan = fftwf_plan_dft_c2r_2d(
183 spatial_size.y,
184 spatial_size.x,
185 reinterpret_cast<fftwf_complex *>(image_frequency_domain),
186 image_spatial_domain,
187 FFTW_ESTIMATE);
188
189 threading::parallel_for(IndexRange(channels_count), 1, [&](const IndexRange sub_range) {
190 for (const int64_t channel : sub_range) {
191 fftwf_execute_dft_c2r(backward_plan,
192 reinterpret_cast<fftwf_complex *>(image_frequency_domain) +
193 frequency_pixels_per_channel * channel,
194 image_spatial_domain + spatial_pixels_per_channel * channel);
195 }
196 });
197
198 /* Copy the result to the output. */
199 threading::parallel_for(IndexRange(image_size.y), 1, [&](const IndexRange sub_y_range) {
200 for (const int64_t y : sub_y_range) {
201 for (const int64_t x : IndexRange(image_size.x)) {
202 for (const int64_t channel : IndexRange(channels_count)) {
203 const int64_t output_index = (x + y * image_size.x) * image->get_num_channels();
204 const int64_t base_index = x + y * spatial_size.x;
205 const int64_t input_index = base_index + spatial_pixels_per_channel * channel;
206 output[output_index + channel] = image_spatial_domain[input_index];
207 output[output_index + 3] = image->get_buffer()[output_index + 3];
208 }
209 }
210 }
211 });
212
213 fftwf_destroy_plan(forward_plan);
214 fftwf_destroy_plan(backward_plan);
215 fftwf_free(image_spatial_domain);
216 fftwf_free(image_frequency_domain);
217 fftwf_free(kernel_frequency_domain);
218#else
219 UNUSED_VARS(output, image, settings);
220#endif
221}
222
223} // namespace blender::compositor
#define UNUSED_VARS(...)
ATTR_WARN_UNUSED_RESULT const BMVert * v
void generate_glare(float *data, MemoryBuffer *input_tile, const NodeGlare *settings) override
a MemoryBuffer contains access to the data
draw_view in_light_buf[] float
static float compute_fog_glow_kernel_value(int x, int y, int kernel_size)
void initialize_float()
Definition fftw.cc:84
int optimal_size_for_real_transform(int size)
Definition fftw.cc:55
T cos(const AngleRadianBase< T > &a)
T sqrt(const T &a)
T exp(const T &x)
T square(const T &a)
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
Definition BLI_task.hh:95
VecBase< int32_t, 2 > int2
__int64 int64_t
Definition stdint.h:89