Blender V5.0
math_interp.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <cmath>
10#include <cstring>
11
12#include "BLI_math_base.h"
13#include "BLI_math_base.hh"
14#include "BLI_math_interp.hh"
15#include "BLI_math_vector.h"
17#include "BLI_simd.hh"
18
19#include "BLI_strict_flags.h" /* IWYU pragma: keep. Keep last. */
20
21namespace blender::math {
22
24{
25 int x = 0;
26 switch (wrap) {
28 x = math::clamp(int(u), 0, size - 1);
29 break;
31 x = int(floored_fmod(u, float(size)));
32 break;
34 x = int(u);
35 if (u < 0.0f || x >= size) {
36 x = -1;
37 }
38 break;
39 }
40 return x;
41}
42
43void interpolate_nearest_wrapmode_fl(const float *buffer,
44 float *output,
45 int width,
46 int height,
47 int components,
48 float u,
49 float v,
50 InterpWrapMode wrap_u,
51 InterpWrapMode wrap_v)
52{
53 BLI_assert(buffer);
54 int x = wrap_coord(u, width, wrap_u);
55 int y = wrap_coord(v, height, wrap_v);
56 if (x < 0 || y < 0) {
57 for (int i = 0; i < components; i++) {
58 output[i] = 0.0f;
59 }
60 return;
61 }
62
63 const float *data = buffer + (int64_t(width) * y + x) * components;
64 for (int i = 0; i < components; i++) {
65 output[i] = data[i];
66 }
67}
68
73
74/* Calculate cubic filter coefficients, for samples at -1,0,+1,+2.
75 * f is 0..1 offset from texel center in pixel space. */
76template<enum eCubicFilter filter> static float4 cubic_filter_coefficients(float f)
77{
78 float f2 = f * f;
79 float f3 = f2 * f;
80
81 if constexpr (filter == eCubicFilter::BSpline) {
82 /* Cubic B-Spline (Mitchell-Netravali filter with B=1, C=0 parameters). */
83 float w3 = f3 * (1.0f / 6.0f);
84 float w0 = -w3 + f2 * 0.5f - f * 0.5f + 1.0f / 6.0f;
85 float w1 = f3 * 0.5f - f2 * 1.0f + 2.0f / 3.0f;
86 float w2 = 1.0f - w0 - w1 - w3;
87 return float4(w0, w1, w2, w3);
88 }
89 else if constexpr (filter == eCubicFilter::Mitchell) {
90 /* Cubic Mitchell-Netravali filter with B=1/3, C=1/3 parameters. */
91 float w0 = -7.0f / 18.0f * f3 + 5.0f / 6.0f * f2 - 0.5f * f + 1.0f / 18.0f;
92 float w1 = 7.0f / 6.0f * f3 - 2.0f * f2 + 8.0f / 9.0f;
93 float w2 = -7.0f / 6.0f * f3 + 3.0f / 2.0f * f2 + 0.5f * f + 1.0f / 18.0f;
94 float w3 = 7.0f / 18.0f * f3 - 1.0f / 3.0f * f2;
95 return float4(w0, w1, w2, w3);
96 }
97}
98
99#if BLI_HAVE_SSE4
100template<eCubicFilter filter>
101BLI_INLINE void bicubic_interpolation_uchar_simd(
102 const uchar *src_buffer, uchar *output, int width, int height, float u, float v)
103{
104 __m128 uv = _mm_set_ps(0, 0, v, u);
105 __m128 uv_floor = _mm_floor_ps(uv);
106 __m128i i_uv = _mm_cvttps_epi32(uv_floor);
107
108 __m128 frac_uv = _mm_sub_ps(uv, uv_floor);
109
110 /* Calculate pixel weights. */
111 float4 wx = cubic_filter_coefficients<filter>(_mm_cvtss_f32(frac_uv));
113 _mm_cvtss_f32(_mm_shuffle_ps(frac_uv, frac_uv, 1)));
114
115 /* Read 4x4 source pixels and blend them. */
116 __m128 out = _mm_setzero_ps();
117 int iu = _mm_cvtsi128_si32(i_uv);
118 int iv = _mm_cvtsi128_si32(_mm_shuffle_epi32(i_uv, 1));
119
120 for (int n = 0; n < 4; n++) {
121 int y1 = iv + n - 1;
122 CLAMP(y1, 0, height - 1);
123 for (int m = 0; m < 4; m++) {
124
125 int x1 = iu + m - 1;
126 CLAMP(x1, 0, width - 1);
127 float w = wx[m] * wy[n];
128
129 const uchar *data = src_buffer + (width * y1 + x1) * 4;
130 /* Load 4 bytes and expand into 4-lane SIMD. */
131 __m128i sample_i = _mm_castps_si128(_mm_load_ss((const float *)data));
132 sample_i = _mm_unpacklo_epi8(sample_i, _mm_setzero_si128());
133 sample_i = _mm_unpacklo_epi16(sample_i, _mm_setzero_si128());
134
135 /* Accumulate into out with weight. */
136 out = _mm_add_ps(out, _mm_mul_ps(_mm_cvtepi32_ps(sample_i), _mm_set1_ps(w)));
137 }
138 }
139
140 /* Pack and write to destination: pack to 16 bit signed, then to 8 bit
141 * unsigned, then write resulting 32-bit value. This will clamp
142 * out of range values too. */
143 out = _mm_add_ps(out, _mm_set1_ps(0.5f));
144 __m128i rgba32 = _mm_cvttps_epi32(out);
145 __m128i rgba16 = _mm_packs_epi32(rgba32, _mm_setzero_si128());
146 __m128i rgba8 = _mm_packus_epi16(rgba16, _mm_setzero_si128());
147 _mm_store_ss((float *)output, _mm_castsi128_ps(rgba8));
148}
149#endif /* BLI_HAVE_SSE4 */
150
151template<typename T, eCubicFilter filter>
152BLI_INLINE void bicubic_interpolation(const T *src_buffer,
153 T *output,
154 int width,
155 int height,
156 int components,
157 float u,
158 float v,
159 InterpWrapMode wrap_u,
160 InterpWrapMode wrap_v)
161{
162 BLI_assert(src_buffer && output);
163 BLI_assert(components > 0 && components <= 4);
164
165 /* GCC 15.x can't reliably detect that `components` is never over 4. */
166#if (defined(__GNUC__) && (__GNUC__ >= 15) && !defined(__clang__))
167 [[assume(components <= 4)]];
168#endif
169
170#if BLI_HAVE_SSE4
171 if constexpr (std::is_same_v<T, uchar>) {
172 if (components == 4 && wrap_u == InterpWrapMode::Extend && wrap_v == InterpWrapMode::Extend) {
173 bicubic_interpolation_uchar_simd<filter>(src_buffer, output, width, height, u, v);
174 return;
175 }
176 }
177#endif
178
179 int iu = int(floor(u));
180 int iv = int(floor(v));
181
182 /* Sample area entirely outside image in border mode? */
183 if (wrap_u == InterpWrapMode::Border && (iu + 2 < 0 || iu > width)) {
184 memset(output, 0, size_t(components) * sizeof(T));
185 return;
186 }
187 if (wrap_v == InterpWrapMode::Border && (iv + 2 < 0 || iv > height)) {
188 memset(output, 0, size_t(components) * sizeof(T));
189 return;
190 }
191
192 float frac_u = u - float(iu);
193 float frac_v = v - float(iv);
194
195 float4 out{0.0f};
196
197 /* Calculate pixel weights. */
200
201 /* Read 4x4 source pixels and blend them. */
202 for (int n = 0; n < 4; n++) {
203 int y1 = iv + n - 1;
204 y1 = wrap_coord(float(y1), height, wrap_v);
205 if (wrap_v == InterpWrapMode::Border && y1 < 0) {
206 continue;
207 }
208
209 for (int m = 0; m < 4; m++) {
210 int x1 = iu + m - 1;
211 x1 = wrap_coord(float(x1), width, wrap_u);
212 if (wrap_u == InterpWrapMode::Border && x1 < 0) {
213 continue;
214 }
215 float w = wx[m] * wy[n];
216
217 const T *data = src_buffer + (width * y1 + x1) * components;
218
219 if (components == 1) {
220 out[0] += data[0] * w;
221 }
222 else if (components == 2) {
223 out[0] += data[0] * w;
224 out[1] += data[1] * w;
225 }
226 else if (components == 3) {
227 out[0] += data[0] * w;
228 out[1] += data[1] * w;
229 out[2] += data[2] * w;
230 }
231 else {
232 out[0] += data[0] * w;
233 out[1] += data[1] * w;
234 out[2] += data[2] * w;
235 out[3] += data[3] * w;
236 }
237 }
238 }
239
240 /* Mitchell filter has negative lobes; prevent output from going out of range. */
241 if constexpr (filter == eCubicFilter::Mitchell) {
242 for (int i = 0; i < components; i++) {
243 out[i] = math::max(out[i], 0.0f);
244 if constexpr (std::is_same_v<T, uchar>) {
245 out[i] = math::min(out[i], 255.0f);
246 }
247 }
248 }
249
250 /* Write result. */
251 if constexpr (std::is_same_v<T, float>) {
252 if (components == 1) {
253 output[0] = out[0];
254 }
255 else if (components == 2) {
257 }
258 else if (components == 3) {
260 }
261 else {
263 }
264 }
265 else {
266 if (components == 1) {
267 output[0] = uchar(out[0] + 0.5f);
268 }
269 else if (components == 2) {
270 output[0] = uchar(out[0] + 0.5f);
271 output[1] = uchar(out[1] + 0.5f);
272 }
273 else if (components == 3) {
274 output[0] = uchar(out[0] + 0.5f);
275 output[1] = uchar(out[1] + 0.5f);
276 output[2] = uchar(out[2] + 0.5f);
277 }
278 else {
279 output[0] = uchar(out[0] + 0.5f);
280 output[1] = uchar(out[1] + 0.5f);
281 output[2] = uchar(out[2] + 0.5f);
282 output[3] = uchar(out[3] + 0.5f);
283 }
284 }
285}
286
287BLI_INLINE void bilinear_fl_impl(const float *buffer,
288 float *output,
289 int width,
290 int height,
291 int components,
292 float u,
293 float v,
294 InterpWrapMode wrap_x,
295 InterpWrapMode wrap_y)
296{
297 BLI_assert(buffer && output);
298 BLI_assert(components > 0 && components <= 4);
299
300 float a, b;
301 float a_b, ma_b, a_mb, ma_mb;
302 int y1, y2, x1, x2;
303
304 if (wrap_x == InterpWrapMode::Repeat) {
305 u = floored_fmod(u, float(width));
306 }
307 if (wrap_y == InterpWrapMode::Repeat) {
308 v = floored_fmod(v, float(height));
309 }
310
311 float uf = floorf(u);
312 float vf = floorf(v);
313
314 x1 = int(uf);
315 x2 = x1 + 1;
316 y1 = int(vf);
317 y2 = y1 + 1;
318
319 const float *row1, *row2, *row3, *row4;
320 const float empty[4] = {0.0f, 0.0f, 0.0f, 0.0f};
321
322 /* Check if +1 samples need wrapping, or we don't do wrapping then if
323 * we are sampling completely outside the image. */
324 if (wrap_x == InterpWrapMode::Repeat) {
325 if (x2 >= width) {
326 x2 = 0;
327 }
328 }
329 else if (wrap_x == InterpWrapMode::Border && (x2 < 0 || x1 >= width)) {
330 copy_vn_fl(output, components, 0.0f);
331 return;
332 }
333 if (wrap_y == InterpWrapMode::Repeat) {
334 if (y2 >= height) {
335 y2 = 0;
336 }
337 }
338 else if (wrap_y == InterpWrapMode::Border && (y2 < 0 || y1 >= height)) {
339 copy_vn_fl(output, components, 0.0f);
340 return;
341 }
342
343 /* Sample locations. */
344 int x1c = blender::math::clamp(x1, 0, width - 1);
345 int x2c = blender::math::clamp(x2, 0, width - 1);
346 int y1c = blender::math::clamp(y1, 0, height - 1);
347 int y2c = blender::math::clamp(y2, 0, height - 1);
348 row1 = buffer + (int64_t(width) * y1c + x1c) * components;
349 row2 = buffer + (int64_t(width) * y2c + x1c) * components;
350 row3 = buffer + (int64_t(width) * y1c + x2c) * components;
351 row4 = buffer + (int64_t(width) * y2c + x2c) * components;
352
353 if (wrap_x == InterpWrapMode::Border) {
354 if (x1 < 0) {
355 row1 = empty;
356 row2 = empty;
357 }
358 if (x2 > width - 1) {
359 row3 = empty;
360 row4 = empty;
361 }
362 }
363 if (wrap_y == InterpWrapMode::Border) {
364 if (y1 < 0) {
365 row1 = empty;
366 row3 = empty;
367 }
368 if (y2 > height - 1) {
369 row2 = empty;
370 row4 = empty;
371 }
372 }
373
374 /* Finally, do interpolation. */
375 a = u - uf;
376 b = v - vf;
377 a_b = a * b;
378 ma_b = (1.0f - a) * b;
379 a_mb = a * (1.0f - b);
380 ma_mb = (1.0f - a) * (1.0f - b);
381
382 if (components == 1) {
383 output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
384 }
385 else if (components == 2) {
386 output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
387 output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
388 }
389 else if (components == 3) {
390 output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
391 output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
392 output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2];
393 }
394 else {
395#if BLI_HAVE_SSE2
396 __m128 rgba1 = _mm_loadu_ps(row1);
397 __m128 rgba2 = _mm_loadu_ps(row2);
398 __m128 rgba3 = _mm_loadu_ps(row3);
399 __m128 rgba4 = _mm_loadu_ps(row4);
400 rgba1 = _mm_mul_ps(_mm_set1_ps(ma_mb), rgba1);
401 rgba2 = _mm_mul_ps(_mm_set1_ps(ma_b), rgba2);
402 rgba3 = _mm_mul_ps(_mm_set1_ps(a_mb), rgba3);
403 rgba4 = _mm_mul_ps(_mm_set1_ps(a_b), rgba4);
404 __m128 rgba13 = _mm_add_ps(rgba1, rgba3);
405 __m128 rgba24 = _mm_add_ps(rgba2, rgba4);
406 __m128 rgba = _mm_add_ps(rgba13, rgba24);
407 _mm_storeu_ps(output, rgba);
408#else
409 output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
410 output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
411 output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2];
412 output[3] = ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3];
413#endif
414 }
415}
416
417template<bool border>
418BLI_INLINE uchar4 bilinear_byte_impl(const uchar *buffer, int width, int height, float u, float v)
419{
420 BLI_assert(buffer);
421 uchar4 res;
422
423#if BLI_HAVE_SSE4
424 __m128 uvuv = _mm_set_ps(v, u, v, u);
425 __m128 uvuv_floor = _mm_floor_ps(uvuv);
426
427 /* x1, y1, x2, y2 */
428 __m128i xy12 = _mm_add_epi32(_mm_cvttps_epi32(uvuv_floor), _mm_set_epi32(1, 1, 0, 0));
429 /* Check whether any of the coordinates are outside of the image. */
430 __m128i size_minus_1 = _mm_sub_epi32(_mm_set_epi32(height, width, height, width),
431 _mm_set1_epi32(1));
432
433 /* Samples 1,2,3,4 will be in this order: x1y1, x1y2, x2y1, x2y2. */
434 __m128i x1234, y1234, invalid_1234;
435
436 if constexpr (border) {
437 /* Blend black colors for samples right outside the image: figure out
438 * which of the 4 samples were outside, set their coordinates to zero
439 * and later on put black color into their place. */
440 __m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128());
441 __m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12);
442 __m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12);
443
444 /* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */
445 x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0));
446 y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1));
447 invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)),
448 _mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1)));
449 /* Set x & y to zero for invalid samples. */
450 x1234 = _mm_andnot_si128(invalid_1234, x1234);
451 y1234 = _mm_andnot_si128(invalid_1234, y1234);
452 }
453 else {
454 /* Clamp samples to image edges. */
455 __m128i xy12_clamped = _mm_max_epi32(xy12, _mm_setzero_si128());
456 xy12_clamped = _mm_min_epi32(xy12_clamped, size_minus_1);
457 x1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(2, 2, 0, 0));
458 y1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(3, 1, 3, 1));
459 }
460
461 /* Read the four sample values. Do address calculations in C, since SSE
462 * before 4.1 makes it very cumbersome to do full integer multiplies. */
463 int xcoord[4];
464 int ycoord[4];
465 _mm_storeu_ps((float *)xcoord, _mm_castsi128_ps(x1234));
466 _mm_storeu_ps((float *)ycoord, _mm_castsi128_ps(y1234));
467 int sample1 = ((const int *)buffer)[ycoord[0] * int64_t(width) + xcoord[0]];
468 int sample2 = ((const int *)buffer)[ycoord[1] * int64_t(width) + xcoord[1]];
469 int sample3 = ((const int *)buffer)[ycoord[2] * int64_t(width) + xcoord[2]];
470 int sample4 = ((const int *)buffer)[ycoord[3] * int64_t(width) + xcoord[3]];
471 __m128i samples1234 = _mm_set_epi32(sample4, sample3, sample2, sample1);
472 if constexpr (border) {
473 /* Set samples to black for the ones that were actually invalid. */
474 samples1234 = _mm_andnot_si128(invalid_1234, samples1234);
475 }
476
477 /* Expand samples from packed 8-bit RGBA to full floats:
478 * spread to 16 bit values. */
479 __m128i rgba16_12 = _mm_unpacklo_epi8(samples1234, _mm_setzero_si128());
480 __m128i rgba16_34 = _mm_unpackhi_epi8(samples1234, _mm_setzero_si128());
481 /* Spread to 32 bit values and convert to float. */
482 __m128 rgba1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(rgba16_12, _mm_setzero_si128()));
483 __m128 rgba2 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(rgba16_12, _mm_setzero_si128()));
484 __m128 rgba3 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(rgba16_34, _mm_setzero_si128()));
485 __m128 rgba4 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(rgba16_34, _mm_setzero_si128()));
486
487 /* Calculate interpolation factors: (1-a)*(1-b), (1-a)*b, a*(1-b), a*b */
488 __m128 abab = _mm_sub_ps(uvuv, uvuv_floor);
489 __m128 m_abab = _mm_sub_ps(_mm_set1_ps(1.0f), abab);
490 __m128 ab_mab = _mm_shuffle_ps(abab, m_abab, _MM_SHUFFLE(3, 2, 1, 0));
491 __m128 factors = _mm_mul_ps(_mm_shuffle_ps(ab_mab, ab_mab, _MM_SHUFFLE(0, 0, 2, 2)),
492 _mm_shuffle_ps(ab_mab, ab_mab, _MM_SHUFFLE(1, 3, 1, 3)));
493
494 /* Blend the samples. */
495 rgba1 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(0, 0, 0, 0)), rgba1);
496 rgba2 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(1, 1, 1, 1)), rgba2);
497 rgba3 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(2, 2, 2, 2)), rgba3);
498 rgba4 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(3, 3, 3, 3)), rgba4);
499 __m128 rgba13 = _mm_add_ps(rgba1, rgba3);
500 __m128 rgba24 = _mm_add_ps(rgba2, rgba4);
501 __m128 rgba = _mm_add_ps(rgba13, rgba24);
502 rgba = _mm_add_ps(rgba, _mm_set1_ps(0.5f));
503 /* Pack and write to destination: pack to 16 bit signed, then to 8 bit
504 * unsigned, then write resulting 32-bit value. */
505 __m128i rgba32 = _mm_cvttps_epi32(rgba);
506 __m128i rgba16 = _mm_packs_epi32(rgba32, _mm_setzero_si128());
507 __m128i rgba8 = _mm_packus_epi16(rgba16, _mm_setzero_si128());
508 _mm_store_ss((float *)&res, _mm_castsi128_ps(rgba8));
509
510#else
511
512 float uf = floorf(u);
513 float vf = floorf(v);
514
515 int x1 = int(uf);
516 int x2 = x1 + 1;
517 int y1 = int(vf);
518 int y2 = y1 + 1;
519
520 /* Completely outside of the image in bordered mode? */
521 if (border && (x2 < 0 || x1 >= width || y2 < 0 || y1 >= height)) {
522 return uchar4(0);
523 }
524
525 /* Sample locations. */
526 const uchar *row1, *row2, *row3, *row4;
527 uchar empty[4] = {0, 0, 0, 0};
528 if constexpr (border) {
529 row1 = (x1 < 0 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x1) * 4;
530 row2 = (x1 < 0 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x1) * 4;
531 row3 = (x2 > width - 1 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x2) * 4;
532 row4 = (x2 > width - 1 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x2) * 4;
533 }
534 else {
535 x1 = blender::math::clamp(x1, 0, width - 1);
536 x2 = blender::math::clamp(x2, 0, width - 1);
537 y1 = blender::math::clamp(y1, 0, height - 1);
538 y2 = blender::math::clamp(y2, 0, height - 1);
539 row1 = buffer + (int64_t(width) * y1 + x1) * 4;
540 row2 = buffer + (int64_t(width) * y2 + x1) * 4;
541 row3 = buffer + (int64_t(width) * y1 + x2) * 4;
542 row4 = buffer + (int64_t(width) * y2 + x2) * 4;
543 }
544
545 float a = u - uf;
546 float b = v - vf;
547 float a_b = a * b;
548 float ma_b = (1.0f - a) * b;
549 float a_mb = a * (1.0f - b);
550 float ma_mb = (1.0f - a) * (1.0f - b);
551
552 res.x = uchar(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] + 0.5f);
553 res.y = uchar(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] + 0.5f);
554 res.z = uchar(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] + 0.5f);
555 res.w = uchar(ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3] + 0.5f);
556#endif
557
558 return res;
559}
560
562 const uchar *buffer, int width, int height, float u, float v)
563{
564 return bilinear_byte_impl<true>(buffer, width, height, u, v);
565}
566
567uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v)
568{
569 return bilinear_byte_impl<false>(buffer, width, height, u, v);
570}
571
572float4 interpolate_bilinear_border_fl(const float *buffer, int width, int height, float u, float v)
573{
574 float4 res;
576 buffer, res, width, height, 4, u, v, InterpWrapMode::Border, InterpWrapMode::Border);
577 return res;
578}
579
581 const float *buffer, float *output, int width, int height, int components, float u, float v)
582{
583 bilinear_fl_impl(buffer,
584 output,
585 width,
586 height,
587 components,
588 u,
589 v,
592}
593
594float4 interpolate_bilinear_fl(const float *buffer, int width, int height, float u, float v)
595{
596 float4 res;
598 buffer, res, width, height, 4, u, v, InterpWrapMode::Extend, InterpWrapMode::Extend);
599 return res;
600}
601
603 const float *buffer, float *output, int width, int height, int components, float u, float v)
604{
605 bilinear_fl_impl(buffer,
606 output,
607 width,
608 height,
609 components,
610 u,
611 v,
614}
615
616void interpolate_bilinear_wrapmode_fl(const float *buffer,
617 float *output,
618 int width,
619 int height,
620 int components,
621 float u,
622 float v,
623 InterpWrapMode wrap_u,
624 InterpWrapMode wrap_v)
625{
626 bilinear_fl_impl(buffer, output, width, height, components, u, v, wrap_u, wrap_v);
627}
628
629uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height, float u, float v)
630{
631 u = floored_fmod(u, float(width));
632 v = floored_fmod(v, float(height));
633 float uf = floorf(u);
634 float vf = floorf(v);
635
636 int x1 = int(uf);
637 int x2 = x1 + 1;
638 int y1 = int(vf);
639 int y2 = y1 + 1;
640
641 /* Wrap interpolation pixels if needed. */
642 BLI_assert(x1 >= 0 && x1 < width && y1 >= 0 && y1 < height);
643 if (x2 >= width) {
644 x2 = 0;
645 }
646 if (y2 >= height) {
647 y2 = 0;
648 }
649
650 float a = u - uf;
651 float b = v - vf;
652 float a_b = a * b;
653 float ma_b = (1.0f - a) * b;
654 float a_mb = a * (1.0f - b);
655 float ma_mb = (1.0f - a) * (1.0f - b);
656
657 /* Blend samples. */
658 const uchar *row1 = buffer + (int64_t(width) * y1 + x1) * 4;
659 const uchar *row2 = buffer + (int64_t(width) * y2 + x1) * 4;
660 const uchar *row3 = buffer + (int64_t(width) * y1 + x2) * 4;
661 const uchar *row4 = buffer + (int64_t(width) * y2 + x2) * 4;
662
663 uchar4 res;
664 res.x = uchar(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] + 0.5f);
665 res.y = uchar(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] + 0.5f);
666 res.z = uchar(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] + 0.5f);
667 res.w = uchar(ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3] + 0.5f);
668 return res;
669}
670
671float4 interpolate_bilinear_wrap_fl(const float *buffer, int width, int height, float u, float v)
672{
673 float4 res;
675 buffer, res, width, height, 4, u, v, InterpWrapMode::Repeat, InterpWrapMode::Repeat);
676 return res;
677}
678
679uchar4 interpolate_cubic_bspline_byte(const uchar *buffer, int width, int height, float u, float v)
680{
681 uchar4 res;
683 buffer, res, width, height, 4, u, v, InterpWrapMode::Extend, InterpWrapMode::Extend);
684 return res;
685}
686
687float4 interpolate_cubic_bspline_fl(const float *buffer, int width, int height, float u, float v)
688{
689 float4 res;
691 buffer, res, width, height, 4, u, v, InterpWrapMode::Extend, InterpWrapMode::Extend);
692 return res;
693}
694
696 const float *buffer, float *output, int width, int height, int components, float u, float v)
697{
699 output,
700 width,
701 height,
702 components,
703 u,
704 v,
707}
708
710 float *output,
711 int width,
712 int height,
713 int components,
714 float u,
715 float v,
718{
720 buffer, output, width, height, components, u, v, wrap_u, wrap_v);
721}
722
724 const uchar *buffer, int width, int height, float u, float v)
725{
726 uchar4 res;
728 buffer, res, width, height, 4, u, v, InterpWrapMode::Extend, InterpWrapMode::Extend);
729 return res;
730}
731
732float4 interpolate_cubic_mitchell_fl(const float *buffer, int width, int height, float u, float v)
733{
734 float4 res;
736 buffer, res, width, height, 4, u, v, InterpWrapMode::Extend, InterpWrapMode::Extend);
737 return res;
738}
739
741 const float *buffer, float *output, int width, int height, int components, float u, float v)
742{
744 output,
745 width,
746 height,
747 components,
748 u,
749 v,
752}
753
754} // namespace blender::math
755
756/**************************************************************************
757 * Filtering method based on
758 * "Creating raster omnimax images from multiple perspective views
759 * using the elliptical weighted average filter"
760 * by Ned Greene and Paul S. Heckbert (1986)
761 ***************************************************************************/
762
763/* Table of `(exp(ar) - exp(a)) / (1 - exp(a))` for `r` in range [0, 1] and `a = -2`.
764 * used instead of actual gaussian,
765 * otherwise at high texture magnifications circular artifacts are visible. */
766#define EWA_MAXIDX 255
767const float EWA_WTS[EWA_MAXIDX + 1] = {
768 1.0f, 0.990965f, 0.982f, 0.973105f, 0.96428f, 0.955524f, 0.946836f,
769 0.938216f, 0.929664f, 0.921178f, 0.912759f, 0.904405f, 0.896117f, 0.887893f,
770 0.879734f, 0.871638f, 0.863605f, 0.855636f, 0.847728f, 0.839883f, 0.832098f,
771 0.824375f, 0.816712f, 0.809108f, 0.801564f, 0.794079f, 0.786653f, 0.779284f,
772 0.771974f, 0.76472f, 0.757523f, 0.750382f, 0.743297f, 0.736267f, 0.729292f,
773 0.722372f, 0.715505f, 0.708693f, 0.701933f, 0.695227f, 0.688572f, 0.68197f,
774 0.67542f, 0.66892f, 0.662471f, 0.656073f, 0.649725f, 0.643426f, 0.637176f,
775 0.630976f, 0.624824f, 0.618719f, 0.612663f, 0.606654f, 0.600691f, 0.594776f,
776 0.588906f, 0.583083f, 0.577305f, 0.571572f, 0.565883f, 0.56024f, 0.55464f,
777 0.549084f, 0.543572f, 0.538102f, 0.532676f, 0.527291f, 0.521949f, 0.516649f,
778 0.511389f, 0.506171f, 0.500994f, 0.495857f, 0.490761f, 0.485704f, 0.480687f,
779 0.475709f, 0.470769f, 0.465869f, 0.461006f, 0.456182f, 0.451395f, 0.446646f,
780 0.441934f, 0.437258f, 0.432619f, 0.428017f, 0.42345f, 0.418919f, 0.414424f,
781 0.409963f, 0.405538f, 0.401147f, 0.39679f, 0.392467f, 0.388178f, 0.383923f,
782 0.379701f, 0.375511f, 0.371355f, 0.367231f, 0.363139f, 0.359079f, 0.355051f,
783 0.351055f, 0.347089f, 0.343155f, 0.339251f, 0.335378f, 0.331535f, 0.327722f,
784 0.323939f, 0.320186f, 0.316461f, 0.312766f, 0.3091f, 0.305462f, 0.301853f,
785 0.298272f, 0.294719f, 0.291194f, 0.287696f, 0.284226f, 0.280782f, 0.277366f,
786 0.273976f, 0.270613f, 0.267276f, 0.263965f, 0.26068f, 0.257421f, 0.254187f,
787 0.250979f, 0.247795f, 0.244636f, 0.241502f, 0.238393f, 0.235308f, 0.232246f,
788 0.229209f, 0.226196f, 0.223206f, 0.220239f, 0.217296f, 0.214375f, 0.211478f,
789 0.208603f, 0.20575f, 0.20292f, 0.200112f, 0.197326f, 0.194562f, 0.191819f,
790 0.189097f, 0.186397f, 0.183718f, 0.18106f, 0.178423f, 0.175806f, 0.17321f,
791 0.170634f, 0.168078f, 0.165542f, 0.163026f, 0.16053f, 0.158053f, 0.155595f,
792 0.153157f, 0.150738f, 0.148337f, 0.145955f, 0.143592f, 0.141248f, 0.138921f,
793 0.136613f, 0.134323f, 0.132051f, 0.129797f, 0.12756f, 0.125341f, 0.123139f,
794 0.120954f, 0.118786f, 0.116635f, 0.114501f, 0.112384f, 0.110283f, 0.108199f,
795 0.106131f, 0.104079f, 0.102043f, 0.100023f, 0.0980186f, 0.09603f, 0.094057f,
796 0.0920994f, 0.0901571f, 0.08823f, 0.0863179f, 0.0844208f, 0.0825384f, 0.0806708f,
797 0.0788178f, 0.0769792f, 0.0751551f, 0.0733451f, 0.0715493f, 0.0697676f, 0.0679997f,
798 0.0662457f, 0.0645054f, 0.0627786f, 0.0610654f, 0.0593655f, 0.0576789f, 0.0560055f,
799 0.0543452f, 0.0526979f, 0.0510634f, 0.0494416f, 0.0478326f, 0.0462361f, 0.0446521f,
800 0.0430805f, 0.0415211f, 0.039974f, 0.0384389f, 0.0369158f, 0.0354046f, 0.0339052f,
801 0.0324175f, 0.0309415f, 0.029477f, 0.0280239f, 0.0265822f, 0.0251517f, 0.0237324f,
802 0.0223242f, 0.020927f, 0.0195408f, 0.0181653f, 0.0168006f, 0.0154466f, 0.0141031f,
803 0.0127701f, 0.0114476f, 0.0101354f, 0.00883339f, 0.00754159f, 0.00625989f, 0.00498819f,
804 0.00372644f, 0.00247454f, 0.00123242f, 0.0f,
805};
806
807static void radangle2imp(float a2, float b2, float th, float *A, float *B, float *C, float *F)
808{
809 float ct2 = cosf(th);
810 const float st2 = 1.0f - ct2 * ct2; /* <- sin(th)^2 */
811 ct2 *= ct2;
812 *A = a2 * st2 + b2 * ct2;
813 *B = (b2 - a2) * sinf(2.0f * th);
814 *C = a2 * ct2 + b2 * st2;
815 *F = a2 * b2;
816}
817
819 float A, float B, float C, float F, float *a, float *b, float *th, float *ecc)
820{
821 /* NOTE: all tests here are done to make sure possible overflows are hopefully minimized. */
822
823 if (F <= 1e-5f) { /* use arbitrary major radius, zero minor, infinite eccentricity */
824 *a = sqrtf(A > C ? A : C);
825 *b = 0.0f;
826 *ecc = 1e10f;
827 *th = 0.5f * (atan2f(B, A - C) + float(M_PI));
828 }
829 else {
830 const float AmC = A - C, ApC = A + C, F2 = F * 2.0f;
831 const float r = sqrtf(AmC * AmC + B * B);
832 float d = ApC - r;
833 *a = (d <= 0.0f) ? sqrtf(A > C ? A : C) : sqrtf(F2 / d);
834 d = ApC + r;
835 if (d <= 0.0f) {
836 *b = 0.0f;
837 *ecc = 1e10f;
838 }
839 else {
840 *b = sqrtf(F2 / d);
841 *ecc = *a / *b;
842 }
843 /* Increase theta by `0.5 * pi` (angle of major axis). */
844 *th = 0.5f * (atan2f(B, AmC) + float(M_PI));
845 }
846}
847
848void BLI_ewa_filter(const int width,
849 const int height,
850 const bool intpol,
851 const bool use_alpha,
852 const float uv[2],
853 const float du[2],
854 const float dv[2],
855 ewa_filter_read_pixel_cb read_pixel_cb,
856 void *userdata,
857 float result[4])
858{
859 /* Scaling `dxt` / `dyt` by full resolution can cause overflow because of huge A/B/C and esp.
860 * F values, scaling by aspect ratio alone does the opposite, so try something in between
861 * instead. */
862 const float ff2 = float(width), ff = sqrtf(ff2), q = float(height) / ff;
863 const float Ux = du[0] * ff, Vx = du[1] * q, Uy = dv[0] * ff, Vy = dv[1] * q;
864 float A = Vx * Vx + Vy * Vy;
865 float B = -2.0f * (Ux * Vx + Uy * Vy);
866 float C = Ux * Ux + Uy * Uy;
867 float F = A * C - B * B * 0.25f;
868 float a, b, th, ecc, a2, b2, ue, ve, U0, V0, DDQ, U, ac1, ac2, BU, d;
869 int u, v, u1, u2, v1, v2;
870
871 /* The so-called 'high' quality EWA method simply adds a constant of 1 to both A & C,
872 * so the ellipse always covers at least some texels. But since the filter is now always larger,
873 * it also means that everywhere else it's also more blurry then ideally should be the case.
874 * So instead here the ellipse radii are modified instead whenever either is too low.
875 * Use a different radius based on interpolation switch,
876 * just enough to anti-alias when interpolation is off,
877 * and slightly larger to make result a bit smoother than bilinear interpolation when
878 * interpolation is on (minimum values: `const float rmin = intpol ? 1.0f : 0.5f;`) */
879 const float rmin = (intpol ? 1.5625f : 0.765625f) / ff2;
880 BLI_ewa_imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
881 if ((b2 = b * b) < rmin) {
882 if ((a2 = a * a) < rmin) {
883 UNUSED_VARS(a2, b2);
884 B = 0.0f;
885 A = C = rmin;
886 F = A * C;
887 }
888 else {
889 b2 = rmin;
890 radangle2imp(a2, b2, th, &A, &B, &C, &F);
891 }
892 }
893
894 ue = ff * sqrtf(C);
895 ve = ff * sqrtf(A);
896 d = float(EWA_MAXIDX + 1) / (F * ff2);
897 A *= d;
898 B *= d;
899 C *= d;
900
901 U0 = uv[0] * float(width);
902 V0 = uv[1] * float(height);
903 u1 = int(floorf(U0 - ue));
904 u2 = int(ceilf(U0 + ue));
905 v1 = int(floorf(V0 - ve));
906 v2 = int(ceilf(V0 + ve));
907
908 /* sane clamping to avoid unnecessarily huge loops */
909 /* NOTE: if eccentricity gets clamped (see above),
910 * the ue/ve limits can also be lowered accordingly
911 */
912 if (U0 - float(u1) > EWA_MAXIDX) {
913 u1 = int(U0) - EWA_MAXIDX;
914 }
915 if (float(u2) - U0 > EWA_MAXIDX) {
916 u2 = int(U0) + EWA_MAXIDX;
917 }
918 if (V0 - float(v1) > EWA_MAXIDX) {
919 v1 = int(V0) - EWA_MAXIDX;
920 }
921 if (float(v2) - V0 > EWA_MAXIDX) {
922 v2 = int(V0) + EWA_MAXIDX;
923 }
924
925 /* Early output check for cases the whole region is outside of the buffer. */
926 if ((u2 < 0 || u1 >= width) || (v2 < 0 || v1 >= height)) {
928 return;
929 }
930
931 U0 -= 0.5f;
932 V0 -= 0.5f;
933 DDQ = 2.0f * A;
934 U = float(u1) - U0;
935 ac1 = A * (2.0f * U + 1.0f);
936 ac2 = A * U * U;
937 BU = B * U;
938
939 d = 0.0f;
941 for (v = v1; v <= v2; v++) {
942 const float V = float(v) - V0;
943 float DQ = ac1 + B * V;
944 float Q = (C * V + BU) * V + ac2;
945 for (u = u1; u <= u2; u++) {
946 if (Q < float(EWA_MAXIDX + 1)) {
947 float tc[4];
948 const float wt = EWA_WTS[(Q < 0.0f) ? 0 : uint(Q)];
949 read_pixel_cb(userdata, u, v, tc);
950 madd_v3_v3fl(result, tc, wt);
951 result[3] += use_alpha ? tc[3] * wt : 0.0f;
952 d += wt;
953 }
954 Q += DQ;
955 DQ += DDQ;
956 }
957 }
958
959 /* `d` should hopefully never be zero anymore. */
960 d = 1.0f / d;
961 mul_v3_fl(result, d);
962 /* Clipping can be ignored if alpha used, `texr->trgba[3]` already includes filtered edge. */
963 result[3] = use_alpha ? result[3] * d : 1.0f;
964}
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_INLINE
MINLINE float floored_fmod(float f, float n)
#define M_PI
void(*)(void *userdata, int x, int y, float result[4]) ewa_filter_read_pixel_cb
#define EWA_MAXIDX
const float EWA_WTS[EWA_MAXIDX+1]
MINLINE void copy_v4_v4(float r[4], const float a[4])
MINLINE void madd_v3_v3fl(float r[3], const float a[3], float f)
MINLINE void copy_v2_v2(float r[2], const float a[2])
MINLINE void mul_v3_fl(float r[3], float f)
MINLINE void copy_v3_v3(float r[3], const float a[3])
void copy_vn_fl(float *array_tar, int size, float val)
MINLINE void zero_v4(float r[4])
unsigned char uchar
unsigned int uint
#define CLAMP(a, b, c)
#define UNUSED_VARS(...)
#define C
Definition RandGen.cpp:29
#define U
BMesh const char void * data
ATTR_WARN_UNUSED_RESULT const BMVert * v2
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define A
long long int int64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
nullptr float
#define out
#define filter
#define output
#define floor
static void radangle2imp(float a2, float b2, float th, float *A, float *B, float *C, float *F)
void BLI_ewa_filter(const int width, const int height, const bool intpol, const bool use_alpha, const float uv[2], const float du[2], const float dv[2], ewa_filter_read_pixel_cb read_pixel_cb, void *userdata, float result[4])
void BLI_ewa_imp2radangle(float A, float B, float C, float F, float *a, float *b, float *th, float *ecc)
#define T
#define B
#define F
uchar4 interpolate_cubic_mitchell_byte(const uchar *buffer, int width, int height, float u, float v)
uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height, float u, float v)
BLI_INLINE void bicubic_interpolation(const T *src_buffer, T *output, int width, int height, int components, float u, float v, InterpWrapMode wrap_u, InterpWrapMode wrap_v)
static float4 cubic_filter_coefficients(float f)
uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v)
BLI_INLINE uchar4 bilinear_byte_impl(const uchar *buffer, int width, int height, float u, float v)
T clamp(const T &a, const T &min, const T &max)
BLI_INLINE void bilinear_fl_impl(const float *buffer, float *output, int width, int height, int components, float u, float v, InterpWrapMode wrap_x, InterpWrapMode wrap_y)
float4 interpolate_bilinear_border_fl(const float *buffer, int width, int height, float u, float v)
float4 interpolate_bilinear_wrap_fl(const float *buffer, int width, int height, float u, float v)
float4 interpolate_cubic_bspline_fl(const float *buffer, int width, int height, float u, float v)
BLI_INLINE int wrap_coord(float u, int size, InterpWrapMode wrap)
T min(const T &a, const T &b)
void interpolate_nearest_wrapmode_fl(const float *buffer, float *output, int width, int height, int components, float u, float v, InterpWrapMode wrap_u, InterpWrapMode wrap_v)
float4 interpolate_bilinear_fl(const float *buffer, int width, int height, float u, float v)
float4 interpolate_cubic_mitchell_fl(const float *buffer, int width, int height, float u, float v)
T max(const T &a, const T &b)
void interpolate_bilinear_wrapmode_fl(const float *buffer, float *output, int width, int height, int components, float u, float v, InterpWrapMode wrap_u, InterpWrapMode wrap_v)
uchar4 interpolate_cubic_bspline_byte(const uchar *buffer, int width, int height, float u, float v)
uchar4 interpolate_bilinear_border_byte(const uchar *buffer, int width, int height, float u, float v)
void interpolate_cubic_bspline_wrapmode_fl(const float *buffer, float *output, int width, int height, int components, float u, float v, InterpWrapMode wrap_u, InterpWrapMode wrap_v)
blender::VecBase< uint8_t, 4 > uchar4
VecBase< float, 4 > float4
float wrap(float value, float max, float min)
Definition node_math.h:103
#define floorf
#define sqrtf
#define sinf
#define cosf
#define ceilf
#define atan2f
i
Definition text_draw.cc:230
CCL_NAMESPACE_BEGIN struct Window V