Blender V4.5
math_interp.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <cmath>
10#include <cstring>
11
12#include "BLI_math_base.h"
13#include "BLI_math_base.hh"
14#include "BLI_math_interp.hh"
15#include "BLI_math_vector.h"
17#include "BLI_simd.hh"
18
19#include "BLI_strict_flags.h" /* IWYU pragma: keep. Keep last. */
20
21namespace blender::math {
22
24{
25 int x = 0;
26 switch (wrap) {
28 x = math::clamp(int(u), 0, size - 1);
29 break;
31 x = int(floored_fmod(u, float(size)));
32 break;
34 x = int(u);
35 if (u < 0.0f || x >= size) {
36 x = -1;
37 }
38 break;
39 }
40 return x;
41}
42
43void interpolate_nearest_wrapmode_fl(const float *buffer,
44 float *output,
45 int width,
46 int height,
47 int components,
48 float u,
49 float v,
50 InterpWrapMode wrap_u,
51 InterpWrapMode wrap_v)
52{
53 BLI_assert(buffer);
54 int x = wrap_coord(u, width, wrap_u);
55 int y = wrap_coord(v, height, wrap_v);
56 if (x < 0 || y < 0) {
57 for (int i = 0; i < components; i++) {
58 output[i] = 0.0f;
59 }
60 return;
61 }
62
63 const float *data = buffer + (int64_t(width) * y + x) * components;
64 for (int i = 0; i < components; i++) {
65 output[i] = data[i];
66 }
67}
68
73
74/* Calculate cubic filter coefficients, for samples at -1,0,+1,+2.
75 * f is 0..1 offset from texel center in pixel space. */
76template<enum eCubicFilter filter> static float4 cubic_filter_coefficients(float f)
77{
78 float f2 = f * f;
79 float f3 = f2 * f;
80
81 if constexpr (filter == eCubicFilter::BSpline) {
82 /* Cubic B-Spline (Mitchell-Netravali filter with B=1, C=0 parameters). */
83 float w3 = f3 * (1.0f / 6.0f);
84 float w0 = -w3 + f2 * 0.5f - f * 0.5f + 1.0f / 6.0f;
85 float w1 = f3 * 0.5f - f2 * 1.0f + 2.0f / 3.0f;
86 float w2 = 1.0f - w0 - w1 - w3;
87 return float4(w0, w1, w2, w3);
88 }
89 else if constexpr (filter == eCubicFilter::Mitchell) {
90 /* Cubic Mitchell-Netravali filter with B=1/3, C=1/3 parameters. */
91 float w0 = -7.0f / 18.0f * f3 + 5.0f / 6.0f * f2 - 0.5f * f + 1.0f / 18.0f;
92 float w1 = 7.0f / 6.0f * f3 - 2.0f * f2 + 8.0f / 9.0f;
93 float w2 = -7.0f / 6.0f * f3 + 3.0f / 2.0f * f2 + 0.5f * f + 1.0f / 18.0f;
94 float w3 = 7.0f / 18.0f * f3 - 1.0f / 3.0f * f2;
95 return float4(w0, w1, w2, w3);
96 }
97}
98
99#if BLI_HAVE_SSE4
100template<eCubicFilter filter>
101BLI_INLINE void bicubic_interpolation_uchar_simd(
102 const uchar *src_buffer, uchar *output, int width, int height, float u, float v)
103{
104 __m128 uv = _mm_set_ps(0, 0, v, u);
105 __m128 uv_floor = _mm_floor_ps(uv);
106 __m128i i_uv = _mm_cvttps_epi32(uv_floor);
107
108 __m128 frac_uv = _mm_sub_ps(uv, uv_floor);
109
110 /* Calculate pixel weights. */
111 float4 wx = cubic_filter_coefficients<filter>(_mm_cvtss_f32(frac_uv));
113 _mm_cvtss_f32(_mm_shuffle_ps(frac_uv, frac_uv, 1)));
114
115 /* Read 4x4 source pixels and blend them. */
116 __m128 out = _mm_setzero_ps();
117 int iu = _mm_cvtsi128_si32(i_uv);
118 int iv = _mm_cvtsi128_si32(_mm_shuffle_epi32(i_uv, 1));
119
120 for (int n = 0; n < 4; n++) {
121 int y1 = iv + n - 1;
122 CLAMP(y1, 0, height - 1);
123 for (int m = 0; m < 4; m++) {
124
125 int x1 = iu + m - 1;
126 CLAMP(x1, 0, width - 1);
127 float w = wx[m] * wy[n];
128
129 const uchar *data = src_buffer + (width * y1 + x1) * 4;
130 /* Load 4 bytes and expand into 4-lane SIMD. */
131 __m128i sample_i = _mm_castps_si128(_mm_load_ss((const float *)data));
132 sample_i = _mm_unpacklo_epi8(sample_i, _mm_setzero_si128());
133 sample_i = _mm_unpacklo_epi16(sample_i, _mm_setzero_si128());
134
135 /* Accumulate into out with weight. */
136 out = _mm_add_ps(out, _mm_mul_ps(_mm_cvtepi32_ps(sample_i), _mm_set1_ps(w)));
137 }
138 }
139
140 /* Pack and write to destination: pack to 16 bit signed, then to 8 bit
141 * unsigned, then write resulting 32-bit value. This will clamp
142 * out of range values too. */
143 out = _mm_add_ps(out, _mm_set1_ps(0.5f));
144 __m128i rgba32 = _mm_cvttps_epi32(out);
145 __m128i rgba16 = _mm_packs_epi32(rgba32, _mm_setzero_si128());
146 __m128i rgba8 = _mm_packus_epi16(rgba16, _mm_setzero_si128());
147 _mm_store_ss((float *)output, _mm_castsi128_ps(rgba8));
148}
149#endif /* BLI_HAVE_SSE4 */
150
151template<typename T, eCubicFilter filter>
152BLI_INLINE void bicubic_interpolation(const T *src_buffer,
153 T *output,
154 int width,
155 int height,
156 int components,
157 float u,
158 float v,
159 InterpWrapMode wrap_u,
160 InterpWrapMode wrap_v)
161{
162 BLI_assert(src_buffer && output);
163 BLI_assert(components > 0 && components <= 4);
164
165#if BLI_HAVE_SSE4
166 if constexpr (std::is_same_v<T, uchar>) {
167 if (components == 4 && wrap_u == InterpWrapMode::Extend && wrap_v == InterpWrapMode::Extend) {
168 bicubic_interpolation_uchar_simd<filter>(src_buffer, output, width, height, u, v);
169 return;
170 }
171 }
172#endif
173
174 int iu = int(floor(u));
175 int iv = int(floor(v));
176
177 /* Sample area entirely outside image in border mode? */
178 if (wrap_u == InterpWrapMode::Border && (iu + 2 < 0 || iu > width)) {
179 memset(output, 0, size_t(components) * sizeof(T));
180 return;
181 }
182 if (wrap_v == InterpWrapMode::Border && (iv + 2 < 0 || iv > height)) {
183 memset(output, 0, size_t(components) * sizeof(T));
184 return;
185 }
186
187 float frac_u = u - float(iu);
188 float frac_v = v - float(iv);
189
190 float4 out{0.0f};
191
192 /* Calculate pixel weights. */
195
196 /* Read 4x4 source pixels and blend them. */
197 for (int n = 0; n < 4; n++) {
198 int y1 = iv + n - 1;
199 y1 = wrap_coord(float(y1), height, wrap_v);
200 if (wrap_v == InterpWrapMode::Border && y1 < 0) {
201 continue;
202 }
203
204 for (int m = 0; m < 4; m++) {
205 int x1 = iu + m - 1;
206 x1 = wrap_coord(float(x1), width, wrap_u);
207 if (wrap_u == InterpWrapMode::Border && x1 < 0) {
208 continue;
209 }
210 float w = wx[m] * wy[n];
211
212 const T *data = src_buffer + (width * y1 + x1) * components;
213
214 if (components == 1) {
215 out[0] += data[0] * w;
216 }
217 else if (components == 2) {
218 out[0] += data[0] * w;
219 out[1] += data[1] * w;
220 }
221 else if (components == 3) {
222 out[0] += data[0] * w;
223 out[1] += data[1] * w;
224 out[2] += data[2] * w;
225 }
226 else {
227 out[0] += data[0] * w;
228 out[1] += data[1] * w;
229 out[2] += data[2] * w;
230 out[3] += data[3] * w;
231 }
232 }
233 }
234
235 /* Mitchell filter has negative lobes; prevent output from going out of range. */
236 if constexpr (filter == eCubicFilter::Mitchell) {
237 for (int i = 0; i < components; i++) {
238 out[i] = math::max(out[i], 0.0f);
239 if constexpr (std::is_same_v<T, uchar>) {
240 out[i] = math::min(out[i], 255.0f);
241 }
242 }
243 }
244
245 /* Write result. */
246 if constexpr (std::is_same_v<T, float>) {
247 if (components == 1) {
248 output[0] = out[0];
249 }
250 else if (components == 2) {
252 }
253 else if (components == 3) {
255 }
256 else {
258 }
259 }
260 else {
261 if (components == 1) {
262 output[0] = uchar(out[0] + 0.5f);
263 }
264 else if (components == 2) {
265 output[0] = uchar(out[0] + 0.5f);
266 output[1] = uchar(out[1] + 0.5f);
267 }
268 else if (components == 3) {
269 output[0] = uchar(out[0] + 0.5f);
270 output[1] = uchar(out[1] + 0.5f);
271 output[2] = uchar(out[2] + 0.5f);
272 }
273 else {
274 output[0] = uchar(out[0] + 0.5f);
275 output[1] = uchar(out[1] + 0.5f);
276 output[2] = uchar(out[2] + 0.5f);
277 output[3] = uchar(out[3] + 0.5f);
278 }
279 }
280}
281
282BLI_INLINE void bilinear_fl_impl(const float *buffer,
283 float *output,
284 int width,
285 int height,
286 int components,
287 float u,
288 float v,
289 InterpWrapMode wrap_x,
290 InterpWrapMode wrap_y)
291{
292 BLI_assert(buffer && output);
293 BLI_assert(components > 0 && components <= 4);
294
295 float a, b;
296 float a_b, ma_b, a_mb, ma_mb;
297 int y1, y2, x1, x2;
298
299 if (wrap_x == InterpWrapMode::Repeat) {
300 u = floored_fmod(u, float(width));
301 }
302 if (wrap_y == InterpWrapMode::Repeat) {
303 v = floored_fmod(v, float(height));
304 }
305
306 float uf = floorf(u);
307 float vf = floorf(v);
308
309 x1 = int(uf);
310 x2 = x1 + 1;
311 y1 = int(vf);
312 y2 = y1 + 1;
313
314 const float *row1, *row2, *row3, *row4;
315 const float empty[4] = {0.0f, 0.0f, 0.0f, 0.0f};
316
317 /* Check if +1 samples need wrapping, or we don't do wrapping then if
318 * we are sampling completely outside the image. */
319 if (wrap_x == InterpWrapMode::Repeat) {
320 if (x2 >= width) {
321 x2 = 0;
322 }
323 }
324 else if (wrap_x == InterpWrapMode::Border && (x2 < 0 || x1 >= width)) {
325 copy_vn_fl(output, components, 0.0f);
326 return;
327 }
328 if (wrap_y == InterpWrapMode::Repeat) {
329 if (y2 >= height) {
330 y2 = 0;
331 }
332 }
333 else if (wrap_y == InterpWrapMode::Border && (y2 < 0 || y1 >= height)) {
334 copy_vn_fl(output, components, 0.0f);
335 return;
336 }
337
338 /* Sample locations. */
339 int x1c = blender::math::clamp(x1, 0, width - 1);
340 int x2c = blender::math::clamp(x2, 0, width - 1);
341 int y1c = blender::math::clamp(y1, 0, height - 1);
342 int y2c = blender::math::clamp(y2, 0, height - 1);
343 row1 = buffer + (int64_t(width) * y1c + x1c) * components;
344 row2 = buffer + (int64_t(width) * y2c + x1c) * components;
345 row3 = buffer + (int64_t(width) * y1c + x2c) * components;
346 row4 = buffer + (int64_t(width) * y2c + x2c) * components;
347
348 if (wrap_x == InterpWrapMode::Border) {
349 if (x1 < 0) {
350 row1 = empty;
351 row2 = empty;
352 }
353 if (x2 > width - 1) {
354 row3 = empty;
355 row4 = empty;
356 }
357 }
358 if (wrap_y == InterpWrapMode::Border) {
359 if (y1 < 0) {
360 row1 = empty;
361 row3 = empty;
362 }
363 if (y2 > height - 1) {
364 row2 = empty;
365 row4 = empty;
366 }
367 }
368
369 /* Finally, do interpolation. */
370 a = u - uf;
371 b = v - vf;
372 a_b = a * b;
373 ma_b = (1.0f - a) * b;
374 a_mb = a * (1.0f - b);
375 ma_mb = (1.0f - a) * (1.0f - b);
376
377 if (components == 1) {
378 output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
379 }
380 else if (components == 2) {
381 output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
382 output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
383 }
384 else if (components == 3) {
385 output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
386 output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
387 output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2];
388 }
389 else {
390#if BLI_HAVE_SSE2
391 __m128 rgba1 = _mm_loadu_ps(row1);
392 __m128 rgba2 = _mm_loadu_ps(row2);
393 __m128 rgba3 = _mm_loadu_ps(row3);
394 __m128 rgba4 = _mm_loadu_ps(row4);
395 rgba1 = _mm_mul_ps(_mm_set1_ps(ma_mb), rgba1);
396 rgba2 = _mm_mul_ps(_mm_set1_ps(ma_b), rgba2);
397 rgba3 = _mm_mul_ps(_mm_set1_ps(a_mb), rgba3);
398 rgba4 = _mm_mul_ps(_mm_set1_ps(a_b), rgba4);
399 __m128 rgba13 = _mm_add_ps(rgba1, rgba3);
400 __m128 rgba24 = _mm_add_ps(rgba2, rgba4);
401 __m128 rgba = _mm_add_ps(rgba13, rgba24);
402 _mm_storeu_ps(output, rgba);
403#else
404 output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
405 output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
406 output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2];
407 output[3] = ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3];
408#endif
409 }
410}
411
412template<bool border>
413BLI_INLINE uchar4 bilinear_byte_impl(const uchar *buffer, int width, int height, float u, float v)
414{
415 BLI_assert(buffer);
416 uchar4 res;
417
418#if BLI_HAVE_SSE4
419 __m128 uvuv = _mm_set_ps(v, u, v, u);
420 __m128 uvuv_floor = _mm_floor_ps(uvuv);
421
422 /* x1, y1, x2, y2 */
423 __m128i xy12 = _mm_add_epi32(_mm_cvttps_epi32(uvuv_floor), _mm_set_epi32(1, 1, 0, 0));
424 /* Check whether any of the coordinates are outside of the image. */
425 __m128i size_minus_1 = _mm_sub_epi32(_mm_set_epi32(height, width, height, width),
426 _mm_set1_epi32(1));
427
428 /* Samples 1,2,3,4 will be in this order: x1y1, x1y2, x2y1, x2y2. */
429 __m128i x1234, y1234, invalid_1234;
430
431 if constexpr (border) {
432 /* Blend black colors for samples right outside the image: figure out
433 * which of the 4 samples were outside, set their coordinates to zero
434 * and later on put black color into their place. */
435 __m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128());
436 __m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12);
437 __m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12);
438
439 /* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */
440 x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0));
441 y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1));
442 invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)),
443 _mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1)));
444 /* Set x & y to zero for invalid samples. */
445 x1234 = _mm_andnot_si128(invalid_1234, x1234);
446 y1234 = _mm_andnot_si128(invalid_1234, y1234);
447 }
448 else {
449 /* Clamp samples to image edges. */
450 __m128i xy12_clamped = _mm_max_epi32(xy12, _mm_setzero_si128());
451 xy12_clamped = _mm_min_epi32(xy12_clamped, size_minus_1);
452 x1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(2, 2, 0, 0));
453 y1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(3, 1, 3, 1));
454 }
455
456 /* Read the four sample values. Do address calculations in C, since SSE
457 * before 4.1 makes it very cumbersome to do full integer multiplies. */
458 int xcoord[4];
459 int ycoord[4];
460 _mm_storeu_ps((float *)xcoord, _mm_castsi128_ps(x1234));
461 _mm_storeu_ps((float *)ycoord, _mm_castsi128_ps(y1234));
462 int sample1 = ((const int *)buffer)[ycoord[0] * int64_t(width) + xcoord[0]];
463 int sample2 = ((const int *)buffer)[ycoord[1] * int64_t(width) + xcoord[1]];
464 int sample3 = ((const int *)buffer)[ycoord[2] * int64_t(width) + xcoord[2]];
465 int sample4 = ((const int *)buffer)[ycoord[3] * int64_t(width) + xcoord[3]];
466 __m128i samples1234 = _mm_set_epi32(sample4, sample3, sample2, sample1);
467 if constexpr (border) {
468 /* Set samples to black for the ones that were actually invalid. */
469 samples1234 = _mm_andnot_si128(invalid_1234, samples1234);
470 }
471
472 /* Expand samples from packed 8-bit RGBA to full floats:
473 * spread to 16 bit values. */
474 __m128i rgba16_12 = _mm_unpacklo_epi8(samples1234, _mm_setzero_si128());
475 __m128i rgba16_34 = _mm_unpackhi_epi8(samples1234, _mm_setzero_si128());
476 /* Spread to 32 bit values and convert to float. */
477 __m128 rgba1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(rgba16_12, _mm_setzero_si128()));
478 __m128 rgba2 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(rgba16_12, _mm_setzero_si128()));
479 __m128 rgba3 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(rgba16_34, _mm_setzero_si128()));
480 __m128 rgba4 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(rgba16_34, _mm_setzero_si128()));
481
482 /* Calculate interpolation factors: (1-a)*(1-b), (1-a)*b, a*(1-b), a*b */
483 __m128 abab = _mm_sub_ps(uvuv, uvuv_floor);
484 __m128 m_abab = _mm_sub_ps(_mm_set1_ps(1.0f), abab);
485 __m128 ab_mab = _mm_shuffle_ps(abab, m_abab, _MM_SHUFFLE(3, 2, 1, 0));
486 __m128 factors = _mm_mul_ps(_mm_shuffle_ps(ab_mab, ab_mab, _MM_SHUFFLE(0, 0, 2, 2)),
487 _mm_shuffle_ps(ab_mab, ab_mab, _MM_SHUFFLE(1, 3, 1, 3)));
488
489 /* Blend the samples. */
490 rgba1 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(0, 0, 0, 0)), rgba1);
491 rgba2 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(1, 1, 1, 1)), rgba2);
492 rgba3 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(2, 2, 2, 2)), rgba3);
493 rgba4 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(3, 3, 3, 3)), rgba4);
494 __m128 rgba13 = _mm_add_ps(rgba1, rgba3);
495 __m128 rgba24 = _mm_add_ps(rgba2, rgba4);
496 __m128 rgba = _mm_add_ps(rgba13, rgba24);
497 rgba = _mm_add_ps(rgba, _mm_set1_ps(0.5f));
498 /* Pack and write to destination: pack to 16 bit signed, then to 8 bit
499 * unsigned, then write resulting 32-bit value. */
500 __m128i rgba32 = _mm_cvttps_epi32(rgba);
501 __m128i rgba16 = _mm_packs_epi32(rgba32, _mm_setzero_si128());
502 __m128i rgba8 = _mm_packus_epi16(rgba16, _mm_setzero_si128());
503 _mm_store_ss((float *)&res, _mm_castsi128_ps(rgba8));
504
505#else
506
507 float uf = floorf(u);
508 float vf = floorf(v);
509
510 int x1 = int(uf);
511 int x2 = x1 + 1;
512 int y1 = int(vf);
513 int y2 = y1 + 1;
514
515 /* Completely outside of the image in bordered mode? */
516 if (border && (x2 < 0 || x1 >= width || y2 < 0 || y1 >= height)) {
517 return uchar4(0);
518 }
519
520 /* Sample locations. */
521 const uchar *row1, *row2, *row3, *row4;
522 uchar empty[4] = {0, 0, 0, 0};
523 if constexpr (border) {
524 row1 = (x1 < 0 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x1) * 4;
525 row2 = (x1 < 0 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x1) * 4;
526 row3 = (x2 > width - 1 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x2) * 4;
527 row4 = (x2 > width - 1 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x2) * 4;
528 }
529 else {
530 x1 = blender::math::clamp(x1, 0, width - 1);
531 x2 = blender::math::clamp(x2, 0, width - 1);
532 y1 = blender::math::clamp(y1, 0, height - 1);
533 y2 = blender::math::clamp(y2, 0, height - 1);
534 row1 = buffer + (int64_t(width) * y1 + x1) * 4;
535 row2 = buffer + (int64_t(width) * y2 + x1) * 4;
536 row3 = buffer + (int64_t(width) * y1 + x2) * 4;
537 row4 = buffer + (int64_t(width) * y2 + x2) * 4;
538 }
539
540 float a = u - uf;
541 float b = v - vf;
542 float a_b = a * b;
543 float ma_b = (1.0f - a) * b;
544 float a_mb = a * (1.0f - b);
545 float ma_mb = (1.0f - a) * (1.0f - b);
546
547 res.x = uchar(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] + 0.5f);
548 res.y = uchar(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] + 0.5f);
549 res.z = uchar(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] + 0.5f);
550 res.w = uchar(ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3] + 0.5f);
551#endif
552
553 return res;
554}
555
557 const uchar *buffer, int width, int height, float u, float v)
558{
559 return bilinear_byte_impl<true>(buffer, width, height, u, v);
560}
561
562uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v)
563{
564 return bilinear_byte_impl<false>(buffer, width, height, u, v);
565}
566
567float4 interpolate_bilinear_border_fl(const float *buffer, int width, int height, float u, float v)
568{
569 float4 res;
571 buffer, res, width, height, 4, u, v, InterpWrapMode::Border, InterpWrapMode::Border);
572 return res;
573}
574
576 const float *buffer, float *output, int width, int height, int components, float u, float v)
577{
578 bilinear_fl_impl(buffer,
579 output,
580 width,
581 height,
582 components,
583 u,
584 v,
587}
588
589float4 interpolate_bilinear_fl(const float *buffer, int width, int height, float u, float v)
590{
591 float4 res;
593 buffer, res, width, height, 4, u, v, InterpWrapMode::Extend, InterpWrapMode::Extend);
594 return res;
595}
596
598 const float *buffer, float *output, int width, int height, int components, float u, float v)
599{
600 bilinear_fl_impl(buffer,
601 output,
602 width,
603 height,
604 components,
605 u,
606 v,
609}
610
611void interpolate_bilinear_wrapmode_fl(const float *buffer,
612 float *output,
613 int width,
614 int height,
615 int components,
616 float u,
617 float v,
618 InterpWrapMode wrap_u,
619 InterpWrapMode wrap_v)
620{
621 bilinear_fl_impl(buffer, output, width, height, components, u, v, wrap_u, wrap_v);
622}
623
624uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height, float u, float v)
625{
626 u = floored_fmod(u, float(width));
627 v = floored_fmod(v, float(height));
628 float uf = floorf(u);
629 float vf = floorf(v);
630
631 int x1 = int(uf);
632 int x2 = x1 + 1;
633 int y1 = int(vf);
634 int y2 = y1 + 1;
635
636 /* Wrap interpolation pixels if needed. */
637 BLI_assert(x1 >= 0 && x1 < width && y1 >= 0 && y1 < height);
638 if (x2 >= width) {
639 x2 = 0;
640 }
641 if (y2 >= height) {
642 y2 = 0;
643 }
644
645 float a = u - uf;
646 float b = v - vf;
647 float a_b = a * b;
648 float ma_b = (1.0f - a) * b;
649 float a_mb = a * (1.0f - b);
650 float ma_mb = (1.0f - a) * (1.0f - b);
651
652 /* Blend samples. */
653 const uchar *row1 = buffer + (int64_t(width) * y1 + x1) * 4;
654 const uchar *row2 = buffer + (int64_t(width) * y2 + x1) * 4;
655 const uchar *row3 = buffer + (int64_t(width) * y1 + x2) * 4;
656 const uchar *row4 = buffer + (int64_t(width) * y2 + x2) * 4;
657
658 uchar4 res;
659 res.x = uchar(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] + 0.5f);
660 res.y = uchar(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] + 0.5f);
661 res.z = uchar(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] + 0.5f);
662 res.w = uchar(ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3] + 0.5f);
663 return res;
664}
665
666float4 interpolate_bilinear_wrap_fl(const float *buffer, int width, int height, float u, float v)
667{
668 float4 res;
670 buffer, res, width, height, 4, u, v, InterpWrapMode::Repeat, InterpWrapMode::Repeat);
671 return res;
672}
673
674uchar4 interpolate_cubic_bspline_byte(const uchar *buffer, int width, int height, float u, float v)
675{
676 uchar4 res;
678 buffer, res, width, height, 4, u, v, InterpWrapMode::Extend, InterpWrapMode::Extend);
679 return res;
680}
681
682float4 interpolate_cubic_bspline_fl(const float *buffer, int width, int height, float u, float v)
683{
684 float4 res;
686 buffer, res, width, height, 4, u, v, InterpWrapMode::Extend, InterpWrapMode::Extend);
687 return res;
688}
689
691 const float *buffer, float *output, int width, int height, int components, float u, float v)
692{
694 output,
695 width,
696 height,
697 components,
698 u,
699 v,
702}
703
705 float *output,
706 int width,
707 int height,
708 int components,
709 float u,
710 float v,
713{
715 buffer, output, width, height, components, u, v, wrap_u, wrap_v);
716}
717
719 const uchar *buffer, int width, int height, float u, float v)
720{
721 uchar4 res;
723 buffer, res, width, height, 4, u, v, InterpWrapMode::Extend, InterpWrapMode::Extend);
724 return res;
725}
726
727float4 interpolate_cubic_mitchell_fl(const float *buffer, int width, int height, float u, float v)
728{
729 float4 res;
731 buffer, res, width, height, 4, u, v, InterpWrapMode::Extend, InterpWrapMode::Extend);
732 return res;
733}
734
736 const float *buffer, float *output, int width, int height, int components, float u, float v)
737{
739 output,
740 width,
741 height,
742 components,
743 u,
744 v,
747}
748
749} // namespace blender::math
750
751/**************************************************************************
752 * Filtering method based on
753 * "Creating raster omnimax images from multiple perspective views
754 * using the elliptical weighted average filter"
755 * by Ned Greene and Paul S. Heckbert (1986)
756 ***************************************************************************/
757
758/* Table of `(exp(ar) - exp(a)) / (1 - exp(a))` for `r` in range [0, 1] and `a = -2`.
759 * used instead of actual gaussian,
760 * otherwise at high texture magnifications circular artifacts are visible. */
761#define EWA_MAXIDX 255
762const float EWA_WTS[EWA_MAXIDX + 1] = {
763 1.0f, 0.990965f, 0.982f, 0.973105f, 0.96428f, 0.955524f, 0.946836f,
764 0.938216f, 0.929664f, 0.921178f, 0.912759f, 0.904405f, 0.896117f, 0.887893f,
765 0.879734f, 0.871638f, 0.863605f, 0.855636f, 0.847728f, 0.839883f, 0.832098f,
766 0.824375f, 0.816712f, 0.809108f, 0.801564f, 0.794079f, 0.786653f, 0.779284f,
767 0.771974f, 0.76472f, 0.757523f, 0.750382f, 0.743297f, 0.736267f, 0.729292f,
768 0.722372f, 0.715505f, 0.708693f, 0.701933f, 0.695227f, 0.688572f, 0.68197f,
769 0.67542f, 0.66892f, 0.662471f, 0.656073f, 0.649725f, 0.643426f, 0.637176f,
770 0.630976f, 0.624824f, 0.618719f, 0.612663f, 0.606654f, 0.600691f, 0.594776f,
771 0.588906f, 0.583083f, 0.577305f, 0.571572f, 0.565883f, 0.56024f, 0.55464f,
772 0.549084f, 0.543572f, 0.538102f, 0.532676f, 0.527291f, 0.521949f, 0.516649f,
773 0.511389f, 0.506171f, 0.500994f, 0.495857f, 0.490761f, 0.485704f, 0.480687f,
774 0.475709f, 0.470769f, 0.465869f, 0.461006f, 0.456182f, 0.451395f, 0.446646f,
775 0.441934f, 0.437258f, 0.432619f, 0.428017f, 0.42345f, 0.418919f, 0.414424f,
776 0.409963f, 0.405538f, 0.401147f, 0.39679f, 0.392467f, 0.388178f, 0.383923f,
777 0.379701f, 0.375511f, 0.371355f, 0.367231f, 0.363139f, 0.359079f, 0.355051f,
778 0.351055f, 0.347089f, 0.343155f, 0.339251f, 0.335378f, 0.331535f, 0.327722f,
779 0.323939f, 0.320186f, 0.316461f, 0.312766f, 0.3091f, 0.305462f, 0.301853f,
780 0.298272f, 0.294719f, 0.291194f, 0.287696f, 0.284226f, 0.280782f, 0.277366f,
781 0.273976f, 0.270613f, 0.267276f, 0.263965f, 0.26068f, 0.257421f, 0.254187f,
782 0.250979f, 0.247795f, 0.244636f, 0.241502f, 0.238393f, 0.235308f, 0.232246f,
783 0.229209f, 0.226196f, 0.223206f, 0.220239f, 0.217296f, 0.214375f, 0.211478f,
784 0.208603f, 0.20575f, 0.20292f, 0.200112f, 0.197326f, 0.194562f, 0.191819f,
785 0.189097f, 0.186397f, 0.183718f, 0.18106f, 0.178423f, 0.175806f, 0.17321f,
786 0.170634f, 0.168078f, 0.165542f, 0.163026f, 0.16053f, 0.158053f, 0.155595f,
787 0.153157f, 0.150738f, 0.148337f, 0.145955f, 0.143592f, 0.141248f, 0.138921f,
788 0.136613f, 0.134323f, 0.132051f, 0.129797f, 0.12756f, 0.125341f, 0.123139f,
789 0.120954f, 0.118786f, 0.116635f, 0.114501f, 0.112384f, 0.110283f, 0.108199f,
790 0.106131f, 0.104079f, 0.102043f, 0.100023f, 0.0980186f, 0.09603f, 0.094057f,
791 0.0920994f, 0.0901571f, 0.08823f, 0.0863179f, 0.0844208f, 0.0825384f, 0.0806708f,
792 0.0788178f, 0.0769792f, 0.0751551f, 0.0733451f, 0.0715493f, 0.0697676f, 0.0679997f,
793 0.0662457f, 0.0645054f, 0.0627786f, 0.0610654f, 0.0593655f, 0.0576789f, 0.0560055f,
794 0.0543452f, 0.0526979f, 0.0510634f, 0.0494416f, 0.0478326f, 0.0462361f, 0.0446521f,
795 0.0430805f, 0.0415211f, 0.039974f, 0.0384389f, 0.0369158f, 0.0354046f, 0.0339052f,
796 0.0324175f, 0.0309415f, 0.029477f, 0.0280239f, 0.0265822f, 0.0251517f, 0.0237324f,
797 0.0223242f, 0.020927f, 0.0195408f, 0.0181653f, 0.0168006f, 0.0154466f, 0.0141031f,
798 0.0127701f, 0.0114476f, 0.0101354f, 0.00883339f, 0.00754159f, 0.00625989f, 0.00498819f,
799 0.00372644f, 0.00247454f, 0.00123242f, 0.0f,
800};
801
802static void radangle2imp(float a2, float b2, float th, float *A, float *B, float *C, float *F)
803{
804 float ct2 = cosf(th);
805 const float st2 = 1.0f - ct2 * ct2; /* <- sin(th)^2 */
806 ct2 *= ct2;
807 *A = a2 * st2 + b2 * ct2;
808 *B = (b2 - a2) * sinf(2.0f * th);
809 *C = a2 * ct2 + b2 * st2;
810 *F = a2 * b2;
811}
812
814 float A, float B, float C, float F, float *a, float *b, float *th, float *ecc)
815{
816 /* NOTE: all tests here are done to make sure possible overflows are hopefully minimized. */
817
818 if (F <= 1e-5f) { /* use arbitrary major radius, zero minor, infinite eccentricity */
819 *a = sqrtf(A > C ? A : C);
820 *b = 0.0f;
821 *ecc = 1e10f;
822 *th = 0.5f * (atan2f(B, A - C) + float(M_PI));
823 }
824 else {
825 const float AmC = A - C, ApC = A + C, F2 = F * 2.0f;
826 const float r = sqrtf(AmC * AmC + B * B);
827 float d = ApC - r;
828 *a = (d <= 0.0f) ? sqrtf(A > C ? A : C) : sqrtf(F2 / d);
829 d = ApC + r;
830 if (d <= 0.0f) {
831 *b = 0.0f;
832 *ecc = 1e10f;
833 }
834 else {
835 *b = sqrtf(F2 / d);
836 *ecc = *a / *b;
837 }
838 /* Increase theta by `0.5 * pi` (angle of major axis). */
839 *th = 0.5f * (atan2f(B, AmC) + float(M_PI));
840 }
841}
842
843void BLI_ewa_filter(const int width,
844 const int height,
845 const bool intpol,
846 const bool use_alpha,
847 const float uv[2],
848 const float du[2],
849 const float dv[2],
850 ewa_filter_read_pixel_cb read_pixel_cb,
851 void *userdata,
852 float result[4])
853{
854 /* Scaling `dxt` / `dyt` by full resolution can cause overflow because of huge A/B/C and esp.
855 * F values, scaling by aspect ratio alone does the opposite, so try something in between
856 * instead. */
857 const float ff2 = float(width), ff = sqrtf(ff2), q = float(height) / ff;
858 const float Ux = du[0] * ff, Vx = du[1] * q, Uy = dv[0] * ff, Vy = dv[1] * q;
859 float A = Vx * Vx + Vy * Vy;
860 float B = -2.0f * (Ux * Vx + Uy * Vy);
861 float C = Ux * Ux + Uy * Uy;
862 float F = A * C - B * B * 0.25f;
863 float a, b, th, ecc, a2, b2, ue, ve, U0, V0, DDQ, U, ac1, ac2, BU, d;
864 int u, v, u1, u2, v1, v2;
865
866 /* The so-called 'high' quality EWA method simply adds a constant of 1 to both A & C,
867 * so the ellipse always covers at least some texels. But since the filter is now always larger,
868 * it also means that everywhere else it's also more blurry then ideally should be the case.
869 * So instead here the ellipse radii are modified instead whenever either is too low.
870 * Use a different radius based on interpolation switch,
871 * just enough to anti-alias when interpolation is off,
872 * and slightly larger to make result a bit smoother than bilinear interpolation when
873 * interpolation is on (minimum values: `const float rmin = intpol ? 1.0f : 0.5f;`) */
874 const float rmin = (intpol ? 1.5625f : 0.765625f) / ff2;
875 BLI_ewa_imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
876 if ((b2 = b * b) < rmin) {
877 if ((a2 = a * a) < rmin) {
878 UNUSED_VARS(a2, b2);
879 B = 0.0f;
880 A = C = rmin;
881 F = A * C;
882 }
883 else {
884 b2 = rmin;
885 radangle2imp(a2, b2, th, &A, &B, &C, &F);
886 }
887 }
888
889 ue = ff * sqrtf(C);
890 ve = ff * sqrtf(A);
891 d = float(EWA_MAXIDX + 1) / (F * ff2);
892 A *= d;
893 B *= d;
894 C *= d;
895
896 U0 = uv[0] * float(width);
897 V0 = uv[1] * float(height);
898 u1 = int(floorf(U0 - ue));
899 u2 = int(ceilf(U0 + ue));
900 v1 = int(floorf(V0 - ve));
901 v2 = int(ceilf(V0 + ve));
902
903 /* sane clamping to avoid unnecessarily huge loops */
904 /* NOTE: if eccentricity gets clamped (see above),
905 * the ue/ve limits can also be lowered accordingly
906 */
907 if (U0 - float(u1) > EWA_MAXIDX) {
908 u1 = int(U0) - EWA_MAXIDX;
909 }
910 if (float(u2) - U0 > EWA_MAXIDX) {
911 u2 = int(U0) + EWA_MAXIDX;
912 }
913 if (V0 - float(v1) > EWA_MAXIDX) {
914 v1 = int(V0) - EWA_MAXIDX;
915 }
916 if (float(v2) - V0 > EWA_MAXIDX) {
917 v2 = int(V0) + EWA_MAXIDX;
918 }
919
920 /* Early output check for cases the whole region is outside of the buffer. */
921 if ((u2 < 0 || u1 >= width) || (v2 < 0 || v1 >= height)) {
923 return;
924 }
925
926 U0 -= 0.5f;
927 V0 -= 0.5f;
928 DDQ = 2.0f * A;
929 U = float(u1) - U0;
930 ac1 = A * (2.0f * U + 1.0f);
931 ac2 = A * U * U;
932 BU = B * U;
933
934 d = 0.0f;
936 for (v = v1; v <= v2; v++) {
937 const float V = float(v) - V0;
938 float DQ = ac1 + B * V;
939 float Q = (C * V + BU) * V + ac2;
940 for (u = u1; u <= u2; u++) {
941 if (Q < float(EWA_MAXIDX + 1)) {
942 float tc[4];
943 const float wt = EWA_WTS[(Q < 0.0f) ? 0 : uint(Q)];
944 read_pixel_cb(userdata, u, v, tc);
945 madd_v3_v3fl(result, tc, wt);
946 result[3] += use_alpha ? tc[3] * wt : 0.0f;
947 d += wt;
948 }
949 Q += DQ;
950 DQ += DDQ;
951 }
952 }
953
954 /* `d` should hopefully never be zero anymore. */
955 d = 1.0f / d;
956 mul_v3_fl(result, d);
957 /* Clipping can be ignored if alpha used, `texr->trgba[3]` already includes filtered edge. */
958 result[3] = use_alpha ? result[3] * d : 1.0f;
959}
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_INLINE
MINLINE float floored_fmod(float f, float n)
#define M_PI
void(*)(void *userdata, int x, int y, float result[4]) ewa_filter_read_pixel_cb
#define EWA_MAXIDX
const float EWA_WTS[EWA_MAXIDX+1]
MINLINE void copy_v4_v4(float r[4], const float a[4])
MINLINE void madd_v3_v3fl(float r[3], const float a[3], float f)
MINLINE void copy_v2_v2(float r[2], const float a[2])
MINLINE void mul_v3_fl(float r[3], float f)
MINLINE void copy_v3_v3(float r[3], const float a[3])
void copy_vn_fl(float *array_tar, int size, float val)
MINLINE void zero_v4(float r[4])
unsigned char uchar
unsigned int uint
#define CLAMP(a, b, c)
#define UNUSED_VARS(...)
#define C
Definition RandGen.cpp:29
#define U
BMesh const char void * data
ATTR_WARN_UNUSED_RESULT const BMVert * v2
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define A
long long int int64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
#define sinf(x)
#define cosf(x)
#define atan2f(x, y)
#define ceilf(x)
#define floorf(x)
#define sqrtf(x)
#define floor
#define out
#define filter
#define output
static void radangle2imp(float a2, float b2, float th, float *A, float *B, float *C, float *F)
void BLI_ewa_filter(const int width, const int height, const bool intpol, const bool use_alpha, const float uv[2], const float du[2], const float dv[2], ewa_filter_read_pixel_cb read_pixel_cb, void *userdata, float result[4])
void BLI_ewa_imp2radangle(float A, float B, float C, float F, float *a, float *b, float *th, float *ecc)
#define T
#define B
#define F
uchar4 interpolate_cubic_mitchell_byte(const uchar *buffer, int width, int height, float u, float v)
uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height, float u, float v)
BLI_INLINE void bicubic_interpolation(const T *src_buffer, T *output, int width, int height, int components, float u, float v, InterpWrapMode wrap_u, InterpWrapMode wrap_v)
static float4 cubic_filter_coefficients(float f)
uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v)
BLI_INLINE uchar4 bilinear_byte_impl(const uchar *buffer, int width, int height, float u, float v)
T clamp(const T &a, const T &min, const T &max)
BLI_INLINE void bilinear_fl_impl(const float *buffer, float *output, int width, int height, int components, float u, float v, InterpWrapMode wrap_x, InterpWrapMode wrap_y)
float4 interpolate_bilinear_border_fl(const float *buffer, int width, int height, float u, float v)
float4 interpolate_bilinear_wrap_fl(const float *buffer, int width, int height, float u, float v)
float4 interpolate_cubic_bspline_fl(const float *buffer, int width, int height, float u, float v)
BLI_INLINE int wrap_coord(float u, int size, InterpWrapMode wrap)
T min(const T &a, const T &b)
void interpolate_nearest_wrapmode_fl(const float *buffer, float *output, int width, int height, int components, float u, float v, InterpWrapMode wrap_u, InterpWrapMode wrap_v)
float4 interpolate_bilinear_fl(const float *buffer, int width, int height, float u, float v)
float4 interpolate_cubic_mitchell_fl(const float *buffer, int width, int height, float u, float v)
T max(const T &a, const T &b)
void interpolate_bilinear_wrapmode_fl(const float *buffer, float *output, int width, int height, int components, float u, float v, InterpWrapMode wrap_u, InterpWrapMode wrap_v)
uchar4 interpolate_cubic_bspline_byte(const uchar *buffer, int width, int height, float u, float v)
uchar4 interpolate_bilinear_border_byte(const uchar *buffer, int width, int height, float u, float v)
void interpolate_cubic_bspline_wrapmode_fl(const float *buffer, float *output, int width, int height, int components, float u, float v, InterpWrapMode wrap_u, InterpWrapMode wrap_v)
blender::VecBase< uint8_t, 4 > uchar4
VecBase< float, 4 > float4
float wrap(float value, float max, float min)
Definition node_math.h:71
i
Definition text_draw.cc:230
CCL_NAMESPACE_BEGIN struct Window V