Blender V4.3
util/math.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifndef __UTIL_MATH_H__
6#define __UTIL_MATH_H__
7
8/* Math
9 *
10 * Basic math functions on scalar and vector types. This header is used by
11 * both the kernel code when compiled as C++, and other C++ non-kernel code. */
12
13#ifndef __KERNEL_GPU__
14# include <cmath>
15#endif
16
17#ifdef __HIP__
18# include <hip/hip_vector_types.h>
19#endif
20
21#if !defined(__KERNEL_METAL__)
22# include <float.h>
23# include <math.h>
24# include <stdio.h>
25#endif /* !defined(__KERNEL_METAL__) */
26
27#include "util/types.h"
28
30
31/* Float Pi variations */
32
33/* Division */
34#ifndef M_PI_F
35# define M_PI_F (3.1415926535897932f) /* pi */
36#endif
37#ifndef M_PI_2_F
38# define M_PI_2_F (1.5707963267948966f) /* pi/2 */
39#endif
40#ifndef M_PI_4_F
41# define M_PI_4_F (0.7853981633974830f) /* pi/4 */
42#endif
43#ifndef M_1_PI_F
44# define M_1_PI_F (0.3183098861837067f) /* 1/pi */
45#endif
46#ifndef M_2_PI_F
47# define M_2_PI_F (0.6366197723675813f) /* 2/pi */
48#endif
49#ifndef M_1_2PI_F
50# define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */
51#endif
52#ifndef M_1_4PI_F
53# define M_1_4PI_F (0.0795774715459476f) /* 1/(4*pi) */
54#endif
55#ifndef M_SQRT_PI_8_F
56# define M_SQRT_PI_8_F (0.6266570686577501f) /* sqrt(pi/8) */
57#endif
58#ifndef M_LN_2PI_F
59# define M_LN_2PI_F (1.8378770664093454f) /* ln(2*pi) */
60#endif
61
62/* Multiplication */
63#ifndef M_2PI_F
64# define M_2PI_F (6.2831853071795864f) /* 2*pi */
65#endif
66#ifndef M_4PI_F
67# define M_4PI_F (12.566370614359172f) /* 4*pi */
68#endif
69#ifndef M_PI_4F
70# define M_PI_4F 0.78539816339744830962f /* pi/4 */
71#endif
72
73/* Float sqrt variations */
74#ifndef M_SQRT2_F
75# define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */
76#endif
77#ifndef M_CBRT2_F
78# define M_CBRT2_F 1.2599210498948732f /* cbrt(2) */
79#endif
80#ifndef M_SQRT1_2F
81# define M_SQRT1_2F 0.70710678118654752440f /* sqrt(1/2) */
82#endif
83#ifndef M_SQRT3_F
84# define M_SQRT3_F (1.7320508075688772f) /* sqrt(3) */
85#endif
86#ifndef M_LN2_F
87# define M_LN2_F (0.6931471805599453f) /* ln(2) */
88#endif
89#ifndef M_LN10_F
90# define M_LN10_F (2.3025850929940457f) /* ln(10) */
91#endif
92
93/* Scalar */
94
95#if !defined(__HIP__) && !defined(__KERNEL_ONEAPI__)
96# ifdef _WIN32
97ccl_device_inline float fmaxf(float a, float b)
98{
99 return (a > b) ? a : b;
100}
101
102ccl_device_inline float fminf(float a, float b)
103{
104 return (a < b) ? a : b;
105}
106
107# endif /* _WIN32 */
108#endif /* __HIP__, __KERNEL_ONEAPI__ */
109
110#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
111# ifndef __KERNEL_ONEAPI__
112using std::isfinite;
113using std::isnan;
114using std::sqrt;
115# else
116# define isfinite(x) sycl::isfinite((x))
117# define isnan(x) sycl::isnan((x))
118# endif
119
121{
122 return (x > 0) ? x : -x;
123}
124
125ccl_device_inline int max(int a, int b)
126{
127 return (a > b) ? a : b;
128}
129
130ccl_device_inline int min(int a, int b)
131{
132 return (a < b) ? a : b;
133}
134
136{
137 return (a > b) ? a : b;
138}
139
141{
142 return (a < b) ? a : b;
143}
144
146{
147 return (a > b) ? a : b;
148}
149
151{
152 return (a < b) ? a : b;
153}
154
155/* NOTE: On 64bit Darwin the `size_t` is defined as `unsigned long int` and `uint64_t` is defined
156 * as `unsigned long long`. Both of the definitions are 64 bit unsigned integer, but the automatic
157 * substitution does not allow to automatically pick function defined for `uint64_t` as it is not
158 * exactly the same type definition.
159 * Work this around by adding a templated function enabled for `size_t` type which will be used
160 * when there is no explicit specialization of `min()`/`max()` above. */
161
162template<class T>
163ccl_device_inline typename std::enable_if_t<std::is_same_v<T, size_t>, T> max(T a, T b)
164{
165 return (a > b) ? a : b;
166}
167
168template<class T>
169ccl_device_inline typename std::enable_if_t<std::is_same_v<T, size_t>, T> min(T a, T b)
170{
171 return (a < b) ? a : b;
172}
173
174ccl_device_inline float max(float a, float b)
175{
176 return (a > b) ? a : b;
177}
178
179ccl_device_inline float min(float a, float b)
180{
181 return (a < b) ? a : b;
182}
183
184ccl_device_inline double max(double a, double b)
185{
186 return (a > b) ? a : b;
187}
188
189ccl_device_inline double min(double a, double b)
190{
191 return (a < b) ? a : b;
192}
193
194/* These 2 guys are templated for usage with registers data.
195 *
196 * NOTE: Since this is CPU-only functions it is ok to use references here.
197 * But for other devices we'll need to be careful about this.
198 */
199
200template<typename T> ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d)
201{
202 return min(min(a, b), min(c, d));
203}
204
205template<typename T> ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d)
206{
207 return max(max(a, b), max(c, d));
208}
209#endif /* __KERNEL_GPU__ */
210
211ccl_device_inline float min4(float a, float b, float c, float d)
212{
213 return min(min(a, b), min(c, d));
214}
215
216ccl_device_inline float max4(float a, float b, float c, float d)
217{
218 return max(max(a, b), max(c, d));
219}
220
221#if !defined(__KERNEL_METAL__) && !defined(__KERNEL_ONEAPI__)
222/* Int/Float conversion */
223
225{
226 union {
227 uint ui;
228 int i;
229 } u;
230 u.ui = i;
231 return u.i;
232}
233
235{
236 union {
237 uint ui;
238 int i;
239 } u;
240 u.i = i;
241 return u.ui;
242}
243
245{
246 union {
247 uint i;
248 float f;
249 } u;
250 u.f = f;
251 return u.i;
252}
253
254# ifndef __HIP__
256{
257 union {
258 int i;
259 float f;
260 } u;
261 u.f = f;
262 return u.i;
263}
264
266{
267 union {
268 int i;
269 float f;
270 } u;
271 u.i = i;
272 return u.f;
273}
274
276{
277 union {
278 uint i;
279 float f;
280 } u;
281 u.f = f;
282 return u.i;
283}
284
286{
287 union {
288 uint i;
289 float f;
290 } u;
291 u.i = i;
292 return u.f;
293}
294# endif
295
297{
298# ifdef __KERNEL_SSE__
299 return int4(_mm_castps_si128(f.m128));
300# else
301 return make_int4(
303# endif
304}
305
307{
308# ifdef __KERNEL_SSE__
309 return float4(_mm_castsi128_ps(i.m128));
310# else
311 return make_float4(
313# endif
314}
315#endif /* !defined(__KERNEL_METAL__) */
316
317#if defined(__KERNEL_METAL__)
319{
320 return isnan(f);
321}
322
324{
325 return isfinite(f);
326}
327#else
329{
330 return ((uint64_t)ptr) & 0xFFFFFFFF;
331}
332
334{
335 return (((uint64_t)ptr) >> 32) & 0xFFFFFFFF;
336}
337
338template<typename T> ccl_device_inline T *pointer_unpack_from_uint(const uint a, const uint b)
339{
340 return (T *)(((uint64_t)b << 32) | a);
341}
342
344{
345 return (a << 16) | b;
346}
347
349{
350 return i >> 16;
351}
352
354{
355 return i & 0xFFFF;
356}
357
358/* Versions of functions which are safe for fast math. */
360{
361 unsigned int x = __float_as_uint(f);
362 return (x << 1) > 0xff000000u;
363}
364
366{
367 /* By IEEE 754 rule, 2*Inf equals Inf */
368 unsigned int x = __float_as_uint(f);
369 return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u);
370}
371#endif
372
374{
375 return isfinite_safe(v) ? v : 0.0f;
376}
377
378#if !defined(__KERNEL_METAL__)
379ccl_device_inline int clamp(int a, int mn, int mx)
380{
381 return min(max(a, mn), mx);
382}
383
384ccl_device_inline float clamp(float a, float mn, float mx)
385{
386 return min(max(a, mn), mx);
387}
388
389ccl_device_inline float mix(float a, float b, float t)
390{
391 return a + t * (b - a);
392}
393
394ccl_device_inline float smoothstep(float edge0, float edge1, float x)
395{
396 float result;
397 if (x < edge0) {
398 result = 0.0f;
399 }
400 else if (x >= edge1) {
401 result = 1.0f;
402 }
403 else {
404 float t = (x - edge0) / (edge1 - edge0);
405 result = (3.0f - 2.0f * t) * (t * t);
406 }
407 return result;
408}
409
410#endif /* !defined(__KERNEL_METAL__) */
411
412#if defined(__KERNEL_CUDA__)
413ccl_device_inline float saturatef(float a)
414{
415 return __saturatef(a);
416}
417#elif !defined(__KERNEL_METAL__)
419{
420 return clamp(a, 0.0f, 1.0f);
421}
422#endif /* __KERNEL_CUDA__ */
423
425{
426 return (int)f;
427}
428
430{
431 return float_to_int(floorf(f));
432}
433
435{
436 float f = floorf(x);
437 *i = float_to_int(f);
438 return x - f;
439}
440
442{
443 return float_to_int(ceilf(f));
444}
445
447{
448 return x - floorf(x);
449}
450
451/* Adapted from `godot-engine` math_funcs.h. */
452ccl_device_inline float wrapf(float value, float max, float min)
453{
454 float range = max - min;
455 return (range != 0.0f) ? value - (range * floorf((value - min) / range)) : min;
456}
457
458ccl_device_inline float pingpongf(float a, float b)
459{
460 return (b != 0.0f) ? fabsf(fractf((a - b) / (b * 2.0f)) * b * 2.0f - b) : 0.0f;
461}
462
463ccl_device_inline float smoothminf(float a, float b, float k)
464{
465 if (k != 0.0f) {
466 float h = fmaxf(k - fabsf(a - b), 0.0f) / k;
467 return fminf(a, b) - h * h * h * k * (1.0f / 6.0f);
468 }
469 else {
470 return fminf(a, b);
471 }
472}
473
475{
476 return (f < 0.0f) ? -1.0f : 1.0f;
477}
478
479ccl_device_inline float nonzerof(float f, float eps)
480{
481 if (fabsf(f) < eps) {
482 return signf(f) * eps;
483 }
484 else {
485 return f;
486 }
487}
488
489/* The behavior of `atan2(0, 0)` is undefined on many platforms, to ensure consistent behavior, we
490 * return 0 in this case. See !126951.
491 * Computes the angle between the positive x axis and the vector pointing from origin to (x, y). */
492ccl_device_inline float compatible_atan2(const float y, const float x)
493{
494 return (x == 0.0f && y == 0.0f) ? 0.0f : atan2f(y, x);
495}
496
497/* `signum` function testing for zero. Matches GLSL and OSL functions. */
499{
500 if (f == 0.0f) {
501 return 0.0f;
502 }
503 else {
504 return signf(f);
505 }
506}
507
509{
510 if (f <= 0.0f) {
511 return 0.0f;
512 }
513 if (f >= 1.0f) {
514 return 1.0f;
515 }
516 float ff = f * f;
517 return (3.0f * ff - 2.0f * ff * f);
518}
519
520ccl_device_inline int mod(int x, int m)
521{
522 return (x % m + m) % m;
523}
524
526{
527 return make_float3(a.x, a.y, 0.0f);
528}
529
531{
532 return make_float2(a.x, a.y);
533}
534
536{
537 return make_float3(a.x, a.y, a.z);
538}
539
541{
542 return make_float4(a.x, a.y, a.z, 1.0f);
543}
544
545ccl_device_inline float4 float3_to_float4(const float3 a, const float w)
546{
547 return make_float4(a.x, a.y, a.z, w);
548}
549
550ccl_device_inline float inverse_lerp(float a, float b, float x)
551{
552 return (x - a) / (b - a);
553}
554
555/* Cubic interpolation between b and c, a and d are the previous and next point. */
556ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x)
557{
558 return 0.5f *
559 (((d + 3.0f * (b - c) - a) * x + (2.0f * a - 5.0f * b + 4.0f * c - d)) * x +
560 (c - a)) *
561 x +
562 b;
563}
564
566
567#include "util/math_int2.h"
568#include "util/math_int3.h"
569#include "util/math_int4.h"
570#include "util/math_int8.h"
571
572#include "util/math_float2.h"
573#include "util/math_float4.h"
574#include "util/math_float8.h"
575
576#include "util/math_float3.h"
577
578#include "util/rect.h"
579
581
582/* Triangle */
583
585 ccl_private const float3 &v2,
586 ccl_private const float3 &v3)
587{
588 return len(cross(v3 - v2, v1 - v2)) * 0.5f;
589}
590
591/* Orthonormal vectors */
592
596{
597#if 0
598 if (fabsf(N.y) >= 0.999f) {
599 *a = make_float3(1, 0, 0);
600 *b = make_float3(0, 0, 1);
601 return;
602 }
603 if (fabsf(N.z) >= 0.999f) {
604 *a = make_float3(1, 0, 0);
605 *b = make_float3(0, 1, 0);
606 return;
607 }
608#endif
609
610 if (N.x != N.y || N.x != N.z)
611 *a = make_float3(N.z - N.y, N.x - N.z, N.y - N.x); //(1,1,1)x N
612 else
613 *a = make_float3(N.z - N.y, N.x + N.z, -N.y - N.x); //(-1,1,1)x N
614
615 *a = normalize(*a);
616 *b = cross(N, *a);
617}
618
619/* Color division */
620
622{
624 GET_SPECTRUM_CHANNEL(a, i) = (GET_SPECTRUM_CHANNEL(a, i) != 0.0f) ?
625 1.0f / GET_SPECTRUM_CHANNEL(a, i) :
626 0.0f;
627 }
628
629 return a;
630}
631
633{
635 GET_SPECTRUM_CHANNEL(a, i) = (GET_SPECTRUM_CHANNEL(b, i) != 0.0f) ?
637 0.0f;
638 }
639
640 return a;
641}
642
644{
645 float x, y, z;
646
647 x = (b.x != 0.0f) ? a.x / b.x : 0.0f;
648 y = (b.y != 0.0f) ? a.y / b.y : 0.0f;
649 z = (b.z != 0.0f) ? a.z / b.z : 0.0f;
650
651 /* try to get gray even if b is zero */
652 if (b.x == 0.0f) {
653 if (b.y == 0.0f) {
654 x = z;
655 y = z;
656 }
657 else if (b.z == 0.0f) {
658 x = y;
659 z = y;
660 }
661 else {
662 x = 0.5f * (y + z);
663 }
664 }
665 else if (b.y == 0.0f) {
666 if (b.z == 0.0f) {
667 y = x;
668 z = x;
669 }
670 else {
671 y = 0.5f * (x + z);
672 }
673 }
674 else if (b.z == 0.0f) {
675 z = 0.5f * (x + y);
676 }
677
678 return make_float3(x, y, z);
679}
680
681/* Rotation of point around axis and angle */
682
684{
685 float costheta = cosf(angle);
686 float sintheta = sinf(angle);
687 float3 r;
688
689 r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) +
690 (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) +
691 (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z);
692
693 r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) +
694 ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) +
695 (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z);
696
697 r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) +
698 (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) +
699 ((costheta + (1 - costheta) * axis.z * axis.z) * p.z);
700
701 return r;
702}
703
704/* NaN-safe math ops */
705
707{
708 return sqrtf(max(f, 0.0f));
709}
710
712{
713#if defined(__KERNEL_METAL__)
714 return (f > 0.0f) ? rsqrt(f) : 0.0f;
715#else
716 return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f;
717#endif
718}
719
720ccl_device float safe_asinf(float a)
721{
722 return asinf(clamp(a, -1.0f, 1.0f));
723}
724
725ccl_device float safe_acosf(float a)
726{
727 return acosf(clamp(a, -1.0f, 1.0f));
728}
729
730ccl_device float compatible_powf(float x, float y)
731{
732#ifdef __KERNEL_GPU__
733 if (y == 0.0f) /* x^0 -> 1, including 0^0 */
734 return 1.0f;
735
736 /* GPU pow doesn't accept negative x, do manual checks here */
737 if (x < 0.0f) {
738 if (fmodf(-y, 2.0f) == 0.0f)
739 return powf(-x, y);
740 else
741 return -powf(-x, y);
742 }
743 else if (x == 0.0f)
744 return 0.0f;
745#endif
746 return powf(x, y);
747}
748
749ccl_device float safe_powf(float a, float b)
750{
751 if (UNLIKELY(a < 0.0f && b != float_to_int(b))) {
752 return 0.0f;
753 }
754
755 return compatible_powf(a, b);
756}
757
758ccl_device float safe_divide(float a, float b)
759{
760 return (b != 0.0f) ? a / b : 0.0f;
761}
762
763ccl_device float safe_logf(float a, float b)
764{
765 if (UNLIKELY(a <= 0.0f || b <= 0.0f)) {
766 return 0.0f;
767 }
768
769 return safe_divide(logf(a), logf(b));
770}
771
772ccl_device float safe_modulo(float a, float b)
773{
774 return (b != 0.0f) ? fmodf(a, b) : 0.0f;
775}
776
777ccl_device float safe_floored_modulo(float a, float b)
778{
779 return (b != 0.0f) ? a - floorf(a / b) * b : 0.0f;
780}
781
782ccl_device_inline float sqr(float a)
783{
784 return a * a;
785}
786
787ccl_device_inline float sin_from_cos(const float c)
788{
789 return safe_sqrtf(1.0f - sqr(c));
790}
791
792ccl_device_inline float cos_from_sin(const float s)
793{
794 return safe_sqrtf(1.0f - sqr(s));
795}
796
798{
799 /* Using second-order Taylor expansion at small angles for better accuracy. */
800 return s_sq > 0.0004f ? 1.0f - safe_sqrtf(1.0f - s_sq) : 0.5f * s_sq;
801}
802
803ccl_device_inline float one_minus_cos(const float angle)
804{
805 /* Using second-order Taylor expansion at small angles for better accuracy. */
806 return angle > 0.02f ? 1.0f - cosf(angle) : 0.5f * sqr(angle);
807}
808
810{
811 return sqr(sqr(sqr(sqr(a)) * a));
812}
813
815{
816 return sqr(a * sqr(sqr(sqr(a)) * a));
817}
818
819#ifdef __KERNEL_METAL__
820ccl_device_inline float lgammaf(float x)
821{
822 /* Nemes, Gergő (2010), "New asymptotic expansion for the Gamma function", Archiv der Mathematik
823 */
824 const float _1_180 = 1.0f / 180.0f;
825 const float log2pi = 1.83787706641f;
826 const float logx = log(x);
827 return (log2pi - logx +
828 x * (logx * 2.0f + log(x * sinh(1.0f / x) + (_1_180 / pow(x, 6.0f))) - 2.0f)) *
829 0.5f;
830}
831#endif
832
833ccl_device_inline float beta(float x, float y)
834{
835 return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y));
836}
837
838ccl_device_inline float xor_signmask(float x, int y)
839{
840 return __int_as_float(__float_as_int(x) ^ y);
841}
842
844{
845 return bits * (1.0f / (float)0xFFFFFFFF);
846}
847
848#if !defined(__KERNEL_GPU__)
849# if defined(__GNUC__)
851{
852 return __builtin_popcount(x);
853}
854# else
856{
857 /* TODO(Stefan): pop-count intrinsic for Windows with fallback for older CPUs. */
858 uint i = x;
859 i = i - ((i >> 1) & 0x55555555);
860 i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
861 i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
862 return i;
863}
864# endif
865#elif defined(__KERNEL_ONEAPI__)
866# define popcount(x) sycl::popcount(x)
867#elif defined(__KERNEL_HIP__)
868/* Use popcll to support 64-bit wave for pre-RDNA AMD GPUs */
869# define popcount(x) __popcll(x)
870#elif !defined(__KERNEL_METAL__)
871# define popcount(x) __popc(x)
872#endif
873
875{
876#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
877 return __clz(x);
878#elif defined(__KERNEL_METAL__)
879 return clz(x);
880#elif defined(__KERNEL_ONEAPI__)
881 return sycl::clz(x);
882#else
883 assert(x != 0);
884# ifdef _MSC_VER
885 unsigned long leading_zero = 0;
886 _BitScanReverse(&leading_zero, x);
887 return (31 - leading_zero);
888# else
889 return __builtin_clz(x);
890# endif
891#endif
892}
893
895{
896#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
897 return (__ffs(x) - 1);
898#elif defined(__KERNEL_METAL__)
899 return ctz(x);
900#elif defined(__KERNEL_ONEAPI__)
901 return sycl::ctz(x);
902#else
903 assert(x != 0);
904# ifdef _MSC_VER
905 unsigned long ctz = 0;
906 _BitScanForward(&ctz, x);
907 return ctz;
908# else
909 return __builtin_ctz(x);
910# endif
911#endif
912}
913
915{
916#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
917 return __ffs(x);
918#elif defined(__KERNEL_METAL__)
919 return (x != 0) ? ctz(x) + 1 : 0;
920#else
921# ifdef _MSC_VER
922 return (x != 0) ? (32 - count_leading_zeros(x & (~x + 1))) : 0;
923# else
924 return __builtin_ffs(x);
925# endif
926#endif
927}
928
929/* projections */
931{
932 float len, u, v;
933 len = sqrtf(co.x * co.x + co.y * co.y);
934 if (len > 0.0f) {
935 u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f;
936 v = (co.z + 1.0f) * 0.5f;
937 }
938 else {
939 u = v = 0.0f;
940 }
941 return make_float2(u, v);
942}
943
945{
946 float l = dot(co, co);
947 float u, v;
948 if (l > 0.0f) {
949 if (UNLIKELY(co.x == 0.0f && co.y == 0.0f)) {
950 u = 0.0f; /* Otherwise domain error. */
951 }
952 else {
953 u = (0.5f - atan2f(co.x, co.y) * M_1_2PI_F);
954 }
955 v = 1.0f - safe_acosf(co.z / sqrtf(l)) * M_1_PI_F;
956 }
957 else {
958 u = v = 0.0f;
959 }
960 return make_float2(u, v);
961}
962
963/* Compares two floats.
964 * Returns true if their absolute difference is smaller than abs_diff (for numbers near zero)
965 * or their relative difference is less than ulp_diff ULPs.
966 * Based on
967 * https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
968 */
969
970ccl_device_inline bool compare_floats(float a, float b, float abs_diff, int ulp_diff)
971{
972 if (fabsf(a - b) < abs_diff) {
973 return true;
974 }
975
976 if ((a < 0.0f) != (b < 0.0f)) {
977 return false;
978 }
979
980 return (abs(__float_as_int(a) - __float_as_int(b)) < ulp_diff);
981}
982
983/* Calculate the angle between the two vectors a and b.
984 * The usual approach `acos(dot(a, b))` has severe precision issues for small angles,
985 * which are avoided by this method.
986 * Based on "Mangled Angles" from https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf
987 */
989{
990 return 2.0f * atan2f(len(a - b), len(a + b));
991}
992
993/* Tangent of the angle between vectors a and b. */
995{
996 return len(cross(a, b)) / dot(a, b);
997}
998
999/* Return value which is greater than the given one and is a power of two. */
1001{
1002 return x == 0 ? 1 : 1 << (32 - count_leading_zeros(x));
1003}
1004
1005/* Return value which is lower than the given one and is a power of two. */
1007{
1008 return x < 2 ? x : 1 << (31 - count_leading_zeros(x - 1));
1009}
1010
1011#ifndef __has_builtin
1012# define __has_builtin(v) 0
1013#endif
1014
1015/* Reverses the bits of a 32 bit integer. */
1017{
1018 /* Use a native instruction if it exists. */
1019#if defined(__KERNEL_CUDA__)
1020 return __brev(x);
1021#elif defined(__KERNEL_METAL__)
1022 return reverse_bits(x);
1023#elif defined(__aarch64__) || (defined(_M_ARM64) && !defined(_MSC_VER))
1024 /* Assume the rbit is always available on 64bit ARM architecture. */
1025 __asm__("rbit %w0, %w1" : "=r"(x) : "r"(x));
1026 return x;
1027#elif defined(__arm__) && ((__ARM_ARCH > 7) || __ARM_ARCH == 6 && __ARM_ARCH_ISA_THUMB >= 2)
1028 /* This ARM instruction is available in ARMv6T2 and above.
1029 * This 32-bit Thumb instruction is available in ARMv6T2 and above. */
1030 __asm__("rbit %0, %1" : "=r"(x) : "r"(x));
1031 return x;
1032#elif __has_builtin(__builtin_bitreverse32)
1033 return __builtin_bitreverse32(x);
1034#else
1035 /* Flip pairwise. */
1036 x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1);
1037 /* Flip pairs. */
1038 x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2);
1039 /* Flip nibbles. */
1040 x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4);
1041 /* Flip bytes. CPUs have an instruction for that, pretty fast one. */
1042# ifdef _MSC_VER
1043 return _byteswap_ulong(x);
1044# elif defined(__INTEL_COMPILER)
1045 return (uint32_t)_bswap((int)x);
1046# else
1047 /* Assuming gcc or clang. */
1048 return __builtin_bswap32(x);
1049# endif
1050#endif
1051}
1052
1053/* Check if intervals (first->x, first->y) and (second.x, second.y) intersect, and replace the
1054 * first interval with their intersection. */
1056{
1057 first->x = fmaxf(first->x, second.x);
1058 first->y = fminf(first->y, second.y);
1059
1060 return first->x < first->y;
1061}
1062
1063/* Solve quadratic equation a*x^2 + b*x + c = 0, adapted from Mitsuba 3
1064 * The solution is ordered so that x1 <= x2.
1065 * Returns true if at least one solution is found. */
1067 const float a, const float b, const float c, ccl_private float &x1, ccl_private float &x2)
1068{
1069 /* If the equation is linear, the solution is -c/b, but b has to be non-zero. */
1070 const bool valid_linear = (a == 0.0f) && (b != 0.0f);
1071 x1 = x2 = -c / b;
1072
1073 const float discriminant = sqr(b) - 4.0f * a * c;
1074 /* Allow slightly negative discriminant in case of numerical precision issues. */
1075 const bool valid_quadratic = (a != 0.0f) && (discriminant > -1e-5f);
1076
1077 if (valid_quadratic) {
1078 /* Numerically stable version of (-b ± sqrt(discriminant)) / (2 * a), avoiding catastrophic
1079 * cancellation when `b` is very close to `sqrt(discriminant)`, by finding the solution of
1080 * greater magnitude which does not suffer from loss of precision, then using the identity
1081 * x1 * x2 = c / a. */
1082 const float temp = -0.5f * (b + copysignf(safe_sqrtf(discriminant), b));
1083 const float r1 = temp / a;
1084 const float r2 = c / temp;
1085
1086 x1 = fminf(r1, r2);
1087 x2 = fmaxf(r1, r2);
1088 }
1089
1090 return (valid_linear || valid_quadratic);
1091}
1092
1094
1095#endif /* __UTIL_MATH_H__ */
unsigned int uint
#define UNLIKELY(x)
ATTR_WARN_UNUSED_RESULT const BMVert * v2
ATTR_WARN_UNUSED_RESULT const BMLoop * l
ATTR_WARN_UNUSED_RESULT const BMVert * v
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Definition btQuadWord.h:117
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
SIMD_FORCE_INLINE btVector3 & normalize()
Normalize this vector x^2 + y^2 + z^2 = 1.
Definition btVector3.h:303
local_group_size(16, 16) .push_constant(Type b
pow(value.r - subtrahend, 2.0)") .do_static_compilation(true)
additional_info("compositor_sum_squared_difference_float_shared") .push_constant(Type output_img float dot(value.rgb, luminance_coefficients)") .define("LOAD(value)"
#define ccl_device_forceinline
#define logf(x)
#define sinf(x)
#define cosf(x)
#define ccl_device
#define expf(x)
#define ccl_private
#define ccl_device_inline
#define powf(x, y)
#define CCL_NAMESPACE_END
ccl_device_forceinline float4 make_float4(const float x, const float y, const float z, const float w)
#define saturatef(x)
ccl_device_forceinline float3 make_float3(const float x, const float y, const float z)
#define atan2f(x, y)
#define asinf(x)
#define fmaxf(x, y)
#define ceilf(x)
#define fminf(x, y)
#define fmodf(x, y)
#define floorf(x)
#define acosf(x)
ccl_device_forceinline float2 make_float2(const float x, const float y)
#define copysignf(x, y)
#define __int_as_float(x)
#define __float_as_int(x)
#define fabsf(x)
#define __float_as_uint(x)
#define sqrtf(x)
ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
#define __uint_as_float(x)
#define lgammaf(x)
int len
draw_view in_light_buf[] float
#define mix(a, b, c)
Definition hash.h:36
ccl_device_inline float cross(const float2 a, const float2 b)
ccl_device_inline float3 log(float3 v)
#define N
#define M_PI_F
Definition mikk_util.hh:15
INLINE Rall1d< T, V, S > sinh(const Rall1d< T, V, S > &arg)
Definition rall1d.h:335
const btScalar eps
Definition poly34.cpp:11
#define min(a, b)
Definition sort.c:32
unsigned int uint32_t
Definition stdint.h:80
unsigned __int64 uint64_t
Definition stdint.h:90
float x
float y
float z
Definition sky_float3.h:27
float y
Definition sky_float3.h:27
float x
Definition sky_float3.h:27
float max
#define FOREACH_SPECTRUM_CHANNEL(counter)
#define GET_SPECTRUM_CHANNEL(v, i)
SPECTRUM_DATA_TYPE Spectrum
ccl_device float safe_divide(float a, float b)
Definition util/math.h:758
ccl_device float safe_floored_modulo(float a, float b)
Definition util/math.h:777
ccl_device_inline float inverse_lerp(float a, float b, float x)
Definition util/math.h:550
ccl_device_inline float ensure_finite(float v)
Definition util/math.h:373
ccl_device_inline int4 __float4_as_int4(float4 f)
Definition util/math.h:296
ccl_device_inline int abs(int x)
Definition util/math.h:120
ccl_device_inline float safe_sqrtf(float f)
Definition util/math.h:706
ccl_device_inline uint uint16_unpack_from_uint_1(const uint i)
Definition util/math.h:353
ccl_device_inline float pow22(float a)
Definition util/math.h:814
ccl_device_inline int float_to_int(float f)
Definition util/math.h:424
ccl_device_inline int ceil_to_int(float f)
Definition util/math.h:441
ccl_device_inline float pow20(float a)
Definition util/math.h:809
ccl_device_inline float compatible_signf(float f)
Definition util/math.h:498
ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d)
Definition util/math.h:200
ccl_device_inline float smoothstep(float edge0, float edge1, float x)
Definition util/math.h:394
ccl_device_inline uint popcount(uint x)
Definition util/math.h:855
ccl_device_inline uint count_leading_zeros(uint x)
Definition util/math.h:874
ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b)
Definition util/math.h:643
ccl_device_inline bool intervals_intersect(ccl_private float2 *first, const float2 second)
Definition util/math.h:1055
CCL_NAMESPACE_END CCL_NAMESPACE_BEGIN ccl_device_inline float triangle_area(ccl_private const float3 &v1, ccl_private const float3 &v2, ccl_private const float3 &v3)
Definition util/math.h:584
ccl_device_inline float compatible_atan2(const float y, const float x)
Definition util/math.h:492
ccl_device_inline Spectrum safe_invert_color(Spectrum a)
Definition util/math.h:621
ccl_device_inline int mod(int x, int m)
Definition util/math.h:520
ccl_device_inline float tan_angle(float3 a, float3 b)
Definition util/math.h:994
ccl_device float safe_modulo(float a, float b)
Definition util/math.h:772
ccl_device_inline float sqr(float a)
Definition util/math.h:782
ccl_device_inline float pingpongf(float a, float b)
Definition util/math.h:458
ccl_device_inline uint pointer_pack_to_uint_1(T *ptr)
Definition util/math.h:333
ccl_device_inline float sin_from_cos(const float c)
Definition util/math.h:787
ccl_device_inline uint32_t reverse_integer_bits(uint32_t x)
Definition util/math.h:1016
ccl_device_inline uint count_trailing_zeros(uint x)
Definition util/math.h:894
ccl_device_inline float sin_sqr_to_one_minus_cos(const float s_sq)
Definition util/math.h:797
ccl_device_inline float inversesqrtf(float f)
Definition util/math.h:711
ccl_device float safe_acosf(float a)
Definition util/math.h:725
ccl_device float compatible_powf(float x, float y)
Definition util/math.h:730
ccl_device_inline float smoothminf(float a, float b, float k)
Definition util/math.h:463
ccl_device_inline float2 map_to_sphere(const float3 co)
Definition util/math.h:944
ccl_device_inline uint uint16_pack_to_uint(const uint a, const uint b)
Definition util/math.h:343
ccl_device_inline bool isfinite_safe(float f)
Definition util/math.h:365
ccl_device_inline bool solve_quadratic(const float a, const float b, const float c, ccl_private float &x1, ccl_private float &x2)
Definition util/math.h:1066
ccl_device_inline uint pointer_pack_to_uint_0(T *ptr)
Definition util/math.h:328
ccl_device_inline float4 float3_to_float4(const float3 a)
Definition util/math.h:540
ccl_device_inline float precise_angle(float3 a, float3 b)
Definition util/math.h:988
ccl_device_inline T * pointer_unpack_from_uint(const uint a, const uint b)
Definition util/math.h:338
ccl_device_inline float2 map_to_tube(const float3 co)
Definition util/math.h:930
ccl_device_inline uint next_power_of_two(uint x)
Definition util/math.h:1000
ccl_device_inline float floorfrac(float x, ccl_private int *i)
Definition util/math.h:434
ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d)
Definition util/math.h:205
ccl_device_inline uint as_uint(int i)
Definition util/math.h:234
ccl_device_inline float nonzerof(float f, float eps)
Definition util/math.h:479
ccl_device float safe_powf(float a, float b)
Definition util/math.h:749
ccl_device_inline uint prev_power_of_two(uint x)
Definition util/math.h:1006
ccl_device float safe_logf(float a, float b)
Definition util/math.h:763
ccl_device_inline float fractf(float x)
Definition util/math.h:446
ccl_device_inline bool compare_floats(float a, float b, float abs_diff, int ulp_diff)
Definition util/math.h:970
ccl_device_inline int floor_to_int(float f)
Definition util/math.h:429
ccl_device_inline bool isnan_safe(float f)
Definition util/math.h:359
ccl_device_inline float beta(float x, float y)
Definition util/math.h:833
ccl_device_inline uint uint16_unpack_from_uint_0(const uint i)
Definition util/math.h:348
ccl_device_inline float3 float2_to_float3(const float2 a)
Definition util/math.h:525
ccl_device_inline float signf(float f)
Definition util/math.h:474
ccl_device_inline void make_orthonormals(const float3 N, ccl_private float3 *a, ccl_private float3 *b)
Definition util/math.h:593
ccl_device_inline float xor_signmask(float x, int y)
Definition util/math.h:838
ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x)
Definition util/math.h:556
ccl_device_inline uint find_first_set(uint x)
Definition util/math.h:914
ccl_device_inline float3 float4_to_float3(const float4 a)
Definition util/math.h:535
ccl_device_inline int as_int(uint i)
Definition util/math.h:224
ccl_device float bits_to_01(uint bits)
Definition util/math.h:843
ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle)
Definition util/math.h:683
ccl_device_inline float4 __int4_as_float4(int4 i)
Definition util/math.h:306
ccl_device_inline float wrapf(float value, float max, float min)
Definition util/math.h:452
ccl_device float safe_asinf(float a)
Definition util/math.h:720
ccl_device_inline float2 float3_to_float2(const float3 a)
Definition util/math.h:530
ccl_device_inline float smoothstepf(float f)
Definition util/math.h:508
ccl_device_inline Spectrum safe_divide_color(Spectrum a, Spectrum b)
Definition util/math.h:632
ccl_device_inline float one_minus_cos(const float angle)
Definition util/math.h:803
ccl_device_inline float cos_from_sin(const float s)
Definition util/math.h:792
ccl_device_inline int clamp(int a, int mn, int mx)
Definition util/math.h:379
PointerRNA * ptr
Definition wm_files.cc:4126