24#ifndef __UTIL_FAST_MATH__
25#define __UTIL_FAST_MATH__
65#ifdef __KERNEL_SSE42__
68 __m128 vec = _mm_set_ss(x);
69 vec = _mm_round_ss(vec, vec, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
70 return _mm_cvtss_si32(vec);
89 x =
madd(qf, -0.78515625f * 4, x);
90 x =
madd(qf, -0.00024187564849853515625f * 4, x);
91 x =
madd(qf, -3.7747668102383613586e-08f * 4, x);
92 x =
madd(qf, -1.2816720341285448015e-12f * 4, x);
99 float u = 2.6083159809786593541503e-06f;
100 u =
madd(u, s, -0.0001981069071916863322258f);
101 u =
madd(u, s, +0.00833307858556509017944336f);
102 u =
madd(u, s, -0.166666597127914428710938f);
103 u =
madd(s, u * x, x);
107 if (
fabsf(u) > 1.0f) {
118 x =
madd(qf, -0.78515625f * 4, x);
119 x =
madd(qf, -0.00024187564849853515625f * 4, x);
120 x =
madd(qf, -3.7747668102383613586e-08f * 4, x);
121 x =
madd(qf, -1.2816720341285448015e-12f * 4, x);
126 float u = -2.71811842367242206819355e-07f;
127 u =
madd(u, s, +2.47990446951007470488548e-05f);
128 u =
madd(u, s, -0.00138888787478208541870117f);
129 u =
madd(u, s, +0.0416666641831398010253906f);
130 u =
madd(u, s, -0.5f);
131 u =
madd(u, s, +1.0f);
135 if (
fabsf(u) > 1.0f) {
146 x =
madd(qf, -0.78515625f * 4, x);
147 x =
madd(qf, -0.00024187564849853515625f * 4, x);
148 x =
madd(qf, -3.7747668102383613586e-08f * 4, x);
149 x =
madd(qf, -1.2816720341285448015e-12f * 4, x);
156 float su = 2.6083159809786593541503e-06f;
157 su =
madd(su, s, -0.0001981069071916863322258f);
158 su =
madd(su, s, +0.00833307858556509017944336f);
159 su =
madd(su, s, -0.166666597127914428710938f);
160 su =
madd(s, su * x, x);
161 float cu = -2.71811842367242206819355e-07f;
162 cu =
madd(cu, s, +2.47990446951007470488548e-05f);
163 cu =
madd(cu, s, -0.00138888787478208541870117f);
164 cu =
madd(cu, s, +0.0416666641831398010253906f);
165 cu =
madd(cu, s, -0.5f);
166 cu =
madd(cu, s, +1.0f);
170 if (
fabsf(su) > 1.0f) {
173 if (
fabsf(cu) > 1.0f) {
193 x =
madd(qf, -0.78515625f * 2, x);
194 x =
madd(qf, -0.00024187564849853515625f * 2, x);
195 x =
madd(qf, -3.7747668102383613586e-08f * 2, x);
196 x =
madd(qf, -1.2816720341285448015e-12f * 2, x);
199 x = M_PI_4_F - (M_PI_4_F -
x);
202 float u = 0.00927245803177356719970703f;
203 u =
madd(u, s, 0.00331984995864331722259521f);
204 u =
madd(u, s, 0.0242998078465461730957031f);
205 u =
madd(u, s, 0.0534495301544666290283203f);
206 u =
madd(u, s, 0.133383005857467651367188f);
207 u =
madd(u, s, 0.333331853151321411132812f);
208 u =
madd(s, u * x, x);
222 const float z = x - ((x + 25165824.0f) - 25165824.0f);
224 const float Q = 3.10396624f;
225 const float P = 3.584135056f;
226 return y * (Q +
P *
fabsf(y));
262 const float f =
fabsf(x);
264 const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
272 const float a =
sqrtf(1.0f - m) *
273 (1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
274 return x < 0 ?
M_PI_F - a : a;
282 const float f =
fabsf(x);
284 const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
286 sqrtf(1.0f - m) * (1.5707963267f +
287 m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
293 const float a =
fabsf(x);
294 const float k = a > 1.0f ? 1 / a : a;
295 const float s = 1.0f - (1.0f - k);
296 const float t = s * s;
303 float r = s *
madd(0.43157974f, t, 1.0f) /
madd(
madd(0.05831938f, t, 0.76443945f), t, 1.0f);
319 const float a =
fabsf(x);
322 const float k = (
b == 0) ? 0.0f : ((a ==
b) ? 1.0f : (
b > a ? a /
b :
b / a));
323 const float s = 1.0f - (1.0f - k);
324 const float t = s * s;
326 float r = s *
madd(0.43157974f, t, 1.0f) /
madd(
madd(0.05831938f, t, 0.76443945f), t, 1.0f);
355 int exponent = (
int)(bits >> 23) - 127;
366 float hi =
madd(f, -0.00931049621349f, 0.05206469089414f);
367 float lo =
madd(f, 0.47868480909345f, -0.72116591947498f);
368 hi =
madd(f, hi, -0.13753123777116f);
369 hi =
madd(f, hi, 0.24187369696082f);
370 hi =
madd(f, hi, -0.34730547155299f);
371 lo =
madd(f, lo, 1.442689881667200f);
372 return ((f4 * hi) + (f * lo)) + exponent;
397 return (
float)((
int)(bits >> 23) - 127);
403 x =
clamp(x, -126.0f, 126.0f);
407 x = 1.0f - (1.0f -
x);
416 float r = 1.33336498402e-3f;
417 r =
madd(x, r, 9.810352697968e-3f);
418 r =
madd(x, r, 5.551834031939e-2f);
419 r =
madd(x, r, 0.2401793301105f);
420 r =
madd(x, r, 0.693144857883f);
421 r =
madd(x, r, 1.0f);
435#if !defined(__KERNEL_GPU__) && !defined(_MSC_VER)
443 x =
clamp(x, -limit, limit);
476 if (
fabsf(x) < 1e-5f) {
477 x = 1.0f - (1.0f -
x);
478 return madd(0.5f, x * x, x);
495 a = 1.0f - (1.0f - a);
499 float r = 2.03945513931e-4f;
500 r =
madd(r, a2, 8.32990277558e-3f);
501 r =
madd(r, a2, 0.1666673421859f);
502 r =
madd(r * a, a2, a);
513 return 0.5f *
e + 0.5f /
e;
523 return copysignf(1.0f - 2.0f / (1.0f +
e), x);
538 if (ybits >= 0x4b800000) {
541 else if (ybits >= 0x3f800000) {
543 int k = (ybits >> 23) - 127;
544 int j = ybits >> (23 - k);
545 if ((j << (23 - k)) == ybits) {
570 const float a1 = 0.0705230784f;
571 const float a2 = 0.0422820123f;
572 const float a3 = 0.0092705272f;
573 const float a4 = 0.0001520143f;
574 const float a5 = 0.0002765672f;
575 const float a6 = 0.0000430638f;
576 const float a =
fabsf(x);
580 const float b = 1.0f - (1.0f - a);
581 const float r =
madd(
582 madd(
madd(
madd(
madd(
madd(a6,
b, a5),
b, a4),
b, a3),
b, a2),
b, a1),
b, 1.0f);
583 const float s = r * r;
584 const float t = s * s;
585 const float u = t * t;
586 const float v = u * u;
606 if (a > 0.99999994f) {
609 float w = -
fast_logf((1.0f - a) * (1.0f + a)), p;
613 p =
madd(p,
w, 3.43273939e-07f);
614 p =
madd(p,
w, -3.5233877e-06f);
615 p =
madd(p,
w, -4.39150654e-06f);
616 p =
madd(p,
w, 0.00021858087f);
617 p =
madd(p,
w, -0.00125372503f);
618 p =
madd(p,
w, -0.00417768164f);
619 p =
madd(p,
w, 0.246640727f);
620 p =
madd(p,
w, 1.50140941f);
624 p = -0.000200214257f;
625 p =
madd(p,
w, 0.000100950558f);
626 p =
madd(p,
w, 0.00134934322f);
627 p =
madd(p,
w, -0.00367342844f);
628 p =
madd(p,
w, 0.00573950773f);
629 p =
madd(p,
w, -0.0076224613f);
630 p =
madd(p,
w, 0.00943887047f);
631 p =
madd(p,
w, 1.00167406f);
632 p =
madd(p,
w, 2.83297682f);
645 y = (2.0f / 3) * y + 1 / (3 * y * y * x);
646 y = (2.0f / 3) * y + 1 / (3 * y * y * x);
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
ATTR_WARN_UNUSED_RESULT const BMVert * v
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
local_group_size(16, 16) .push_constant(Type b
#define util_assert(statement)
#define ccl_device_inline
#define CCL_NAMESPACE_END
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
ccl_device_inline float fast_inv_cbrtf(float x)
ccl_device float fast_exp2f(float x)
ccl_device_inline float fast_cospif(float x)
ccl_device_inline float fast_log10(float x)
CCL_NAMESPACE_BEGIN ccl_device_inline float madd(const float a, const float b, const float c)
ccl_device_inline int fast_rint(float x)
ccl_device_inline float vector_angle(float3 a, float3 b)
ccl_device_inline float fast_logf(float x)
ccl_device_inline float4 madd4(const float4 a, const float4 b, const float4 c)
ccl_device void fast_sincosf(float x, ccl_private float *sine, ccl_private float *cosine)
ccl_device float4 fast_exp2f4(float4 x)
ccl_device_inline float fast_erfcf(float x)
ccl_device float fast_acosf(float x)
ccl_device float fast_sinpif(float x)
ccl_device_inline float fast_erff(float x)
ccl_device float fast_asinf(float x)
ccl_device float fast_logb(float x)
ccl_device float fast_tanf(float x)
ccl_device_inline float fast_coshf(float x)
ccl_device float fast_atan2f(float y, float x)
ccl_device float fast_atanf(float x)
ccl_device_inline float fast_tanhf(float x)
ccl_device_inline float fast_expm1f(float x)
ccl_device_inline float fast_ierff(float x)
ccl_device_inline float fast_exp10(float x)
ccl_device float fast_sinf(float x)
ccl_device float fast_cosf(float x)
ccl_device_inline float fast_expf(float x)
ccl_device float fast_safe_powf(float x, float y)
ccl_device float fast_log2f(float x)
ccl_device_inline float4 fast_expf4(float4 x)
ccl_device float fast_sinhf(float x)
VecBase< float, 4 > float4
ccl_device_inline int4 __float4_as_int4(float4 f)
ccl_device_inline int float_to_int(float f)
ccl_device_inline float4 __int4_as_float4(int4 i)
ccl_device_inline int clamp(int a, int mn, int mx)