20 return float3(_mm_setzero_ps());
40 return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
46#ifndef __KERNEL_METAL__
51 return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
60 return float3(_mm_mul_ps(a.m128,
b.m128));
69 return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f)));
77# if defined(__KERNEL_SSE__)
78 return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
86# if defined(__KERNEL_SSE__)
87 return float3(_mm_div_ps(_mm_set1_ps(f), a.m128));
95# if defined(__KERNEL_SSE__)
96 return float3(_mm_div_ps(a.m128, _mm_set1_ps(f)));
98 float invf = 1.0f / f;
105# if defined(__KERNEL_SSE__)
106 return float3(_mm_div_ps(a.m128,
b.m128));
114# ifdef __KERNEL_SSE__
115 return float3(_mm_add_ps(a.m128,
b.m128));
133# ifdef __KERNEL_SSE__
134 return float3(_mm_sub_ps(a.m128,
b.m128));
177 const float invf = 1.0f / f;
181# if !(defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) || defined(__KERNEL_ONEAPI__))
215# ifdef __KERNEL_SSE__
216 return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128,
b.m128)) & 7) == 7;
218 return (a.
x ==
b.x && a.
y ==
b.y && a.
z ==
b.z);
224# ifdef __KERNEL_SSE__
238# ifdef __KERNEL_SSE__
239 return int3(_mm_castps_si128(_mm_cmpge_ps(a.m128,
b.m128)));
247# ifdef __KERNEL_SSE__
248 return int3(_mm_castps_si128(_mm_cmplt_ps(a.m128,
b.m128)));
256# if defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
257 return _mm_cvtss_f32(_mm_dp_ps(a,
b, 0x7F));
259 return a.
x *
b.x + a.
y *
b.y + a.
z *
b.z;
265# ifdef __KERNEL_SSE__
266 return int3(_mm_castps_si128(_mm_cmpgt_ps(a.m128,
b.m128)));
281#if defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
282 return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,
b),
b));
284 return a.
x *
b.x + a.
y *
b.y;
290#if defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
291 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F)));
312#ifndef __KERNEL_METAL__
321# ifdef __KERNEL_SSE__
334# if defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
335 const __m128
norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
344# ifdef __KERNEL_SSE__
345 return float3(_mm_min_ps(a.m128,
b.m128));
353# ifdef __KERNEL_SSE__
354 return float3(_mm_max_ps(a.m128,
b.m128));
362 return min(
max(a, mn), mx);
367# ifdef __KERNEL_SSE__
368# ifdef __KERNEL_NEON__
369 return float3(vabsq_f32(a.m128));
371 __m128
mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
385# if defined(__KERNEL_NEON__)
389 const float32x4_t iquot = vrndq_f32(a /
b);
390 return float3(vsubq_f32(a, vmulq_f32(iquot, vdupq_n_f32(
b))));
391# elif defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
392 const __m128 iquot = _mm_round_ps(a /
b, _MM_FROUND_TRUNC);
393 return float3(_mm_sub_ps(a, _mm_mul_ps(iquot, _mm_set1_ps(
b))));
401# if defined(__KERNEL_NEON__)
402 const float32x4_t iquot = vrndq_f32(vdivq_f32(a.m128,
b.m128));
403 return float3(vsubq_f32(a, vmulq_f32(iquot,
b.m128)));
404# elif defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
405 const __m128 div = _mm_div_ps(a.m128,
b.m128);
406 const __m128 iquot = _mm_round_ps(div, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
407 return float3(_mm_sub_ps(a.m128, _mm_mul_ps(iquot,
b.m128)));
415# ifdef __KERNEL_SSE__
416 return float3(_mm_sqrt_ps(a));
424# if defined(__KERNEL_NEON__)
425 return float3(vrndnq_f32(a.m128));
426# elif defined(__KERNEL_SSE__)
427 return float3(_mm_round_ps(a.m128, _MM_FROUND_NINT));
435# ifdef __KERNEL_SSE__
436 return float3(_mm_floor_ps(a));
444# ifdef __KERNEL_SSE__
445 return float3(_mm_ceil_ps(a));
453 return a + t * (
b - a);
458 return a + t * (
b - a);
498 return incident - 2.0f * unit_normal *
dot(incident, unit_normal);
503 const float k = 1.0f - eta * eta * (1.0f -
dot(normal, incident) *
dot(normal, incident));
507 return eta * incident - (eta *
dot(normal, incident) +
sqrt(k)) * normal;
532 const float x = 1.0f / *t;
538 const float t =
len(a);
539 return (t != 0.0f) ? a * (1.0f / t) : a;
544 const float t =
len(a);
545 return (t != 0.0f) ? a * (1.0f / t) : fallback;
551 return (*t != 0.0f) ? a / (*t) : a;
557 (
b.y != 0.0f) ? a.
y /
b.y : 0.0f,
558 (
b.z != 0.0f) ? a.
z /
b.z : 0.0f);
568 return a + t * (
b - a);
581 return (a.
x == 0.0f && a.
y == 0.0f && a.
z == 0.0f);
587 return (a.
x == 0.0f || a.
y == 0.0f || a.
z == 0.0f);
592#if defined(__KERNEL_SSE__) && defined(__KERNEL_NEON__)
594 t = vsetq_lane_f32(0.0f, t, 3);
595 return vaddvq_f32(t);
597 return (a.
x + a.
y + a.
z);
608#if defined(__KERNEL_METAL__)
615template<
class MaskType>
618#if defined(__KERNEL_METAL__)
620#elif defined(__KERNEL_SSE__)
621# ifdef __KERNEL_SSE42__
622 return float3(_mm_blendv_ps(
b.m128, a.m128, _mm_castsi128_ps(
mask.m128)));
625 _mm_or_ps(_mm_and_ps(_mm_castsi128_ps(
mask), a), _mm_andnot_ps(_mm_castsi128_ps(
mask),
b)));
651#if defined(__KERNEL_METAL__)
653#elif defined __KERNEL_NEON__
654 return int3(vreinterpretq_m128i_s32(vceqq_f32(a.m128,
b.m128)));
655#elif defined(__KERNEL_SSE__)
656 return int3(_mm_castps_si128(_mm_cmpeq_ps(a.m128,
b.m128)));
723 if (
fabsf(
N.y) >= 0.999f) {
728 if (
fabsf(
N.z) >= 0.999f) {
735 if (
N.x !=
N.y ||
N.x !=
N.z) {
754 r.
x = ((costheta + (1 - costheta) * axis.
x * axis.
x) * p.
x) +
755 (((1 - costheta) * axis.
x * axis.
y - axis.
z * sintheta) * p.
y) +
756 (((1 - costheta) * axis.
x * axis.
z + axis.
y * sintheta) * p.
z);
758 r.
y = (((1 - costheta) * axis.
x * axis.
y + axis.
z * sintheta) * p.
x) +
759 ((costheta + (1 - costheta) * axis.
y * axis.
y) * p.
y) +
760 (((1 - costheta) * axis.
y * axis.
z - axis.
x * sintheta) * p.
z);
762 r.
z = (((1 - costheta) * axis.
x * axis.
z - axis.
y * sintheta) * p.
x) +
763 (((1 - costheta) * axis.
y * axis.
z + axis.
x * sintheta) * p.
y) +
764 ((costheta + (1 - costheta) * axis.
z * axis.
z) * p.
z);
794 v = (co.
z + 1.0f) * 0.5f;
804 const float l =
dot(co, co);
831#ifdef __KERNEL_METAL__
832 return as_type<uint3>(f);
840#ifdef __KERNEL_METAL__
841 return as_type<float3>(f);
MINLINE float safe_acosf(float a)
MINLINE float safe_powf(float base, float exponent)
static double angle(const Eigen::Vector3d &v1, const Eigen::Vector3d &v2)
ATTR_WARN_UNUSED_RESULT const BMVert * v2
ATTR_WARN_UNUSED_RESULT const BMLoop * l
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
ATTR_WARN_UNUSED_RESULT const BMVert * v
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
SIMD_FORCE_INLINE btScalar norm() const
Return the norm (length) of the vector.
#define ccl_device_inline
#define ccl_device_template_spec
#define CCL_NAMESPACE_END
VecBase< T, D > reflect(VecOp< T, D >, VecOp< T, D >) RET
VecBase< float, D > normalize(VecOp< float, D >) RET
VecBase< T, D > faceforward(VecOp< T, D >, VecOp< T, D >, VecOp< T, D >) RET
bool all(VecOp< bool, D >) RET
VecBase< float, 3 > cross(VecOp< float, 3 >, VecOp< float, 3 >) RET
VecBase< float, 4 > float4
VecBase< float, 3 > float3
MINLINE float compatible_signf(float f)
ccl_device_inline float len_squared(const float2 a)
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
ccl_device_inline float3 safe_normalize(const float3 a)
ccl_device_inline float3 reciprocal(const float3 a)
ccl_device_inline bool is_zero(const float3 a)
ccl_device_inline void copy_v3_v3(ccl_private float *r, const float3 val)
ccl_device_inline float precise_angle(const float3 a, const float3 b)
ccl_device_inline float3 operator*(const float3 a, const float3 b)
ccl_device_inline bool isequal(const float3 a, const float3 b)
ccl_device_inline float3 power(const float3 v, const float e)
ccl_device_inline float3 safe_normalize_fallback(const float3 a, const float3 fallback)
ccl_device_inline float3 safe_fmod(const float3 a, const float3 b)
ccl_device_inline float3 compatible_sign(const float3 v)
ccl_device_inline float3 refract(const float3 incident, const float3 normal, const float eta)
ccl_device_inline int3 operator>=(const float3 a, const float3 b)
ccl_device_inline float3 one_float3()
ccl_device_inline float3 ensure_finite(const float3 v)
ccl_device_inline float3 normalize_len(const float3 a, ccl_private float *t)
ccl_device_inline float3 clamp(const float3 a, const float3 mn, const float3 mx)
ccl_device_inline float3 project(const float3 v, const float3 v_proj)
ccl_device_inline int3 operator<(const float3 a, const float3 b)
CCL_NAMESPACE_BEGIN ccl_device_inline float3 zero_float3()
ccl_device_template_spec float3 make_zero()
ccl_device_inline float3 safe_pow(const float3 a, const float3 b)
ccl_device_inline float3 operator/=(float3 &a, const float3 b)
ccl_device_inline float3 atan2(const float3 y, const float3 x)
ccl_device_inline float3 interp(const float3 a, const float3 b, const float t)
ccl_device_inline float dot_xy(const float3 a, const float3 b)
ccl_device_inline float3 fmod(const float3 a, const float b)
ccl_device_inline float3 fabs(const float3 a)
ccl_device_inline float reduce_add(const float3 a)
ccl_device_inline float3 rotate_around_axis(const float3 p, const float3 axis, const float angle)
ccl_device_inline auto isequal_mask(const float3 a, const float3 b)
ccl_device_inline float2 map_to_sphere(const float3 co)
ccl_device_inline bool isfinite_safe(const float3 v)
ccl_device_inline float3 mask(const MaskType mask, const float3 a)
ccl_device_inline float reduce_min(const float3 a)
ccl_device_inline float3 operator+(const float3 a, const float3 b)
ccl_device_inline float3 wrap(const float3 value, const float3 max, const float3 min)
ccl_device_inline auto is_zero_mask(const float3 a)
ccl_device_inline float average(const float3 a)
ccl_device_inline float3 safe_floored_fmod(const float3 a, const float3 b)
ccl_device_inline float2 map_to_tube(const float3 co)
ccl_device_inline bool any_zero(const float3 a)
ccl_device_inline float3 operator*=(float3 &a, const float3 b)
ccl_device_inline float3 operator/(const float f, const float3 a)
ccl_device_inline bool operator==(const float3 a, const float3 b)
ccl_device_inline float distance(const float3 a, const float3 b)
ccl_device_inline float3 uint3_as_float3(const uint3 f)
ccl_device_inline float3 safe_divide(const float3 a, const float3 b)
ccl_device_inline float triangle_area(const ccl_private float3 &v1, const ccl_private float3 &v2, const ccl_private float3 &v3)
ccl_device_inline float3 operator-(const float3 &a)
ccl_device_inline float reduce_max(const float3 a)
ccl_device_inline uint3 float3_as_uint3(const float3 f)
ccl_device_inline float3 safe_normalize_len(const float3 a, ccl_private float *t)
ccl_device_inline int3 operator>(const float3 a, const float3 b)
ccl_device_inline void make_orthonormals(const float3 N, ccl_private float3 *a, ccl_private float3 *b)
ccl_device_inline float3 operator-=(float3 &a, const float3 b)
ccl_device_inline bool operator!=(const float3 a, const float3 b)
ccl_device_inline float3 operator+=(float3 &a, const float3 b)
ccl_device_inline float tan_angle(const float3 a, const float3 b)
ccl_device_inline float dot(const float3 a, const float3 b)
ccl_device_inline float len_squared(const float3 a)
ccl_device_inline float3 safe_sqrt(const float3 a)
ccl_device_inline float4 msub(const float4 a, const float4 b, const float4 c)