18 return float3(_mm_setzero_ps());
38 return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
44#ifndef __KERNEL_METAL__
49 return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
58 return float3(_mm_mul_ps(a.m128,
b.m128));
67 return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f)));
75# if defined(__KERNEL_SSE__)
76 return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
84# if defined(__KERNEL_SSE__)
85 return float3(_mm_div_ps(_mm_set1_ps(f), a.m128));
93# if defined(__KERNEL_SSE__)
94 return float3(_mm_div_ps(a.m128, _mm_set1_ps(f)));
96 float invf = 1.0f / f;
103# if defined(__KERNEL_SSE__)
104 return float3(_mm_div_ps(a.m128,
b.m128));
112# ifdef __KERNEL_SSE__
113 return float3(_mm_add_ps(a.m128,
b.m128));
126# ifdef __KERNEL_SSE__
127 return float3(_mm_sub_ps(a.m128,
b.m128));
165 const float invf = 1.0f / f;
169# if !(defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) || defined(__KERNEL_ONEAPI__))
197# ifdef __KERNEL_SSE__
198 return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128,
b.m128)) & 7) == 7;
200 return (a.
x ==
b.x && a.
y ==
b.y && a.
z ==
b.z);
211# ifdef __KERNEL_SSE__
212 return int3(_mm_castps_si128(_mm_cmpge_ps(a.m128,
b.m128)));
220# if defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
221 return _mm_cvtss_f32(_mm_dp_ps(a,
b, 0x7F));
223 return a.
x *
b.x + a.
y *
b.y + a.
z *
b.z;
231#if defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
232 return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,
b),
b));
234 return a.
x *
b.x + a.
y *
b.y;
240#if defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
241 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F)));
262#ifndef __KERNEL_METAL__
271# ifdef __KERNEL_SSE__
284# if defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
285 const __m128
norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
294# ifdef __KERNEL_SSE__
295 return float3(_mm_min_ps(a.m128,
b.m128));
303# ifdef __KERNEL_SSE__
304 return float3(_mm_max_ps(a.m128,
b.m128));
312 return min(
max(a, mn), mx);
317# ifdef __KERNEL_SSE__
318# ifdef __KERNEL_NEON__
319 return float3(vabsq_f32(a.m128));
321 __m128
mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
335# if defined(__KERNEL_NEON__)
339 const float32x4_t iquot = vrndq_f32(a /
b);
340 return float3(vsubq_f32(a, vmulq_f32(iquot, vdupq_n_f32(
b))));
341# elif defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
342 const __m128 iquot = _mm_round_ps(a /
b, _MM_FROUND_TRUNC);
343 return float3(_mm_sub_ps(a, _mm_mul_ps(iquot, _mm_set1_ps(
b))));
351# ifdef __KERNEL_SSE__
352 return float3(_mm_sqrt_ps(a));
360# ifdef __KERNEL_SSE__
361 return float3(_mm_floor_ps(a));
369# ifdef __KERNEL_SSE__
370 return float3(_mm_ceil_ps(a));
378 return a + t * (
b - a);
403 return incident - 2.0f * unit_normal *
dot(incident, unit_normal);
408 const float k = 1.0f - eta * eta * (1.0f -
dot(normal, incident) *
dot(normal, incident));
412 return eta * incident - (eta *
dot(normal, incident) +
sqrt(k)) * normal;
432 const float x = 1.0f / *t;
438 const float t =
len(a);
439 return (t != 0.0f) ? a * (1.0f / t) : a;
444 const float t =
len(a);
445 return (t != 0.0f) ? a * (1.0f / t) : fallback;
451 return (*t != 0.0f) ? a / (*t) : a;
457 (
b.y != 0.0f) ? a.
y /
b.y : 0.0f,
458 (
b.z != 0.0f) ? a.
z /
b.z : 0.0f);
468 return a + t * (
b - a);
481 return (a.
x == 0.0f && a.
y == 0.0f && a.
z == 0.0f);
487#if defined(__KERNEL_SSE__) && defined(__KERNEL_NEON__)
489 t = vsetq_lane_f32(0.0f, t, 3);
490 return vaddvq_f32(t);
492 return (a.
x + a.
y + a.
z);
503#if defined(__KERNEL_METAL__)
510template<
class MaskType>
513#if defined(__KERNEL_METAL__)
515#elif defined(__KERNEL_SSE__)
516# ifdef __KERNEL_SSE42__
517 return float3(_mm_blendv_ps(
b.m128, a.m128, _mm_castsi128_ps(
mask.m128)));
520 _mm_or_ps(_mm_and_ps(_mm_castsi128_ps(
mask), a), _mm_andnot_ps(_mm_castsi128_ps(
mask),
b)));
575 if (
fabsf(
N.y) >= 0.999f) {
580 if (
fabsf(
N.z) >= 0.999f) {
587 if (
N.x !=
N.y ||
N.x !=
N.z) {
606 r.
x = ((costheta + (1 - costheta) * axis.
x * axis.
x) * p.
x) +
607 (((1 - costheta) * axis.
x * axis.
y - axis.
z * sintheta) * p.
y) +
608 (((1 - costheta) * axis.
x * axis.
z + axis.
y * sintheta) * p.
z);
610 r.
y = (((1 - costheta) * axis.
x * axis.
y + axis.
z * sintheta) * p.
x) +
611 ((costheta + (1 - costheta) * axis.
y * axis.
y) * p.
y) +
612 (((1 - costheta) * axis.
y * axis.
z - axis.
x * sintheta) * p.
z);
614 r.
z = (((1 - costheta) * axis.
x * axis.
z - axis.
y * sintheta) * p.
x) +
615 (((1 - costheta) * axis.
y * axis.
z + axis.
x * sintheta) * p.
y) +
616 ((costheta + (1 - costheta) * axis.
z * axis.
z) * p.
z);
646 v = (co.
z + 1.0f) * 0.5f;
656 const float l =
dot(co, co);
MINLINE float safe_acosf(float a)
static double angle(const Eigen::Vector3d &v1, const Eigen::Vector3d &v2)
ATTR_WARN_UNUSED_RESULT const BMVert * v2
ATTR_WARN_UNUSED_RESULT const BMLoop * l
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
ATTR_WARN_UNUSED_RESULT const BMVert * v
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
SIMD_FORCE_INLINE btScalar norm() const
Return the norm (length) of the vector.
#define ccl_device_inline
#define ccl_device_template_spec
#define CCL_NAMESPACE_END
VecBase< float, 4 > float4
VecBase< T, D > reflect(VecOp< T, D >, VecOp< T, D >) RET
VecBase< float, D > normalize(VecOp< float, D >) RET
VecBase< T, D > faceforward(VecOp< T, D >, VecOp< T, D >, VecOp< T, D >) RET
VecBase< float, 3 > float3
bool all(VecOp< bool, D >) RET
VecBase< float, 3 > cross(VecOp< float, 3 >, VecOp< float, 3 >) RET
ccl_device_inline float len_squared(const float2 a)
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
ccl_device_inline float3 safe_normalize(const float3 a)
ccl_device_inline float3 reciprocal(const float3 a)
ccl_device_inline bool is_zero(const float3 a)
ccl_device_inline float precise_angle(const float3 a, const float3 b)
ccl_device_inline float3 operator*(const float3 a, const float3 b)
ccl_device_inline bool isequal(const float3 a, const float3 b)
ccl_device_inline float3 power(const float3 v, const float e)
ccl_device_inline float3 safe_normalize_fallback(const float3 a, const float3 fallback)
ccl_device_inline float3 refract(const float3 incident, const float3 normal, const float eta)
ccl_device_inline int3 operator>=(const float3 a, const float3 b)
ccl_device_inline float3 one_float3()
ccl_device_inline float3 ensure_finite(const float3 v)
ccl_device_inline float3 normalize_len(const float3 a, ccl_private float *t)
ccl_device_inline float3 clamp(const float3 a, const float3 mn, const float3 mx)
ccl_device_inline float3 project(const float3 v, const float3 v_proj)
CCL_NAMESPACE_BEGIN ccl_device_inline float3 zero_float3()
ccl_device_template_spec float3 make_zero()
ccl_device_inline float3 operator/=(float3 &a, const float3 b)
ccl_device_inline float3 interp(const float3 a, const float3 b, const float t)
ccl_device_inline float dot_xy(const float3 a, const float3 b)
ccl_device_inline float3 fmod(const float3 a, const float b)
ccl_device_inline float3 fabs(const float3 a)
ccl_device_inline float reduce_add(const float3 a)
ccl_device_inline float3 rotate_around_axis(const float3 p, const float3 axis, const float angle)
ccl_device_inline float2 map_to_sphere(const float3 co)
ccl_device_inline bool isfinite_safe(const float3 v)
ccl_device_inline float3 mask(const MaskType mask, const float3 a)
ccl_device_inline float reduce_min(const float3 a)
ccl_device_inline float3 operator+(const float3 a, const float3 b)
ccl_device_inline float average(const float3 a)
ccl_device_inline float2 map_to_tube(const float3 co)
ccl_device_inline float3 operator*=(float3 &a, const float3 b)
ccl_device_inline float3 operator/(const float f, const float3 a)
ccl_device_inline bool operator==(const float3 a, const float3 b)
ccl_device_inline float distance(const float3 a, const float3 b)
ccl_device_inline float3 safe_divide(const float3 a, const float3 b)
ccl_device_inline float triangle_area(const ccl_private float3 &v1, const ccl_private float3 &v2, const ccl_private float3 &v3)
ccl_device_inline float3 operator-(const float3 &a)
ccl_device_inline float reduce_max(const float3 a)
ccl_device_inline float3 safe_normalize_len(const float3 a, ccl_private float *t)
ccl_device_inline void make_orthonormals(const float3 N, ccl_private float3 *a, ccl_private float3 *b)
ccl_device_inline float3 operator-=(float3 &a, const float3 b)
ccl_device_inline bool operator!=(const float3 a, const float3 b)
ccl_device_inline float3 operator+=(float3 &a, const float3 b)
ccl_device_inline float tan_angle(const float3 a, const float3 b)
ccl_device_inline float dot(const float3 a, const float3 b)
ccl_device_inline float len_squared(const float3 a)
ccl_device_inline float3 sqr(const float3 a)
ccl_device_inline float4 msub(const float4 a, const float4 b, const float4 c)