47#ifdef __KERNEL_METAL__
52 return (a *
b).xyz +
make_float3(t->x.w, t->y.w, t->z.w);
59#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
62 float4 x(_mm_loadu_ps(&t->x.x));
63 float4 y(_mm_loadu_ps(&t->y.x));
64 float4 z(_mm_loadu_ps(&t->z.x));
65 float4 w(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f));
67 _MM_TRANSPOSE4_PS(
x.m128,
y.m128,
z.m128,
w.m128);
70 tmp =
madd(shuffle<2>(aa),
z, tmp);
71 tmp =
madd(shuffle<1>(aa),
y, tmp);
72 tmp =
madd(shuffle<0>(aa),
x, tmp);
75#elif defined(__KERNEL_METAL__)
77 return (a *
b).xyz +
make_float3(t->x.w, t->y.w, t->z.w);
80 a.
x * t->y.x + a.
y * t->y.y + a.
z * t->y.z + t->y.w,
81 a.
x * t->z.x + a.
y * t->z.y + a.
z * t->z.z + t->z.w);
89#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
92 float4 x(_mm_loadu_ps(&t->x.x));
93 float4 y(_mm_loadu_ps(&t->y.x));
94 float4 z(_mm_loadu_ps(&t->z.x));
97 _MM_TRANSPOSE4_PS(
x.m128,
y.m128,
z.m128,
w.m128);
99 float4 tmp = shuffle<2>(aa) *
z;
100 tmp =
madd(shuffle<1>(aa),
y, tmp);
101 tmp =
madd(shuffle<0>(aa),
x, tmp);
104#elif defined(__KERNEL_METAL__)
109 a.
x * t->y.x + a.
y * t->y.y + a.
z * t->y.z,
110 a.
x * t->z.x + a.
y * t->z.y + a.
z * t->z.z);
170 const float cx =
cosf(euler.
x);
171 const float cy =
cosf(euler.
y);
172 const float cz =
cosf(euler.
z);
173 const float sx =
sinf(euler.
x);
174 const float sy =
sinf(euler.
y);
175 const float sz =
sinf(euler.
z);
182 t.
x.y = sy * sx * cz - cx * sz;
183 t.
y.y = sy * sx * sz + cx * cz;
186 t.
x.z = sy * cx * cz + sx * sz;
187 t.
y.z = sy * cx * sz - sx * cz;
190 t.
x.w = t.
y.w = t.
z.w = 0.0f;
201 return make_transform(dx.
x, dx.
y, dx.
z, 0.0f, dy.
x, dy.
y, dy.
z, 0.0f,
N.x,
N.y,
N.z, 0.0f);
204#if !defined(__KERNEL_METAL__)
227#ifndef __KERNEL_GPU__
239 return make_transform(1, 0, 0, t.
x, 0, 1, 0, t.
y, 0, 0, 1, t.
z);
249 return make_transform(s.
x, 0, 0, 0, 0, s.
y, 0, 0, 0, 0, s.
z, 0);
261 const float t = 1.0f - c;
266 axis.
x * axis.
y * t - s * axis.
z,
267 axis.
x * axis.
z * t + s * axis.
y,
270 axis.
y * axis.
x * t + s * axis.
z,
271 axis.
y * axis.
y * t + c,
272 axis.
y * axis.
z * t - s * axis.
x,
275 axis.
z * axis.
x * t - s * axis.
y,
276 axis.
z * axis.
y * t + s * axis.
x,
277 axis.
z * axis.
z * t + c,
308 const float threshold)
310 for (
int x = 0;
x < 3;
x++) {
311 for (
int y = 0;
y < 4;
y++) {
328 t->
x[column] = value.
x;
329 t->
y[column] = value.
y;
330 t->
z[column] = value.
z;
339 const float eps = 1e-6f;
364 return (
dot(
cross(c0, c1), c2) < 0.0f);
380 return make_transform(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
390#if defined(__KERNEL_GPU_RAYTRACING__)
391 return normalize((1.0f - t) * q1 + t * q2);
395 const float costheta =
dot(q1, q2);
399 if (costheta > 0.9995f) {
401 return normalize((1.0f - t) * q1 + t * q2);
404 const float theta =
acosf(
clamp(costheta, -1.0f, 1.0f));
406 const float thetap = theta * t;
407 return q1 *
cosf(thetap) + qperp *
sinf(thetap);
412#ifndef __KERNEL_GPU__
422#if defined(__AVX2__) && defined(__KERNEL_SSE2__)
423 const __m128 a = (
const __m128 &)a_;
424 const __m128
b = (
const __m128 &)b_;
425 const __m128 a_shuffle = _mm_castsi128_ps(
426 _mm_shuffle_epi32(_mm_castps_si128(a), _MM_SHUFFLE(3, 0, 2, 1)));
427 const __m128 b_shuffle = _mm_castsi128_ps(
428 _mm_shuffle_epi32(_mm_castps_si128(
b), _MM_SHUFFLE(3, 0, 2, 1)));
429 const __m128 r = _mm_castsi128_ps(
430 _mm_shuffle_epi32(_mm_castps_si128(_mm_fmsub_ps(a, b_shuffle, _mm_mul_ps(a_shuffle,
b))),
431 _MM_SHUFFLE(3, 0, 2, 1)));
435 return cross(a_, b_);
440#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
441 const __m128 a = (
const __m128 &)a_;
442 const __m128
b = (
const __m128 &)b_;
443 return _mm_cvtss_f32(_mm_dp_ps(a,
b, 0x7F));
495#ifndef __KERNEL_GPU__
550 dot(rotation_x, scale_x),
dot(rotation_x, scale_y),
dot(rotation_x, scale_z), decomp->y.x);
552 dot(rotation_y, scale_x),
dot(rotation_y, scale_y),
dot(rotation_y, scale_z), decomp->y.y);
554 dot(rotation_z, scale_x),
dot(rotation_z, scale_y),
dot(rotation_z, scale_z), decomp->y.z);
564 const int maxstep = numsteps - 1;
565 const int step =
min((
int)(time * maxstep), maxstep - 1);
566 const float t = time * maxstep -
step;
574 decomp.
y = (1.0f - t) * a->y + t *
b->y;
575 decomp.
z = (1.0f - t) * a->z + t *
b->z;
576 decomp.
w = (1.0f - t) * a->w + t *
b->w;
593#ifndef __KERNEL_GPU__
613#define transform_point_auto transform_point
614#define transform_direction_auto transform_direction
615#define transform_direction_transposed_auto transform_direction_transposed
static double angle(const Eigen::Vector3d &v1, const Eigen::Vector3d &v2)
ATTR_WARN_UNUSED_RESULT const BMLoop * l
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
dot(value.rgb, luminance_coefficients)") DEFINE_VALUE("REDUCE(lhs
#define ccl_device_forceinline
#define ccl_device_inline
#define CCL_NAMESPACE_END
VecBase< float, 4 > float4
VecBase< float, D > normalize(VecOp< float, D >) RET
VecBase< float, 3 > float3
VecBase< float, D > step(VecOp< float, D >, VecOp< float, D >) RET
VecBase< float, 3 > cross(VecOp< float, 3 >, VecOp< float, 3 >) RET
constexpr T clamp(T, U, U) RET
ccl_device_inline bool isfinite_safe(const float f)
CCL_NAMESPACE_BEGIN ccl_device_inline float madd(const float a, const float b, const float c)
ccl_device_inline float len_squared(const float2 a)
CCL_NAMESPACE_BEGIN ccl_device_inline float4 zero_float4()
bool system_cpu_support_avx2()
ccl_device_inline void print_float4(const ccl_private char *label, const float4 a)