5#ifndef __UTIL_TRANSFORM_H__
6#define __UTIL_TRANSFORM_H__
54#ifdef __KERNEL_METAL__
59 return (a *
b).xyz +
make_float3(t->x.w, t->y.w, t->z.w);
66#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
67 const float4 aa(a.m128);
69 float4
x(_mm_loadu_ps(&t->x.x));
70 float4
y(_mm_loadu_ps(&t->y.x));
71 float4
z(_mm_loadu_ps(&t->z.x));
72 float4
w(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f));
74 _MM_TRANSPOSE4_PS(x.m128, y.m128,
z.m128,
w.m128);
77 tmp =
madd(shuffle<2>(aa),
z, tmp);
78 tmp =
madd(shuffle<1>(aa), y, tmp);
79 tmp =
madd(shuffle<0>(aa), x, tmp);
82#elif defined(__KERNEL_METAL__)
84 return (a *
b).xyz +
make_float3(t->x.w, t->y.w, t->z.w);
87 a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w,
88 a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w);
96#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
97 const float4 aa(a.m128);
99 float4
x(_mm_loadu_ps(&t->x.x));
100 float4
y(_mm_loadu_ps(&t->y.x));
101 float4
z(_mm_loadu_ps(&t->z.x));
102 float4
w(_mm_setzero_ps());
104 _MM_TRANSPOSE4_PS(x.m128, y.m128,
z.m128,
w.m128);
106 float4 tmp = shuffle<2>(aa) *
z;
107 tmp =
madd(shuffle<1>(aa), y, tmp);
108 tmp =
madd(shuffle<0>(aa), x, tmp);
111#elif defined(__KERNEL_METAL__)
116 a.x * t->y.x + a.y * t->y.y + a.z * t->y.z,
117 a.x * t->z.x + a.y * t->z.y + a.z * t->z.z);
177 float cx =
cosf(euler.
x);
178 float cy =
cosf(euler.
y);
179 float cz =
cosf(euler.
z);
180 float sx =
sinf(euler.
x);
181 float sy =
sinf(euler.
y);
182 float sz =
sinf(euler.
z);
189 t.
x.y = sy * sx * cz - cx * sz;
190 t.
y.y = sy * sx * sz + cx * cz;
193 t.
x.z = sy * cx * cz + sx * sz;
194 t.
y.z = sy * cx * sz - sx * cz;
197 t.
x.w = t.
y.w = t.
z.w = 0.0f;
208 return make_transform(dx.
x, dx.
y, dx.
z, 0.0f, dy.x, dy.y, dy.z, 0.0f,
N.x,
N.y,
N.z, 0.0f);
211#if !defined(__KERNEL_METAL__)
228#ifndef __KERNEL_GPU__
246 return make_transform(1, 0, 0, t.
x, 0, 1, 0, t.
y, 0, 0, 1, t.
z);
256 return make_transform(s.x, 0, 0, 0, 0, s.y, 0, 0, 0, 0, s.z, 0);
266 float s =
sinf(angle);
267 float c =
cosf(angle);
273 axis.x * axis.y * t - s * axis.z,
274 axis.x * axis.z * t + s * axis.y,
277 axis.y * axis.x * t + s * axis.z,
278 axis.y * axis.y * t + c,
279 axis.y * axis.z * t - s * axis.x,
282 axis.z * axis.x * t - s * axis.y,
283 axis.z * axis.y * t + s * axis.x,
284 axis.z * axis.z * t + c,
303 return memcmp(&A, &
B,
sizeof(
Transform)) == 0;
313 const float threshold)
315 for (
int x = 0; x < 3; x++) {
316 for (
int y = 0; y < 4; y++) {
317 if (
fabsf(A[x][y] -
B[x][y]) > threshold) {
333 t->
x[column] = value.x;
334 t->
y[column] = value.y;
335 t->
z[column] = value.z;
369 return (
dot(
cross(c0, c1), c2) < 0.0f);
385 return make_transform(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
395#if defined(__KERNEL_GPU_RAYTRACING__)
400 float costheta =
dot(
q1, q2);
404 if (costheta > 0.9995f) {
410 float theta =
acosf(
clamp(costheta, -1.0f, 1.0f));
412 float thetap = theta * t;
413 return q1 *
cosf(thetap) + qperp *
sinf(thetap);
418#ifndef __KERNEL_GPU__
426#ifndef __KERNEL_GPU__
446 float q0,
q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
448 q0 = M_SQRT2_F * decomp->x.w;
449 q1 = M_SQRT2_F * decomp->x.x;
450 q2 = M_SQRT2_F * decomp->x.y;
451 q3 = M_SQRT2_F * decomp->x.z;
474 dot(rotation_x, scale_x),
dot(rotation_x, scale_y),
dot(rotation_x, scale_z), decomp->y.x);
476 dot(rotation_y, scale_x),
dot(rotation_y, scale_y),
dot(rotation_y, scale_z), decomp->y.y);
478 dot(rotation_z, scale_x),
dot(rotation_z, scale_y),
dot(rotation_z, scale_z), decomp->y.z);
488 int maxstep = numsteps - 1;
489 int step =
min((
int)(time * maxstep), maxstep - 1);
490 float t = time * maxstep - step;
498 decomp.
y = (1.0f - t) * a->y + t *
b->y;
499 decomp.
z = (1.0f - t) * a->z + t *
b->z;
500 decomp.
w = (1.0f - t) * a->w + t *
b->w;
517#ifndef __KERNEL_GPU__
535#define transform_point_auto transform_point
536#define transform_direction_auto transform_direction
537#define transform_direction_transposed_auto transform_direction_transposed
ATTR_WARN_UNUSED_RESULT const BMLoop * l
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
SIMD_FORCE_INLINE btVector3 & normalize()
Normalize this vector x^2 + y^2 + z^2 = 1.
local_group_size(16, 16) .push_constant(Type b
additional_info("compositor_sum_squared_difference_float_shared") .push_constant(Type output_img float dot(value.rgb, luminance_coefficients)") .define("LOAD(value)"
#define ccl_device_inline
#define CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN ccl_device_inline float madd(const float a, const float b, const float c)
ccl_device_inline float len_squared(const float2 a)
ccl_device_inline float cross(const float2 a, const float2 b)
CCL_NAMESPACE_BEGIN ccl_device_inline float4 zero_float4()
VecBase< float, 4 > float4
MatBase< float, 3, 3 > float3x3
bool system_cpu_support_avx2()
bool system_cpu_support_sse42()
ccl_device_inline void print_float4(ccl_private const char *label, const float4 a)
ccl_device_inline bool isfinite_safe(float f)
ccl_device_inline float4 float3_to_float4(const float3 a)
ccl_device_inline float3 float4_to_float3(const float4 a)
ccl_device_inline int clamp(int a, int mn, int mx)