5#ifndef __UTIL_MATH_MATRIX_H__
6#define __UTIL_MATH_MATRIX_H__
10#define MAT(A, size, row, col) A[(row) * (size) + (col)]
14# define MATS(A, n, r, c, s) A[((r) * (n) + (c)) * (s)]
16# define MATHS(A, r, c, s) A[((r) * ((r) + 1) / 2 + (c)) * (s)]
17# define VECS(V, i, s) V[(i) * (s)]
19# define MATS(A, n, r, c, s) MAT(A, n, r, c)
20# define MATHS(A, r, c, s) A[(r) * ((r) + 1) / 2 + (c)]
21# define VECS(V, i, s) V[i]
28 for (
int i = 0; i < n; i++) {
35 for (
int row = 0; row < n; row++) {
37 MAT(A, n, row,
col) = 0.0f;
48 for (
int i = 0; i < n; i++) {
57 for (
int i = 0; i < n; i++) {
67 for (
int i = 0; i < n; i++) {
68 a[i * astride] *=
b[i];
74 for (
int i = 0; i < n; i++) {
83 for (
int i = 0; i < n; i++) {
84 a[i] =
max(a[i],
b[i]);
90 for (
int i = 0; i < n; i++) {
98 for (
int i = 0; i < n; i++) {
115 for (
int row = 0; row < n; row++) {
116 MATHS(A, row, row, stride) += val;
127 for (
int row = 0; row < n; row++) {
139 for (
int row = 0; row < n; row++) {
151 for (
int row = 0; row < n; row++) {
161 for (
int i = 0; i < n; i++) {
162 for (
int j = 0; j < i; j++) {
163 float temp =
MATS(A, n, i, j, stride);
164 MATS(A, n, i, j, stride) =
MATS(A, n, j, i, stride);
165 MATS(A, n, j, i, stride) = temp;
177 for (
int row = 0; row < n; row++) {
179 float sum_col =
MATHS(A, row,
col, stride);
180 for (
int k = 0; k <
col; k++) {
181 sum_col -=
MATHS(A, row, k, stride) *
MATHS(A,
col, k, stride);
184 sum_col =
sqrtf(
max(sum_col, 0.0f));
189 MATHS(A, row,
col, stride) = sum_col;
217 for (
int row = 0; row < n; row++) {
221 VECS(y, row, stride) =
sum /
MATHS(A, row, row, stride);
225 for (
int row = n - 1; row >= 0; row--) {
229 VECS(y, row, stride) =
sum /
MATHS(A, row, row, stride);
246 const float singular_epsilon = 1e-9f;
248 for (
int row = 0; row < n; row++) {
250 MATS(
V, n, row,
col, v_stride) = (
col == row) ? 1.0f : 0.0f;
254 for (
int sweep = 0; sweep < 8; sweep++) {
255 float off_diagonal = 0.0f;
256 for (
int row = 1; row < n; row++) {
261 if (off_diagonal < 1e-7f) {
270 float threshold = 0.2f * off_diagonal / (n * n);
272 for (
int row = 1; row < n; row++) {
275 float element =
MAT(A, n, row,
col);
276 float abs_element =
fabsf(element);
280 if (sweep > 3 && abs_element <= singular_epsilon *
fabsf(
MAT(A, n, row, row)) &&
283 MAT(A, n, row,
col) = 0.0f;
287 if (element == 0.0f) {
293 if (sweep < 3 && (abs_element < threshold)) {
302 float singular_diff =
MAT(A, n, row, row) -
MAT(A, n,
col,
col);
304 if (abs_element > singular_epsilon *
fabsf(singular_diff)) {
305 float cot_2phi = 0.5f * singular_diff /
element;
306 ratio = 1.0f / (
fabsf(cot_2phi) +
sqrtf(1.0f + cot_2phi * cot_2phi));
307 if (cot_2phi < 0.0f) {
312 ratio = element / singular_diff;
315 float c = 1.0f /
sqrtf(1.0f + ratio * ratio);
319 float tan_phi_2 = s / (1.0f + c);
322 float singular_delta = ratio *
element;
323 MAT(A, n, row, row) += singular_delta;
327 MAT(A, n, row,
col) = 0.0f;
330#define ROT(M, r1, c1, r2, c2, stride) \
332 float M1 = MATS(M, n, r1, c1, stride); \
333 float M2 = MATS(M, n, r2, c2, stride); \
334 MATS(M, n, r1, c1, stride) -= s * (M2 + tan_phi_2 * M1); \
335 MATS(M, n, r2, c2, stride) += s * (M1 - tan_phi_2 * M2); \
340 for (
int i = 0; i <
col; i++)
341 ROT(A,
col, i, row, i, 1);
342 for (
int i =
col + 1; i < row; i++)
343 ROT(A, i,
col, row, i, 1);
344 for (
int i = row + 1; i < n; i++)
345 ROT(A, i,
col, i, row, 1);
347 for (
int i = 0; i < n; i++)
348 ROT(
V,
col, i, row, i, v_stride);
355 for (
int i = 0; i < n - 1; i++) {
356 float v =
MAT(A, n, i, i);
358 for (
int j = i; j < n; j++) {
359 if (
MAT(A, n, j, j) >=
v) {
366 MAT(A, n, k, k) =
MAT(A, n, i, i);
369 for (
int j = 0; j < n; j++) {
370 float v =
MATS(
V, n, i, j, v_stride);
371 MATS(
V, n, i, j, v_stride) =
MATS(
V, n, k, j, v_stride);
372 MATS(
V, n, k, j, v_stride) =
v;
378#ifdef __KERNEL_SSE3__
381 for (
int i = 0; i < n; i++) {
388 for (
int row = 0; row < n; row++) {
402 for (
int row = 0; row < n; row++) {
411 for (
int i = 0; i < n; i++) {
418 for (
int i = 0; i < n; i++) {
425 for (
int i = 0; i < n; i++) {
426 a[i] =
max(a[i],
b[i]);
432 for (
int row = 0; row < n; row++) {
#define atomic_add_and_fetch_float(p, x)
ATTR_WARN_UNUSED_RESULT const void * element
ATTR_WARN_UNUSED_RESULT const BMVert * v
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
static T sum(const btAlignedObjectArray< T > &items)
local_group_size(16, 16) .push_constant(Type b
#define ccl_device_inline
#define CCL_NAMESPACE_END
ccl_device_inline float reduce_add(const float2 a)
CCL_NAMESPACE_BEGIN ccl_device_inline float4 zero_float4()
#define MAT(A, size, row, col)
ccl_device_inline void math_vec3_add(ccl_private float3 *v, int n, ccl_private float *x, float3 w)
ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride)
ccl_device_inline void math_matrix_transpose(ccl_global float *A, int n, int stride)
ccl_device_inline void math_vector_add(ccl_private float *a, ccl_private const float *ccl_restrict b, int n)
ccl_device_inline void math_vec3_add_strided(ccl_global float3 *v, int n, ccl_private float *x, float3 w, int stride)
ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A, int n, float val, int stride)
ccl_device_inline void math_matrix_zero(ccl_private float *A, int n)
ccl_device_inline void math_vector_mul(ccl_private float *a, ccl_private const float *ccl_restrict b, int n)
#define MATHS(A, r, c, s)
ccl_device_inline void math_trimatrix_add_gramian_strided(ccl_global float *A, int n, ccl_private const float *ccl_restrict v, float weight, int stride)
#define ROT(M, r1, c1, r2, c2, stride)
ccl_device_inline void math_vector_max(ccl_private float *a, ccl_private const float *ccl_restrict b, int n)
ccl_device_inline void math_vector_scale(ccl_private float *a, float b, int n)
ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A, ccl_global float3 *y, int n, int stride)
#define MATS(A, n, r, c, s)
ccl_device void math_matrix_jacobi_eigendecomposition(ccl_private float *A, ccl_global float *V, int n, int v_stride)
ccl_device_inline void math_vector_mul_strided(ccl_global float *a, ccl_private const float *ccl_restrict b, int astride, int n)
ccl_device_inline void math_matrix_add_gramian(ccl_private float *A, int n, ccl_private const float *ccl_restrict v, float weight)
ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A, int n, ccl_private const float *ccl_restrict v, float weight)
ccl_device_inline void math_vector_zero(ccl_private float *v, int n)
CCL_NAMESPACE_BEGIN struct Window V