Blender V4.3
transform_inverse.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
8
9/* Custom cross and dot implementations that match Embree bit for bit.
10 * Normally we don't use SSE41/AVX outside the kernel, but for this it's
11 * important to match exactly for ray tracing precision. */
12
14{
15#if defined(__AVX2__) && defined(__KERNEL_SSE2__)
16 const __m128 a = (const __m128 &)a_;
17 const __m128 b = (const __m128 &)b_;
18 const __m128 a_shuffle = _mm_castsi128_ps(
19 _mm_shuffle_epi32(_mm_castps_si128(a), _MM_SHUFFLE(3, 0, 2, 1)));
20 const __m128 b_shuffle = _mm_castsi128_ps(
21 _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(3, 0, 2, 1)));
22 const __m128 r = _mm_castsi128_ps(
23 _mm_shuffle_epi32(_mm_castps_si128(_mm_fmsub_ps(a, b_shuffle, _mm_mul_ps(a_shuffle, b))),
24 _MM_SHUFFLE(3, 0, 2, 1)));
25 return (const float3 &)r;
26#endif
27
28 return cross(a_, b_);
29}
30
32{
33#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
34 const __m128 a = (const __m128 &)a_;
35 const __m128 b = (const __m128 &)b_;
36 return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
37#endif
38
39 return dot(a_, b_);
40}
41
43{
44 /* This implementation matches the one in Embree exactly, to ensure consistent
45 * results with the ray intersection of instances. */
46 float3 x = make_float3(tfm.x.x, tfm.y.x, tfm.z.x);
47 float3 y = make_float3(tfm.x.y, tfm.y.y, tfm.z.y);
48 float3 z = make_float3(tfm.x.z, tfm.y.z, tfm.z.z);
49 float3 w = make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
50
51 /* Compute determinant. */
53
54 if (det == 0.0f) {
55 /* Matrix is degenerate (e.g. 0 scale on some axis), ideally we should
56 * never be in this situation, but try to invert it anyway with tweak.
57 *
58 * This logic does not match Embree which would just give an invalid
59 * matrix. A better solution would be to remove this and ensure any object
60 * matrix is valid. */
61 x.x += 1e-8f;
62 y.y += 1e-8f;
63 z.z += 1e-8f;
64
65 det = transform_inverse_dot(x, cross(y, z));
66 if (det == 0.0f) {
67 det = FLT_MAX;
68 }
69 }
70
71 /* Divide adjoint matrix by the determinant to compute inverse of 3x3 matrix. */
72 const float3 inverse_x = transform_inverse_cross(y, z) / det;
73 const float3 inverse_y = transform_inverse_cross(z, x) / det;
74 const float3 inverse_z = transform_inverse_cross(x, y) / det;
75
76 /* Compute translation and fill transform. */
77 Transform itfm;
78 itfm.x = float3_to_float4(inverse_x, -transform_inverse_dot(inverse_x, w));
79 itfm.y = float3_to_float4(inverse_y, -transform_inverse_dot(inverse_y, w));
80 itfm.z = float3_to_float4(inverse_z, -transform_inverse_dot(inverse_z, w));
81
82 return itfm;
83}
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Definition btQuadWord.h:117
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
local_group_size(16, 16) .push_constant(Type b
additional_info("compositor_sum_squared_difference_float_shared") .push_constant(Type output_img float dot(value.rgb, luminance_coefficients)") .define("LOAD(value)"
#define ccl_device_forceinline
#define CCL_NAMESPACE_END
ccl_device_forceinline float3 make_float3(const float x, const float y, const float z)
ccl_device_inline float cross(const float2 a, const float2 b)
#define FLT_MAX
Definition stdcycles.h:14
float4 y
Definition transform.h:24
float4 x
Definition transform.h:24
float4 z
Definition transform.h:24
ccl_device_forceinline Transform transform_inverse_impl(const Transform tfm)
CCL_NAMESPACE_BEGIN ccl_device_forceinline float3 transform_inverse_cross(const float3 a_, const float3 b_)
ccl_device_forceinline float transform_inverse_dot(const float3 a_, const float3 b_)
ccl_device_inline float4 float3_to_float4(const float3 a)
Definition util/math.h:540