5#ifndef __UTIL_MATH_INTERSECT_H__
6#define __UTIL_MATH_INTERSECT_H__
21 const float3 d_vec = sphere_P - ray_P;
22 const float r_sq = sphere_radius * sphere_radius;
23 const float d_sq =
dot(d_vec, d_vec);
24 const float d_cos_theta =
dot(d_vec, ray_D);
26 if (d_sq > r_sq && d_cos_theta < 0.0f) {
31 const float d_sin_theta_sq =
len_squared(d_vec - d_cos_theta * ray_D);
33 if (d_sin_theta_sq > r_sq) {
39 const float t = d_cos_theta -
copysignf(
sqrtf(r_sq - d_sin_theta_sq), d_sq - r_sq);
41 if (t > ray_tmin && t < ray_tmax) {
43 *isect_P = ray_P + ray_D * t;
62 const float div =
dot(ray_D, disk_N);
67 const float t = -disk_t / div;
68 if (!(t > ray_tmin && t < ray_tmax)) {
73 if (
len_squared(
P - disk_P) > disk_radius * disk_radius) {
91 const float3 vp = ray_P - disk_P;
92 const float dp =
dot(vp, disk_N);
93 const float cos_angle =
dot(disk_N, -ray_D);
94 if (dp * cos_angle > 0.f)
96 float t = dp / cos_angle;
103 if (
dot(T, T) <
sqr(disk_radius) && (t > ray_tmin && t < ray_tmax)) {
104 *isect_P = ray_P + t * ray_D;
115#ifdef __KERNEL_NEON__
118 a = vsetq_lane_f32(x, a, 0);
119 float32x4_t reciprocal = vrecpeq_f32(a);
120 reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
121 reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
122 return vgetq_lane_f32(reciprocal, 0);
123#elif defined(__KERNEL_SSE__)
124 const __m128 a = _mm_set_ss(x);
125 const __m128 r = _mm_rcp_ss(a);
127# ifdef __KERNEL_AVX2_
128 return _mm_cvtss_f32(_mm_mul_ss(r, _mm_fnmadd_ss(r, a, _mm_set_ss(2.0f))));
130 return _mm_cvtss_f32(_mm_mul_ss(r, _mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a))));
139#if defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
144 return a.x *
b.x + a.y *
b.y + a.z *
b.z;
150#if defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
156 return make_float3(a.y *
b.z - a.z *
b.y, a.z *
b.x - a.x *
b.z, a.x *
b.y - a.y *
b.x);
162 const float ray_tmin,
163 const float ray_tmax,
175 const float3 v0 = tri_a - ray_P;
176 const float3 v1 = tri_b - ray_P;
181 const float3 e1 = v0 - v1;
189 const float UVW =
U +
V +
W;
190 const float eps = FLT_EPSILON *
fabsf(UVW);
194 if (!(minUVW >= -
eps || maxUVW <=
eps)) {
200 const float3 Ng = Ng1 + Ng1;
201 const float den =
dot(Ng, ray_D);
208 const float T =
dot(v0, Ng);
209 const float t = T / den;
210 if (!(t >= ray_tmin && t <= ray_tmax)) {
215 *isect_u =
min(
U * rcp_uvw, 1.0f);
216 *isect_v =
min(
V * rcp_uvw, 1.0f);
235 const float3 e1 = v0 - v1;
249 return (minUVW >=
eps || maxUVW <= -
eps);
272 float t = -(
dot(ray_P, quad_n) -
dot(quad_P, quad_n)) /
dot(ray_D, quad_n);
273 if (!(t > ray_tmin && t < ray_tmax)) {
276 const float3 hit = ray_P + t * ray_D;
277 const float3 inplane = hit - quad_P;
278 const float u =
dot(inplane, inv_quad_u);
279 if (u < -0.5f || u > 0.5f) {
282 const float v =
dot(inplane, inv_quad_v);
286 if (ellipse && (u * u +
v *
v > 0.25f)) {
312 const float DN =
dot(ray_D,
N);
315 const float t = -
dot(
P,
N) / DN;
319 t_range->x =
fmaxf(t_range->x, t);
322 t_range->y =
fminf(t_range->y, t);
325 return t_range->x < t_range->y;
338 const float3 t_lower = (bbox_min - ray_P) * inv_ray_D;
339 const float3 t_upper = (bbox_max - ray_P) * inv_ray_D;
368 const float a =
dot(D_proj, D_proj);
369 float b =
dot(P_proj, D_proj);
372 const float t_mid = -
b / a;
373 P_proj += D_proj * t_mid;
376 b =
dot(P_proj, D_proj);
377 const float c =
dot(P_proj, P_proj) - 1.0f;
400 const float cos_angle_sq,
403 if (cos_angle_sq < 1e-4f) {
411 const float AD =
dot(axis, D);
412 const float AP =
dot(axis,
P);
414 const float a =
sqr(AD) - cos_angle_sq;
415 const float b = 2.0f * (AD * AP - cos_angle_sq *
dot(D,
P));
416 const float c =
sqr(AP) - cos_angle_sq *
dot(
P,
P);
418 float tmin = 0.0f, tmax =
FLT_MAX;
422 const bool tmin_valid = AP + tmin * AD > 0.0f;
423 const bool tmax_valid = AP + tmax * AD > 0.0f;
425 valid &= (tmin_valid || tmax_valid);
431 else if (!tmin_valid) {
ATTR_WARN_UNUSED_RESULT const BMVert * v2
ATTR_WARN_UNUSED_RESULT const BMVert * v
local_group_size(16, 16) .push_constant(Type b
additional_info("compositor_sum_squared_difference_float_shared") .push_constant(Type output_img float dot(value.rgb, luminance_coefficients)") .define("LOAD(value)"
#define ccl_device_forceinline
#define ccl_device_inline
#define CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN ccl_device_inline float madd(const float a, const float b, const float c)
ccl_device_inline float len_squared(const float2 a)
ccl_device_inline float reduce_max(const float2 a)
ccl_device_inline float2 normalize_len(const float2 a, ccl_private float *t)
ccl_device_inline float reduce_min(const float2 a)
ccl_device_inline float3 rcp(const float3 a)
ccl_device_inline float4 msub(const float4 a, const float4 b, const float4 c)
ccl_device bool ray_disk_intersect(float3 ray_P, float3 ray_D, float ray_tmin, float ray_tmax, float3 disk_P, float3 disk_N, float disk_radius, ccl_private float3 *isect_P, ccl_private float *isect_t)
ccl_device_inline bool ray_cone_intersect(const float3 axis, const float3 P, float3 D, const float cos_angle_sq, ccl_private float2 *t_range)
ccl_device_forceinline float ray_triangle_rcp(const float x)
ccl_device bool ray_aabb_intersect(const float3 bbox_min, const float3 bbox_max, const float3 ray_P, const float3 ray_D, ccl_private float2 *t_range)
CCL_NAMESPACE_BEGIN ccl_device bool ray_sphere_intersect(float3 ray_P, float3 ray_D, float ray_tmin, float ray_tmax, float3 sphere_P, float sphere_radius, ccl_private float3 *isect_P, ccl_private float *isect_t)
ccl_device bool ray_aligned_disk_intersect(float3 ray_P, float3 ray_D, float ray_tmin, float ray_tmax, float3 disk_P, float disk_radius, ccl_private float3 *isect_P, ccl_private float *isect_t)
ccl_device_inline bool ray_infinite_cylinder_intersect(const float3 P, const float3 D, const float len_u, const float len_v, ccl_private float2 *t_range)
ccl_device_forceinline bool ray_triangle_intersect(const float3 ray_P, const float3 ray_D, const float ray_tmin, const float ray_tmax, const float3 tri_a, const float3 tri_b, const float3 tri_c, ccl_private float *isect_u, ccl_private float *isect_v, ccl_private float *isect_t)
ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D, float ray_tmin, float ray_tmax, float3 quad_P, float3 inv_quad_u, float3 inv_quad_v, float3 quad_n, ccl_private float3 *isect_P, ccl_private float *isect_t, ccl_private float *isect_u, ccl_private float *isect_v, bool ellipse)
ccl_device_forceinline bool ray_triangle_intersect_self(const float3 ray_P, const float3 ray_D, const float3 verts[3])
ccl_device_inline float3 ray_triangle_cross(const float3 a, const float3 b)
ccl_device_inline float ray_triangle_dot(const float3 a, const float3 b)
ccl_device bool ray_plane_intersect(const float3 N, const float3 P, const float3 ray_D, ccl_private float2 *t_range)
ccl_device_inline bool intervals_intersect(ccl_private float2 *first, const float2 second)
ccl_device_inline float sqr(float a)
ccl_device_inline float inversesqrtf(float f)
ccl_device_inline bool solve_quadratic(const float a, const float b, const float c, ccl_private float &x1, ccl_private float &x2)
ccl_device_inline float4 float3_to_float4(const float3 a)
ccl_device_inline float2 float3_to_float2(const float3 a)
CCL_NAMESPACE_BEGIN struct Window V