6#ifndef __UTIL_MATH_INT4_H__
7#define __UTIL_MATH_INT4_H__
10# error "Do not include this file directly, include util/types.h instead."
19 return int4(_mm_add_epi32(a.m128,
b.m128));
33 return int4(_mm_sub_epi32(a.m128,
b.m128));
47 return int4(_mm_srai_epi32(a.m128, i));
49 return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
56 return int4(_mm_slli_epi32(a.m128, i));
58 return make_int4(a.x << i, a.y << i, a.z << i, a.w << i);
65 return int4(_mm_cmplt_epi32(a.m128,
b.m128));
79 return int4(_mm_cmpeq_epi32(a.m128,
b.m128));
81 return make_int4(a.x ==
b.x, a.y ==
b.y, a.z ==
b.z, a.w ==
b.w);
93 return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128,
b.m128)));
95 return make_int4(a.x >=
b.x, a.y >=
b.y, a.z >=
b.z, a.w >=
b.w);
106# ifdef __KERNEL_SSE__
107 return int4(_mm_and_si128(a.m128,
b.m128));
109 return make_int4(a.x &
b.x, a.y &
b.y, a.z &
b.z, a.w &
b.w);
115# ifdef __KERNEL_SSE__
116 return int4(_mm_or_si128(a.m128,
b.m128));
118 return make_int4(a.x |
b.x, a.y |
b.y, a.z |
b.z, a.w |
b.w);
124# ifdef __KERNEL_SSE__
125 return int4(_mm_xor_si128(a.m128,
b.m128));
127 return make_int4(a.x ^
b.x, a.y ^
b.y, a.z ^
b.z, a.w ^
b.w);
197# ifdef __KERNEL_SSE__
200 return int4(_mm_srli_epi32(a.m128,
b));
206# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
207 return int4(_mm_min_epi32(a.m128,
b.m128));
215# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
216 return int4(_mm_max_epi32(a.m128,
b.m128));
218 return make_int4(
max(a.x,
b.x), max(a.y,
b.y), max(a.z,
b.z), max(a.w,
b.w));
224 return min(
max(a, mn), mx);
229# ifdef __KERNEL_SSE__
230 return int4(_mm_or_si128(_mm_and_si128(mask, a), _mm_andnot_si128(mask,
b)));
233 (mask.x) ? a.x :
b.x, (mask.y) ? a.y :
b.y, (mask.z) ? a.z :
b.z, (mask.w) ? a.w :
b.w);
239# ifdef __KERNEL_SSE__
240 return int4(_mm_loadu_si128((__m128i *)
v));
250 return float4(_mm_castsi128_ps(a));
260 return int4(_mm_andnot_si128(a.m128,
b.m128));
263template<
size_t i0,
size_t i1,
size_t i2,
size_t i3>
266# ifdef __KERNEL_NEON__
267 int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
268 return int4(vreinterpretq_m128i_s32(result));
270 return int4(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
274template<
size_t i0,
size_t i1,
size_t i2,
size_t i3>
277# ifdef __KERNEL_NEON__
278 int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
279 vreinterpretq_s32_m128i(
b));
280 return int4(vreinterpretq_m128i_s32(result));
282 return int4(_mm_castps_si128(
283 _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(
b), _MM_SHUFFLE(i3, i2, i1, i0))));
289 return shuffle<i0, i0, i0, i0>(
b);
ATTR_WARN_UNUSED_RESULT const BMVert * v
local_group_size(16, 16) .push_constant(Type b
#define ccl_device_forceinline
#define ccl_device_inline
#define CCL_NAMESPACE_END
ccl_device_inline int4 load_int4(const int *v)
ccl_device_inline int4 operator>>(const int4 a, int i)
ccl_device_inline int4 operator^(const int4 a, const int4 b)
ccl_device_inline int4 operator<(const int4 a, const int4 b)
ccl_device_inline int4 & operator|=(int4 &a, const int4 b)
ccl_device_inline int4 operator-(const int4 a, const int4 b)
ccl_device_inline int4 operator<<(const int4 a, int i)
ccl_device_inline int4 operator==(const int4 a, const int4 b)
ccl_device_inline int4 & operator&=(int4 &a, const int4 b)
ccl_device_inline int4 operator|(const int4 a, const int4 b)
ccl_device_inline int4 operator+=(int4 &a, const int4 b)
ccl_device_inline int4 & operator>>=(int4 &a, const int32_t b)
ccl_device_inline int4 select(const int4 mask, const int4 a, const int4 b)
ccl_device_inline int4 & operator<<=(int4 &a, const int32_t b)
CCL_NAMESPACE_BEGIN ccl_device_inline int4 operator+(const int4 a, const int4 b)
ccl_device_inline int4 operator>=(const int4 a, const int4 b)
ccl_device_inline int4 & operator^=(int4 &a, const int4 b)
ccl_device_inline int4 operator&(const int4 a, const int4 b)
ccl_device_inline int4 clamp(const int4 a, const int4 mn, const int4 mx)
ccl_device_inline float4 cast(const int4 a)
ccl_device_inline int4 operator-=(int4 &a, const int4 b)