13#if !defined(__KERNEL_METAL__)
17 return int4(_mm_add_epi32(a.m128,
b.m128));
31 return int4(_mm_sub_epi32(a.m128,
b.m128));
50 return int4(_mm_srai_epi32(a.m128,
i));
59 return int4(_mm_slli_epi32(a.m128,
i));
68 return int4(_mm_cmplt_epi32(a.m128,
b.m128));
82 return int4(_mm_cmpeq_epi32(a.m128,
b.m128));
96 return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128,
b.m128)));
109# ifdef __KERNEL_SSE__
110 return int4(_mm_and_si128(a.m128,
b.m128));
118# ifdef __KERNEL_SSE__
119 return int4(_mm_or_si128(a.m128,
b.m128));
127# ifdef __KERNEL_SSE__
128 return int4(_mm_xor_si128(a.m128,
b.m128));
200# ifdef __KERNEL_SSE__
203 return int4(_mm_srli_epi32(a.m128,
b));
209# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
210 return int4(_mm_min_epi32(a.m128,
b.m128));
218# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
219 return int4(_mm_max_epi32(a.m128,
b.m128));
227 return min(
max(a, mn), mx);
232# ifdef __KERNEL_SSE__
233 return int4(_mm_or_si128(_mm_and_si128(
mask, a), _mm_andnot_si128(
mask,
b)));
242# ifdef __KERNEL_SSE__
243 return int4(_mm_loadu_si128((__m128i *)
v));
253 return float4(_mm_castsi128_ps(a));
263 return int4(_mm_andnot_si128(a.m128,
b.m128));
266template<
size_t i0, const
size_t i1, const
size_t i2, const
size_t i3>
269# ifdef __KERNEL_NEON__
270 int32x4_t
result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
273 return int4(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
277template<
size_t i0, const
size_t i1, const
size_t i2, const
size_t i3>
280# ifdef __KERNEL_NEON__
281 int32x4_t
result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
282 vreinterpretq_s32_m128i(
b));
285 return int4(_mm_castps_si128(
286 _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(
b), _MM_SHUFFLE(i3, i2, i1, i0))));
292 return shuffle<i0, i0, i0, i0>(
b);
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define ccl_device_forceinline
#define ccl_device_inline
#define CCL_NAMESPACE_END
VecBase< float, 4 > float4
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
ccl_device_inline int4 operator>>(const int4 a, const int i)
ccl_device_inline int4 load_int4(const int *v)
ccl_device_inline int4 operator^(const int4 a, const int4 b)
ccl_device_inline int4 operator<(const int4 a, const int4 b)
ccl_device_inline int4 & operator|=(int4 &a, const int4 b)
ccl_device_inline int4 operator-(const int4 a, const int4 b)
ccl_device_inline int4 operator==(const int4 a, const int4 b)
ccl_device_inline int4 & operator&=(int4 &a, const int4 b)
ccl_device_inline int4 operator|(const int4 a, const int4 b)
ccl_device_inline int4 operator+=(int4 &a, const int4 b)
ccl_device_inline int4 & operator>>=(int4 &a, const int32_t b)
ccl_device_inline int4 & operator<<=(int4 &a, const int32_t b)
ccl_device_inline int4 operator*(const int4 a, const int4 b)
CCL_NAMESPACE_BEGIN ccl_device_inline int4 operator+(const int4 a, const int4 b)
ccl_device_inline int4 operator>=(const int4 a, const int4 b)
ccl_device_inline int4 operator<<(const int4 a, const int i)
ccl_device_inline int4 & operator^=(int4 &a, const int4 b)
ccl_device_inline int4 operator&(const int4 a, const int4 b)
ccl_device_inline int4 clamp(const int4 a, const int4 mn, const int4 mx)
ccl_device_inline int4 operator-=(int4 &a, const int4 b)