Blender V4.3
math_int4.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2013 Intel Corporation
2 * SPDX-FileCopyrightText: 2011-2022 Blender Foundation
3 *
4 * SPDX-License-Identifier: Apache-2.0 */
5
6#ifndef __UTIL_MATH_INT4_H__
7#define __UTIL_MATH_INT4_H__
8
9#ifndef __UTIL_MATH_H__
10# error "Do not include this file directly, include util/types.h instead."
11#endif
12
14
15#ifndef __KERNEL_GPU__
17{
18# ifdef __KERNEL_SSE__
19 return int4(_mm_add_epi32(a.m128, b.m128));
20# else
21 return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
22# endif
23}
24
26{
27 return a = a + b;
28}
29
31{
32# ifdef __KERNEL_SSE__
33 return int4(_mm_sub_epi32(a.m128, b.m128));
34# else
35 return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
36# endif
37}
38
40{
41 return a = a - b;
42}
43
45{
46# ifdef __KERNEL_SSE__
47 return int4(_mm_srai_epi32(a.m128, i));
48# else
49 return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
50# endif
51}
52
54{
55# ifdef __KERNEL_SSE__
56 return int4(_mm_slli_epi32(a.m128, i));
57# else
58 return make_int4(a.x << i, a.y << i, a.z << i, a.w << i);
59# endif
60}
61
63{
64# ifdef __KERNEL_SSE__
65 return int4(_mm_cmplt_epi32(a.m128, b.m128));
66# else
67 return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
68# endif
69}
70
72{
73 return a < make_int4(b);
74}
75
77{
78# ifdef __KERNEL_SSE__
79 return int4(_mm_cmpeq_epi32(a.m128, b.m128));
80# else
81 return make_int4(a.x == b.x, a.y == b.y, a.z == b.z, a.w == b.w);
82# endif
83}
84
86{
87 return a == make_int4(b);
88}
89
91{
92# ifdef __KERNEL_SSE__
93 return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128)));
94# else
95 return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
96# endif
97}
98
100{
101 return a >= make_int4(b);
102}
103
105{
106# ifdef __KERNEL_SSE__
107 return int4(_mm_and_si128(a.m128, b.m128));
108# else
109 return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
110# endif
111}
112
114{
115# ifdef __KERNEL_SSE__
116 return int4(_mm_or_si128(a.m128, b.m128));
117# else
118 return make_int4(a.x | b.x, a.y | b.y, a.z | b.z, a.w | b.w);
119# endif
120}
121
123{
124# ifdef __KERNEL_SSE__
125 return int4(_mm_xor_si128(a.m128, b.m128));
126# else
127 return make_int4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w);
128# endif
129}
130
132{
133 return make_int4(a) & b;
134}
135
137{
138 return a & make_int4(b);
139}
140
142{
143 return make_int4(a) | b;
144}
145
147{
148 return a | make_int4(b);
149}
150
152{
153 return make_int4(a) ^ b;
154}
155
157{
158 return a ^ make_int4(b);
159}
160
162{
163 return a = a & b;
164}
166{
167 return a = a & b;
168}
169
171{
172 return a = a | b;
173}
175{
176 return a = a | b;
177}
178
180{
181 return a = a ^ b;
182}
184{
185 return a = a ^ b;
186}
187
189{
190 return a = a << b;
191}
193{
194 return a = a >> b;
195}
196
197# ifdef __KERNEL_SSE__
198ccl_device_forceinline const int4 srl(const int4 a, const int32_t b)
199{
200 return int4(_mm_srli_epi32(a.m128, b));
201}
202# endif
203
205{
206# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
207 return int4(_mm_min_epi32(a.m128, b.m128));
208# else
209 return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
210# endif
211}
212
214{
215# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
216 return int4(_mm_max_epi32(a.m128, b.m128));
217# else
218 return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
219# endif
220}
221
222ccl_device_inline int4 clamp(const int4 a, const int4 mn, const int4 mx)
223{
224 return min(max(a, mn), mx);
225}
226
227ccl_device_inline int4 select(const int4 mask, const int4 a, const int4 b)
228{
229# ifdef __KERNEL_SSE__
230 return int4(_mm_or_si128(_mm_and_si128(mask, a), _mm_andnot_si128(mask, b)));
231# else
232 return make_int4(
233 (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w);
234# endif
235}
236
238{
239# ifdef __KERNEL_SSE__
240 return int4(_mm_loadu_si128((__m128i *)v));
241# else
242 return make_int4(v[0], v[1], v[2], v[3]);
243# endif
244}
245#endif /* __KERNEL_GPU__ */
246
248{
249#ifdef __KERNEL_SSE__
250 return float4(_mm_castsi128_ps(a));
251#else
252 return make_float4(
254#endif
255}
256
257#ifdef __KERNEL_SSE__
258ccl_device_forceinline int4 andnot(const int4 a, const int4 b)
259{
260 return int4(_mm_andnot_si128(a.m128, b.m128));
261}
262
263template<size_t i0, size_t i1, size_t i2, size_t i3>
264ccl_device_forceinline int4 shuffle(const int4 a)
265{
266# ifdef __KERNEL_NEON__
267 int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
268 return int4(vreinterpretq_m128i_s32(result));
269# else
270 return int4(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
271# endif
272}
273
274template<size_t i0, size_t i1, size_t i2, size_t i3>
275ccl_device_forceinline int4 shuffle(const int4 a, const int4 b)
276{
277# ifdef __KERNEL_NEON__
278 int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
279 vreinterpretq_s32_m128i(b));
280 return int4(vreinterpretq_m128i_s32(result));
281# else
282 return int4(_mm_castps_si128(
283 _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))));
284# endif
285}
286
287template<size_t i0> ccl_device_forceinline int4 shuffle(const int4 b)
288{
289 return shuffle<i0, i0, i0, i0>(b);
290}
291#endif
292
294
295#endif /* __UTIL_MATH_INT4_H__ */
ATTR_WARN_UNUSED_RESULT const BMVert * v
local_group_size(16, 16) .push_constant(Type b
#define ccl_device_forceinline
#define ccl_device_inline
#define CCL_NAMESPACE_END
ccl_device_forceinline float4 make_float4(const float x, const float y, const float z, const float w)
#define __int_as_float(x)
ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
ccl_device_inline int4 load_int4(const int *v)
Definition math_int4.h:237
ccl_device_inline int4 operator>>(const int4 a, int i)
Definition math_int4.h:44
ccl_device_inline int4 operator^(const int4 a, const int4 b)
Definition math_int4.h:122
ccl_device_inline int4 operator<(const int4 a, const int4 b)
Definition math_int4.h:62
ccl_device_inline int4 & operator|=(int4 &a, const int4 b)
Definition math_int4.h:170
ccl_device_inline int4 operator-(const int4 a, const int4 b)
Definition math_int4.h:30
ccl_device_inline int4 operator<<(const int4 a, int i)
Definition math_int4.h:53
ccl_device_inline int4 operator==(const int4 a, const int4 b)
Definition math_int4.h:76
ccl_device_inline int4 & operator&=(int4 &a, const int4 b)
Definition math_int4.h:161
ccl_device_inline int4 operator|(const int4 a, const int4 b)
Definition math_int4.h:113
ccl_device_inline int4 operator+=(int4 &a, const int4 b)
Definition math_int4.h:25
ccl_device_inline int4 & operator>>=(int4 &a, const int32_t b)
Definition math_int4.h:192
ccl_device_inline int4 select(const int4 mask, const int4 a, const int4 b)
Definition math_int4.h:227
ccl_device_inline int4 & operator<<=(int4 &a, const int32_t b)
Definition math_int4.h:188
CCL_NAMESPACE_BEGIN ccl_device_inline int4 operator+(const int4 a, const int4 b)
Definition math_int4.h:16
ccl_device_inline int4 operator>=(const int4 a, const int4 b)
Definition math_int4.h:90
ccl_device_inline int4 & operator^=(int4 &a, const int4 b)
Definition math_int4.h:179
ccl_device_inline int4 operator&(const int4 a, const int4 b)
Definition math_int4.h:104
ccl_device_inline int4 clamp(const int4 a, const int4 mn, const int4 mx)
Definition math_int4.h:222
ccl_device_inline float4 cast(const int4 a)
Definition math_int4.h:247
ccl_device_inline int4 operator-=(int4 &a, const int4 b)
Definition math_int4.h:39
#define min(a, b)
Definition sort.c:32
signed int int32_t
Definition stdint.h:77
float max