Blender V5.0
math_int4.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2013 Intel Corporation
2 * SPDX-FileCopyrightText: 2011-2022 Blender Foundation
3 *
4 * SPDX-License-Identifier: Apache-2.0 */
5
6#pragma once
7
8#include "util/types_float4.h"
9#include "util/types_int4.h"
10
12
13#if !defined(__KERNEL_METAL__)
15{
16# ifdef __KERNEL_SSE__
17 return int4(_mm_add_epi32(a.m128, b.m128));
18# else
19 return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
20# endif
21}
22
24{
25 return a = a + b;
26}
27
29{
30# ifdef __KERNEL_SSE__
31 return int4(_mm_sub_epi32(a.m128, b.m128));
32# else
33 return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
34# endif
35}
36
38{
39 return a = a - b;
40}
41
43{
44 return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
45}
46
48{
49# ifdef __KERNEL_SSE__
50 return int4(_mm_srai_epi32(a.m128, i));
51# else
52 return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
53# endif
54}
55
57{
58# ifdef __KERNEL_SSE__
59 return int4(_mm_slli_epi32(a.m128, i));
60# else
61 return make_int4(a.x << i, a.y << i, a.z << i, a.w << i);
62# endif
63}
64
66{
67# ifdef __KERNEL_SSE__
68 return int4(_mm_cmplt_epi32(a.m128, b.m128));
69# else
70 return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
71# endif
72}
73
75{
76 return a < make_int4(b);
77}
78
80{
81# ifdef __KERNEL_SSE__
82 return int4(_mm_cmpeq_epi32(a.m128, b.m128));
83# else
84 return make_int4(a.x == b.x, a.y == b.y, a.z == b.z, a.w == b.w);
85# endif
86}
87
89{
90 return a == make_int4(b);
91}
92
94{
95# ifdef __KERNEL_SSE__
96 return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128)));
97# else
98 return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
99# endif
100}
101
103{
104 return a >= make_int4(b);
105}
106
108{
109# ifdef __KERNEL_SSE__
110 return int4(_mm_and_si128(a.m128, b.m128));
111# else
112 return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
113# endif
114}
115
117{
118# ifdef __KERNEL_SSE__
119 return int4(_mm_or_si128(a.m128, b.m128));
120# else
121 return make_int4(a.x | b.x, a.y | b.y, a.z | b.z, a.w | b.w);
122# endif
123}
124
126{
127# ifdef __KERNEL_SSE__
128 return int4(_mm_xor_si128(a.m128, b.m128));
129# else
130 return make_int4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w);
131# endif
132}
133
135{
136 return make_int4(a) & b;
137}
138
140{
141 return a & make_int4(b);
142}
143
145{
146 return make_int4(a) | b;
147}
148
150{
151 return a | make_int4(b);
152}
153
155{
156 return make_int4(a) ^ b;
157}
158
160{
161 return a ^ make_int4(b);
162}
163
165{
166 return a = a & b;
167}
169{
170 return a = a & b;
171}
172
174{
175 return a = a | b;
176}
178{
179 return a = a | b;
180}
181
183{
184 return a = a ^ b;
185}
187{
188 return a = a ^ b;
189}
190
192{
193 return a = a << b;
194}
196{
197 return a = a >> b;
198}
199
200# ifdef __KERNEL_SSE__
201ccl_device_forceinline int4 srl(const int4 a, const int32_t b)
202{
203 return int4(_mm_srli_epi32(a.m128, b));
204}
205# endif
206
208{
209# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
210 return int4(_mm_min_epi32(a.m128, b.m128));
211# else
212 return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
213# endif
214}
215
217{
218# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
219 return int4(_mm_max_epi32(a.m128, b.m128));
220# else
221 return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
222# endif
223}
224
225ccl_device_inline int4 clamp(const int4 a, const int4 mn, const int4 mx)
226{
227 return min(max(a, mn), mx);
228}
229
231{
232# ifdef __KERNEL_SSE__
233 return int4(_mm_or_si128(_mm_and_si128(mask, a), _mm_andnot_si128(mask, b)));
234# else
235 return make_int4(
236 (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w);
237# endif
238}
239
241{
242# ifdef __KERNEL_SSE__
243 return int4(_mm_loadu_si128((__m128i *)v));
244# else
245 return make_int4(v[0], v[1], v[2], v[3]);
246# endif
247}
248#endif /* __KERNEL_METAL__ */
249
251{
252#ifdef __KERNEL_SSE__
253 return float4(_mm_castsi128_ps(a));
254#else
255 return make_float4(
257#endif
258}
259
260#ifdef __KERNEL_SSE__
261ccl_device_forceinline int4 andnot(const int4 a, const int4 b)
262{
263 return int4(_mm_andnot_si128(a.m128, b.m128));
264}
265
266template<size_t i0, const size_t i1, const size_t i2, const size_t i3>
267ccl_device_forceinline int4 shuffle(const int4 a)
268{
269# ifdef __KERNEL_NEON__
270 int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
271 return int4(vreinterpretq_m128i_s32(result));
272# else
273 return int4(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
274# endif
275}
276
277template<size_t i0, const size_t i1, const size_t i2, const size_t i3>
278ccl_device_forceinline int4 shuffle(const int4 a, const int4 b)
279{
280# ifdef __KERNEL_NEON__
281 int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
282 vreinterpretq_s32_m128i(b));
283 return int4(vreinterpretq_m128i_s32(result));
284# else
285 return int4(_mm_castps_si128(
286 _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))));
287# endif
288}
289
290template<size_t i0> ccl_device_forceinline int4 shuffle(const int4 b)
291{
292 return shuffle<i0, i0, i0, i0>(b);
293}
294#endif
295
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define ccl_device_forceinline
#define ccl_device_inline
#define CCL_NAMESPACE_END
#define __int_as_float(x)
ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
#define cast
#define select(A, B, C)
VecBase< float, 4 > float4
VecBase< int, 4 > int4
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
ccl_device_inline int4 operator>>(const int4 a, const int i)
Definition math_int4.h:47
ccl_device_inline int4 load_int4(const int *v)
Definition math_int4.h:240
ccl_device_inline int4 operator^(const int4 a, const int4 b)
Definition math_int4.h:125
ccl_device_inline int4 operator<(const int4 a, const int4 b)
Definition math_int4.h:65
ccl_device_inline int4 & operator|=(int4 &a, const int4 b)
Definition math_int4.h:173
ccl_device_inline int4 operator-(const int4 a, const int4 b)
Definition math_int4.h:28
ccl_device_inline int4 operator==(const int4 a, const int4 b)
Definition math_int4.h:79
ccl_device_inline int4 & operator&=(int4 &a, const int4 b)
Definition math_int4.h:164
ccl_device_inline int4 operator|(const int4 a, const int4 b)
Definition math_int4.h:116
ccl_device_inline int4 operator+=(int4 &a, const int4 b)
Definition math_int4.h:23
ccl_device_inline int4 & operator>>=(int4 &a, const int32_t b)
Definition math_int4.h:195
ccl_device_inline int4 & operator<<=(int4 &a, const int32_t b)
Definition math_int4.h:191
ccl_device_inline int4 operator*(const int4 a, const int4 b)
Definition math_int4.h:42
CCL_NAMESPACE_BEGIN ccl_device_inline int4 operator+(const int4 a, const int4 b)
Definition math_int4.h:14
ccl_device_inline int4 operator>=(const int4 a, const int4 b)
Definition math_int4.h:93
ccl_device_inline int4 operator<<(const int4 a, const int i)
Definition math_int4.h:56
ccl_device_inline int4 & operator^=(int4 &a, const int4 b)
Definition math_int4.h:182
ccl_device_inline int4 operator&(const int4 a, const int4 b)
Definition math_int4.h:107
ccl_device_inline int4 clamp(const int4 a, const int4 mn, const int4 mx)
Definition math_int4.h:225
ccl_device_inline int4 operator-=(int4 &a, const int4 b)
Definition math_int4.h:37
#define make_float4
#define min(a, b)
Definition sort.cc:36
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251