Blender V4.5
math_int4.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2013 Intel Corporation
2 * SPDX-FileCopyrightText: 2011-2022 Blender Foundation
3 *
4 * SPDX-License-Identifier: Apache-2.0 */
5
6#pragma once
7
8#include "util/types_float4.h"
9#include "util/types_int4.h"
10
12
13#ifndef __KERNEL_GPU__
15{
16# ifdef __KERNEL_SSE__
17 return int4(_mm_add_epi32(a.m128, b.m128));
18# else
19 return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
20# endif
21}
22
24{
25 return a = a + b;
26}
27
29{
30# ifdef __KERNEL_SSE__
31 return int4(_mm_sub_epi32(a.m128, b.m128));
32# else
33 return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
34# endif
35}
36
38{
39 return a = a - b;
40}
41
43{
44# ifdef __KERNEL_SSE__
45 return int4(_mm_srai_epi32(a.m128, i));
46# else
47 return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
48# endif
49}
50
52{
53# ifdef __KERNEL_SSE__
54 return int4(_mm_slli_epi32(a.m128, i));
55# else
56 return make_int4(a.x << i, a.y << i, a.z << i, a.w << i);
57# endif
58}
59
61{
62# ifdef __KERNEL_SSE__
63 return int4(_mm_cmplt_epi32(a.m128, b.m128));
64# else
65 return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
66# endif
67}
68
70{
71 return a < make_int4(b);
72}
73
75{
76# ifdef __KERNEL_SSE__
77 return int4(_mm_cmpeq_epi32(a.m128, b.m128));
78# else
79 return make_int4(a.x == b.x, a.y == b.y, a.z == b.z, a.w == b.w);
80# endif
81}
82
84{
85 return a == make_int4(b);
86}
87
89{
90# ifdef __KERNEL_SSE__
91 return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128)));
92# else
93 return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
94# endif
95}
96
98{
99 return a >= make_int4(b);
100}
101
103{
104# ifdef __KERNEL_SSE__
105 return int4(_mm_and_si128(a.m128, b.m128));
106# else
107 return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
108# endif
109}
110
112{
113# ifdef __KERNEL_SSE__
114 return int4(_mm_or_si128(a.m128, b.m128));
115# else
116 return make_int4(a.x | b.x, a.y | b.y, a.z | b.z, a.w | b.w);
117# endif
118}
119
121{
122# ifdef __KERNEL_SSE__
123 return int4(_mm_xor_si128(a.m128, b.m128));
124# else
125 return make_int4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w);
126# endif
127}
128
130{
131 return make_int4(a) & b;
132}
133
135{
136 return a & make_int4(b);
137}
138
140{
141 return make_int4(a) | b;
142}
143
145{
146 return a | make_int4(b);
147}
148
150{
151 return make_int4(a) ^ b;
152}
153
155{
156 return a ^ make_int4(b);
157}
158
160{
161 return a = a & b;
162}
164{
165 return a = a & b;
166}
167
169{
170 return a = a | b;
171}
173{
174 return a = a | b;
175}
176
178{
179 return a = a ^ b;
180}
182{
183 return a = a ^ b;
184}
185
187{
188 return a = a << b;
189}
191{
192 return a = a >> b;
193}
194
195# ifdef __KERNEL_SSE__
196ccl_device_forceinline int4 srl(const int4 a, const int32_t b)
197{
198 return int4(_mm_srli_epi32(a.m128, b));
199}
200# endif
201
203{
204# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
205 return int4(_mm_min_epi32(a.m128, b.m128));
206# else
207 return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
208# endif
209}
210
212{
213# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
214 return int4(_mm_max_epi32(a.m128, b.m128));
215# else
216 return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
217# endif
218}
219
220ccl_device_inline int4 clamp(const int4 a, const int4 mn, const int4 mx)
221{
222 return min(max(a, mn), mx);
223}
224
226{
227# ifdef __KERNEL_SSE__
228 return int4(_mm_or_si128(_mm_and_si128(mask, a), _mm_andnot_si128(mask, b)));
229# else
230 return make_int4(
231 (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w);
232# endif
233}
234
236{
237# ifdef __KERNEL_SSE__
238 return int4(_mm_loadu_si128((__m128i *)v));
239# else
240 return make_int4(v[0], v[1], v[2], v[3]);
241# endif
242}
243#endif /* __KERNEL_GPU__ */
244
246{
247#ifdef __KERNEL_SSE__
248 return float4(_mm_castsi128_ps(a));
249#else
250 return make_float4(
252#endif
253}
254
255#ifdef __KERNEL_SSE__
256ccl_device_forceinline int4 andnot(const int4 a, const int4 b)
257{
258 return int4(_mm_andnot_si128(a.m128, b.m128));
259}
260
261template<size_t i0, const size_t i1, const size_t i2, const size_t i3>
262ccl_device_forceinline int4 shuffle(const int4 a)
263{
264# ifdef __KERNEL_NEON__
265 int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
266 return int4(vreinterpretq_m128i_s32(result));
267# else
268 return int4(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
269# endif
270}
271
272template<size_t i0, const size_t i1, const size_t i2, const size_t i3>
273ccl_device_forceinline int4 shuffle(const int4 a, const int4 b)
274{
275# ifdef __KERNEL_NEON__
276 int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
277 vreinterpretq_s32_m128i(b));
278 return int4(vreinterpretq_m128i_s32(result));
279# else
280 return int4(_mm_castps_si128(
281 _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))));
282# endif
283}
284
285template<size_t i0> ccl_device_forceinline int4 shuffle(const int4 b)
286{
287 return shuffle<i0, i0, i0, i0>(b);
288}
289#endif
290
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define ccl_device_forceinline
#define ccl_device_inline
#define CCL_NAMESPACE_END
ccl_device_forceinline float4 make_float4(const float x, const float y, const float z, const float w)
#define __int_as_float(x)
ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
#define cast
VecBase< float, 4 > float4
#define select(A, B, C)
VecBase< int, 4 > int4
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
ccl_device_inline int4 operator>>(const int4 a, const int i)
Definition math_int4.h:42
ccl_device_inline int4 load_int4(const int *v)
Definition math_int4.h:235
ccl_device_inline int4 operator^(const int4 a, const int4 b)
Definition math_int4.h:120
ccl_device_inline int4 operator<(const int4 a, const int4 b)
Definition math_int4.h:60
ccl_device_inline int4 & operator|=(int4 &a, const int4 b)
Definition math_int4.h:168
ccl_device_inline int4 operator-(const int4 a, const int4 b)
Definition math_int4.h:28
ccl_device_inline int4 operator==(const int4 a, const int4 b)
Definition math_int4.h:74
ccl_device_inline int4 & operator&=(int4 &a, const int4 b)
Definition math_int4.h:159
ccl_device_inline int4 operator|(const int4 a, const int4 b)
Definition math_int4.h:111
ccl_device_inline int4 operator+=(int4 &a, const int4 b)
Definition math_int4.h:23
ccl_device_inline int4 & operator>>=(int4 &a, const int32_t b)
Definition math_int4.h:190
ccl_device_inline int4 & operator<<=(int4 &a, const int32_t b)
Definition math_int4.h:186
CCL_NAMESPACE_BEGIN ccl_device_inline int4 operator+(const int4 a, const int4 b)
Definition math_int4.h:14
ccl_device_inline int4 operator>=(const int4 a, const int4 b)
Definition math_int4.h:88
ccl_device_inline int4 operator<<(const int4 a, const int i)
Definition math_int4.h:51
ccl_device_inline int4 & operator^=(int4 &a, const int4 b)
Definition math_int4.h:177
ccl_device_inline int4 operator&(const int4 a, const int4 b)
Definition math_int4.h:102
ccl_device_inline int4 clamp(const int4 a, const int4 mn, const int4 mx)
Definition math_int4.h:220
ccl_device_inline int4 operator-=(int4 &a, const int4 b)
Definition math_int4.h:37
#define min(a, b)
Definition sort.cc:36
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251