Blender V5.0
math_float8.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2013 Intel Corporation
2 * SPDX-FileCopyrightText: 2022 Blender Foundation
3 *
4 * SPDX-License-Identifier: Apache-2.0 */
5
6#pragma once
7
8#include "util/math_base.h"
9#include "util/types_float8.h"
10#include "util/types_int8.h"
11
13
15{
16#ifdef __KERNEL_AVX__
17 return vfloat8(_mm256_setzero_ps());
18#else
19 return make_vfloat8(0.0f);
20#endif
21}
22
24{
25 return make_vfloat8(1.0f);
26}
27
28ccl_device_inline vfloat8 operator+(const vfloat8 a, const vfloat8 b)
29{
30#ifdef __KERNEL_AVX__
31 return vfloat8(_mm256_add_ps(a.m256, b.m256));
32#else
33 return make_vfloat8(
34 a.a + b.a, a.b + b.b, a.c + b.c, a.d + b.d, a.e + b.e, a.f + b.f, a.g + b.g, a.h + b.h);
35#endif
36}
37
38ccl_device_inline vfloat8 operator+(const vfloat8 a, const float f)
39{
40 return a + make_vfloat8(f);
41}
42
43ccl_device_inline vfloat8 operator+(const float f, const vfloat8 a)
44{
45 return make_vfloat8(f) + a;
46}
47
48ccl_device_inline vfloat8 operator-(const vfloat8 a)
49{
50#ifdef __KERNEL_AVX__
51 __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));
52 return vfloat8(_mm256_xor_ps(a.m256, mask));
53#else
54 return make_vfloat8(-a.a, -a.b, -a.c, -a.d, -a.e, -a.f, -a.g, -a.h);
55#endif
56}
57
58ccl_device_inline vfloat8 operator-(const vfloat8 a, const vfloat8 b)
59{
60#ifdef __KERNEL_AVX__
61 return vfloat8(_mm256_sub_ps(a.m256, b.m256));
62#else
63 return make_vfloat8(
64 a.a - b.a, a.b - b.b, a.c - b.c, a.d - b.d, a.e - b.e, a.f - b.f, a.g - b.g, a.h - b.h);
65#endif
66}
67
68ccl_device_inline vfloat8 operator-(const vfloat8 a, const float f)
69{
70 return a - make_vfloat8(f);
71}
72
73ccl_device_inline vfloat8 operator-(const float f, const vfloat8 a)
74{
75 return make_vfloat8(f) - a;
76}
77
78ccl_device_inline vfloat8 operator*(const vfloat8 a, const vfloat8 b)
79{
80#ifdef __KERNEL_AVX__
81 return vfloat8(_mm256_mul_ps(a.m256, b.m256));
82#else
83 return make_vfloat8(
84 a.a * b.a, a.b * b.b, a.c * b.c, a.d * b.d, a.e * b.e, a.f * b.f, a.g * b.g, a.h * b.h);
85#endif
86}
87
88ccl_device_inline vfloat8 operator*(const vfloat8 a, const float f)
89{
90 return a * make_vfloat8(f);
91}
92
93ccl_device_inline vfloat8 operator*(const float f, const vfloat8 a)
94{
95 return make_vfloat8(f) * a;
96}
97
98ccl_device_inline vfloat8 operator/(const vfloat8 a, const vfloat8 b)
99{
100#ifdef __KERNEL_AVX__
101 return vfloat8(_mm256_div_ps(a.m256, b.m256));
102#else
103 return make_vfloat8(
104 a.a / b.a, a.b / b.b, a.c / b.c, a.d / b.d, a.e / b.e, a.f / b.f, a.g / b.g, a.h / b.h);
105#endif
106}
107
108ccl_device_inline vfloat8 operator/(const vfloat8 a, const float f)
109{
110 return a / make_vfloat8(f);
111}
112
113ccl_device_inline vfloat8 operator/(const float f, const vfloat8 a)
114{
115 return make_vfloat8(f) / a;
116}
117
118ccl_device_inline vfloat8 operator+=(vfloat8 a, const vfloat8 b)
119{
120 return a = a + b;
121}
122
123ccl_device_inline vfloat8 operator-=(vfloat8 a, const vfloat8 b)
124{
125 return a = a - b;
126}
127
128ccl_device_inline vfloat8 operator*=(vfloat8 a, const vfloat8 b)
129{
130 return a = a * b;
131}
132
133ccl_device_inline vfloat8 operator*=(vfloat8 a, const float f)
134{
135 return a = a * f;
136}
137
138ccl_device_inline vfloat8 operator/=(vfloat8 a, const float f)
139{
140 return a = a / f;
141}
142
143ccl_device_inline bool operator==(const vfloat8 a, const vfloat8 b)
144{
145#ifdef __KERNEL_AVX__
146 return (_mm256_movemask_ps(_mm256_castsi256_ps(
147 _mm256_cmpeq_epi32(_mm256_castps_si256(a.m256), _mm256_castps_si256(b.m256)))) &
148 0b11111111) == 0b11111111;
149#else
150 return (a.a == b.a && a.b == b.b && a.c == b.c && a.d == b.d && a.e == b.e && a.f == b.f &&
151 a.g == b.g && a.h == b.h);
152#endif
153}
154
155ccl_device_inline vfloat8 operator^(const vfloat8 a, const vfloat8 b)
156{
157#ifdef __KERNEL_AVX__
158 return vfloat8(_mm256_xor_ps(a.m256, b.m256));
159#else
168#endif
169}
170
171ccl_device_inline vfloat8 sqrt(const vfloat8 a)
172{
173#ifdef __KERNEL_AVX__
174 return vfloat8(_mm256_sqrt_ps(a.m256));
175#else
176 return make_vfloat8(sqrtf(a.a),
177 sqrtf(a.b),
178 sqrtf(a.c),
179 sqrtf(a.d),
180 sqrtf(a.e),
181 sqrtf(a.f),
182 sqrtf(a.g),
183 sqrtf(a.h));
184#endif
185}
186
187ccl_device_inline vfloat8 sqr(const vfloat8 a)
188{
189 return a * a;
190}
191
192ccl_device_inline bool is_zero(const vfloat8 a)
193{
194 return a == make_vfloat8(0.0f);
195}
196
197ccl_device_inline float reduce_add(const vfloat8 a)
198{
199#ifdef __KERNEL_AVX__
200 vfloat8 b(_mm256_hadd_ps(a.m256, a.m256));
201 vfloat8 h(_mm256_hadd_ps(b.m256, b.m256));
202 return h[0] + h[4];
203#else
204 return a.a + a.b + a.c + a.d + a.e + a.f + a.g + a.h;
205#endif
206}
207
208ccl_device_inline float average(const vfloat8 a)
209{
210 return reduce_add(a) / 8.0f;
211}
212
213ccl_device_inline vfloat8 min(const vfloat8 a, const vfloat8 b)
214{
215#ifdef __KERNEL_AVX__
216 return vfloat8(_mm256_min_ps(a.m256, b.m256));
217#else
218 return make_vfloat8(min(a.a, b.a),
219 min(a.b, b.b),
220 min(a.c, b.c),
221 min(a.d, b.d),
222 min(a.e, b.e),
223 min(a.f, b.f),
224 min(a.g, b.g),
225 min(a.h, b.h));
226#endif
227}
228
229ccl_device_inline vfloat8 max(const vfloat8 a, const vfloat8 b)
230{
231#ifdef __KERNEL_AVX__
232 return vfloat8(_mm256_max_ps(a.m256, b.m256));
233#else
234 return make_vfloat8(max(a.a, b.a),
235 max(a.b, b.b),
236 max(a.c, b.c),
237 max(a.d, b.d),
238 max(a.e, b.e),
239 max(a.f, b.f),
240 max(a.g, b.g),
241 max(a.h, b.h));
242#endif
243}
244
245ccl_device_inline vfloat8 clamp(const vfloat8 a, const vfloat8 mn, const vfloat8 mx)
246{
247 return min(max(a, mn), mx);
248}
249
250ccl_device_inline vfloat8 select(const vint8 mask, const vfloat8 a, const vfloat8 b)
251{
252#ifdef __KERNEL_AVX__
253 return vfloat8(_mm256_blendv_ps(b, a, _mm256_castsi256_ps(mask)));
254#else
255 return make_vfloat8((mask.a) ? a.a : b.a,
256 (mask.b) ? a.b : b.b,
257 (mask.c) ? a.c : b.c,
258 (mask.d) ? a.d : b.d,
259 (mask.e) ? a.e : b.e,
260 (mask.f) ? a.f : b.f,
261 (mask.g) ? a.g : b.g,
262 (mask.h) ? a.h : b.h);
263#endif
264}
265
266ccl_device_inline vfloat8 fabs(const vfloat8 a)
267{
268#ifdef __KERNEL_AVX__
269 return vfloat8(_mm256_and_ps(a.m256, _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))));
270#else
271 return make_vfloat8(fabsf(a.a),
272 fabsf(a.b),
273 fabsf(a.c),
274 fabsf(a.d),
275 fabsf(a.e),
276 fabsf(a.f),
277 fabsf(a.g),
278 fabsf(a.h));
279#endif
280}
281
282ccl_device_inline vfloat8 mix(const vfloat8 a, const vfloat8 b, const float t)
283{
284 return a + t * (b - a);
285}
286
287ccl_device_inline vfloat8 mix(const vfloat8 a, const vfloat8 b, vfloat8 t)
288{
289 return a + t * (b - a);
290}
291
292ccl_device_inline vfloat8 saturate(const vfloat8 a)
293{
294 return clamp(a, make_vfloat8(0.0f), make_vfloat8(1.0f));
295}
296
297ccl_device_inline vfloat8 exp(vfloat8 v)
298{
299 return make_vfloat8(
300 expf(v.a), expf(v.b), expf(v.c), expf(v.d), expf(v.e), expf(v.f), expf(v.g), expf(v.h));
301}
302
303ccl_device_inline vfloat8 log(vfloat8 v)
304{
305 return make_vfloat8(
306 logf(v.a), logf(v.b), logf(v.c), logf(v.d), logf(v.e), logf(v.f), logf(v.g), logf(v.h));
307}
308
309ccl_device_inline float dot(const vfloat8 a, const vfloat8 b)
310{
311#ifdef __KERNEL_AVX__
312 vfloat8 t(_mm256_dp_ps(a.m256, b.m256, 0xFF));
313 return t[0] + t[4];
314#else
315 return (a.a * b.a) + (a.b * b.b) + (a.c * b.c) + (a.d * b.d) + (a.e * b.e) + (a.f * b.f) +
316 (a.g * b.g) + (a.h * b.h);
317#endif
318}
319
320ccl_device_inline vfloat8 pow(vfloat8 v, const float e)
321{
322 return make_vfloat8(powf(v.a, e),
323 powf(v.b, e),
324 powf(v.c, e),
325 powf(v.d, e),
326 powf(v.e, e),
327 powf(v.f, e),
328 powf(v.g, e),
329 powf(v.h, e));
330}
331
332ccl_device_inline float reduce_min(const vfloat8 a)
333{
334 return min(min(min(a.a, a.b), min(a.c, a.d)), min(min(a.e, a.f), min(a.g, a.h)));
335}
336
337ccl_device_inline float reduce_max(const vfloat8 a)
338{
339 return max(max(max(a.a, a.b), max(a.c, a.d)), max(max(a.e, a.f), max(a.g, a.h)));
340}
341
342ccl_device_inline bool isequal(const vfloat8 a, const vfloat8 b)
343{
344 return a == b;
345}
346
347ccl_device_inline vfloat8 safe_divide(const vfloat8 a, const float b)
348{
349 return (b != 0.0f) ? a / b : make_vfloat8(0.0f);
350}
351
352ccl_device_inline vfloat8 safe_divide(const vfloat8 a, const vfloat8 b)
353{
354 return make_vfloat8((b.a != 0.0f) ? a.a / b.a : 0.0f,
355 (b.b != 0.0f) ? a.b / b.b : 0.0f,
356 (b.c != 0.0f) ? a.c / b.c : 0.0f,
357 (b.d != 0.0f) ? a.d / b.d : 0.0f,
358 (b.e != 0.0f) ? a.e / b.e : 0.0f,
359 (b.f != 0.0f) ? a.f / b.f : 0.0f,
360 (b.g != 0.0f) ? a.g / b.g : 0.0f,
361 (b.h != 0.0f) ? a.h / b.h : 0.0f);
362}
363
365{
366 v.a = ensure_finite(v.a);
367 v.b = ensure_finite(v.b);
368 v.c = ensure_finite(v.c);
369 v.d = ensure_finite(v.d);
370 v.e = ensure_finite(v.e);
371 v.f = ensure_finite(v.f);
372 v.g = ensure_finite(v.g);
373 v.h = ensure_finite(v.h);
374
375 return v;
376}
377
379{
380 return isfinite_safe(v.a) && isfinite_safe(v.b) && isfinite_safe(v.c) && isfinite_safe(v.d) &&
382}
383
384ccl_device_inline vint8 cast(const vfloat8 a)
385{
386#ifdef __KERNEL_AVX__
387 return vint8(_mm256_castps_si256(a));
388#else
389 return make_vint8(__float_as_int(a.a),
390 __float_as_int(a.b),
391 __float_as_int(a.c),
392 __float_as_int(a.d),
393 __float_as_int(a.e),
394 __float_as_int(a.f),
395 __float_as_int(a.g),
396 __float_as_int(a.h));
397#endif
398}
399
400#ifdef __KERNEL_SSE__
401ccl_device_forceinline float4 low(const vfloat8 a)
402{
403# ifdef __KERNEL_AVX__
404 return float4(_mm256_extractf128_ps(a.m256, 0));
405# else
406 return make_float4(a.e, a.f, a.g, a.h);
407# endif
408}
409ccl_device_forceinline float4 high(const vfloat8 a)
410{
411# ifdef __KERNEL_AVX__
412 return float4(_mm256_extractf128_ps(a.m256, 1));
413# else
414 return make_float4(a.a, a.b, a.c, a.d);
415# endif
416}
417
418template<int i0,
419 const int i1,
420 const int i2,
421 const int i3,
422 const int i4,
423 const int i5,
424 const int i6,
425 const int i7>
426ccl_device_forceinline vfloat8 shuffle(const vfloat8 a)
427{
428# ifdef __KERNEL_AVX__
429 return vfloat8(_mm256_permutevar_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0)));
430# else
431 return make_vfloat8(a[i0], a[i1], a[i2], a[i3], a[i4 + 4], a[i5 + 4], a[i6 + 4], a[i7 + 4]);
432# endif
433}
434
435template<size_t i0, const size_t i1, const size_t i2, const size_t i3>
436ccl_device_forceinline vfloat8 shuffle(const vfloat8 a, const vfloat8 b)
437{
438# ifdef __KERNEL_AVX__
439 return vfloat8(_mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)));
440# else
441 return make_vfloat8(shuffle<i0, i1, i2, i3>(high(a), high(b)),
442 shuffle<i0, i1, i2, i3>(low(a), low(b)));
443# endif
444}
445
446template<size_t i0, const size_t i1, const size_t i2, const size_t i3>
447ccl_device_forceinline vfloat8 shuffle(const vfloat8 a)
448{
449 return shuffle<i0, i1, i2, i3>(a, a);
450}
451template<size_t i0> ccl_device_forceinline vfloat8 shuffle(const vfloat8 a, const vfloat8 b)
452{
453 return shuffle<i0, i0, i0, i0>(a, b);
454}
455template<size_t i0> ccl_device_forceinline vfloat8 shuffle(const vfloat8 a)
456{
457 return shuffle<i0>(a, a);
458}
459
460template<size_t i> ccl_device_forceinline float extract(const vfloat8 a)
461{
462# ifdef __KERNEL_AVX__
463 __m256 b = shuffle<i, i, i, i>(a).m256;
464 return _mm256_cvtss_f32(b);
465# else
466 return a[i];
467# endif
468}
469#endif
470
__forceinline float extract(const int4 &b)
Definition binning.cpp:27
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define ccl_device_forceinline
#define ccl_device_inline
#define logf(x)
#define expf(x)
#define powf(x, y)
#define CCL_NAMESPACE_END
#define __float_as_int(x)
#define __float_as_uint(x)
#define __uint_as_float(x)
#define cast
#define log
#define pow
#define exp
#define select(A, B, C)
#define sqrt
VecBase< float, 4 > float4
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
ccl_device_inline vfloat8 safe_divide(const vfloat8 a, const float b)
ccl_device_inline vfloat8 one_vfloat8()
Definition math_float8.h:23
ccl_device_inline vfloat8 operator*(const vfloat8 a, const vfloat8 b)
Definition math_float8.h:78
ccl_device_inline bool operator==(const vfloat8 a, const vfloat8 b)
ccl_device_inline vfloat8 operator/=(vfloat8 a, const float f)
ccl_device_inline vfloat8 fabs(const vfloat8 a)
ccl_device_inline vfloat8 ensure_finite(vfloat8 v)
ccl_device_inline float average(const vfloat8 a)
ccl_device_inline vfloat8 operator+=(vfloat8 a, const vfloat8 b)
ccl_device_inline vfloat8 operator^(const vfloat8 a, const vfloat8 b)
ccl_device_inline vfloat8 operator-(const vfloat8 a)
Definition math_float8.h:48
ccl_device_inline vfloat8 operator/(const vfloat8 a, const vfloat8 b)
Definition math_float8.h:98
ccl_device_inline vfloat8 operator-=(vfloat8 a, const vfloat8 b)
ccl_device_inline bool isequal(const vfloat8 a, const vfloat8 b)
ccl_device_inline float reduce_add(const vfloat8 a)
ccl_device_inline bool is_zero(const vfloat8 a)
ccl_device_inline float dot(const vfloat8 a, const vfloat8 b)
CCL_NAMESPACE_BEGIN ccl_device_inline vfloat8 zero_vfloat8()
Definition math_float8.h:14
ccl_device_inline bool isfinite_safe(vfloat8 v)
ccl_device_inline vfloat8 operator*=(vfloat8 a, const vfloat8 b)
ccl_device_inline float reduce_min(const vfloat8 a)
ccl_device_inline vfloat8 clamp(const vfloat8 a, const vfloat8 mn, const vfloat8 mx)
ccl_device_inline vfloat8 operator+(const vfloat8 a, const vfloat8 b)
Definition math_float8.h:28
ccl_device_inline float reduce_max(const vfloat8 a)
#define mix
#define sqr
#define fabsf
#define sqrtf
#define make_float4
#define saturate(a)
Definition smaa.cc:315
#define min(a, b)
Definition sort.cc:36
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251
ccl_device_inline vfloat8 make_vfloat8(const float f)
ccl_device_inline vint8 make_vint8(const vfloat8 f)