Blender V5.0
math_half.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include "BLI_math_half.hh"
10
11#if defined(__ARM_NEON)
12/* Use ARM FP16 conversion instructions */
13# define USE_HARDWARE_FP16_NEON
14# include <arm_neon.h>
15#endif
16#if (defined(__x86_64__) || defined(_M_X64))
17/* All AVX2 CPUs have F16C instructions, so use those if we're compiling for AVX2.
18 * Otherwise use "manual" SSE2 4x-wide conversion. */
19# if defined(__AVX2__)
20# define USE_HARDWARE_FP16_F16C
21# else
22# define USE_SSE2_FP16
23# endif
24# include <immintrin.h>
25#endif
26
28{
29#if defined(USE_HARDWARE_FP16_NEON)
30 float16x4_t h4 = vcvt_f16_f32(vdupq_n_f32(v));
31 float16_t h = vget_lane_f16(h4, 0);
32 return *(uint16_t *)&h;
33#else
34 /* Based on float_to_half_fast3_rtne from public domain https://gist.github.com/rygorous/2156668
35 * see corresponding blog post https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
36 */
37 union FP32 {
38 uint32_t u;
39 float f;
40 };
41 FP32 f;
42 f.f = v;
43 FP32 f32infty = {255 << 23};
44 FP32 f16max = {(127 + 16) << 23};
45 FP32 denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23};
46 uint32_t sign_mask = 0x80000000u;
47 uint16_t o = {0};
48
49 uint32_t sign = f.u & sign_mask;
50 f.u ^= sign;
51
52 /*
53 * NOTE all the integer compares in this function can be safely
54 * compiled into signed compares since all operands are below
55 * 0x80000000. Important if you want fast straight SSE2 code
56 * (since there's no unsigned PCMPGTD).
57 */
58 if (f.u >= f16max.u) {
59 /* result is Inf or NaN (all exponent bits set) */
60 o = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; /* NaN->qNaN and Inf->Inf */
61 }
62 else {
63 /* (De)normalized number or zero */
64 if (f.u < (113 << 23)) {
65 /* Resulting FP16 is subnormal or zero.
66 * Use a magic value to align our 10 mantissa bits at the bottom of
67 * the float. as long as FP addition is round-to-nearest-even this
68 * just works. */
69 f.f += denorm_magic.f;
70
71 /* and one integer subtract of the bias later, we have our final float! */
72 o = f.u - denorm_magic.u;
73 }
74 else {
75 uint32_t mant_odd = (f.u >> 13) & 1; /* resulting mantissa is odd */
76
77 /* update exponent, rounding bias part 1 */
78 f.u += (uint32_t(15 - 127) << 23) + 0xfff;
79 /* rounding bias part 2 */
80 f.u += mant_odd;
81 /* take the bits! */
82 o = f.u >> 13;
83 }
84 }
85
86 o |= sign >> 16;
87 return o;
88#endif
89}
90
92{
93 uint16_t h = float_to_half(v);
94 /* Infinity or NaN? */
95 if ((h & 0x7c00) == 0x7c00) {
96 if ((h & 0x03ff) == 0) {
97 /* +/- infinity: +/- max value. */
98 h ^= 0x07ff;
99 }
100 else {
101 /* +/- Nan: +/- zero. */
102 h &= 0x8000;
103 }
104 }
105 return h;
106}
107
109{
110#if defined(USE_HARDWARE_FP16_NEON)
111 uint16x4_t v4 = vdup_n_u16(v);
112 float16x4_t h4 = vreinterpret_f16_u16(v4);
113 float32x4_t f4 = vcvt_f32_f16(h4);
114 return vgetq_lane_f32(f4, 0);
115#else
116 /* Based on half_to_float_fast4 from public domain https://gist.github.com/rygorous/2144712
117 * see corresponding blog post https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
118 */
119 union FP32 {
120 uint32_t u;
121 float f;
122 };
123 constexpr FP32 magic = {113 << 23};
124 constexpr uint32_t shifted_exp = 0x7c00 << 13; /* exponent mask after shift */
125 FP32 o;
126
127 o.u = (v & 0x7fff) << 13; /* exponent/mantissa bits */
128 uint32_t exp = shifted_exp & o.u; /* just the exponent */
129 o.u += (127 - 15) << 23; /* exponent adjust */
130
131 /* handle exponent special cases */
132 if (exp == shifted_exp) { /* Inf/NaN? */
133 o.u += (128 - 16) << 23; /* extra exp adjust */
134 }
135 else if (exp == 0) { /* Zero/Denormal? */
136 o.u += 1 << 23; /* extra exp adjust */
137 o.f -= magic.f; /* renormalize */
138 }
139
140 o.u |= (v & 0x8000) << 16; /* sign bit */
141 return o.f;
142#endif
143}
144
145#ifdef USE_SSE2_FP16
146/* 4x wide float<->half conversion using SSE2 code, based on
147 * https://gist.github.com/rygorous/4d9e9e88cab13c703773dc767a23575f */
148
149/* Float->half conversion with round-to-nearest-even, SSE2+.
150 * leaves half-floats in 32-bit lanes (sign extended). */
151static inline __m128i F32_to_F16_4x(const __m128 &f)
152{
153 const __m128 mask_sign = _mm_set1_ps(-0.0f);
154 /* all FP32 values >=this round to +inf */
155 const __m128i c_f16max = _mm_set1_epi32((127 + 16) << 23);
156 const __m128i c_nanbit = _mm_set1_epi32(0x200);
157 const __m128i c_nanlobits = _mm_set1_epi32(0x1ff);
158 const __m128i c_infty_as_fp16 = _mm_set1_epi32(0x7c00);
159 /* smallest FP32 that yields a normalized FP16 */
160 const __m128i c_min_normal = _mm_set1_epi32((127 - 14) << 23);
161 const __m128i c_subnorm_magic = _mm_set1_epi32(((127 - 15) + (23 - 10) + 1) << 23);
162 /* adjust exponent and add mantissa rounding */
163 const __m128i c_normal_bias = _mm_set1_epi32(0xfff - ((127 - 15) << 23));
164
165 __m128 justsign = _mm_and_ps(f, mask_sign);
166 __m128 absf = _mm_andnot_ps(mask_sign, f); /* f & ~mask_sign */
167 /* the cast is "free" (extra bypass latency, but no throughput hit) */
168 __m128i absf_int = _mm_castps_si128(absf);
169 __m128 b_isnan = _mm_cmpunord_ps(absf, absf); /* is this a NaN? */
170 __m128i b_isregular = _mm_cmpgt_epi32(c_f16max, absf_int); /* (sub)normalized or special? */
171 __m128i nan_payload = _mm_and_si128(_mm_srli_epi32(absf_int, 13),
172 c_nanlobits); /* payload bits for NaNs */
173 __m128i nan_quiet = _mm_or_si128(nan_payload, c_nanbit); /* and set quiet bit */
174 __m128i nanfinal = _mm_and_si128(_mm_castps_si128(b_isnan), nan_quiet);
175 __m128i inf_or_nan = _mm_or_si128(nanfinal, c_infty_as_fp16); /* output for specials */
176
177 /* subnormal? */
178 __m128i b_issub = _mm_cmpgt_epi32(c_min_normal, absf_int);
179
180 /* "result is subnormal" path */
181 __m128 subnorm1 = _mm_add_ps(
182 absf, _mm_castsi128_ps(c_subnorm_magic)); /* magic value to round output mantissa */
183 __m128i subnorm2 = _mm_sub_epi32(_mm_castps_si128(subnorm1),
184 c_subnorm_magic); /* subtract out bias */
185
186 /* "result is normal" path */
187 __m128i mantoddbit = _mm_slli_epi32(absf_int, 31 - 13); /* shift bit 13 (mantissa LSB) to sign */
188 __m128i mantodd = _mm_srai_epi32(mantoddbit, 31); /* -1 if FP16 mantissa odd, else 0 */
189
190 __m128i round1 = _mm_add_epi32(absf_int, c_normal_bias);
191 /* if mantissa LSB odd, bias towards rounding up (RTNE) */
192 __m128i round2 = _mm_sub_epi32(round1, mantodd);
193 __m128i normal = _mm_srli_epi32(round2, 13); /* rounded result */
194
195 /* combine the two non-specials */
196 __m128i nonspecial = _mm_or_si128(_mm_and_si128(subnorm2, b_issub),
197 _mm_andnot_si128(b_issub, normal));
198
199 /* merge in specials as well */
200 __m128i joined = _mm_or_si128(_mm_and_si128(nonspecial, b_isregular),
201 _mm_andnot_si128(b_isregular, inf_or_nan));
202
203 __m128i sign_shift = _mm_srai_epi32(_mm_castps_si128(justsign), 16);
204 __m128i result = _mm_or_si128(joined, sign_shift);
205
206 return result;
207}
208
209/* Half->float conversion, SSE2+. Input in 32-bit lanes. */
210static inline __m128 F16_to_F32_4x(const __m128i &h)
211{
212 const __m128i mask_nosign = _mm_set1_epi32(0x7fff);
213 const __m128 magic_mult = _mm_castsi128_ps(_mm_set1_epi32((254 - 15) << 23));
214 const __m128i was_infnan = _mm_set1_epi32(0x7bff);
215 const __m128 exp_infnan = _mm_castsi128_ps(_mm_set1_epi32(255 << 23));
216 const __m128i was_nan = _mm_set1_epi32(0x7c00);
217 const __m128i nan_quiet = _mm_set1_epi32(1 << 22);
218
219 __m128i expmant = _mm_and_si128(mask_nosign, h);
220 __m128i justsign = _mm_xor_si128(h, expmant);
221 __m128i shifted = _mm_slli_epi32(expmant, 13);
222 __m128 scaled = _mm_mul_ps(_mm_castsi128_ps(shifted), magic_mult);
223 __m128i b_wasinfnan = _mm_cmpgt_epi32(expmant, was_infnan);
224 __m128i sign = _mm_slli_epi32(justsign, 16);
225 __m128 infnanexp = _mm_and_ps(_mm_castsi128_ps(b_wasinfnan), exp_infnan);
226 __m128i b_wasnan = _mm_cmpgt_epi32(expmant, was_nan);
227 __m128i nanquiet = _mm_and_si128(b_wasnan, nan_quiet);
228 __m128 infnandone = _mm_or_ps(infnanexp, _mm_castsi128_ps(nanquiet));
229
230 __m128 sign_inf = _mm_or_ps(_mm_castsi128_ps(sign), infnandone);
231 __m128 result = _mm_or_ps(scaled, sign_inf);
232
233 return result;
234}
235
236#endif // USE_SSE2_FP16
237
238void blender::math::float_to_half_array(const float *src, uint16_t *dst, size_t length)
239{
240 size_t i = 0;
241#if defined(USE_HARDWARE_FP16_F16C) /* 8-wide loop using AVX2 F16C */
242 for (; i + 7 < length; i += 8) {
243 __m256 src8 = _mm256_loadu_ps(src);
244 __m128i h8 = _mm256_cvtps_ph(src8, _MM_FROUND_TO_NEAREST_INT);
245 _mm_storeu_epi32(dst, h8);
246 src += 8;
247 dst += 8;
248 }
249#elif defined(USE_SSE2_FP16) /* 4-wide loop using SSE2 */
250 for (; i + 3 < length; i += 4) {
251 __m128 src4 = _mm_loadu_ps(src);
252 __m128i h4 = F32_to_F16_4x(src4);
253 __m128i h4_packed = _mm_packs_epi32(h4, h4);
254 _mm_storeu_si64(dst, h4_packed);
255 src += 4;
256 dst += 4;
257 }
258#elif defined(USE_HARDWARE_FP16_NEON) /* 4-wide loop using NEON */
259 for (; i + 3 < length; i += 4) {
260 float32x4_t src4 = vld1q_f32(src);
261 float16x4_t h4 = vcvt_f16_f32(src4);
262 vst1_f16((float16_t *)dst, h4);
263 src += 4;
264 dst += 4;
265 }
266#endif
267 /* Use scalar path to convert the tail of array (or whole array if none of
268 * wider paths above were used). */
269 for (; i < length; i++) {
270 *dst++ = float_to_half(*src++);
271 }
272}
273
274void blender::math::float_to_half_make_finite_array(const float *src, uint16_t *dst, size_t length)
275{
276 size_t i = 0;
277#if defined(USE_HARDWARE_FP16_F16C) /* 8-wide loop using AVX2 F16C */
278 for (; i + 7 < length; i += 8) {
279 __m256 src8 = _mm256_loadu_ps(src);
280 __m128i h8 = _mm256_cvtps_ph(src8, _MM_FROUND_TO_NEAREST_INT);
281 /* Handle inf/nan. */
282 {
283 const __m128i exp_mask = _mm_set1_epi16(0x7c00u);
284 __m128i exp_all_ones = _mm_cmpeq_epi16(_mm_and_si128(h8, exp_mask), exp_mask);
285 const __m128i mant_mask = _mm_set1_epi16(0x03ffu);
286 const __m128i zero = _mm_setzero_si128();
287 __m128i mant_is_zero = _mm_cmpeq_epi16(_mm_and_si128(h8, mant_mask), zero);
288 __m128i is_inf = _mm_and_si128(exp_all_ones, mant_is_zero);
289 const __m128i all_ones = _mm_cmpeq_epi16(zero, zero);
290 __m128i is_nan = _mm_and_si128(exp_all_ones, _mm_andnot_si128(mant_is_zero, all_ones));
291 const __m128i sign_mask = _mm_set1_epi16(0x8000u);
292 __m128i signbits = _mm_and_si128(h8, sign_mask);
293 __m128i inf_res = _mm_or_si128(signbits, _mm_set1_epi16(0x7bffu)); /* +/- 65504 */
294 __m128i nan_res = signbits; /* +/- 0 */
295 /* Select final result. */
296 h8 = _mm_blendv_epi8(h8, inf_res, is_inf);
297 h8 = _mm_blendv_epi8(h8, nan_res, is_nan);
298 }
299 _mm_storeu_si128((__m128i *)dst, h8);
300 src += 8;
301 dst += 8;
302 }
303#elif defined(USE_SSE2_FP16) /* 4-wide loop using SSE2 */
304 for (; i + 3 < length; i += 4) {
305 __m128 src4 = _mm_loadu_ps(src);
306 __m128i h4 = F32_to_F16_4x(src4);
307 /* Handle inf/nan. */
308 {
309 __m128i hi_part = _mm_and_si128(h4, _mm_set1_epi32(0xffff0000u));
310 const __m128i exp_mask = _mm_set1_epi16(0x7c00u);
311 __m128i exp_all_ones = _mm_cmpeq_epi16(_mm_and_si128(h4, exp_mask), exp_mask);
312 const __m128i mant_mask = _mm_set1_epi16(0x03ffu);
313 const __m128i zero = _mm_setzero_si128();
314 __m128i mant_is_zero = _mm_cmpeq_epi16(_mm_and_si128(h4, mant_mask), zero);
315 __m128i is_inf = _mm_and_si128(exp_all_ones, mant_is_zero);
316 const __m128i all_ones = _mm_cmpeq_epi16(zero, zero);
317 __m128i is_nan = _mm_and_si128(exp_all_ones, _mm_andnot_si128(mant_is_zero, all_ones));
318 const __m128i sign_mask = _mm_set1_epi16(0x8000u);
319 __m128i signbits = _mm_and_si128(h4, sign_mask);
320 __m128i inf_res = _mm_or_si128(signbits, _mm_set1_epi16(0x7bffu)); /* +/- 65504 */
321 __m128i nan_res = signbits; /* +/- 0 */
322 /* Select final result. */
323 h4 = _mm_blendv_epi8(h4, inf_res, is_inf);
324 h4 = _mm_blendv_epi8(h4, nan_res, is_nan);
325 h4 = _mm_and_si128(h4, _mm_set1_epi32(0xffff));
326 h4 = _mm_or_si128(h4, hi_part);
327 }
328 __m128i h4_packed = _mm_packs_epi32(h4, h4);
329 _mm_storeu_si64(dst, h4_packed);
330 src += 4;
331 dst += 4;
332 }
333#elif defined(USE_HARDWARE_FP16_NEON) /* 4-wide loop using NEON */
334 for (; i + 3 < length; i += 4) {
335 float32x4_t src4 = vld1q_f32(src);
336 float16x4_t h4 = vcvt_f16_f32(src4);
337 /* Handle inf/nan. */
338 {
339 uint16x4_t hu4 = vreinterpret_u16_f16(h4);
340 const uint16x4_t exp_mask = vdup_n_u16(0x7c00u);
341 uint16x4_t exp_all_ones = vceq_u16(vand_u16(hu4, exp_mask), exp_mask);
342 const uint16x4_t mant_mask = vdup_n_u16(0x03ffu);
343 const uint16x4_t zero = vdup_n_u16(0);
344 uint16x4_t mant_is_zero = vceq_u16(vand_u16(hu4, mant_mask), zero);
345 uint16x4_t is_inf = vand_u16(exp_all_ones, mant_is_zero);
346 uint16x4_t is_nan = vand_u16(exp_all_ones, vmvn_u16(mant_is_zero));
347 const uint16x4_t sign_mask = vdup_n_u16(0x8000u);
348 uint16x4_t signbits = vand_u16(hu4, sign_mask);
349 uint16x4_t inf_res = vorr_u16(signbits, vdup_n_u16(0x7bffu)); /* +/- 65504 */
350 uint16x4_t nan_res = signbits; /* +/- 0 */
351 /* Select final result. */
352 hu4 = vbsl_u16(is_inf, inf_res, hu4);
353 hu4 = vbsl_u16(is_nan, nan_res, hu4);
354 h4 = vreinterpret_f16_u16(hu4);
355 }
356 vst1_f16((float16_t *)dst, h4);
357 src += 4;
358 dst += 4;
359 }
360#endif
361 /* Use scalar path to convert the tail of array (or whole array if none of
362 * wider paths above were used). */
363 for (; i < length; i++) {
364 *dst++ = float_to_half_make_finite(*src++);
365 }
366}
367
368void blender::math::half_to_float_array(const uint16_t *src, float *dst, size_t length)
369{
370 size_t i = 0;
371#if defined(USE_HARDWARE_FP16_F16C) /* 8-wide loop using AVX2 F16C */
372 for (; i + 7 < length; i += 8) {
373 __m128i src8 = _mm_loadu_epi32(src);
374 __m256 f8 = _mm256_cvtph_ps(src8);
375 _mm256_storeu_ps(dst, f8);
376 src += 8;
377 dst += 8;
378 }
379#elif defined(USE_SSE2_FP16) /* 4-wide loop using SSE2 */
380 for (; i + 3 < length; i += 4) {
381 __m128i src4 = _mm_loadu_si64(src);
382 src4 = _mm_unpacklo_epi16(src4, src4);
383 __m128 f4 = F16_to_F32_4x(src4);
384 _mm_storeu_ps(dst, f4);
385 src += 4;
386 dst += 4;
387 }
388#elif defined(USE_HARDWARE_FP16_NEON) /* 4-wide loop using NEON */
389 for (; i + 3 < length; i += 4) {
390 float16x4_t src4 = vld1_f16((const float16_t *)src);
391 float32x4_t f4 = vcvt_f32_f16(src4);
392 vst1q_f32(dst, f4);
393 src += 4;
394 dst += 4;
395 }
396#endif
397 /* Use scalar path to convert the tail of array (or whole array if none of
398 * wider paths above were used). */
399 for (; i < length; i++) {
400 *dst++ = half_to_float(*src++);
401 }
402}
403
404#ifdef USE_HARDWARE_FP16_NEON
405# undef USE_HARDWARE_FP16_NEON
406#endif
407#ifdef USE_HARDWARE_FP16_F16C
408# undef USE_HARDWARE_FP16_F16C
409#endif
410#ifdef USE_SSE2_FP16
411# undef USE_SSE2_FP16
412#endif
ATTR_WARN_UNUSED_RESULT const BMVert * v
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
constexpr T sign(T) RET
void float_to_half_array(const float *src, uint16_t *dst, size_t length)
Definition math_half.cc:238
uint16_t float_to_half(float v)
Definition math_half.cc:27
T sign(const T &a)
T length(const VecBase< T, Size > &a)
T exp(const T &x)
void half_to_float_array(const uint16_t *src, float *dst, size_t length)
Definition math_half.cc:368
void float_to_half_make_finite_array(const float *src, uint16_t *dst, size_t length)
Definition math_half.cc:274
float half_to_float(uint16_t v)
Definition math_half.cc:108
uint16_t float_to_half_make_finite(float v)
Definition math_half.cc:91
i
Definition text_draw.cc:230
static int magic(const Tex *tex, const float texvec[3], TexResult *texres)