Blender V5.0
bit_bool_conversion.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
10#include "BLI_simd.hh"
11
12namespace blender::bits {
13
14template<typename ByteToBit>
16 MutableBitSpan r_bits,
17 const int64_t allowed_overshoot,
18 const ByteToBit &byte_to_bit)
19{
20 BLI_assert(r_bits.size() >= bytes.size());
21 if (bytes.is_empty()) {
22 return false;
23 }
24
25 int64_t byte_i = 0;
26 const char *bytes_ = bytes.data();
27
28 bool any_true = false;
29
30/* Conversion from bytes to bits can be way faster with intrinsics. That's because instead of
31 * processing one element at a time, we can process 16 at once. */
32#if BLI_HAVE_SSE2
33 int64_t iteration_end = bytes.size();
34 if (iteration_end % 16 > 0) {
35 if (allowed_overshoot >= 16) {
36 iteration_end = (iteration_end + 16) & ~15;
37 }
38 }
39 /* Iterate over chunks of bytes. */
40 for (; byte_i + 16 <= iteration_end; byte_i += 16) {
41 /* Load 16 bytes at once. */
42 const __m128i group = _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes_ + byte_i));
43 const uint16_t is_true_mask = byte_to_bit.see2_chunk(group);
44 any_true |= is_true_mask != 0;
45
46 const int start_bit_in_int = (r_bits.bit_range().start() + byte_i) & BitIndexMask;
47 BitInt *start_bit_int = int_containing_bit(r_bits.data(), r_bits.bit_range().start() + byte_i);
48 *start_bit_int |= BitInt(is_true_mask) << start_bit_in_int;
49
50 if (start_bit_in_int > BitsPerInt - 16) {
51 /* It's possible that the bits need inserted in two consecutive integers. */
52 start_bit_int[1] |= BitInt(is_true_mask) >> (64 - start_bit_in_int);
53 }
54 }
55#endif
56
57 /* Process remaining bytes. */
58 for (; byte_i < bytes.size(); byte_i++) {
59 if (byte_to_bit.single(bytes_[byte_i])) {
60 r_bits[byte_i].set();
61 any_true = true;
62 }
63 }
64 return any_true;
65}
66
67struct BoolToBit {
68 static bool single(const char c)
69 {
70 return bool(c);
71 }
72
73#if BLI_HAVE_SSE2
74 static uint16_t see2_chunk(const __m128i chunk)
75 {
76 const __m128i zero_bytes = _mm_set1_epi8(0);
77 /* Compare them all against zero. The result is a mask of the form [0x00, 0xff, 0xff, ...]. */
78 const __m128i is_false_byte_mask = _mm_cmpeq_epi8(chunk, zero_bytes);
79 /* Compress the byte-mask into a bit mask. This takes one bit from each byte. */
80 const uint16_t is_false_mask = _mm_movemask_epi8(is_false_byte_mask);
81 /* Now we have a bit mask where each bit corresponds to an input byte. */
82 const uint16_t is_true_mask = ~is_false_mask;
83 return is_true_mask;
84 }
85#endif
86};
87
89 MutableBitSpan r_bits,
90 const int64_t allowed_overshoot)
91{
92 return or_bytes_into_bits(bools.cast<char>(), r_bits, allowed_overshoot, BoolToBit());
93}
94
95} // namespace blender::bits
#define BLI_assert(a)
Definition BLI_assert.h:46
long long int int64_t
constexpr int64_t start() const
Span< NewT > constexpr cast() const
Definition BLI_span.hh:418
constexpr const T * data() const
Definition BLI_span.hh:215
constexpr int64_t size() const
Definition BLI_span.hh:252
constexpr bool is_empty() const
Definition BLI_span.hh:260
const IndexRange & bit_range() const
uint64_t BitInt
bool or_bools_into_bits(Span< bool > bools, MutableBitSpan r_bits, int64_t allowed_overshoot=0)
bool or_bytes_into_bits(const Span< char > bytes, MutableBitSpan r_bits, const int64_t allowed_overshoot, const ByteToBit &byte_to_bit)
static constexpr BitInt BitIndexMask
static constexpr int64_t BitsPerInt
BitInt * int_containing_bit(BitInt *data, const int64_t bit_index)
static bool single(const char c)