Blender V4.3
bit_bool_conversion.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
6#include "BLI_simd.hh"
7#include "BLI_timeit.hh"
8
9namespace blender::bits {
10
12 MutableBitSpan r_bits,
13 const int64_t allowed_overshoot)
14{
15 BLI_assert(r_bits.size() >= bools.size());
16 if (bools.is_empty()) {
17 return false;
18 }
19
20 int64_t bool_i = 0;
21 const bool *bools_ = bools.data();
22
23 bool any_true = false;
24
25/* Conversion from bools to bits can be way faster with intrinsics. That's because instead of
26 * processing one element at a time, we can process 16 at once. */
27#if BLI_HAVE_SSE2
28 /* Initialize zeros, so that we can compare against it. */
29 const __m128i zero_bytes = _mm_set1_epi8(0);
30 int64_t iteration_end = bools.size();
31 if (iteration_end % 16 > 0) {
32 if (allowed_overshoot >= 16) {
33 iteration_end = (iteration_end + 16) & ~15;
34 }
35 }
36 /* Iterate over chunks of booleans. */
37 for (; bool_i + 16 <= iteration_end; bool_i += 16) {
38 /* Load 16 bools at once. */
39 const __m128i group = _mm_loadu_si128(reinterpret_cast<const __m128i *>(bools_ + bool_i));
40 /* Compare them all against zero. The result is a mask of the form [0x00, 0xff, 0xff, ...]. */
41 const __m128i is_false_byte_mask = _mm_cmpeq_epi8(group, zero_bytes);
42 /* Compress the byte-mask into a bit mask. This takes one bit from each byte. */
43 const uint16_t is_false_mask = _mm_movemask_epi8(is_false_byte_mask);
44 /* Now we have a bit mask where each bit corresponds to an input boolean. */
45 const uint16_t is_true_mask = ~is_false_mask;
46 any_true |= is_true_mask != 0;
47
48 const int start_bit_in_int = (r_bits.bit_range().start() + bool_i) & BitIndexMask;
49 BitInt *start_bit_int = int_containing_bit(r_bits.data(), r_bits.bit_range().start() + bool_i);
50 *start_bit_int |= BitInt(is_true_mask) << start_bit_in_int;
51
52 if (start_bit_in_int > BitsPerInt - 16) {
53 /* It's possible that the bits need inserted in two consecutive integers. */
54 start_bit_int[1] |= BitInt(is_true_mask) >> (64 - start_bit_in_int);
55 }
56 }
57#endif
58
59 /* Process remaining bools. */
60 for (; bool_i < bools.size(); bool_i++) {
61 if (bools_[bool_i]) {
62 r_bits[bool_i].set();
63 any_true = true;
64 }
65 }
66 return any_true;
67}
68
69} // namespace blender::bits
#define BLI_assert(a)
Definition BLI_assert.h:50
constexpr int64_t start() const
constexpr const T * data() const
Definition BLI_span.hh:216
constexpr int64_t size() const
Definition BLI_span.hh:253
constexpr bool is_empty() const
Definition BLI_span.hh:261
const IndexRange & bit_range() const
uint64_t BitInt
bool or_bools_into_bits(Span< bool > bools, MutableBitSpan r_bits, int64_t allowed_overshoot=0)
static constexpr BitInt BitIndexMask
static constexpr int64_t BitsPerInt
BitInt * int_containing_bit(BitInt *data, const int64_t bit_index)
unsigned short uint16_t
Definition stdint.h:79
__int64 int64_t
Definition stdint.h:89