Blender V5.0
BLI_bit_span_to_index_ranges.hh
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#pragma once
10
11#include <limits>
12
13#include "BLI_bit_span.hh"
15#include "BLI_math_bits.h"
16#include "BLI_simd.hh"
17
18namespace blender::bits {
19
26template<typename IntT>
28{
29 if (bits.is_empty()) {
30 return;
31 }
32
33 /* -1 because we also need to store the end of the last range. */
34 constexpr int64_t max_index = std::numeric_limits<IntT>::max() - 1;
35 UNUSED_VARS_NDEBUG(max_index);
36
37 auto append_range = [&](const IndexRange range) {
38 BLI_assert(range.last() <= max_index);
39 builder.add_range(IntT(range.start()), IntT(range.one_after_last()));
40 };
41
42 auto process_bit_int = [&](const BitInt value,
43 const int64_t start_bit,
44 const int64_t bits_num,
45 const int64_t start) {
46 /* The bits in the mask are the ones we should look at. */
47 const BitInt mask = mask_range_bits(start_bit, bits_num);
48 const BitInt masked_value = mask & value;
49 if (masked_value == 0) {
50 /* Do nothing. */
51 return;
52 }
53 if (masked_value == mask) {
54 /* All bits are set. */
55 append_range(IndexRange::from_begin_size(start, bits_num));
56 return;
57 }
58 const int64_t bit_i_to_output_offset = start - start_bit;
59
60 /* Iterate over ranges of 1s. For example, if the bits are 0b000111110001111000, the loop
61 * below requires two iterations. The worst case for this is when there are very many small
62 * ranges of 1s (e.g. 0b10101010101). So far it seems like the overhead of detecting such
63 * cases is higher than the potential benefit of using another algorithm. */
64 BitInt current_value = masked_value;
65 while (current_value != 0) {
66 /* Find start of next range of 1s. */
67 const int64_t first_set_bit_i = int64_t(bitscan_forward_uint64(current_value));
68 /* This mask is used to find the end of the 1s range. */
69 const BitInt find_unset_value = ~(current_value | mask_first_n_bits(first_set_bit_i) |
70 ~mask);
71 if (find_unset_value == 0) {
72 /* In this case, the range one 1s extends to the end of the current integer. */
73 const IndexRange range = IndexRange::from_begin_end(first_set_bit_i, start_bit + bits_num);
74 append_range(range.shift(bit_i_to_output_offset));
75 break;
76 }
77 /* Find the index of the first 0 after the range of 1s. */
78 const int64_t next_unset_bit_i = int64_t(bitscan_forward_uint64(find_unset_value));
79 /* Store the range of 1s. */
80 const IndexRange range = IndexRange::from_begin_end(first_set_bit_i, next_unset_bit_i);
81 append_range(range.shift(bit_i_to_output_offset));
82 /* Remove the processed range of 1s so that it is ignored in the next iteration. */
83 current_value &= ~mask_first_n_bits(next_unset_bit_i);
84 }
85 return;
86 };
87
88 const BitInt *data = bits.data();
89 const IndexRange bit_range = bits.bit_range();
90
91 /* As much as possible we want to process full 64-bit integers at once. However, the bit-span may
92 * not be aligned, so it's first split up into aligned and unaligned sections. */
94
95 /* Process the first (partial) integer in the bit-span. */
96 if (!ranges.prefix.is_empty()) {
97 const BitInt first_int = *int_containing_bit(data, bit_range.start());
98 process_bit_int(
99 first_int, BitInt(ranges.prefix.start()) & BitIndexMask, ranges.prefix.size(), 0);
100 }
101
102 /* Process all the full integers in the bit-span. */
103 if (!ranges.aligned.is_empty()) {
104 const BitInt *start = int_containing_bit(data, ranges.aligned.start());
105 const int64_t ints_to_check = ranges.aligned.size() / BitsPerInt;
106 int64_t int_i = 0;
107
108/* Checking for chunks of 0 bits can be speedup using intrinsics quite significantly. */
109#if BLI_HAVE_SSE2
110 for (; int_i + 1 < ints_to_check; int_i += 2) {
111 /* Loads the next 128 bit. */
112 const __m128i group = _mm_loadu_si128(reinterpret_cast<const __m128i *>(start + int_i));
113 /* Checks if all the 128 bits are zero. */
114 const bool group_is_zero = _mm_testz_si128(group, group);
115 if (group_is_zero) {
116 continue;
117 }
118 /* If at least one of them is not zero, process the two integers separately. */
119 for (int j = 0; j < 2; j++) {
120 process_bit_int(
121 start[int_i + j], 0, BitsPerInt, ranges.prefix.size() + (int_i + j) * BitsPerInt);
122 }
123 }
124#endif
125
126 /* Process the remaining integers. */
127 for (; int_i < ints_to_check; int_i++) {
128 process_bit_int(start[int_i], 0, BitsPerInt, ranges.prefix.size() + int_i * BitsPerInt);
129 }
130 }
131
132 /* Process the final few bits that don't fill up a full integer. */
133 if (!ranges.suffix.is_empty()) {
134 const BitInt last_int = *int_containing_bit(data, bit_range.last());
135 process_bit_int(
136 last_int, 0, ranges.suffix.size(), ranges.prefix.size() + ranges.aligned.size());
137 }
138}
139
140} // namespace blender::bits
#define BLI_assert(a)
Definition BLI_assert.h:46
MINLINE unsigned int bitscan_forward_uint64(unsigned long long a)
#define UNUSED_VARS_NDEBUG(...)
BMesh const char void * data
long long int int64_t
constexpr IndexRange shift(int64_t n) const
constexpr int64_t last(const int64_t n=0) const
constexpr int64_t size() const
constexpr bool is_empty() const
static constexpr IndexRange from_begin_end(const int64_t begin, const int64_t end)
static constexpr IndexRange from_begin_size(const int64_t begin, const int64_t size)
constexpr int64_t start() const
bool add_range(const T start, const T end)
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
uint64_t BitInt
static constexpr BitInt BitIndexMask
static constexpr int64_t BitsPerInt
BitInt * int_containing_bit(BitInt *data, const int64_t bit_index)
void bits_to_index_ranges(const BitSpan bits, IndexRangesBuilder< IntT > &builder)
BitInt mask_first_n_bits(const int64_t n)
BitInt mask_range_bits(const int64_t start, const int64_t size)
AlignedIndexRanges split_index_range_by_alignment(const IndexRange range, const int64_t alignment)