Blender V4.3
BLI_bit_span_to_index_ranges.hh
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#pragma once
6
7#include <limits>
8
9#include "BLI_bit_span.hh"
11#include "BLI_math_bits.h"
12#include "BLI_simd.hh"
13
14namespace blender::bits {
15
22template<typename IntT>
23inline void bits_to_index_ranges(const BitSpan bits, IndexRangesBuilder<IntT> &builder)
24{
25 if (bits.is_empty()) {
26 return;
27 }
28
29 /* -1 because we also need to store the end of the last range. */
30 constexpr int64_t max_index = std::numeric_limits<IntT>::max() - 1;
31 UNUSED_VARS_NDEBUG(max_index);
32
33 auto append_range = [&](const IndexRange range) {
34 BLI_assert(range.last() <= max_index);
35 builder.add_range(IntT(range.start()), IntT(range.one_after_last()));
36 };
37
38 auto process_bit_int = [&](const BitInt value,
39 const int64_t start_bit,
40 const int64_t bits_num,
41 const int64_t start) {
42 /* The bits in the mask are the ones we should look at. */
43 const BitInt mask = mask_range_bits(start_bit, bits_num);
44 const BitInt masked_value = mask & value;
45 if (masked_value == 0) {
46 /* Do nothing. */
47 return;
48 }
49 if (masked_value == mask) {
50 /* All bits are set. */
51 append_range(IndexRange::from_begin_size(start, bits_num));
52 return;
53 }
54 const int64_t bit_i_to_output_offset = start - start_bit;
55
56 /* Iterate over ranges of 1s. For example, if the bits are 0b000111110001111000, the loop
57 * below requires two iterations. The worst case for this is when the there are very many small
58 * ranges of 1s (e.g. 0b10101010101). So far it seems like the overhead of detecting such
59 * cases is higher than the potential benefit of using another algorithm. */
60 BitInt current_value = masked_value;
61 while (current_value != 0) {
62 /* Find start of next range of 1s. */
63 const int64_t first_set_bit_i = int64_t(bitscan_forward_uint64(current_value));
64 /* This mask is used to find the end of the 1s range. */
65 const BitInt find_unset_value = ~(current_value | mask_first_n_bits(first_set_bit_i) |
66 ~mask);
67 if (find_unset_value == 0) {
68 /* In this case, the range one 1s extends to the end of the current integer. */
69 const IndexRange range = IndexRange::from_begin_end(first_set_bit_i, start_bit + bits_num);
70 append_range(range.shift(bit_i_to_output_offset));
71 break;
72 }
73 /* Find the index of the first 0 after the range of 1s. */
74 const int64_t next_unset_bit_i = int64_t(bitscan_forward_uint64(find_unset_value));
75 /* Store the range of 1s. */
76 const IndexRange range = IndexRange::from_begin_end(first_set_bit_i, next_unset_bit_i);
77 append_range(range.shift(bit_i_to_output_offset));
78 /* Remove the processed range of 1s so that it is ignored in the next iteration. */
79 current_value &= ~mask_first_n_bits(next_unset_bit_i);
80 }
81 return;
82 };
83
84 const BitInt *data = bits.data();
85 const IndexRange bit_range = bits.bit_range();
86
87 /* As much as possible we want to process full 64-bit integers at once. However, the bit-span may
88 * not be aligned, so it's first split up into aligned and unaligned sections. */
90
91 /* Process the first (partial) integer in the bit-span. */
92 if (!ranges.prefix.is_empty()) {
93 const BitInt first_int = *int_containing_bit(data, bit_range.start());
94 process_bit_int(
95 first_int, BitInt(ranges.prefix.start()) & BitIndexMask, ranges.prefix.size(), 0);
96 }
97
98 /* Process all the full integers in the bit-span. */
99 if (!ranges.aligned.is_empty()) {
100 const BitInt *start = int_containing_bit(data, ranges.aligned.start());
101 const int64_t ints_to_check = ranges.aligned.size() / BitsPerInt;
102 int64_t int_i = 0;
103
104/* Checking for chunks of 0 bits can be speedup using intrinsics quite significantly. */
105#if BLI_HAVE_SSE2
106 for (; int_i + 1 < ints_to_check; int_i += 2) {
107 /* Loads the next 128 bit. */
108 const __m128i group = _mm_loadu_si128(reinterpret_cast<const __m128i *>(start + int_i));
109 /* Checks if all the 128 bits are zero. */
110 const bool group_is_zero = _mm_testz_si128(group, group);
111 if (group_is_zero) {
112 continue;
113 }
114 /* If at least one of them is not zero, process the two integers separately. */
115 for (int j = 0; j < 2; j++) {
116 process_bit_int(
117 start[int_i + j], 0, BitsPerInt, ranges.prefix.size() + (int_i + j) * BitsPerInt);
118 }
119 }
120#endif
121
122 /* Process the remaining integers. */
123 for (; int_i < ints_to_check; int_i++) {
124 process_bit_int(start[int_i], 0, BitsPerInt, ranges.prefix.size() + int_i * BitsPerInt);
125 }
126 }
127
128 /* Process the final few bits that don't fill up a full integer. */
129 if (!ranges.suffix.is_empty()) {
130 const BitInt last_int = *int_containing_bit(data, bit_range.last());
131 process_bit_int(
132 last_int, 0, ranges.suffix.size(), ranges.prefix.size() + ranges.aligned.size());
133 }
134}
135
136} // namespace blender::bits
#define BLI_assert(a)
Definition BLI_assert.h:50
MINLINE unsigned int bitscan_forward_uint64(unsigned long long a)
#define UNUSED_VARS_NDEBUG(...)
constexpr int64_t last(const int64_t n=0) const
static constexpr IndexRange from_begin_end(const int64_t begin, const int64_t end)
static constexpr IndexRange from_begin_size(const int64_t begin, const int64_t size)
constexpr int64_t start() const
bool add_range(const T start, const T end)
const BitInt * data() const
const IndexRange & bit_range() const
IndexRange range
uint64_t BitInt
static constexpr BitInt BitIndexMask
static constexpr int64_t BitsPerInt
BitInt * int_containing_bit(BitInt *data, const int64_t bit_index)
void bits_to_index_ranges(const BitSpan bits, IndexRangesBuilder< IntT > &builder)
BitInt mask_first_n_bits(const int64_t n)
BitInt mask_range_bits(const int64_t start, const int64_t size)
AlignedIndexRanges split_index_range_by_alignment(const IndexRange range, const int64_t alignment)
__int64 int64_t
Definition stdint.h:89