38 mask.foreach_index_optimized<T>(
GrainSize(4096),
39 [&](
const T src,
const T dst) { r_map[src] = dst; });
46 std::array<int16_t, max_segment_size>
data;
55 static constexpr int64_t size_shift = 31;
77 indices_by_segment[segment_i] = static_offsets;
82 segment_offsets.
last() = max_size;
87 data.segments_num_ = segments_num;
88 data.indices_by_segment_ = indices_by_segment.
data();
89 data.segment_offsets_ = segment_offsets.
data();
90 data.cumulative_segment_sizes_ = segment_offsets.
data();
91 data.begin_index_in_segment_ = 0;
107 if (std::holds_alternative<IndexRange>(segment)) {
108 const IndexRange range = std::get<IndexRange>(segment);
109 parts.append(fmt::format(
"{}-{}", range.first(), range.last()));
112 const Span<int64_t> segment_indices = std::get<Span<int64_t>>(segment);
113 parts.append(fmt::format(
"{}", fmt::join(segment_indices,
", ")));
116 stream << fmt::format(
"(Size: {} | {})", mask.size(), fmt::join(parts,
", "));
129 sliced.indices_num_ =
size;
131 sliced.indices_by_segment_ += first_it.
segment_i;
132 sliced.segment_offsets_ += first_it.
segment_i;
133 sliced.cumulative_segment_sizes_ += first_it.
segment_i;
145 sliced.indices_num_ =
size;
147 sliced.indices_by_segment_ += first_it.
segment_i;
148 sliced.segment_offsets_ += first_it.
segment_i;
149 sliced.cumulative_segment_sizes_ += first_it.
segment_i;
166 const std::optional<RawMaskIterator> last_it = this->
find_smaller_equal(start + size - 1);
167 if (!first_it || !last_it) {
172 if (last_index < first_index) {
175 const int64_t sliced_mask_size = last_index - first_index + 1;
176 return this->
slice(*first_it, *last_it, sliced_mask_size);
194 if (std::optional<IndexRange> range = this->
to_range()) {
195 return range->
slice(start, size).
shift(offset);
197 return this->
slice(start, size).
shift(offset, memory);
209 if (std::optional<IndexRange> range = this->
to_range()) {
210 return range->shift(offset);
217 shifted_mask.segment_offsets_ = new_segment_offsets.
data();
231 int64_t group_start_segment_i = 0;
232 int64_t group_first = segments[0][0];
236 auto finish_group = [&](
const int64_t last_segment_i) {
237 if (group_start_segment_i == last_segment_i) {
244 for (
int64_t i = group_start_segment_i + 1; i <= last_segment_i; i++) {
251 const std::optional<IndexRange> segment_base_range =
253 const bool segment_is_range = segment_base_range.has_value();
255 if (group_as_range && segment_is_range) {
256 if (group_last + 1 == segment[0]) {
259 group_last = segment.last();
264 finish_group(segment_i - 1);
266 group_start_segment_i = segment_i;
267 group_first = segment[0];
268 group_last = segment.last();
269 group_as_range = segment_is_range;
271 finish_group(segments.
size() - 1);
274 const int64_t new_segments_num = std::remove_if(segments.
begin(),
277 return segment.is_empty();
280 return new_segments_num;
292 BLI_assert(std::is_sorted(segment.base_span().begin(), segment.base_span().end()));
294 last_index = segment.last();
307 cumulative_segment_sizes[0] = 0;
310 indices_by_segment[segment_i] = segment.base_span().
data();
311 segment_offsets[segment_i] = segment.offset();
312 cumulative_segment_sizes[segment_i + 1] = cumulative_segment_sizes[segment_i] + segment.size();
320 data.indices_by_segment_ = indices_by_segment.
data();
321 data.segment_offsets_ = segment_offsets.data();
322 data.cumulative_segment_sizes_ = cumulative_segment_sizes.data();
323 data.begin_index_in_segment_ = 0;
324 data.end_index_in_segment_ = segments.
last().size();
332template<
typename T,
int64_t InlineBufferSize>
346 for (
const auto &segment : segments) {
347 if (std::holds_alternative<IndexRange>(segment)) {
348 const IndexRange segment_range = std::get<IndexRange>(segment);
352 Span<T> segment_indices = std::get<Span<T>>(segment);
354 segment_indices.
size());
355 while (!segment_indices.
is_empty()) {
356 const int64_t offset = segment_indices[0];
359 [&](
const T value) { return value - offset >= max_segment_size; });
361 const int64_t offset_index = segment_indices[i] - offset;
363 offset_indices[i] =
int16_t(offset_index);
365 r_segments.
append_as(offset, offset_indices.take_front(next_segment_size));
366 segment_indices = segment_indices.
drop_front(next_segment_size);
367 offset_indices = offset_indices.
drop_front(next_segment_size);
393 main_segments.
extend(data.segments);
411 if (indices.is_empty()) {
423 constexpr int64_t min_grain_size = 4096;
425 if (indices.size() <= min_grain_size) {
431 const int64_t grain_size = std::clamp(
432 indices.size() / (threads_num * 4), min_grain_size, max_grain_size);
436 ParallelSegmentsCollector::LocalData &local_data = segments_collector.data_by_thread.local();
437 segments_from_indices(indices.slice(range), local_data.allocator, local_data.segments);
439 segments_collector.
reduce(memory, segments);
442 segments.
resize(consolidated_segments_num);
455 const int64_t segment_start = universe_segment[0];
462 const int64_t segment_end = universe_segment.
last() + 1;
465 const int64_t global_index = universe_segment[i];
466 const int64_t local_index = global_index - segment_start;
470 if (bits_slice[local_index]) {
471 local_bits[local_index].set();
476 return segment_start;
491 universe_segment.
last());
505 const int64_t segment_shift = batch_predicate(universe_segment, builder);
515 constexpr int64_t threshold = 64;
516 int64_t next_range_to_process = 0;
517 int64_t skipped_indices_num = 0;
521 auto consolidate_skipped_ranges = [&](
int64_t end_range_i) {
522 if (skipped_indices_num == 0) {
530 counter += range.size();
537 if (range.size() > threshold || builder.
size() == 1) {
538 consolidate_skipped_ranges(i);
540 next_range_to_process = i + 1;
541 skipped_indices_num = 0;
544 skipped_indices_num += range.size();
547 consolidate_skipped_ranges(builder.
size());
562 if (universe.
size() <= grain_size.
value) {
573 universe_segment, data.allocator, batch_predicate, data.segments);
575 segments_collector.
reduce(memory, segments);
603 universe_segment.
last());
635 return *
static_cast<const bool *
>(info.
data) ? universe :
IndexMask();
642 universe,
GrainSize(512), memory, [&](
const int64_t index) {
return bools[index]; });
650 const Expr &expr = builder.
merge({&mask_a, &mask_b});
659 const Expr &expr = builder.
subtract({&mask_a}, {&mask_b});
677 if (
const auto *range = std::get_if<IndexRange>(&item)) {
682 else if (
const auto *span_i64 = std::get_if<
Span<int64_t>>(&item)) {
683 for (
const int64_t i : *span_i64) {
687 else if (
const auto *span_i32 = std::get_if<
Span<int>>(&item)) {
688 for (
const int i : *span_i32) {
692 else if (
const auto *index = std::get_if<int64_t>(&item)) {
697 values_vec.
extend(values.begin(), values.end());
698 std::sort(values_vec.
begin(), values_vec.
end());
707 r_indices[pos] = T(i);
723 for (
const int64_t i : shifted_indices) {
742 [&](
const int64_t i) { r_bools[i] =
true; });
770 std::array<int16_t, max_segment_size> indices_array;
771 const int64_t true_indices_num = filter_indices(universe_segment, indices_array.data());
772 if (true_indices_num == 0) {
781 for (
const auto &true_segment : true_segments) {
782 if (std::holds_alternative<IndexRange>(true_segment)) {
783 const IndexRange segment_range = std::get<IndexRange>(true_segment);
787 const Span<int16_t> segment_indices = std::get<Span<int16_t>>(true_segment);
805 if (universe.
size() <= grain_size.
value) {
816 universe_segment, data.allocator, filter_indices, data.segments);
818 segments_collector.
reduce(memory, segments);
822 segments.
resize(consolidated_segments_num);
829 if (
const std::optional<RawMaskIterator> it = this->
find_larger_equal(query_index)) {
830 if ((*
this)[*it] == query_index) {
848 if (query_index < segment[0]) {
850 const int64_t index_in_segment = segment_begin_index;
855 const int64_t local_index = query_index - segment.offset();
857 segment.base_span(), [&](
const int16_t i) { return i >= local_index; });
858 const int64_t actual_index_in_segment = index_in_segment + segment_begin_index;
868 const std::optional<RawMaskIterator> larger_equal_it = this->
find_larger_equal(query_index);
869 if (!larger_equal_it) {
873 if ((*
this)[*larger_equal_it] == query_index) {
875 return larger_equal_it;
877 if (larger_equal_it->segment_i > 0) {
878 if (larger_equal_it->index_in_segment > 0) {
881 int16_t(larger_equal_it->index_in_segment - 1)};
897 return this->
find(query_index).has_value();
903 for (
const int64_t i : data.index_range()) {
926 return data.as_span().take_front(repetitions);
930 return data.as_span().take_front(repetitions);
934 return data.as_span().take_front(repetitions);
958 if (repetitions == 0) {
961 if (repetitions == 1 && initial_offset == 0) {
963 return mask_to_repeat;
965 const std::optional<IndexRange> range_to_repeat = mask_to_repeat.
to_range();
966 if (range_to_repeat && range_to_repeat->first() == 0 && range_to_repeat->size() == stride) {
968 return IndexRange(initial_offset, repetitions * stride);
980 if (src_segment.
size() == 1) {
987 inline_repetitions_num * src_segment.
size());
990 const int64_t index = src_segment[i] - src_segment[0] + repetition * stride;
992 repeated_indices_mut[repetition * src_segment.
size() + i] =
int16_t(index);
995 repeated_indices = repeated_indices_mut;
1002 const int64_t used_repetitions = std::min(inline_repetitions_num,
1003 repetitions - i * inline_repetitions_num);
1004 repeated_segments.
append(
1006 repeated_indices.
take_front(used_repetitions * src_segment.
size())));
1017 segment.offset() + repetition * stride + initial_offset, segment.base_span()));
1037 return masks[0].size() == maks.size();
1051 for (
const int64_t mask_i : masks.index_range()) {
1052 if (start_iter[mask_i] == 0) {
1053 segments[mask_i] = masks[mask_i].segment(segment_iter[mask_i]);
1057 int16_t next_common_sequence_size = std::numeric_limits<int16_t>::max();
1058 for (
const int64_t mask_i : masks.index_range()) {
1059 next_common_sequence_size =
math::min(next_common_sequence_size,
1060 int16_t(segments[mask_i].
size() - start_iter[mask_i]));
1063 for (
const int64_t mask_i : masks.index_range()) {
1064 sequences[mask_i] = segments[mask_i].
slice(start_iter[mask_i], next_common_sequence_size);
1067 if (!fn(sequences)) {
1071 for (
const int64_t mask_i : masks.index_range()) {
1072 if (segments[mask_i].
size() - start_iter[mask_i] == next_common_sequence_size) {
1073 segment_iter[mask_i]++;
1074 start_iter[mask_i] = 0;
1077 start_iter[mask_i] += next_common_sequence_size;
1085 if (a.size() !=
b.size()) {
1098 if (a_is_range || b_is_range) {
1099 return a_is_range && b_is_range;
1107 BLI_assert(a_indices[0] >= 0 && b_indices[0] >= 0);
1108 BLI_assert(b_indices[0] == a_indices[0] - offset_difference);
1110 return std::equal(a_indices.
begin(),
1112 b.base_span().begin(),
1113 [offset_difference](
const int16_t a_index,
const int16_t b_index) ->
bool {
1114 return a_index - offset_difference == b_index;
1120 if (a.size() !=
b.size()) {
1124 const std::optional<IndexRange> a_as_range = a.to_range();
1125 const std::optional<IndexRange> b_as_range =
b.to_range();
1126 if (a_as_range.has_value() || b_as_range.has_value()) {
1127 return a_as_range == b_as_range;
1146 if (
const std::optional<int> single_group_id = group_ids.
get_if_single()) {
1149 const int64_t groups_num = r_index_by_group_id.
size();
1150 result_masks.
resize(groups_num);
1151 result_masks[group_index] = universe;
1152 return result_masks;
1157 const int64_t groups_num = r_index_by_group_id.
size();
1158 result_masks.
resize(groups_num);
1163 const int group_id = group_ids_span[i];
1164 return r_index_by_group_id.
index_of(group_id);
1167 return result_masks;
1175 IndexMask(group_ids.
size()), group_ids, memory, r_index_by_group_id);
int BLI_system_thread_count(void)
#define UNUSED_VARS_NDEBUG(...)
const T & last(const int64_t n=0) const
constexpr int64_t first() const
constexpr IndexRange shift(int64_t n) const
constexpr int64_t size() const
static constexpr IndexRange from_begin_end(const int64_t begin, const int64_t end)
constexpr int64_t start() const
static constexpr IndexRange from_begin_end_inclusive(const int64_t begin, const int64_t last)
constexpr IndexRange drop_front(int64_t n) const
IndexRange index_range() const
MutableSpan< T > allocate_array(int64_t size)
MutableSpan< T > construct_array_copy(Span< T > src)
void transfer_ownership_from(LinearAllocator<> &other)
constexpr int64_t size() const
constexpr bool is_empty() const
constexpr T * data() const
constexpr void fill(const T &value) const
constexpr T * end() const
constexpr T * begin() const
constexpr IndexRange index_range() const
constexpr T & last(const int64_t n=0) const
IndexRange index_range() const
T last(const int64_t n=0) const
Span< BaseT > base_span() const
constexpr Span drop_front(int64_t n) const
constexpr Span slice_safe(const int64_t start, const int64_t size) const
constexpr Span slice(int64_t start, int64_t size) const
constexpr const T * data() const
constexpr int64_t size() const
constexpr const T & last(const int64_t n=0) const
constexpr const T * end() const
constexpr IndexRange index_range() const
constexpr const T * begin() const
constexpr Span take_front(int64_t n) const
constexpr bool is_empty() const
std::optional< T > get_if_single() const
IndexRange index_range() const
CommonVArrayInfo common_info() const
int64_t index_of(const Key &key) const
int64_t index_of_or_add(const Key &key)
void append(const T &value)
void resize(const int64_t new_size)
void extend(Span< T > array)
Span< T > as_span() const
void append_as(ForwardValue &&...value)
IndexRange index_range() const
BitSpan slice(const IndexRange range) const
void resize(const int64_t new_size_in_bits, const bool value=false)
MutableBitSpan slice(const IndexRange range) const
const IntersectionExpr & intersect(const Span< Term > terms)
const UnionExpr & merge(const Span< Term > terms)
const DifferenceExpr & subtract(const Term &main_term, const Span< Term > subtract_terms)
IndexMaskSegment shift(const int64_t shift) const
IndexMask slice_and_shift(IndexRange range, int64_t offset, IndexMaskMemory &memory) const
static IndexMask from_predicate(const IndexMask &universe, GrainSize grain_size, IndexMaskMemory &memory, Fn &&predicate)
static IndexMask from_every_nth(int64_t n, int64_t indices_num, const int64_t initial_offset, IndexMaskMemory &memory)
static IndexMask from_bools_inverse(const IndexMask &universe, Span< bool > bools, IndexMaskMemory &memory)
IndexMaskSegment segment(int64_t segment_i) const
void to_indices(MutableSpan< T > r_indices) const
IndexMask slice_content(IndexRange range) const
Vector< IndexRange > to_ranges_invert(IndexRange universe) const
static IndexMask from_bits(BitSpan bits, IndexMaskMemory &memory)
Vector< IndexRange > to_ranges() const
std::optional< RawMaskIterator > find(int64_t query_index) const
std::optional< IndexRange > to_range() const
static Vector< IndexMask, 4 > from_group_ids(const VArray< int > &group_ids, IndexMaskMemory &memory, VectorSet< int > &r_index_by_group_id)
static IndexMask from_repeating(const IndexMask &mask_to_repeat, int64_t repetitions, int64_t stride, int64_t initial_offset, IndexMaskMemory &memory)
static IndexMask from_union(const IndexMask &mask_a, const IndexMask &mask_b, IndexMaskMemory &memory)
IndexMask shift(const int64_t offset, IndexMaskMemory &memory) const
void foreach_index_optimized(Fn &&fn) const
std::optional< RawMaskIterator > find_larger_equal(int64_t query_index) const
IndexMask slice(IndexRange range) const
static IndexMask from_intersection(const IndexMask &mask_a, const IndexMask &mask_b, IndexMaskMemory &memory)
static IndexMask from_indices(Span< T > indices, IndexMaskMemory &memory)
IndexRange bounds() const
int64_t iterator_to_index(const RawMaskIterator &it) const
static IndexMask from_segments(Span< IndexMaskSegment > segments, IndexMaskMemory &memory)
int64_t min_array_size() const
static void from_groups(const IndexMask &universe, IndexMaskMemory &memory, Fn &&get_group_index, MutableSpan< IndexMask > r_masks)
static IndexMask from_batch_predicate(const IndexMask &universe, GrainSize grain_size, IndexMaskMemory &memory, FunctionRef< int64_t(const IndexMaskSegment &universe_segment, IndexRangesBuilder< int16_t > &builder)> batch_predicate)
static IndexMask from_initializers(const Span< Initializer > initializers, IndexMaskMemory &memory)
void set_bits(MutableBitSpan r_bits, int64_t offset=0) const
std::optional< RawMaskIterator > find_smaller_equal(int64_t query_index) const
bool contains(int64_t query_index) const
IndexMask complement(const IndexMask &universe, IndexMaskMemory &memory) const
static IndexMask from_bools(Span< bool > bools, IndexMaskMemory &memory)
void foreach_range(Fn &&fn) const
void to_bits(MutableBitSpan r_bits, int64_t offset=0) const
void foreach_segment_optimized(Fn &&fn) const
void to_bools(MutableSpan< bool > r_bools) const
void foreach_index(Fn &&fn) const
RawMaskIterator index_to_iterator(int64_t index) const
static void foreach_segment_zipped(Span< IndexMask > masks, FunctionRef< bool(Span< IndexMaskSegment > segments)> fn)
int64_t segments_num() const
static IndexMask from_difference(const IndexMask &mask_a, const IndexMask &mask_b, IndexMaskMemory &memory)
std::variant< IndexRange, Span< int64_t >, Span< int >, int64_t > Initializer
void foreach_segment(Fn &&fn) const
local_group_size(16, 16) .push_constant(Type b
ccl_device_inline float4 mask(const int4 mask, const float4 a)
void fill_index_range(MutableSpan< T > span, const T start=0)
int64_t find_predicate_begin(Iterator begin, Iterator end, Predicate &&predicate)
bool or_bools_into_bits(Span< bool > bools, MutableBitSpan r_bits, int64_t allowed_overshoot=0)
void bits_to_index_ranges(const BitSpan bits, IndexRangesBuilder< IntT > &builder)
void invert(BitSpanT &&data)
static void segments_from_predicate_filter(const IndexMaskSegment universe_segment, LinearAllocator<> &allocator, const FunctionRef< int64_t(IndexMaskSegment indices, int16_t *r_true_indices)> filter_indices, Vector< IndexMaskSegment, 16 > &r_segments)
IndexMask from_predicate_impl(const IndexMask &universe, GrainSize grain_size, IndexMaskMemory &memory, FunctionRef< int64_t(IndexMaskSegment indices, int16_t *r_true_indices)> filter_indices)
IndexMask evaluate_expression(const Expr &expression, IndexMaskMemory &memory)
static constexpr int64_t max_segment_size
static void segments_from_batch_predicate(const IndexMaskSegment universe_segment, LinearAllocator<> &allocator, const FunctionRef< int64_t(const IndexMaskSegment &universe_segment, IndexRangesBuilder< int16_t > &builder)> batch_predicate, Vector< IndexMaskSegment, 16 > &r_segments)
static void segments_from_indices(const Span< T > indices, LinearAllocator<> &allocator, Vector< IndexMaskSegment, InlineBufferSize > &r_segments)
static Span< int16_t > get_every_nth_index(const int64_t n, const int64_t repetitions, IndexMaskMemory &memory)
void build_reverse_map(const IndexMask &mask, MutableSpan< T > r_map)
std::ostream & operator<<(std::ostream &stream, const IndexMask &mask)
bool operator==(const RawMaskIterator &a, const RawMaskIterator &b)
std::array< int16_t, max_segment_size > build_static_indices_array()
static Array< int16_t > build_every_nth_index_array(const int64_t n)
static int64_t from_bits_batch_predicate(const IndexMaskSegment universe_segment, IndexRangesBuilder< int16_t > &builder, const BitSpan bits_slice)
const IndexMask & get_static_index_mask_for_min_size(const int64_t min_size)
int64_t consolidate_index_mask_segments(MutableSpan< IndexMaskSegment > segments, IndexMaskMemory &memory)
static bool segments_is_equal(const IndexMaskSegment &a, const IndexMaskSegment &b)
template void build_reverse_map< int >(const IndexMask &mask, MutableSpan< int > r_map)
const std::array< int16_t, max_segment_size > & get_static_indices_array()
T min(const T &a, const T &b)
void isolate_task(const Function &function)
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
std::optional< IndexRange > non_empty_as_range_try(const Span< T > indices)
int64_t split_to_ranges_and_spans(const Span< T > indices, const int64_t range_threshold, Vector< std::variant< IndexRange, Span< T > >, InlineBufferSize > &r_segments)
bool non_empty_is_range(const Span< T > indices)
void parallel_sort(RandomAccessIterator begin, RandomAccessIterator end)
constexpr IntT ceil_division(const IntT x, const IntT y)
const int64_t * cumulative_segment_sizes_
const int16_t ** indices_by_segment_
int64_t end_index_in_segment_
int64_t begin_index_in_segment_
const int64_t * segment_offsets_
LinearAllocator allocator
Vector< IndexMaskSegment, 16 > segments
threading::EnumerableThreadSpecific< LocalData > data_by_thread
void reduce(LinearAllocator<> &main_allocator, Vector< IndexMaskSegment, 16 > &main_segments)