Blender V4.3
offset_indices.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include "BLI_array_utils.hh"
7#include "BLI_task.hh"
8
10
12 const int start_offset)
13{
14 int offset = start_offset;
15 int64_t offset_i64 = start_offset;
16
17 for (const int i : counts_to_offsets.index_range().drop_back(1)) {
18 const int count = counts_to_offsets[i];
19 BLI_assert(count >= 0);
20 counts_to_offsets[i] = offset;
21 offset += count;
22#ifndef NDEBUG
23 offset_i64 += count;
24#endif
25 }
26 counts_to_offsets.last() = offset;
27
28 BLI_assert_msg(offset == offset_i64, "Integer overflow occured");
29 UNUSED_VARS_NDEBUG(offset_i64);
30
31 return OffsetIndices<int>(counts_to_offsets);
32}
33
34std::optional<OffsetIndices<int>> accumulate_counts_to_offsets_with_overflow_check(
35 MutableSpan<int> counts_to_offsets, int start_offset)
36{
37 /* This variant was measured to be about ~8% slower than the version without overflow check.
38 * Since this function is often a serial bottleneck, we use a separate code path for when an
39 * overflow check is requested. */
40 int64_t offset = start_offset;
41 for (const int i : counts_to_offsets.index_range().drop_back(1)) {
42 const int count = counts_to_offsets[i];
43 BLI_assert(count >= 0);
44 counts_to_offsets[i] = offset;
45 offset += count;
46 }
47 counts_to_offsets.last() = offset;
48 const bool has_overflow = offset >= std::numeric_limits<int>::max();
49 if (has_overflow) {
50 return std::nullopt;
51 }
52 return OffsetIndices<int>(counts_to_offsets);
53}
54
55void fill_constant_group_size(const int size, const int start_offset, MutableSpan<int> offsets)
56{
58 threading::parallel_for(offsets.index_range(), 1024, [&](const IndexRange range) {
59 for (const int64_t i : range) {
60 offsets[i] = size * i + start_offset;
61 }
62 });
63 });
64}
65
67 const IndexMask &mask,
68 MutableSpan<int> sizes)
69{
70 mask.foreach_index_optimized<int64_t>(GrainSize(4096),
71 [&](const int64_t i) { sizes[i] = offsets[i].size(); });
72}
73
75 const IndexMask &mask,
76 MutableSpan<int> sizes)
77{
78 mask.foreach_index_optimized<int64_t>(GrainSize(4096), [&](const int64_t i, const int64_t pos) {
79 sizes[pos] = offsets[i].size();
80 });
81}
82
84 const Span<int> indices,
85 MutableSpan<int> sizes)
86{
87 threading::memory_bandwidth_bound_task(
88 sizes.size_in_bytes() + offsets.data().size_in_bytes() + indices.size_in_bytes(), [&]() {
89 threading::parallel_for(indices.index_range(), 4096, [&](const IndexRange range) {
90 for (const int i : range) {
91 sizes[i] = offsets[indices[i]].size();
92 }
93 });
94 });
95}
96
97int sum_group_sizes(const OffsetIndices<int> offsets, const Span<int> indices)
98{
99 int count = 0;
100 for (const int i : indices) {
101 count += offsets[i].size();
102 }
103 return count;
104}
105
106int sum_group_sizes(const OffsetIndices<int> offsets, const IndexMask &mask)
107{
108 int count = 0;
109 mask.foreach_segment_optimized([&](const auto segment) {
110 if constexpr (std::is_same_v<std::decay_t<decltype(segment)>, IndexRange>) {
111 count += offsets[segment].size();
112 }
113 else {
114 for (const int64_t i : segment) {
115 count += offsets[i].size();
116 }
117 }
118 });
119 return count;
120}
121
123 const IndexMask &selection,
124 const int start_offset,
125 MutableSpan<int> dst_offsets)
126{
127 if (selection.is_empty()) {
128 return {};
129 }
130 int offset = start_offset;
131 selection.foreach_index_optimized<int>([&](const int i, const int pos) {
132 dst_offsets[pos] = offset;
133 offset += src_offsets[i].size();
134 });
135 dst_offsets.last() = offset;
136 return OffsetIndices<int>(dst_offsets);
137}
138
139void build_reverse_map(OffsetIndices<int> offsets, MutableSpan<int> r_map)
140{
141 threading::parallel_for(offsets.index_range(), 1024, [&](const IndexRange range) {
142 for (const int64_t i : range) {
143 r_map.slice(offsets[i]).fill(i);
144 }
145 });
146}
147
149{
150 BLI_assert(std::all_of(offsets.begin(), offsets.end(), [](int value) { return value == 0; }));
151 array_utils::count_indices(indices, offsets);
152 offset_indices::accumulate_counts_to_offsets(offsets);
153}
154
155} // namespace blender::offset_indices
#define BLI_assert(a)
Definition BLI_assert.h:50
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:57
#define UNUSED_VARS_NDEBUG(...)
constexpr IndexRange drop_back(int64_t n) const
constexpr int64_t size_in_bytes() const
Definition BLI_span.hh:502
constexpr T * end() const
Definition BLI_span.hh:549
constexpr T * begin() const
Definition BLI_span.hh:545
constexpr IndexRange index_range() const
Definition BLI_span.hh:671
constexpr T & last(const int64_t n=0) const
Definition BLI_span.hh:690
static ushort indices[]
int count
void copy_group_sizes(OffsetIndices< int > offsets, const IndexMask &mask, MutableSpan< int > sizes)
OffsetIndices< int > accumulate_counts_to_offsets(MutableSpan< int > counts_to_offsets, int start_offset=0)
std::optional< OffsetIndices< int > > accumulate_counts_to_offsets_with_overflow_check(MutableSpan< int > counts_to_offsets, int start_offset=0)
void gather_group_sizes(OffsetIndices< int > offsets, const IndexMask &mask, MutableSpan< int > sizes)
void fill_constant_group_size(int size, int start_offset, MutableSpan< int > offsets)
void build_reverse_offsets(Span< int > indices, MutableSpan< int > offsets)
int sum_group_sizes(OffsetIndices< int > offsets, const IndexMask &mask)
OffsetIndices< int > gather_selected_offsets(OffsetIndices< int > src_offsets, const IndexMask &selection, int start_offset, MutableSpan< int > dst_offsets)
void memory_bandwidth_bound_task(const int64_t approximate_bytes_touched, const Function &function)
Definition BLI_task.hh:243
__int64 int64_t
Definition stdint.h:89