119 std::sort(boundaries.begin(),
126 std::sort(boundaries.begin(),
129 return a.index < b.index;
138 const int64_t prev_boundary_index,
139 const int64_t current_boundary_index,
142 const int64_t size = current_boundary_index - prev_boundary_index;
148 return *prev_segment;
155 return *prev_segment;
159 result.segments.append(
161 return result.segments.last();
166 const int64_t prev_boundary_index,
167 const int64_t current_boundary_index,
175 return *prev_segment;
178 result.segments.append(
181 return result.segments.last();
186 const int64_t prev_boundary_index,
187 const int64_t current_boundary_index,
194 prev_segment->
mask == ©_from_mask)
198 return *prev_segment;
204 return *prev_segment;
210 return result.segments.last();
215 if (boundaries.is_empty()) {
222 int64_t prev_boundary_index = boundaries[0].index;
225 if (prev_boundary_index < boundary.index) {
228 bool has_full =
false;
229 bool has_unknown =
false;
230 bool copy_from_single_mask =
true;
231 const IndexMask *copy_from_mask =
nullptr;
232 for (
const CoarseSegment *active_segment : active_segments) {
233 switch (active_segment->type) {
243 if (!
ELEM(copy_from_mask,
nullptr, active_segment->mask)) {
244 copy_from_single_mask =
false;
246 copy_from_mask = active_segment->mask;
254 prev_segment, prev_boundary_index, boundary.index, result);
256 else if (has_unknown || !copy_from_single_mask) {
258 prev_segment, prev_boundary_index, boundary.index, result);
260 else if (copy_from_mask !=
nullptr && copy_from_single_mask) {
262 prev_segment, prev_boundary_index, boundary.index, *copy_from_mask, result);
265 prev_boundary_index = boundary.index;
269 if (boundary.is_begin) {
270 active_segments.
append(boundary.segment);
282 if (boundaries.is_empty()) {
289 int64_t prev_boundary_index = boundaries[0].index;
292 if (prev_boundary_index < boundary.index) {
295 if (active_segments.
size() == terms_num) {
299 int unknown_count = 0;
301 bool copy_from_single_mask =
true;
302 const IndexMask *copy_from_mask =
nullptr;
303 for (
const CoarseSegment *active_segment : active_segments) {
304 switch (active_segment->type) {
315 if (!
ELEM(copy_from_mask,
nullptr, active_segment->mask)) {
316 copy_from_single_mask =
false;
318 copy_from_mask = active_segment->mask;
324 BLI_assert(full_count + unknown_count + copy_count == terms_num);
325 if (full_count == terms_num) {
327 prev_segment, prev_boundary_index, boundary.index, result);
329 else if (unknown_count > 0 || copy_count < terms_num || !copy_from_single_mask) {
331 prev_segment, prev_boundary_index, boundary.index, result);
333 else if (copy_count == terms_num && copy_from_single_mask) {
335 prev_segment, prev_boundary_index, boundary.index, *copy_from_mask, result);
339 prev_boundary_index = boundary.index;
343 if (boundary.is_begin) {
344 active_segments.
append(boundary.segment);
355 if (boundaries.is_empty()) {
363 int64_t prev_boundary_index = boundaries[0].index;
366 if (prev_boundary_index < boundary.index) {
369 if (active_main_segments.
size() == 1) {
370 const CoarseSegment &active_main_segment = *active_main_segments[0];
373 bool has_subtract_full =
false;
374 bool has_subtract_same_mask =
false;
375 for (
const CoarseSegment *active_subtract_segment : active_subtract_segments) {
376 switch (active_subtract_segment->type) {
381 has_subtract_full =
true;
386 if (active_main_segment.
mask == active_subtract_segment->mask) {
387 has_subtract_same_mask =
true;
395 if (has_subtract_full) {
399 switch (active_main_segment.
type) {
402 prev_segment, prev_boundary_index, boundary.index, result);
406 if (active_subtract_segments.
is_empty()) {
408 prev_segment, prev_boundary_index, boundary.index, result);
412 prev_segment, prev_boundary_index, boundary.index, result);
417 if (active_subtract_segments.
is_empty()) {
421 *active_main_segment.
mask,
424 else if (has_subtract_same_mask) {
429 prev_segment, prev_boundary_index, boundary.index, result);
437 prev_boundary_index = boundary.index;
441 if (boundary.is_main) {
442 if (boundary.is_begin) {
443 active_main_segments.
append(boundary.segment);
450 if (boundary.is_begin) {
451 active_subtract_segments.
append(boundary.segment);
478 const std::optional<IndexRange> eval_bounds = std::nullopt)
485 for (
const Expr *expression : eval_order) {
486 CoarseResult &expr_result = expression_results[expression->index].emplace();
487 switch (expression->type) {
492 if (eval_bounds.has_value()) {
499 if (!mask.is_empty()) {
501 if (
const std::optional<IndexRange> range = mask.to_range()) {
514 const CoarseResult &term_result = *expression_results[term->index];
516 boundaries.append({segment.bounds.first(),
true, &segment});
517 boundaries.append({segment.bounds.one_after_last(),
false, &segment});
528 const CoarseResult &term_result = *expression_results[term->index];
530 boundaries.append({segment.bounds.first(),
true, &segment});
531 boundaries.append({segment.bounds.one_after_last(),
false, &segment});
541 const CoarseResult &main_term_result = *expression_results[expr.
terms[0]->index];
543 boundaries.append({{segment.bounds.first(),
true, &segment},
true});
544 boundaries.append({{segment.bounds.one_after_last(),
false, &segment},
true});
546 for (
const Expr *term : expr.
terms.as_span().drop_front(1)) {
547 const CoarseResult &term_result = *expression_results[term->index];
549 boundaries.append({{segment.bounds.first(),
true, &segment},
false});
550 boundaries.append({{segment.bounds.one_after_last(),
false, &segment},
false});
561 return std::move(final_result);
601 for (
const Expr *expression : eval_order) {
603 switch (expression->type) {
607 mask.
to_bits(expr_result, -bounds_min);
611 for (
const Expr *term : expression->terms) {
612 expr_result |= expression_results[term->index];
618 for (
const Expr *term : expression->terms.as_span().drop_front(1)) {
619 expr_result &= expression_results[term->index];
625 for (
const Expr *term : expression->terms.as_span().drop_front(1)) {
629 expression_results[term->index]);
648 if (segments.
size() == 1) {
651 if (segments.
size() == 2) {
654 const int64_t size = std::set_union(a.begin(), a.end(),
b.begin(),
b.end(), r_values) -
656 return {bounds_min, {r_values, size}};
663 sorted_segments.
begin(),
664 sorted_segments.
end(),
667 std::array<int16_t, max_segment_size> tmp_indices;
671 int16_t *buffer_b = tmp_indices.data();
673 if (sorted_segments.
size() % 2 == 1) {
675 std::swap(buffer_a, buffer_b);
684 count = std::set_union(a.begin(), a.end(),
b.begin(),
b.end(), dst) - dst;
694 count = std::set_union(a, a +
count,
b.begin(),
b.end(), dst) - dst;
695 std::swap(buffer_a, buffer_b);
697 return {bounds_min, {r_values,
count}};
708 if (segments.
size() == 1) {
711 if (segments.
size() == 2) {
714 const int64_t size = std::set_intersection(a.begin(), a.end(),
b.begin(),
b.end(), r_values) -
716 return {bounds_min, {r_values, size}};
723 sorted_segments.
begin(),
724 sorted_segments.
end(),
727 std::array<int16_t, max_segment_size> tmp_indices_1;
728 std::array<int16_t, max_segment_size> tmp_indices_2;
729 int16_t *buffer_a = tmp_indices_1.data();
730 int16_t *buffer_b = tmp_indices_2.data();
738 count = std::set_intersection(a.begin(), a.end(),
b.begin(),
b.end(), dst) - dst;
746 int16_t *dst = (segment_i == sorted_segments.
size() - 1) ? r_values : buffer_b;
747 count = std::set_intersection(a, a +
count,
b.begin(),
b.end(), dst) - dst;
748 std::swap(buffer_a, buffer_b);
750 return {bounds_min, {r_values,
count}};
769 if (subtract_segments.
size() == 1) {
771 const IndexMaskSegment subtract_segment = subtract_segments[0].shift(-bounds_min);
772 const int64_t size = std::set_difference(shifted_main_segment.
begin(),
773 shifted_main_segment.
end(),
774 subtract_segment.
begin(),
775 subtract_segment.
end(),
778 return {bounds_min, {r_values, size}};
783 subtract_count += segment.
size();
785 if (subtract_count < main_segment.
size() / 2) {
788 std::array<int16_t, max_segment_size> union_indices;
793 const int64_t size = std::set_difference(shifted_main_segment.
begin(),
794 shifted_main_segment.
end(),
795 unioned_subtract_segment.
begin(),
796 unioned_subtract_segment.
end(),
799 return {bounds_min, {r_values, size}};
805 sorted_subtract_segments.
begin(),
806 sorted_subtract_segments.
end(),
809 std::array<int16_t, max_segment_size> tmp_indices_1;
810 std::array<int16_t, max_segment_size> tmp_indices_2;
811 int16_t *buffer_a = tmp_indices_1.data();
812 int16_t *buffer_b = tmp_indices_2.data();
818 const IndexMaskSegment subtract_segment = sorted_subtract_segments[0].shift(-bounds_min);
820 count = std::set_difference(shifted_main_segment.
begin(),
821 shifted_main_segment.
end(),
822 subtract_segment.
begin(),
823 subtract_segment.
end(),
829 const IndexMaskSegment &subtract_segment = sorted_subtract_segments[segment_i].shift(
832 int16_t *dst = (segment_i == sorted_subtract_segments.
size() - 1) ? r_values : buffer_b;
833 count = std::set_difference(buffer_a,
835 subtract_segment.
begin(),
836 subtract_segment.
end(),
839 std::swap(buffer_a, buffer_b);
841 return {bounds_min, {r_values,
count}};
858 for (
const Expr *expression : eval_order) {
859 switch (expression->type) {
865 if (mask.segments_num() == 1) {
866 results[expression->index] = mask.segment(0);
873 int64_t result_size_upper_bound = 0;
874 bool used_short_circuit =
false;
878 if (term_segment.
size() == bounds.
size()) {
881 results[expression->index] = term_segment;
882 used_short_circuit =
true;
885 term_segments[term_i] = term_segment;
886 result_size_upper_bound += term_segment.
size();
888 if (used_short_circuit) {
891 result_size_upper_bound = std::min(result_size_upper_bound, bounds.
size());
894 term_segments, bounds_min, dst.
data());
896 result_segment.base_span().end());
897 results[expression->index] = result_segment;
904 bool used_short_circuit =
false;
910 results[expression->index] = {};
911 used_short_circuit =
true;
914 result_size_upper_bound = std::min(result_size_upper_bound, term_segment.
size());
915 term_segments[term_i] = term_segment;
917 if (used_short_circuit) {
922 term_segments, bounds_min, dst.
data());
924 result_segment.base_span().end());
925 results[expression->index] = result_segment;
934 results[expression->index] = {};
937 int64_t result_size_upper_bound = main_segment.
size();
938 bool used_short_circuit =
false;
940 for (
const int64_t term_i : expr.
terms.index_range().drop_front(1)) {
941 const Expr &subtract_term = *expr.
terms[term_i];
943 if (term_segment.
size() == bounds.
size()) {
946 results[expression->index] = {};
947 used_short_circuit =
true;
950 result_size_upper_bound = std::min(result_size_upper_bound,
951 bounds.
size() - term_segment.
size());
952 subtract_segments[term_i - 1] = term_segment;
954 if (used_short_circuit) {
959 main_segment, subtract_segments, bounds_min, dst.
data());
961 result_segment.base_span().end());
962 results[expression->index] = result_segment;
967 return results[root_expression.
index];
981 switch (evaluated_segment.type) {
983 const int64_t full_size = evaluated_segment.bounds.
size();
987 evaluated_segment.bounds.first() + i,
Span(static_indices_array).take_front(size)));
993 evaluated_segment.bounds);
999 result_mask_segments.
append(evaluated_segment.indices);
1004 return result_mask_segments;
1015 eval_order.
append(&root_expression);
1022 expr_stack.
push(&root_expression);
1025 const Expr &expression = *expr_stack.
peek();
1026 bool &is_evaluated = is_evaluated_states[expression.
index];
1031 bool all_terms_evaluated =
true;
1032 for (
const Expr *term : expression.
terms) {
1033 bool &term_evaluated = is_evaluated_states[term->index];
1034 if (!term_evaluated) {
1037 term_evaluated =
true;
1040 expr_stack.
push(term);
1041 all_terms_evaluated =
false;
1045 if (all_terms_evaluated) {
1046 eval_order.
append(&expression);
1047 is_evaluated =
true;
1058 for (
const Expr *term : root_expression.
terms) {
1059 if (!term->terms.is_empty()) {
1068 const Expr &root_expression,
1083 auto handle_coarse_result = [&](
const CoarseResult &coarse_result) {
1084 for (
const CoarseSegment &segment : coarse_result.segments) {
1085 switch (segment.type) {
1087 if (segment.bounds.size() > coarse_segment_size_threshold) {
1088 long_unknown_segments.
push(segment.bounds);
1091 r_short_unknown_segments.
append(segment.bounds);
1097 r_evaluated_segments.
append(
1112 handle_coarse_result(initial_coarse_result);
1115 while (!long_unknown_segments.
is_empty()) {
1116 const IndexRange unknown_bounds = long_unknown_segments.
pop();
1117 const int64_t split_pos = unknown_bounds.
size() / 2;
1122 handle_coarse_result(left_result);
1123 handle_coarse_result(right_result);
1128 const Expr &root_expression,
1136 auto evaluate_unknown_segment = [&](
const IndexRange bounds,
1140 switch (exact_eval_mode) {
1143 root_expression, allocator, bounds, eval_order);
1144 if (!indices.is_empty()) {
1145 r_local_evaluated_segments.append(
1157 const Expr &expr = *eval_order[eval_order_i];
1163 const int64_t segments_num = mask.segments_num();
1164 if (segments_num <= 1) {
1174 split_indices.
append(segment[0]);
1177 std::sort(split_indices.
begin(), split_indices.
end());
1180 split_indices[boundary_i + 1]);
1185 root_expression, allocator, sub_bounds, eval_order);
1186 if (!indices.is_empty()) {
1187 r_local_evaluated_segments.append(
1198 const int64_t unknown_segment_eval_grain_size = 8;
1199 if (short_unknown_segments.
size() < unknown_segment_eval_grain_size) {
1200 for (
const IndexRange &bounds : short_unknown_segments) {
1201 evaluate_unknown_segment(bounds, memory, r_evaluated_segments);
1213 unknown_segment_eval_grain_size,
1215 LocalData &data = data_by_thread.local();
1216 for (const IndexRange &bounds : short_unknown_segments.slice(range))
1218 evaluate_unknown_segment(
1219 bounds, data.allocator, data.evaluated_segments);
1222 for (LocalData &data : data_by_thread) {
1223 if (!data.evaluated_segments.is_empty()) {
1224 r_evaluated_segments.
extend(data.evaluated_segments);
1234 if (evaluated_segments.
is_empty()) {
1237 if (evaluated_segments.
size() == 1) {
1239 switch (evaluated_segment.
type) {
1240 case EvaluatedSegment::Type::Full: {
1243 case EvaluatedSegment::Type::Copy: {
1246 case EvaluatedSegment::Type::Indices: {
1247 return IndexMask::from_segments({evaluated_segment.
indices}, memory);
1252 std::sort(evaluated_segments.
begin(),
1253 evaluated_segments.
end(),
1255 return a.bounds.start() < b.bounds.start();
1259 return IndexMask::from_segments(result_segments, memory);
1277 root_expression, eval_order, evaluated_segments, short_unknown_segments);
1281 short_unknown_segments,
1283 evaluated_segments);
1294 const ExactEvalMode other_exact_eval_mode = (exact_eval_mode == ExactEvalMode::Bits) ?
1295 ExactEvalMode::Indices :
1296 ExactEvalMode::Bits;
1307 for (
const Term &term : terms) {
1308 term_expressions.
append(&this->term_to_expr(term));
1311 expr.
type = Expr::Type::Union;
1312 expr.
index = expr_count_++;
1313 expr.
terms = std::move(term_expressions);
1320 term_expressions.
append(&this->term_to_expr(main_term));
1321 for (
const Term &subtract_term : subtract_terms) {
1322 term_expressions.
append(&this->term_to_expr(subtract_term));
1325 expr.
type = Expr::Type::Difference;
1326 expr.
index = expr_count_++;
1327 expr.
terms = std::move(term_expressions);
1334 for (
const Term &term : terms) {
1335 term_expressions.
append(&this->term_to_expr(term));
1338 expr.
type = Expr::Type::Intersection;
1339 expr.
index += expr_count_++;
1340 expr.
terms = std::move(term_expressions);
1344const Expr &ExprBuilder::term_to_expr(
const Term &term)
1346 if (
const Expr *
const *expr = std::get_if<const Expr *>(&term)) {
1349 AtomicExpr &expr = scope_.construct<AtomicExpr>();
1350 expr.
type = Expr::Type::Atomic;
1351 expr.index = expr_count_++;
1352 if (
const IndexRange *range = std::get_if<IndexRange>(&term)) {
1353 expr.mask = &scope_.construct<
IndexMask>(*range);
1356 expr.mask = std::get<const IndexMask *>(term);
constexpr int64_t first() const
constexpr int64_t one_after_last() const
constexpr IndexRange drop_back(int64_t n) const
constexpr int64_t size() const
constexpr bool is_empty() const
static constexpr IndexRange from_begin_end(const int64_t begin, const int64_t end)
constexpr IndexRange with_new_end(const int64_t new_end) const
static constexpr IndexRange from_begin_size(const int64_t begin, const int64_t size)
constexpr int64_t start() const
constexpr IndexRange take_front(int64_t n) const
constexpr IndexRange drop_front(int64_t n) const
MutableSpan< T > allocate_array(int64_t size)
void free_end_of_previous_allocation(const int64_t original_allocation_size, const void *free_after)
MutableSpan< T > construct_array_copy(Span< T > src)
void transfer_ownership_from(LinearAllocator<> &other)
constexpr int64_t size() const
constexpr bool is_empty() const
constexpr T * data() const
constexpr int64_t size_in_bytes() const
constexpr T * end() const
constexpr T * begin() const
constexpr const T * data() const
constexpr int64_t size() const
constexpr IndexRange index_range() const
constexpr bool is_empty() const
void push(const T &value)
void append(const T &value)
void remove_first_occurrence_and_reorder(const T &value)
IndexRange index_range() const
void append_unchecked(const T &value)
void extend(Span< T > array)
std::variant< const Expr *, const IndexMask *, IndexRange > Term
IndexMaskSegment shift(const int64_t shift) const
IndexMask slice_content(IndexRange range) const
void to_bits(MutableBitSpan r_bits, int64_t offset=0) const
void foreach_segment(Fn &&fn) const
local_group_size(16, 16) .push_constant(Type b
ccl_device_inline float4 mask(const int4 mask, const float4 a)
static constexpr int64_t BitsPerInt
void mix_into_first_expr(ExprFn &&expr, FirstBitSpanT &&first_arg, const BitSpanT &...args)
void copy_from_or(FirstBitSpanT &first_arg, const BitSpanT &...args)
void foreach_1_index(const BitSpanT &data, Fn &&fn)
static CoarseSegment & add_coarse_segment__full(CoarseSegment *prev_segment, const int64_t prev_boundary_index, const int64_t current_boundary_index, CoarseResult &result)
static IndexMaskSegment evaluate_exact_with_indices(const Expr &root_expression, LinearAllocator<> &allocator, const IndexRange bounds, const Span< const Expr * > eval_order)
static void evaluate_coarse_and_split_until_segments_are_short(const Expr &root_expression, const Span< const Expr * > eval_order, Vector< EvaluatedSegment, 16 > &r_evaluated_segments, Vector< IndexRange, 16 > &r_short_unknown_segments)
IndexMask evaluate_expression(const Expr &expression, IndexMaskMemory &memory)
static void evaluate_short_unknown_segments_exactly(const Expr &root_expression, const ExactEvalMode exact_eval_mode, const Span< const Expr * > eval_order, const Span< IndexRange > short_unknown_segments, IndexMaskMemory &memory, Vector< EvaluatedSegment, 16 > &r_evaluated_segments)
static IndexMaskSegment union_index_mask_segments(const Span< IndexMaskSegment > segments, const int64_t bounds_min, int16_t *r_values)
static constexpr int64_t segment_size_threshold
static void sort_course_boundaries(MutableSpan< CourseBoundary > boundaries)
static ExactEvalMode determine_exact_eval_mode(const Expr &root_expression)
static CoarseResult evaluate_coarse(const Expr &root_expression, const Span< const Expr * > eval_order, const std::optional< IndexRange > eval_bounds=std::nullopt)
static IndexMaskSegment evaluate_exact_with_bits(const Expr &root_expression, LinearAllocator<> &allocator, const IndexRange bounds, const Span< const Expr * > eval_order)
static IndexMask evaluated_segments_to_index_mask(MutableSpan< EvaluatedSegment > evaluated_segments, IndexMaskMemory &memory)
static Span< int16_t > bits_to_indices(const BoundedBitSpan bits, LinearAllocator<> &allocator)
static constexpr int64_t max_segment_size
static Vector< const Expr *, inline_expr_array_size > compute_eval_order(const Expr &root_expression)
static IndexMaskSegment intersect_index_mask_segments(const Span< IndexMaskSegment > segments, const int64_t bounds_min, int16_t *r_values)
static CoarseSegment & add_coarse_segment__unknown(CoarseSegment *prev_segment, const int64_t prev_boundary_index, const int64_t current_boundary_index, CoarseResult &result)
static Vector< IndexMaskSegment > build_result_mask_segments(const Span< EvaluatedSegment > evaluated_segments)
static void evaluate_coarse_difference(const Span< DifferenceCourseBoundary > boundaries, CoarseResult &r_result)
static IndexMask evaluate_expression_impl(const Expr &root_expression, IndexMaskMemory &memory, const ExactEvalMode exact_eval_mode)
static CoarseSegment & add_coarse_segment__copy(CoarseSegment *prev_segment, const int64_t prev_boundary_index, const int64_t current_boundary_index, const IndexMask ©_from_mask, CoarseResult &result)
static IndexMaskSegment difference_index_mask_segments(const IndexMaskSegment main_segment, const Span< IndexMaskSegment > subtract_segments, const int64_t bounds_min, int16_t *r_values)
static void evaluate_coarse_intersection(const Span< CourseBoundary > boundaries, const int64_t terms_num, CoarseResult &r_result)
static void evaluate_coarse_union(const Span< CourseBoundary > boundaries, CoarseResult &r_result)
const std::array< int16_t, max_segment_size > & get_static_indices_array()
constexpr int64_t inline_expr_array_size
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
constexpr IntT ceil_division(const IntT x, const IntT y)
Vector< CoarseSegment > segments
const CoarseSegment * segment
const IndexMask * copy_mask
const DifferenceExpr & as_difference() const
const IntersectionExpr & as_intersection() const
const AtomicExpr & as_atomic() const
int expression_array_size() const
const UnionExpr & as_union() const
Vector< const Expr * > terms