21namespace exec_presets {
62 template<
typename... ParamTags,
typename... LoadedParams,
size_t...
I>
64 std::index_sequence<I...> ,
65 const std::tuple<LoadedParams...> &loaded_params)
const
67 return std::make_tuple([&]() {
71 const GVArrayImpl &varray_impl = *std::get<I>(loaded_params);
78 T *
ptr = std::get<I>(loaded_params);
94 template<
typename... ParamTags,
typename... LoadedParams,
size_t...
I>
96 std::index_sequence<I...> ,
97 const std::tuple<LoadedParams...> &loaded_params)
const
99 return std::make_tuple([&]() {
104 constexpr bool UseSpan =
ValueSequence<size_t, Indices...>::template contains<I>();
105 const GVArrayImpl &varray_impl = *std::get<I>(loaded_params);
112 T *
ptr = std::get<I>(loaded_params);
128template<
typename MaskT,
typename... Args,
typename... ParamTags,
size_t...
I,
typename ElementFn>
131#if (defined(__GNUC__) && !defined(__clang__))
132[[gnu::optimize(
"-funroll-loops")]] [[gnu::optimize(
"O3")]]
136 std::index_sequence<I...> ,
137 ElementFn element_fn,
141 Args &&__restrict... args)
143 if constexpr (std::is_same_v<std::decay_t<MaskT>,
IndexRange>) {
145 const int64_t start = mask.start();
146 const int64_t end = mask.one_after_last();
147 for (
int64_t i = start; i < end; i++) {
148 element_fn(args[i]...);
153 element_fn(args[i]...);
173template<
typename... ParamTags,
typename ElementFn,
typename... Chunks>
174#if (defined(__GNUC__) && !defined(__clang__))
175[[gnu::optimize(
"-funroll-loops")]] [[gnu::optimize(
"O3")]]
179 const ElementFn element_fn,
181 Chunks &&__restrict... chunks)
184 element_fn(chunks[i]...);
193template<
typename... ParamTags,
size_t...
I,
typename ElementFn,
typename... LoadedParams>
195 std::index_sequence<I...> ,
196 const ElementFn element_fn,
198 const std::tuple<LoadedParams...> &loaded_params)
205 static constexpr int64_t MaxChunkSize = 64;
206 const int64_t mask_size = mask.size();
207 const int64_t tmp_buffer_size = std::min(mask_size, MaxChunkSize);
210 std::tuple<TypedBuffer<typename ParamTags::base_type, MaxChunkSize>...> temporary_buffers;
213 std::tuple<MaterializeArgInfo<ParamTags>...> args_info;
223 const GVArrayImpl &varray_impl = *std::get<I>(loaded_params);
228 const T &in_single = *
static_cast<const T *
>(common_info.
data);
229 T *tmp_buffer = std::get<I>(temporary_buffers).ptr();
245 const int64_t segment_offset = mask.offset();
248 for (
int64_t chunk_start = 0; chunk_start < mask_size; chunk_start += MaxChunkSize) {
249 const int64_t chunk_end = std::min<int64_t>(chunk_start + MaxChunkSize, mask_size);
250 const int64_t chunk_size = chunk_end - chunk_start;
252 const int64_t mask_start = sliced_mask[0];
257 if (!sliced_mask_is_range) {
264 T *tmp_buffer = std::get<I>(temporary_buffers).ptr();
265 T *param_buffer = std::get<I>(loaded_params);
266 for (
int64_t i = 0; i < chunk_size; i++) {
267 new (tmp_buffer + i)
T(std::move(param_buffer[sliced_mask[i]]));
274 const IndexMask *current_segment_mask =
nullptr;
285 T *tmp_buffer = std::get<I>(temporary_buffers);
289 return const_cast<const T *
>(tmp_buffer);
297 const GVArrayImpl &varray_impl = *std::get<I>(loaded_params);
298 if (current_segment_mask ==
nullptr) {
299 current_segment_mask = &index_mask_from_segment.
update(
300 {segment_offset, sliced_mask.
base_span()});
308 return const_cast<const T *
>(tmp_buffer);
315 if (sliced_mask_is_range) {
317 T *param_buffer = std::get<I>(loaded_params);
318 return param_buffer + mask_start;
323 return const_cast<T *
>(tmp_buffer);
329 if (!sliced_mask_is_range) {
339 T *tmp_buffer = std::get<I>(temporary_buffers).ptr();
340 T *param_buffer = std::get<I>(loaded_params);
341 for (
int64_t i = 0; i < chunk_size; i++) {
342 new (param_buffer + sliced_mask[i])
T(std::move(tmp_buffer[i]));
343 std::destroy_at(tmp_buffer + i);
359 T *tmp_buffer = std::get<I>(temporary_buffers).ptr();
376 T *tmp_buffer = std::get<I>(temporary_buffers).ptr();
384template<
typename ElementFn,
typename ExecPreset,
typename... ParamTags,
size_t...
I>
386 const ExecPreset exec_preset,
390 std::index_sequence<I...> )
395 const auto loaded_params = std::make_tuple([&]() {
401 return params.readonly_single_input(
I).get_implementation();
404 return static_cast<T *
>(
params.uninitialized_single_output(
I).data());
407 return static_cast<T *
>(
params.single_mutable(
I).data());
412 bool executed_devirtualized =
false;
413 if constexpr (ExecPreset::use_devirtualization) {
416 mask.to_spans_and_ranges<16>();
418 const auto devirtualizers = exec_preset.create_devirtualizers(
421 devirtualizers, [&](
auto &&...args) {
422 for (
const std::variant<IndexRange, IndexMaskSegment> &segment : mask_segments) {
423 if (std::holds_alternative<IndexRange>(segment)) {
424 const auto segment_range = std::get<IndexRange>(segment);
426 std::index_sequence<I...>(),
429 std::forward<
decltype(args)>(args)...);
432 const auto segment_indices = std::get<IndexMaskSegment>(segment);
434 std::index_sequence<I...>(),
437 std::forward<
decltype(args)>(args)...);
448 if (!executed_devirtualized) {
454 std::index_sequence<I...>(),
468 const GVArrayImpl &varray_impl = *std::get<I>(loaded_params);
475 T *
ptr = std::get<I>(loaded_params);
489template<
typename ElementFn,
typename ExecPreset,
typename... ParamTags>
491 const ExecPreset exec_preset,
500 std::make_index_sequence<
sizeof...(ParamTags)>());
514 : call_fn_(std::move(call_fn))
518 ([&] { builder.add(ParamTags(),
""); }(), ...);
528template<
typename Out,
typename... In,
typename ElementFn,
typename ExecPreset>
530 const ElementFn element_fn,
531 const ExecPreset exec_preset,
537 [element_fn](
const In &...in, Out &out) {
new (&out) Out(element_fn(in...)); },
540 return CustomMF(name, call_fn, param_tags);
546template<
typename In1,
551 const ElementFn element_fn,
559template<
typename In1,
565 const ElementFn element_fn,
573template<
typename In1,
580 const ElementFn element_fn,
588template<
typename In1,
596 const ElementFn element_fn,
604template<
typename In1,
613 const ElementFn element_fn,
621template<
typename In1,
631 const ElementFn element_fn,
639template<
typename Mut1,
typename ElementFn,
typename ExecPreset = exec_presets::AllSpanOrSingle>
640inline auto SM(
const char *name,
641 const ElementFn element_fn,
646 element_fn, exec_preset, param_tags);
702 builder.single_output<T>(
"Value");
709 mask.foreach_index_optimized<
int64_t>([&](
const int64_t i) {
new (&output[i])
T(value_); });
720 if (other1 !=
nullptr) {
721 return value_ == other1->value_;
725 if (other2 !=
nullptr) {
727 if (type == other2->type_) {
728 return type.is_equal_or_false(
static_cast<const void *
>(&value_), other2->value_);
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
static const CPPType & get()
virtual void materialize_compressed_to_uninitialized(const IndexMask &mask, void *dst) const
virtual CommonVArrayInfo common_info() const
VArray< T > typed() const
Span< BaseT > base_span() const
CustomMF_Constant(U &&value)
bool equals(const MultiFunction &other) const override
uint64_t hash() const override
void call(const IndexMask &mask, Params params, Context) const override
CustomMF_DefaultOutput(Span< DataType > input_types, Span< DataType > output_types)
void call(const IndexMask &mask, Params params, Context context) const override
void call(const IndexMask &mask, Params params, Context context) const override
CustomMF_GenericConstantArray(GSpan array)
CustomMF_GenericConstant(const CPPType &type, const void *value, bool make_value_copy)
~CustomMF_GenericConstant()
uint64_t hash() const override
bool equals(const MultiFunction &other) const override
void call(const IndexMask &mask, Params params, Context context) const override
CustomMF_GenericCopy(DataType data_type)
void call(const IndexMask &mask, Params params, Context context) const override
StringRefNull name() const
void set_signature(const Signature *signature)
CustomMF(const char *name, CallFn call_fn, TypeSequence< ParamTags... >)
void call(const IndexMask &mask, Params params, Context) const override
const IndexMask & update(IndexMaskSegment segment)
IndexMaskSegment slice(const IndexRange &range) const
ccl_device_inline float4 mask(const int4 mask, const float4 a)
auto build_multi_function_with_n_inputs_one_output(const char *name, const ElementFn element_fn, const ExecPreset exec_preset, TypeSequence< In... >)
void execute_element_fn_as_multi_function(const ElementFn element_fn, const ExecPreset exec_preset, const IndexMask &mask, Params params, TypeSequence< ParamTags... >, std::index_sequence< I... >)
auto build_multi_function_call_from_element_fn(const ElementFn element_fn, const ExecPreset exec_preset, TypeSequence< ParamTags... >)
void execute_array(TypeSequence< ParamTags... >, std::index_sequence< I... >, ElementFn element_fn, MaskT mask, Args &&__restrict... args)
void execute_materialized_impl(TypeSequence< ParamTags... >, const ElementFn element_fn, const int64_t size, Chunks &&__restrict... chunks)
void execute_materialized(TypeSequence< ParamTags... >, std::index_sequence< I... >, const ElementFn element_fn, const IndexMaskSegment mask, const std::tuple< LoadedParams... > &loaded_params)
auto SI1_SO(const char *name, const ElementFn element_fn, const ExecPreset exec_preset=exec_presets::Materialized())
auto SI5_SO(const char *name, const ElementFn element_fn, const ExecPreset exec_preset=exec_presets::Materialized())
auto SI4_SO(const char *name, const ElementFn element_fn, const ExecPreset exec_preset=exec_presets::Materialized())
auto SM(const char *name, const ElementFn element_fn, const ExecPreset exec_preset=exec_presets::AllSpanOrSingle())
auto SI3_SO(const char *name, const ElementFn element_fn, const ExecPreset exec_preset=exec_presets::Materialized())
auto SI6_SO(const char *name, const ElementFn element_fn, const ExecPreset exec_preset=exec_presets::Materialized())
auto SI2_SO(const char *name, const ElementFn element_fn, const ExecPreset exec_preset=exec_presets::Materialized())
bool non_empty_is_range(const Span< T > indices)
bool call_with_devirtualized_parameters(const std::tuple< Devirtualizers... > &devis, const Fn &fn)
uint64_t get_default_hash(const T &v)
void uninitialized_fill_n(T *dst, int64_t n, const T &value)
void destruct_n(T *ptr, int64_t n)
unsigned __int64 uint64_t
static constexpr ParamCategory category
const ParamTag::base_type * internal_span_data
static constexpr FallbackMode fallback_mode
auto create_devirtualizers(TypeSequence< ParamTags... >, std::index_sequence< I... >, const std::tuple< LoadedParams... > &loaded_params) const
static constexpr bool use_devirtualization
static constexpr FallbackMode fallback_mode
static constexpr bool use_devirtualization
static constexpr bool use_devirtualization
static constexpr FallbackMode fallback_mode
auto create_devirtualizers(TypeSequence< ParamTags... >, std::index_sequence< I... >, const std::tuple< LoadedParams... > &loaded_params) const
static constexpr FallbackMode fallback_mode
static constexpr bool use_devirtualization