13# if defined(WIN32) && !defined(NOMINMAX)
17# define TBB_MIN_MAX_CLEANUP
19# include <tbb/blocked_range.h>
20# include <tbb/parallel_for.h>
21# include <tbb/parallel_for_each.h>
22# include <tbb/parallel_invoke.h>
23# include <tbb/parallel_reduce.h>
24# include <tbb/task_arena.h>
29# ifdef TBB_MIN_MAX_CLEANUP
57template<
typename Range,
typename Function>
61 tbb::parallel_for_each(range, function);
63 for (
auto &&value :
range) {
94template<
typename Function>
97 const Function &function,
100 if (range.is_empty()) {
121 const int64_t alignment_mask = ~(alignment - 1);
123 const int64_t unaligned_begin = unaligned_range.
start();
125 const int64_t aligned_begin = std::max(global_begin, unaligned_begin & alignment_mask);
126 const int64_t aligned_end = unaligned_end == global_end ?
128 std::max(global_begin, unaligned_end & alignment_mask);
130 return aligned_range;
140template<
typename Function>
144 const Function &function)
148 function(aligned_range);
152template<
typename Value,
typename Function,
typename Reduction>
155 const Value &identity,
156 const Function &function,
157 const Reduction &reduction)
160 if (range.size() >= grain_size) {
162 return tbb::parallel_reduce(
163 tbb::blocked_range<int64_t>(range.first(), range.one_after_last(), grain_size),
165 [&](
const tbb::blocked_range<int64_t> &subrange,
const Value &ident) {
166 return function(IndexRange(subrange.begin(), subrange.size()), ident);
173 return function(range, identity);
176template<
typename Value,
typename Function,
typename Reduction>
180 const Value &identity,
181 const Function &function,
182 const Reduction &reduction)
188 [&](
const IndexRange unaligned_range,
const Value &ident) {
190 function(aligned_range, ident);
202 tbb::parallel_invoke(std::forward<Functions>(functions)...);
213template<
typename... Functions>
226template<
typename Function>
inline void isolate_task(
const Function &function)
230 tbb::this_task_arena::isolate(function);
242template<
typename Function>
244 const Function &function)
251 if (approximate_bytes_touched <= 8 * 1024 * 1024) {
constexpr int64_t one_after_last() const
static constexpr IndexRange from_begin_end(const int64_t begin, const int64_t end)
constexpr int64_t start() const
void parallel_for_impl(IndexRange range, int64_t grain_size, FunctionRef< void(IndexRange)> function, const TaskSizeHints &size_hints)
void memory_bandwidth_bound_task_impl(FunctionRef< void()> function)
void isolate_task(const Function &function)
void parallel_invoke(Functions &&...functions)
void parallel_for_each(Range &&range, const Function &function)
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
bool use_single_thread(const TaskSizeHints &size_hints, const IndexRange range, const int64_t threshold)
void memory_bandwidth_bound_task(const int64_t approximate_bytes_touched, const Function &function)
void parallel_for_aligned(const IndexRange range, const int64_t grain_size, const int64_t alignment, const Function &function)
Value parallel_reduce(IndexRange range, int64_t grain_size, const Value &identity, const Function &function, const Reduction &reduction)
Value parallel_reduce_aligned(const IndexRange range, const int64_t grain_size, const int64_t alignment, const Value &identity, const Function &function, const Reduction &reduction)
IndexRange align_sub_range(const IndexRange unaligned_range, const int64_t alignment, const IndexRange global_range)
constexpr GrainSize(const int64_t grain_size)