Blender V5.0
BLI_task.hh
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#pragma once
6
10
11#ifdef WITH_TBB
12/* Quiet top level deprecation message, unrelated to API usage here. */
13# if defined(WIN32) && !defined(NOMINMAX)
14/* TBB includes Windows.h which will define min/max macros causing issues
15 * when we try to use std::min and std::max later on. */
16# define NOMINMAX
17# define TBB_MIN_MAX_CLEANUP
18# endif
19# include <tbb/blocked_range.h>
20# include <tbb/parallel_for.h>
21# include <tbb/parallel_for_each.h>
22# include <tbb/parallel_invoke.h>
23# include <tbb/parallel_reduce.h>
24# include <tbb/task_arena.h>
25# ifdef WIN32
26/* We cannot keep this defined, since other parts of the code deal with this on their own, leading
27 * to multiple define warnings unless we un-define this, however we can only undefine this if we
28 * were the ones that made the definition earlier. */
29# ifdef TBB_MIN_MAX_CLEANUP
30# undef NOMINMAX
31# endif
32# endif
33#endif
34
35#include "BLI_function_ref.hh"
36#include "BLI_index_range.hh"
37#include "BLI_lazy_threading.hh"
39
40namespace blender {
41
45struct GrainSize {
47
48 explicit constexpr GrainSize(const int64_t grain_size) : value(grain_size) {}
49};
50
51} // namespace blender
52
53namespace blender::threading {
54
55template<typename Range, typename Function>
56inline void parallel_for_each(Range &&range, const Function &function)
57{
58#ifdef WITH_TBB
59 tbb::parallel_for_each(range, function);
60#else
61 for (auto &&value : range) {
62 function(value);
63 }
64#endif
65}
66
67namespace detail {
69 int64_t grain_size,
70 FunctionRef<void(IndexRange)> function,
71 const TaskSizeHints &size_hints);
73} // namespace detail
74
92template<typename Function>
93inline void parallel_for(const IndexRange range,
94 const int64_t grain_size,
95 const Function &function,
96 const TaskSizeHints &size_hints = detail::TaskSizeHints_Static(1))
97{
98 if (range.is_empty()) {
99 return;
100 }
101 /* Invoking tbb for small workloads has a large overhead. */
102 if (use_single_thread(size_hints, range, grain_size)) {
103 function(range);
104 return;
105 }
106 detail::parallel_for_impl(range, grain_size, function, size_hints);
107}
108
113inline IndexRange align_sub_range(const IndexRange unaligned_range,
114 const int64_t alignment,
115 const IndexRange global_range)
116{
117 const int64_t global_begin = global_range.start();
118 const int64_t global_end = global_range.one_after_last();
119 const int64_t alignment_mask = ~(alignment - 1);
120
121 const int64_t unaligned_begin = unaligned_range.start();
122 const int64_t unaligned_end = unaligned_range.one_after_last();
123 const int64_t aligned_begin = std::max(global_begin, unaligned_begin & alignment_mask);
124 const int64_t aligned_end = unaligned_end == global_end ?
125 unaligned_end :
126 std::max(global_begin, unaligned_end & alignment_mask);
127 const IndexRange aligned_range = IndexRange::from_begin_end(aligned_begin, aligned_end);
128 return aligned_range;
129}
130
138template<typename Function>
139inline void parallel_for_aligned(const IndexRange range,
140 const int64_t grain_size,
141 const int64_t alignment,
142 const Function &function)
143{
144 parallel_for(range, grain_size, [&](const IndexRange unaligned_range) {
145 const IndexRange aligned_range = align_sub_range(unaligned_range, alignment, range);
146 function(aligned_range);
147 });
148}
149
150template<typename Value, typename Function, typename Reduction>
151inline Value parallel_reduce(IndexRange range,
152 int64_t grain_size,
153 const Value &identity,
154 const Function &function,
155 const Reduction &reduction)
156{
157#ifdef WITH_TBB
158 if (range.size() >= grain_size) {
160 return tbb::parallel_reduce(
161 tbb::blocked_range<int64_t>(range.first(), range.one_after_last(), grain_size),
162 identity,
163 [&](const tbb::blocked_range<int64_t> &subrange, const Value &ident) {
164 return function(IndexRange(subrange.begin(), subrange.size()), ident);
165 },
166 reduction);
167 }
168#else
169 UNUSED_VARS(grain_size, reduction);
170#endif
171 return function(range, identity);
172}
173
174template<typename Value, typename Function, typename Reduction>
175inline Value parallel_reduce_aligned(const IndexRange range,
176 const int64_t grain_size,
177 const int64_t alignment,
178 const Value &identity,
179 const Function &function,
180 const Reduction &reduction)
181{
183 range,
184 grain_size,
185 identity,
186 [&](const IndexRange unaligned_range, const Value &ident) {
187 const IndexRange aligned_range = align_sub_range(unaligned_range, alignment, range);
188 function(aligned_range, ident);
189 },
190 reduction);
191}
192
193template<typename Value, typename Function, typename Reduction>
195 int64_t grain_size,
196 const Value &identity,
197 const Function &function,
198 const Reduction &reduction)
199{
200#ifdef WITH_TBB
201 if (range.size() >= grain_size) {
203 return tbb::parallel_deterministic_reduce(
204 tbb::blocked_range<int64_t>(range.first(), range.one_after_last(), grain_size),
205 identity,
206 [&](const tbb::blocked_range<int64_t> &subrange, const Value &ident) {
207 return function(IndexRange(subrange.begin(), subrange.size()), ident);
208 },
209 reduction);
210 }
211#else
212 UNUSED_VARS(grain_size, reduction);
213#endif
214 return function(range, identity);
215}
216
221template<typename... Functions> inline void parallel_invoke(Functions &&...functions)
222{
223#ifdef WITH_TBB
224 tbb::parallel_invoke(std::forward<Functions>(functions)...);
225#else
226 (functions(), ...);
227#endif
228}
229
235template<typename... Functions>
236inline void parallel_invoke(const bool use_threading, Functions &&...functions)
237{
238 if (use_threading) {
240 parallel_invoke(std::forward<Functions>(functions)...);
241 }
242 else {
243 (functions(), ...);
244 }
245}
246
248template<typename Function> inline void isolate_task(const Function &function)
249{
250#ifdef WITH_TBB
252 tbb::this_task_arena::isolate(function);
253#else
254 function();
255#endif
256}
257
264template<typename Function>
265inline void memory_bandwidth_bound_task(const int64_t approximate_bytes_touched,
266 const Function &function)
267{
268 /* Don't limit threading when all touched memory can stay in the CPU cache, because there a much
269 * higher memory bandwidth is available compared to accessing RAM. This value is supposed to be
270 * on the order of the L3 cache size. Accessing that value is not quite straight forward and even
271 * if it was, it's not clear if using the exact cache size would be beneficial because there is
272 * often more stuff going on the CPU at the same time. */
273 if (approximate_bytes_touched <= 8 * 1024 * 1024) {
274 function();
275 return;
276 }
278}
279
280} // namespace blender::threading
#define UNUSED_VARS(...)
long long int int64_t
constexpr int64_t first() const
constexpr int64_t one_after_last() const
constexpr int64_t size() const
constexpr bool is_empty() const
static constexpr IndexRange from_begin_end(const int64_t begin, const int64_t end)
constexpr int64_t start() const
void parallel_for_impl(IndexRange range, int64_t grain_size, FunctionRef< void(IndexRange)> function, const TaskSizeHints &size_hints)
void memory_bandwidth_bound_task_impl(FunctionRef< void()> function)
void isolate_task(const Function &function)
Definition BLI_task.hh:248
void parallel_invoke(Functions &&...functions)
Definition BLI_task.hh:221
void parallel_for_each(Range &&range, const Function &function)
Definition BLI_task.hh:56
Value parallel_deterministic_reduce(IndexRange range, int64_t grain_size, const Value &identity, const Function &function, const Reduction &reduction)
Definition BLI_task.hh:194
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
Definition BLI_task.hh:93
bool use_single_thread(const TaskSizeHints &size_hints, const IndexRange range, const int64_t threshold)
void memory_bandwidth_bound_task(const int64_t approximate_bytes_touched, const Function &function)
Definition BLI_task.hh:265
void parallel_for_aligned(const IndexRange range, const int64_t grain_size, const int64_t alignment, const Function &function)
Definition BLI_task.hh:139
Value parallel_reduce(IndexRange range, int64_t grain_size, const Value &identity, const Function &function, const Reduction &reduction)
Definition BLI_task.hh:151
Value parallel_reduce_aligned(const IndexRange range, const int64_t grain_size, const int64_t alignment, const Value &identity, const Function &function, const Reduction &reduction)
Definition BLI_task.hh:175
IndexRange align_sub_range(const IndexRange unaligned_range, const int64_t alignment, const IndexRange global_range)
Definition BLI_task.hh:113
constexpr GrainSize(const int64_t grain_size)
Definition BLI_task.hh:48