26# include <tbb/blocked_range.h>
27# include <tbb/enumerable_thread_specific.h>
28# include <tbb/parallel_for.h>
29# include <tbb/parallel_reduce.h>
44 : func(func), userdata(userdata), settings(settings)
50 RangeTask(
const RangeTask &other)
51 : func(other.func), userdata(other.userdata), settings(other.settings)
57 RangeTask(RangeTask &other, tbb::split )
58 : func(other.func), userdata(other.userdata), settings(other.settings)
65 if (settings->
func_free !=
nullptr && userdata_chunk !=
nullptr) {
66 settings->
func_free(userdata, userdata_chunk);
71 void init_chunk(
void *from_chunk)
78 userdata_chunk =
nullptr;
82 void operator()(
const tbb::blocked_range<int> &r)
const
86 for (
int i = r.begin();
i != r.end(); ++
i) {
87 func(userdata,
i, &tls);
91 void join(
const RangeTask &other)
93 settings->
func_reduce(userdata, userdata_chunk, other.userdata_chunk);
108 RangeTask task(func, userdata, settings);
110 const tbb::blocked_range<int> range(start, stop, grainsize);
121 parallel_for(range, task);
131 for (
int i = start;
i < stop;
i++) {
132 func(userdata,
i, &tls);
145 static tbb::enumerable_thread_specific<int> tbb_thread_id(-1);
146 static int tbb_thread_id_counter = 0;
148 int &thread_id = tbb_thread_id.local();
149 if (thread_id == -1) {
152 BLI_assert_msg(0,
"Maximum number of threads exceeded for sculpting");
165static void parallel_for_impl_static_size(
const IndexRange range,
167 const FunctionRef<
void(IndexRange)> function)
169 tbb::parallel_for(tbb::blocked_range<int64_t>(range.
first(), range.
one_after_last(), grain_size),
170 [function](
const tbb::blocked_range<int64_t> &subrange) {
171 function(IndexRange(subrange.begin(), subrange.size()));
177static void parallel_for_impl_individual_size_lookup(
178 const IndexRange range,
180 const FunctionRef<
void(IndexRange)> function,
186 const int64_t outer_grain_size = std::min<int64_t>(grain_size, 512);
189 Array<int64_t, 1024> task_sizes(sub_range.
size());
190 size_hints.lookup_individual_sizes(sub_range, task_sizes);
193 Vector<int64_t, 256> offsets_vec;
197 counter += task_sizes[
i];
198 if (counter >= grain_size) {
203 if (offsets_vec.
last() < sub_range.
size()) {
206 const OffsetIndices<int64_t> offsets = offsets_vec.
as_span();
210 for (const int64_t i : offsets_range) {
211 const IndexRange actual_range = offsets[i].shift(sub_range.start());
212 function(actual_range);
227 if (range.
size() == 1) {
233 if (total_size <= grain_size) {
256 switch (size_hints.
type) {
259 const int64_t final_grain_size = task_size == 1 ?
261 std::max<int64_t>(1, grain_size / task_size);
262 parallel_for_impl_static_size(range, final_grain_size, function);
266 parallel_for_impl_individual_size_lookup(
298 const int num_threads = 8;
304 static tbb::task_arena arena{num_threads};
311 arena.execute(function);
#define BLI_assert_msg(a, msg)
int BLI_task_scheduler_num_threads(void)
struct TaskParallelTLS TaskParallelTLS
void(* TaskParallelRangeFunc)(void *__restrict userdata, int iter, const TaskParallelTLS *__restrict tls)
struct TaskParallelSettings TaskParallelSettings
#define BLENDER_MAX_THREADS
Read Guarded memory(de)allocation.
Provides wrapper around system-specific atomic primitives, and some extensions (faked-atomic operatio...
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x)
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
constexpr int64_t first() const
constexpr int64_t one_after_last() const
constexpr int64_t size() const
constexpr IndexRange index_range() const
void append(const T &value)
const T & last(const int64_t n=0) const
Span< T > as_span() const
constexpr int64_t size() const
constexpr bool is_empty() const
constexpr IndexRange take_front(int64_t n) const
constexpr IndexRange drop_front(int64_t n) const
virtual int64_t lookup_accumulated_size(IndexRange range) const =0
void * MEM_mallocN(size_t len, const char *str)
void parallel_for_impl(IndexRange range, int64_t grain_size, FunctionRef< void(IndexRange)> function, const TaskSizeHints &size_hints)
void memory_bandwidth_bound_task_impl(FunctionRef< void()> function)
static void parallel_for_impl_accumulated_size_lookup(const IndexRange range, const int64_t grain_size, const FunctionRef< void(IndexRange)> function, const TaskSizeHints_AccumulatedLookup &size_hints)
void parallel_invoke(Functions &&...functions)
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
TaskParallelReduceFunc func_reduce
TaskParallelFreeFunc func_free
size_t userdata_chunk_size
void BLI_task_parallel_range(const int start, const int stop, void *userdata, TaskParallelRangeFunc func, const TaskParallelSettings *settings)
int BLI_task_parallel_thread_id(const TaskParallelTLS *)
static Value parallel_reduce(const int range, const Value &identity, const Function &function, const Reduction &reduction)