Blender
V4.3
intern
cycles
kernel
device
gpu
parallel_prefix_sum.h
Go to the documentation of this file.
1
/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2
*
3
* SPDX-License-Identifier: Apache-2.0 */
4
5
#pragma once
6
7
CCL_NAMESPACE_BEGIN
8
9
/* Parallel prefix sum.
10
*
11
* TODO: actually make this work in parallel.
12
*
13
* This is used for an array the size of the number of shaders in the scene
14
* which is not usually huge, so might not be a significant bottleneck. */
15
16
#include "
util/atomic.h
"
17
18
#ifdef __HIP__
19
# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 1024
20
#else
21
# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
22
#endif
23
24
__device__
void
gpu_parallel_prefix_sum
(
const
int
global_id,
25
ccl_global
int
*counter,
26
ccl_global
int
*prefix_sum,
27
const
int
num_values)
28
{
29
if
(global_id != 0) {
30
return
;
31
}
32
33
int
offset = 0;
34
for
(
int
i = 0; i < num_values; i++) {
35
const
int
new_offset = offset + counter[i];
36
prefix_sum[i] = offset;
37
counter[i] = 0;
38
offset = new_offset;
39
}
40
}
41
42
CCL_NAMESPACE_END
atomic.h
ccl_global
#define ccl_global
Definition
device/cuda/compat.h:45
CCL_NAMESPACE_END
#define CCL_NAMESPACE_END
Definition
device/cuda/compat.h:10
__device__
#define __device__
Definition
device/metal/compat.h:317
CCL_NAMESPACE_BEGIN
Definition
python.cpp:44
gpu_parallel_prefix_sum
__device__ void gpu_parallel_prefix_sum(const int global_id, ccl_global int *counter, ccl_global int *prefix_sum, const int num_values)
Definition
parallel_prefix_sum.h:24
Generated on Thu Feb 6 2025 07:36:39 for Blender by
doxygen
1.11.0