Blender V4.3
kernel/device/hip/config.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5/* Device data taken from HIP occupancy calculator.
6 *
7 * Terminology
8 * - HIP GPUs have multiple streaming multiprocessors
9 * - Each multiprocessor executes multiple thread blocks
10 * - Each thread block contains a number of threads, also known as the block size
11 * - Multiprocessors have a fixed number of registers, and the amount of registers
12 * used by each threads limits the number of threads per block.
13 */
14
15/* Launch Bound Definitions */
16#define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
17#define GPU_MULTIPROCESSOR_MAX_BLOCKS 64
18#define GPU_BLOCK_MAX_THREADS 1024
19#define GPU_THREAD_MAX_REGISTERS 255
20
21#define GPU_KERNEL_BLOCK_NUM_THREADS 1024
22#define GPU_KERNEL_MAX_REGISTERS 64
23
24/* For performance tuning of HIPRT kernels we might have to change the number
25 * that's why we don't use GPU_KERNEL_BLOCK_NUM_THREADS. */
26#define GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS 1024
27
28/* Compute number of threads per block and minimum blocks per multiprocessor
29 * given the maximum number of registers per thread. */
30#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
31 extern "C" __global__ void __launch_bounds__(block_num_threads, \
32 GPU_MULTIPRESSOR_MAX_REGISTERS / \
33 (block_num_threads * thread_num_registers))
34
35#define ccl_gpu_kernel_threads(block_num_threads) \
36 extern "C" __global__ void __launch_bounds__(block_num_threads)
37
38#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
39#define ccl_gpu_kernel_postfix
40
41#define ccl_gpu_kernel_call(x) x
42#define ccl_gpu_kernel_within_bounds(i, n) ((i) < (n))
43
44/* Define a function object where "func" is the lambda body, and additional parameters are used to
45 * specify captured state */
46#define ccl_gpu_kernel_lambda(func, ...) \
47 struct KernelLambda { \
48 __VA_ARGS__; \
49 __device__ int operator()(const int state) \
50 { \
51 return (func); \
52 } \
53 } ccl_gpu_kernel_lambda_pass
54
55/* sanity checks */
56
57#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
58# error "Maximum number of threads per block exceeded"
59#endif
60
61#if GPU_MULTIPRESSOR_MAX_REGISTERS / (GPU_KERNEL_BLOCK_NUM_THREADS * GPU_KERNEL_MAX_REGISTERS) > \
62 GPU_MULTIPROCESSOR_MAX_BLOCKS
63# error "Maximum number of blocks per multiprocessor exceeded"
64#endif
65
66#if GPU_KERNEL_MAX_REGISTERS > GPU_THREAD_MAX_REGISTERS
67# error "Maximum number of registers per thread exceeded"
68#endif