 |
Blender V4.3
|
Go to the documentation of this file.
16#if __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350
17# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
18# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
19# define GPU_BLOCK_MAX_THREADS 1024
20# define GPU_THREAD_MAX_REGISTERS 63
23# define GPU_KERNEL_BLOCK_NUM_THREADS 256
24# define GPU_KERNEL_MAX_REGISTERS 63
27#elif __CUDA_ARCH__ == 320
28# define GPU_MULTIPRESSOR_MAX_REGISTERS 32768
29# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
30# define GPU_BLOCK_MAX_THREADS 1024
31# define GPU_THREAD_MAX_REGISTERS 63
34# define GPU_KERNEL_BLOCK_NUM_THREADS 256
35# define GPU_KERNEL_MAX_REGISTERS 63
38#elif __CUDA_ARCH__ == 370
39# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
40# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
41# define GPU_BLOCK_MAX_THREADS 1024
42# define GPU_THREAD_MAX_REGISTERS 255
45# define GPU_KERNEL_BLOCK_NUM_THREADS 256
46# define GPU_KERNEL_MAX_REGISTERS 63
49#elif __CUDA_ARCH__ <= 699
50# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
51# define GPU_MULTIPROCESSOR_MAX_BLOCKS 32
52# define GPU_BLOCK_MAX_THREADS 1024
53# define GPU_THREAD_MAX_REGISTERS 255
56# define GPU_KERNEL_BLOCK_NUM_THREADS 256
59# if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600
60# define GPU_KERNEL_MAX_REGISTERS 64
62# define GPU_KERNEL_MAX_REGISTERS 48
66#elif __CUDA_ARCH__ <= 899
67# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
68# define GPU_MULTIPROCESSOR_MAX_BLOCKS 32
69# define GPU_BLOCK_MAX_THREADS 1024
70# define GPU_THREAD_MAX_REGISTERS 255
73# define GPU_KERNEL_BLOCK_NUM_THREADS 512
74# define GPU_KERNEL_MAX_REGISTERS 96
78# error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
83#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
84 extern "C" __global__ void __launch_bounds__(block_num_threads, \
85 GPU_MULTIPRESSOR_MAX_REGISTERS / \
86 (block_num_threads * thread_num_registers))
88#define ccl_gpu_kernel_threads(block_num_threads) \
89 extern "C" __global__ void __launch_bounds__(block_num_threads)
91#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
92#define ccl_gpu_kernel_postfix
94#define ccl_gpu_kernel_call(x) x
95#define ccl_gpu_kernel_within_bounds(i, n) ((i) < (n))
99#define ccl_gpu_kernel_lambda(func, ...) \
100 struct KernelLambda { \
102 __device__ int operator()(const int state) \
106 } ccl_gpu_kernel_lambda_pass
110#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
111# error "Maximum number of threads per block exceeded"
114#if GPU_MULTIPRESSOR_MAX_REGISTERS / (GPU_KERNEL_BLOCK_NUM_THREADS * GPU_KERNEL_MAX_REGISTERS) > \
115 GPU_MULTIPROCESSOR_MAX_BLOCKS
116# error "Maximum number of blocks per multiprocessor exceeded"
119#if GPU_KERNEL_MAX_REGISTERS > GPU_THREAD_MAX_REGISTERS
120# error "Maximum number of registers per thread exceeded"