 |
Blender V5.0
|
Go to the documentation of this file.
16#if __CUDA_ARCH__ <= 699
17# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
18# define GPU_MULTIPROCESSOR_MAX_BLOCKS 32
19# define GPU_BLOCK_MAX_THREADS 1024
20# define GPU_THREAD_MAX_REGISTERS 255
23# define GPU_KERNEL_BLOCK_NUM_THREADS 256
26# if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600
27# define GPU_KERNEL_MAX_REGISTERS 64
29# define GPU_KERNEL_MAX_REGISTERS 48
33#elif __CUDA_ARCH__ <= 1299
34# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
35# define GPU_MULTIPROCESSOR_MAX_BLOCKS 32
36# define GPU_BLOCK_MAX_THREADS 1024
37# define GPU_THREAD_MAX_REGISTERS 255
40# define GPU_KERNEL_BLOCK_NUM_THREADS 384
41# define GPU_KERNEL_MAX_REGISTERS 168
45# error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
50#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
51 extern "C" __global__ void __launch_bounds__(block_num_threads, \
52 GPU_MULTIPRESSOR_MAX_REGISTERS / \
53 (block_num_threads * thread_num_registers))
55#define ccl_gpu_kernel_threads(block_num_threads) \
56 extern "C" __global__ void __launch_bounds__(block_num_threads)
58#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
59#define ccl_gpu_kernel_postfix
61#define ccl_gpu_kernel_call(x) x
62#define ccl_gpu_kernel_within_bounds(i, n) ((i) < (n))
66#define ccl_gpu_kernel_lambda(func, ...) \
67 struct KernelLambda { \
69 __device__ int operator()(const int state) \
73 } ccl_gpu_kernel_lambda_pass
77#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
78# error "Maximum number of threads per block exceeded"
81#if GPU_MULTIPRESSOR_MAX_REGISTERS / (GPU_KERNEL_BLOCK_NUM_THREADS * GPU_KERNEL_MAX_REGISTERS) > \
82 GPU_MULTIPROCESSOR_MAX_BLOCKS
83# error "Maximum number of blocks per multiprocessor exceeded"
86#if GPU_KERNEL_MAX_REGISTERS > GPU_THREAD_MAX_REGISTERS
87# error "Maximum number of registers per thread exceeded"