Blender V4.3
device/cuda/compat.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#define __KERNEL_GPU__
8#define __KERNEL_CUDA__
9#define CCL_NAMESPACE_BEGIN
10#define CCL_NAMESPACE_END
11
12#ifndef ATTR_FALLTHROUGH
13# define ATTR_FALLTHROUGH
14#endif
15
16/* Manual definitions so we can compile without CUDA toolkit. */
17
18#ifdef __CUDACC_RTC__
19typedef unsigned int uint32_t;
20typedef unsigned long long uint64_t;
21#else
22# include <stdint.h>
23#endif
24
25#ifdef CYCLES_CUBIN_CC
26# define FLT_MIN 1.175494350822287507969e-38f
27# define FLT_MAX 340282346638528859811704183484516925440.0f
28# define FLT_EPSILON 1.192092896e-07F
29#endif
30
31/* Qualifiers */
32
33#define ccl_device __device__ __inline__
34#define ccl_device_extern extern "C" __device__
35#if __CUDA_ARCH__ < 500
36# define ccl_device_inline __device__ __forceinline__
37# define ccl_device_forceinline __device__ __forceinline__
38#else
39# define ccl_device_inline __device__ __inline__
40# define ccl_device_forceinline __device__ __forceinline__
41#endif
42#define ccl_device_noinline __device__ __noinline__
43#define ccl_device_noinline_cpu ccl_device
44#define ccl_device_inline_method ccl_device
45#define ccl_global
46#define ccl_inline_constant __constant__
47#define ccl_device_constant __constant__ __device__
48#define ccl_static_constexpr static constexpr
49#define ccl_constant const
50#define ccl_gpu_shared __shared__
51#define ccl_private
52#define ccl_ray_data ccl_private
53#define ccl_may_alias
54#define ccl_restrict __restrict__
55#define ccl_loop_no_unroll
56#define ccl_align(n) __align__(n)
57#define ccl_optional_struct_init
58
59/* No assert supported for CUDA */
60
61#define kernel_assert(cond)
62
63/* GPU thread, block, grid size and index */
64
65#define ccl_gpu_thread_idx_x (threadIdx.x)
66#define ccl_gpu_block_dim_x (blockDim.x)
67#define ccl_gpu_block_idx_x (blockIdx.x)
68#define ccl_gpu_grid_dim_x (gridDim.x)
69#define ccl_gpu_warp_size (warpSize)
70#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
71
72#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
73#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
74
75/* GPU warp synchronization. */
76
77#define ccl_gpu_syncthreads() __syncthreads()
78#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate)
79
80/* GPU texture objects */
81
82typedef unsigned long long CUtexObject;
85
86template<typename T>
88 const float x,
89 const float y)
90{
91 return tex2D<T>(texobj, x, y);
92}
93
94template<typename T>
96 const float x,
97 const float y,
98 const float z)
99{
100 return tex3D<T>(texobj, x, y, z);
101}
102
103/* Use fast math functions */
104
105#define cosf(x) __cosf(((float)(x)))
106#define sinf(x) __sinf(((float)(x)))
107#define powf(x, y) __powf(((float)(x)), ((float)(y)))
108#define tanf(x) __tanf(((float)(x)))
109#define logf(x) __logf(((float)(x)))
110#define expf(x) __expf(((float)(x)))
111
112/* Half */
113
114typedef unsigned short half;
115
117{
118 half val;
119 asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
120 return val;
121}
122
124{
125 float val;
126 asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
127 return val;
128}
129
130/* Types */
131
132#include "util/half.h"
133#include "util/types.h"
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Definition btQuadWord.h:117
Definition half.h:42
ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object_2D texobj, const float x, const float y)
#define ccl_device_forceinline
CUtexObject ccl_gpu_tex_object_3D
unsigned short half
ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D texobj, const float x, const float y, const float z)
ccl_device_forceinline float __half2float(const half h)
unsigned long long CUtexObject
CUtexObject ccl_gpu_tex_object_2D
#define __float2half(x)
unsigned int uint32_t
Definition stdint.h:80
unsigned __int64 uint64_t
Definition stdint.h:90