Blender V5.0
device/cuda/device.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6#include "device/device.h"
7
8#include "util/log.h"
9
10#ifdef WITH_CUDA
12
13# include "integrator/denoiser_oidn_gpu.h" // IWYU pragma: keep
14
15# include "util/string.h"
16# ifdef _WIN32
17# include "util/windows.h"
18# endif
19#endif /* WITH_CUDA */
20
22
24{
25#if !defined(WITH_CUDA)
26 return false;
27#elif defined(WITH_CUDA_DYNLOAD)
28 static bool initialized = false;
29 static bool result = false;
30
31 if (initialized) {
32 return result;
33 }
34
35 initialized = true;
36 int cuew_result = cuewInit(CUEW_INIT_CUDA);
37 if (cuew_result == CUEW_SUCCESS) {
38 LOG_INFO << "CUEW initialization succeeded";
39 if (CUDADevice::have_precompiled_kernels()) {
40 LOG_INFO << "Found precompiled kernels";
41 result = true;
42 }
43 else if (cuewCompilerPath() != nullptr) {
44 LOG_INFO << "Found CUDA compiler " << cuewCompilerPath();
45 result = true;
46 }
47 else {
48 LOG_INFO << "Neither precompiled kernels nor CUDA compiler was found,"
49 << " unable to use CUDA";
50 }
51 }
52 else {
53 LOG_WARNING << "CUEW initialization failed: "
54 << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ?
55 "Error setting up atexit() handler" :
56 "Error opening the library");
57 }
58
59 return result;
60#else /* WITH_CUDA_DYNLOAD */
61 return true;
62#endif /* WITH_CUDA_DYNLOAD */
63}
64
66 Stats &stats,
67 Profiler &profiler,
68 bool headless)
69{
70#ifdef WITH_CUDA
71 return make_unique<CUDADevice>(info, stats, profiler, headless);
72#else
73 (void)info;
74 (void)stats;
75 (void)profiler;
76 (void)headless;
77
78 LOG_FATAL << "Request to create CUDA device without compiled-in support. Should never happen.";
79
80 return nullptr;
81#endif
82}
83
84#ifdef WITH_CUDA
85static CUresult device_cuda_safe_init()
86{
87# ifdef _WIN32
88 __try
89 {
90 return cuInit(0);
91 }
92 __except (EXCEPTION_EXECUTE_HANDLER)
93 {
94 /* Ignore crashes inside the CUDA driver and hope we can
95 * survive even with corrupted CUDA installs. */
96 fprintf(stderr, "Cycles CUDA: driver crashed, continuing without CUDA.\n");
97 }
98
99 return CUDA_ERROR_NO_DEVICE;
100# else
101 return cuInit(0);
102# endif
103}
104#endif /* WITH_CUDA */
105
107{
108#ifdef WITH_CUDA
109 CUresult result = device_cuda_safe_init();
110 if (result != CUDA_SUCCESS) {
111 if (result != CUDA_ERROR_NO_DEVICE) {
112 LOG_ERROR << "CUDA cuInit: " << cuewErrorString(result);
113 }
114 return;
115 }
116
117 int count = 0;
118 result = cuDeviceGetCount(&count);
119 if (result != CUDA_SUCCESS) {
120 LOG_ERROR << "CUDA cuDeviceGetCount: " << cuewErrorString(result);
121 return;
122 }
123
124 vector<DeviceInfo> display_devices;
125
126 for (int num = 0; num < count; num++) {
127 char name[256];
128
129 result = cuDeviceGetName(name, 256, num);
130 if (result != CUDA_SUCCESS) {
131 LOG_ERROR << "CUDA cuDeviceGetName: " << cuewErrorString(result);
132 continue;
133 }
134
135 if (!cudaSupportsDevice(num)) {
136 LOG_INFO << "Ignoring device \"" << name << "\", this graphics card is no longer supported.";
137 continue;
138 }
139
140 DeviceInfo info;
141
142 info.type = DEVICE_CUDA;
143 info.description = string(name);
144 info.num = num;
145
146 info.has_nanovdb = true;
147 info.denoisers = 0;
148
149 info.has_gpu_queue = true;
150
151 /* Check if the device has P2P access to any other device in the system. */
152 for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
153 if (num != peer_num) {
154 if (cudaSupportsDevice(peer_num)) {
155 int can_access = 0;
156 cuDeviceCanAccessPeer(&can_access, num, peer_num);
157 info.has_peer_memory = (can_access != 0);
158 }
159 }
160 }
161
162 int pci_location[3] = {0, 0, 0};
163 cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
164 cuDeviceGetAttribute(&pci_location[1], CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, num);
165 cuDeviceGetAttribute(&pci_location[2], CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, num);
166 info.id = string_printf("CUDA_%s_%04x:%02x:%02x",
167 name,
168 (unsigned int)pci_location[0],
169 (unsigned int)pci_location[1],
170 (unsigned int)pci_location[2]);
171
172# if defined(WITH_OPENIMAGEDENOISE)
173# if OIDN_VERSION >= 20300
174 if (oidnIsCUDADeviceSupported(num)) {
175# else
176 if (OIDNDenoiserGPU::is_device_supported(info)) {
177# endif
179 }
180# endif
181
182 /* If device has a kernel timeout and no compute preemption, we assume
183 * it is connected to a display and will freeze the display while doing
184 * computations. */
185 int timeout_attr = 0, preempt_attr = 0;
186 cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num);
187 cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num);
188
189# ifdef _WIN32
190 int major;
191 cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
192 /* The CUDA driver reports compute preemption as not being available on
193 * Windows 10 even when it is, due to an issue in application profiles.
194 * Detect case where we expect it to be available and override. */
195 if (preempt_attr == 0 && (major >= 6) && system_windows_version_at_least(10, 17134)) {
196 LOG_INFO << "Assuming device has compute preemption on Windows 10.";
197 preempt_attr = 1;
198 }
199# endif
200
201 if (timeout_attr && !preempt_attr) {
202 LOG_INFO << "Device is recognized as display.";
203 info.description += " (Display)";
204 info.display_device = true;
205 display_devices.push_back(info);
206 }
207 else {
208 LOG_INFO << "Device has compute preemption or is not used for display.";
209 devices.push_back(info);
210 }
211 LOG_INFO << "Added device \"" << info.description << "\" with id \"" << info.id << "\".";
212
214 LOG_INFO << "Device with id \"" << info.id << "\" supports "
216 }
217 }
218
219 if (!display_devices.empty()) {
220 devices.insert(devices.end(), display_devices.begin(), display_devices.end());
221 }
222#else /* WITH_CUDA */
223 (void)devices;
224#endif /* WITH_CUDA */
225}
226
228{
229#ifdef WITH_CUDA
230 CUresult result = device_cuda_safe_init();
231 if (result != CUDA_SUCCESS) {
232 if (result != CUDA_ERROR_NO_DEVICE) {
233 return string("Error initializing CUDA: ") + cuewErrorString(result);
234 }
235 return "No CUDA device found\n";
236 }
237
238 int count;
239 result = cuDeviceGetCount(&count);
240 if (result != CUDA_SUCCESS) {
241 return string("Error getting devices: ") + cuewErrorString(result);
242 }
243
244 string capabilities;
245 for (int num = 0; num < count; num++) {
246 char name[256];
247 if (cuDeviceGetName(name, 256, num) != CUDA_SUCCESS) {
248 continue;
249 }
250 capabilities += string("\t") + name + "\n";
251 int value;
252# define GET_ATTR(attr) \
253 { \
254 if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
255 capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
256 } \
257 } \
258 (void)0
259 /* TODO(sergey): Strip all attributes which are not useful for us
260 * or does not depend on the driver.
261 */
262 GET_ATTR(MAX_THREADS_PER_BLOCK);
263 GET_ATTR(MAX_BLOCK_DIM_X);
264 GET_ATTR(MAX_BLOCK_DIM_Y);
265 GET_ATTR(MAX_BLOCK_DIM_Z);
266 GET_ATTR(MAX_GRID_DIM_X);
267 GET_ATTR(MAX_GRID_DIM_Y);
268 GET_ATTR(MAX_GRID_DIM_Z);
269 GET_ATTR(MAX_SHARED_MEMORY_PER_BLOCK);
270 GET_ATTR(SHARED_MEMORY_PER_BLOCK);
271 GET_ATTR(TOTAL_CONSTANT_MEMORY);
272 GET_ATTR(WARP_SIZE);
273 GET_ATTR(MAX_PITCH);
274 GET_ATTR(MAX_REGISTERS_PER_BLOCK);
275 GET_ATTR(REGISTERS_PER_BLOCK);
276 GET_ATTR(CLOCK_RATE);
277 GET_ATTR(TEXTURE_ALIGNMENT);
278 GET_ATTR(GPU_OVERLAP);
279 GET_ATTR(MULTIPROCESSOR_COUNT);
280 GET_ATTR(KERNEL_EXEC_TIMEOUT);
281 GET_ATTR(INTEGRATED);
282 GET_ATTR(CAN_MAP_HOST_MEMORY);
283 GET_ATTR(COMPUTE_MODE);
284 GET_ATTR(MAXIMUM_TEXTURE1D_WIDTH);
285 GET_ATTR(MAXIMUM_TEXTURE2D_WIDTH);
286 GET_ATTR(MAXIMUM_TEXTURE2D_HEIGHT);
287 GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH);
288 GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT);
289 GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH);
290 GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_WIDTH);
291 GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_HEIGHT);
292 GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_LAYERS);
293 GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_WIDTH);
294 GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_HEIGHT);
295 GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES);
296 GET_ATTR(SURFACE_ALIGNMENT);
297 GET_ATTR(CONCURRENT_KERNELS);
298 GET_ATTR(ECC_ENABLED);
299 GET_ATTR(TCC_DRIVER);
300 GET_ATTR(MEMORY_CLOCK_RATE);
301 GET_ATTR(GLOBAL_MEMORY_BUS_WIDTH);
302 GET_ATTR(L2_CACHE_SIZE);
303 GET_ATTR(MAX_THREADS_PER_MULTIPROCESSOR);
304 GET_ATTR(ASYNC_ENGINE_COUNT);
305 GET_ATTR(UNIFIED_ADDRESSING);
306 GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_WIDTH);
307 GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_LAYERS);
308 GET_ATTR(CAN_TEX2D_GATHER);
309 GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_WIDTH);
310 GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_HEIGHT);
311 GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE);
312 GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE);
313 GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE);
314 GET_ATTR(TEXTURE_PITCH_ALIGNMENT);
315 GET_ATTR(MAXIMUM_TEXTURECUBEMAP_WIDTH);
316 GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH);
317 GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS);
318 GET_ATTR(MAXIMUM_SURFACE1D_WIDTH);
319 GET_ATTR(MAXIMUM_SURFACE2D_WIDTH);
320 GET_ATTR(MAXIMUM_SURFACE2D_HEIGHT);
321 GET_ATTR(MAXIMUM_SURFACE3D_WIDTH);
322 GET_ATTR(MAXIMUM_SURFACE3D_HEIGHT);
323 GET_ATTR(MAXIMUM_SURFACE3D_DEPTH);
324 GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_WIDTH);
325 GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_LAYERS);
326 GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_WIDTH);
327 GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_HEIGHT);
328 GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_LAYERS);
329 GET_ATTR(MAXIMUM_SURFACECUBEMAP_WIDTH);
330 GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH);
331 GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS);
332 GET_ATTR(MAXIMUM_TEXTURE1D_LINEAR_WIDTH);
333 GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_WIDTH);
334 GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_HEIGHT);
335 GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_PITCH);
336 GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH);
337 GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT);
338 GET_ATTR(COMPUTE_CAPABILITY_MAJOR);
339 GET_ATTR(COMPUTE_CAPABILITY_MINOR);
340 GET_ATTR(MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH);
341 GET_ATTR(STREAM_PRIORITIES_SUPPORTED);
342 GET_ATTR(GLOBAL_L1_CACHE_SUPPORTED);
343 GET_ATTR(LOCAL_L1_CACHE_SUPPORTED);
344 GET_ATTR(MAX_SHARED_MEMORY_PER_MULTIPROCESSOR);
345 GET_ATTR(MAX_REGISTERS_PER_MULTIPROCESSOR);
346 GET_ATTR(MANAGED_MEMORY);
347 GET_ATTR(MULTI_GPU_BOARD);
348 GET_ATTR(MULTI_GPU_BOARD_GROUP_ID);
349# undef GET_ATTR
350 capabilities += "\n";
351 }
352
353 return capabilities;
354
355#else /* WITH_CUDA */
356 return "";
357#endif /* WITH_CUDA */
358}
359
ATTR_WARN_UNUSED_RESULT const size_t num
DenoiserTypeMask denoisers
bool display_device
bool has_peer_memory
bool has_gpu_queue
DeviceType type
string description
CCL_NAMESPACE_BEGIN const char * denoiserTypeToHumanReadable(DenoiserType type)
Definition denoise.cpp:9
@ DENOISER_OPENIMAGEDENOISE
Definition denoise.h:13
#define CCL_NAMESPACE_END
void device_cuda_info(vector< DeviceInfo > &devices)
string device_cuda_capabilities()
CCL_NAMESPACE_BEGIN bool device_cuda_init()
unique_ptr< Device > device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
@ DEVICE_CUDA
static bool initialized
int count
#define LOG_FATAL
Definition log.h:99
#define LOG_ERROR
Definition log.h:101
#define LOG_WARNING
Definition log.h:103
#define LOG_INFO
Definition log.h:106
const char * name
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
CCL_NAMESPACE_BEGIN bool system_windows_version_at_least(const int major, const int build)
Definition windows.cpp:13