Blender V4.3
device/cuda/device.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include "util/log.h"
8
9#ifdef WITH_CUDA
11# include "device/device.h"
12
14
15# include "util/string.h"
16# include "util/windows.h"
17#endif /* WITH_CUDA */
18
20
22{
23#if !defined(WITH_CUDA)
24 return false;
25#elif defined(WITH_CUDA_DYNLOAD)
26 static bool initialized = false;
27 static bool result = false;
28
29 if (initialized) {
30 return result;
31 }
32
33 initialized = true;
34 int cuew_result = cuewInit(CUEW_INIT_CUDA);
35 if (cuew_result == CUEW_SUCCESS) {
36 VLOG_INFO << "CUEW initialization succeeded";
37 if (CUDADevice::have_precompiled_kernels()) {
38 VLOG_INFO << "Found precompiled kernels";
39 result = true;
40 }
41 else if (cuewCompilerPath() != NULL) {
42 VLOG_INFO << "Found CUDA compiler " << cuewCompilerPath();
43 result = true;
44 }
45 else {
46 VLOG_INFO << "Neither precompiled kernels nor CUDA compiler was found,"
47 << " unable to use CUDA";
48 }
49 }
50 else {
51 VLOG_WARNING << "CUEW initialization failed: "
52 << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ?
53 "Error setting up atexit() handler" :
54 "Error opening the library");
55 }
56
57 return result;
58#else /* WITH_CUDA_DYNLOAD */
59 return true;
60#endif /* WITH_CUDA_DYNLOAD */
61}
62
63Device *device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
64{
65#ifdef WITH_CUDA
66 return new CUDADevice(info, stats, profiler, headless);
67#else
68 (void)info;
69 (void)stats;
70 (void)profiler;
71 (void)headless;
72
73 LOG(FATAL) << "Request to create CUDA device without compiled-in support. Should never happen.";
74
75 return nullptr;
76#endif
77}
78
79#ifdef WITH_CUDA
80static CUresult device_cuda_safe_init()
81{
82# ifdef _WIN32
83 __try
84 {
85 return cuInit(0);
86 }
87 __except (EXCEPTION_EXECUTE_HANDLER)
88 {
89 /* Ignore crashes inside the CUDA driver and hope we can
90 * survive even with corrupted CUDA installs. */
91 fprintf(stderr, "Cycles CUDA: driver crashed, continuing without CUDA.\n");
92 }
93
94 return CUDA_ERROR_NO_DEVICE;
95# else
96 return cuInit(0);
97# endif
98}
99#endif /* WITH_CUDA */
100
102{
103#ifdef WITH_CUDA
104 CUresult result = device_cuda_safe_init();
105 if (result != CUDA_SUCCESS) {
106 if (result != CUDA_ERROR_NO_DEVICE) {
107 fprintf(stderr, "CUDA cuInit: %s\n", cuewErrorString(result));
108 }
109 return;
110 }
111
112 int count = 0;
113 result = cuDeviceGetCount(&count);
114 if (result != CUDA_SUCCESS) {
115 fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result));
116 return;
117 }
118
119 vector<DeviceInfo> display_devices;
120
121 for (int num = 0; num < count; num++) {
122 char name[256];
123
124 result = cuDeviceGetName(name, 256, num);
125 if (result != CUDA_SUCCESS) {
126 fprintf(stderr, "CUDA cuDeviceGetName: %s\n", cuewErrorString(result));
127 continue;
128 }
129
130 int major;
131 cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
132 if (major < 3) {
133 VLOG_INFO << "Ignoring device \"" << name
134 << "\", this graphics card is no longer supported.";
135 continue;
136 }
137
138 DeviceInfo info;
139
140 info.type = DEVICE_CUDA;
141 info.description = string(name);
142 info.num = num;
143
144 info.has_nanovdb = true;
145 info.denoisers = 0;
146
147 info.has_gpu_queue = true;
148
149 /* Check if the device has P2P access to any other device in the system. */
150 for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
151 if (num != peer_num) {
152 int can_access = 0;
153 cuDeviceCanAccessPeer(&can_access, num, peer_num);
154 info.has_peer_memory = (can_access != 0);
155 }
156 }
157
158 int pci_location[3] = {0, 0, 0};
159 cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
160 cuDeviceGetAttribute(&pci_location[1], CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, num);
161 cuDeviceGetAttribute(&pci_location[2], CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, num);
162 info.id = string_printf("CUDA_%s_%04x:%02x:%02x",
163 name,
164 (unsigned int)pci_location[0],
165 (unsigned int)pci_location[1],
166 (unsigned int)pci_location[2]);
167
168# if defined(WITH_OPENIMAGEDENOISE)
169# if OIDN_VERSION >= 20300
170 if (oidnIsCUDADeviceSupported(num)) {
171# else
172 if (OIDNDenoiserGPU::is_device_supported(info)) {
173# endif
175 }
176# endif
177
178 /* If device has a kernel timeout and no compute preemption, we assume
179 * it is connected to a display and will freeze the display while doing
180 * computations. */
181 int timeout_attr = 0, preempt_attr = 0;
182 cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num);
183 cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num);
184
185 /* The CUDA driver reports compute preemption as not being available on
186 * Windows 10 even when it is, due to an issue in application profiles.
187 * Detect case where we expect it to be available and override. */
188 if (preempt_attr == 0 && (major >= 6) && system_windows_version_at_least(10, 17134)) {
189 VLOG_INFO << "Assuming device has compute preemption on Windows 10.";
190 preempt_attr = 1;
191 }
192
193 if (timeout_attr && !preempt_attr) {
194 VLOG_INFO << "Device is recognized as display.";
195 info.description += " (Display)";
196 info.display_device = true;
197 display_devices.push_back(info);
198 }
199 else {
200 VLOG_INFO << "Device has compute preemption or is not used for display.";
201 devices.push_back(info);
202 }
203 VLOG_INFO << "Added device \"" << info.description << "\" with id \"" << info.id << "\".";
204
206 VLOG_INFO << "Device with id \"" << info.id << "\" supports "
208 }
209
210 if (!display_devices.empty()) {
211 devices.insert(devices.end(), display_devices.begin(), display_devices.end());
212 }
213#else /* WITH_CUDA */
214 (void)devices;
215#endif /* WITH_CUDA */
216}
217
219{
220#ifdef WITH_CUDA
221 CUresult result = device_cuda_safe_init();
222 if (result != CUDA_SUCCESS) {
223 if (result != CUDA_ERROR_NO_DEVICE) {
224 return string("Error initializing CUDA: ") + cuewErrorString(result);
225 }
226 return "No CUDA device found\n";
227 }
228
229 int count;
230 result = cuDeviceGetCount(&count);
231 if (result != CUDA_SUCCESS) {
232 return string("Error getting devices: ") + cuewErrorString(result);
233 }
234
235 string capabilities = "";
236 for (int num = 0; num < count; num++) {
237 char name[256];
238 if (cuDeviceGetName(name, 256, num) != CUDA_SUCCESS) {
239 continue;
240 }
241 capabilities += string("\t") + name + "\n";
242 int value;
243# define GET_ATTR(attr) \
244 { \
245 if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
246 capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
247 } \
248 } \
249 (void)0
250 /* TODO(sergey): Strip all attributes which are not useful for us
251 * or does not depend on the driver.
252 */
253 GET_ATTR(MAX_THREADS_PER_BLOCK);
254 GET_ATTR(MAX_BLOCK_DIM_X);
255 GET_ATTR(MAX_BLOCK_DIM_Y);
256 GET_ATTR(MAX_BLOCK_DIM_Z);
257 GET_ATTR(MAX_GRID_DIM_X);
258 GET_ATTR(MAX_GRID_DIM_Y);
259 GET_ATTR(MAX_GRID_DIM_Z);
260 GET_ATTR(MAX_SHARED_MEMORY_PER_BLOCK);
261 GET_ATTR(SHARED_MEMORY_PER_BLOCK);
262 GET_ATTR(TOTAL_CONSTANT_MEMORY);
263 GET_ATTR(WARP_SIZE);
264 GET_ATTR(MAX_PITCH);
265 GET_ATTR(MAX_REGISTERS_PER_BLOCK);
266 GET_ATTR(REGISTERS_PER_BLOCK);
267 GET_ATTR(CLOCK_RATE);
268 GET_ATTR(TEXTURE_ALIGNMENT);
269 GET_ATTR(GPU_OVERLAP);
270 GET_ATTR(MULTIPROCESSOR_COUNT);
271 GET_ATTR(KERNEL_EXEC_TIMEOUT);
272 GET_ATTR(INTEGRATED);
273 GET_ATTR(CAN_MAP_HOST_MEMORY);
274 GET_ATTR(COMPUTE_MODE);
275 GET_ATTR(MAXIMUM_TEXTURE1D_WIDTH);
276 GET_ATTR(MAXIMUM_TEXTURE2D_WIDTH);
277 GET_ATTR(MAXIMUM_TEXTURE2D_HEIGHT);
278 GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH);
279 GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT);
280 GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH);
281 GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_WIDTH);
282 GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_HEIGHT);
283 GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_LAYERS);
284 GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_WIDTH);
285 GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_HEIGHT);
286 GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES);
287 GET_ATTR(SURFACE_ALIGNMENT);
288 GET_ATTR(CONCURRENT_KERNELS);
289 GET_ATTR(ECC_ENABLED);
290 GET_ATTR(TCC_DRIVER);
291 GET_ATTR(MEMORY_CLOCK_RATE);
292 GET_ATTR(GLOBAL_MEMORY_BUS_WIDTH);
293 GET_ATTR(L2_CACHE_SIZE);
294 GET_ATTR(MAX_THREADS_PER_MULTIPROCESSOR);
295 GET_ATTR(ASYNC_ENGINE_COUNT);
296 GET_ATTR(UNIFIED_ADDRESSING);
297 GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_WIDTH);
298 GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_LAYERS);
299 GET_ATTR(CAN_TEX2D_GATHER);
300 GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_WIDTH);
301 GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_HEIGHT);
302 GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE);
303 GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE);
304 GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE);
305 GET_ATTR(TEXTURE_PITCH_ALIGNMENT);
306 GET_ATTR(MAXIMUM_TEXTURECUBEMAP_WIDTH);
307 GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH);
308 GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS);
309 GET_ATTR(MAXIMUM_SURFACE1D_WIDTH);
310 GET_ATTR(MAXIMUM_SURFACE2D_WIDTH);
311 GET_ATTR(MAXIMUM_SURFACE2D_HEIGHT);
312 GET_ATTR(MAXIMUM_SURFACE3D_WIDTH);
313 GET_ATTR(MAXIMUM_SURFACE3D_HEIGHT);
314 GET_ATTR(MAXIMUM_SURFACE3D_DEPTH);
315 GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_WIDTH);
316 GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_LAYERS);
317 GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_WIDTH);
318 GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_HEIGHT);
319 GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_LAYERS);
320 GET_ATTR(MAXIMUM_SURFACECUBEMAP_WIDTH);
321 GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH);
322 GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS);
323 GET_ATTR(MAXIMUM_TEXTURE1D_LINEAR_WIDTH);
324 GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_WIDTH);
325 GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_HEIGHT);
326 GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_PITCH);
327 GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH);
328 GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT);
329 GET_ATTR(COMPUTE_CAPABILITY_MAJOR);
330 GET_ATTR(COMPUTE_CAPABILITY_MINOR);
331 GET_ATTR(MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH);
332 GET_ATTR(STREAM_PRIORITIES_SUPPORTED);
333 GET_ATTR(GLOBAL_L1_CACHE_SUPPORTED);
334 GET_ATTR(LOCAL_L1_CACHE_SUPPORTED);
335 GET_ATTR(MAX_SHARED_MEMORY_PER_MULTIPROCESSOR);
336 GET_ATTR(MAX_REGISTERS_PER_MULTIPROCESSOR);
337 GET_ATTR(MANAGED_MEMORY);
338 GET_ATTR(MULTI_GPU_BOARD);
339 GET_ATTR(MULTI_GPU_BOARD_GROUP_ID);
340# undef GET_ATTR
341 capabilities += "\n";
342 }
343
344 return capabilities;
345
346#else /* WITH_CUDA */
347 return "";
348#endif /* WITH_CUDA */
349}
350
DenoiserTypeMask denoisers
bool display_device
bool has_peer_memory
bool has_gpu_queue
DeviceType type
string description
CCL_NAMESPACE_BEGIN const char * denoiserTypeToHumanReadable(DenoiserType type)
Definition denoise.cpp:9
@ DENOISER_OPENIMAGEDENOISE
Definition denoise.h:15
#define CCL_NAMESPACE_END
void device_cuda_info(vector< DeviceInfo > &devices)
string device_cuda_capabilities()
CCL_NAMESPACE_BEGIN bool device_cuda_init()
Device * device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
@ DEVICE_CUDA
#define NULL
static bool initialized
int count
#define VLOG_INFO
Definition log.h:72
#define VLOG_WARNING
Definition log.h:70
#define LOG(severity)
Definition log.h:33
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
CCL_NAMESPACE_BEGIN bool system_windows_version_at_least(int major, int build)
Definition windows.cpp:13