Blender V4.5
device/device.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#include <cstdlib>
8#include <functional>
9
10#include "bvh/params.h"
11
12#include "device/denoise.h"
13#include "device/memory.h"
14
15#include "util/log.h"
16#include "util/profiling.h"
17#include "util/stats.h"
18#include "util/string.h"
19#include "util/texture.h"
20#include "util/thread.h"
21#include "util/types.h"
22#include "util/unique_ptr.h"
23#include "util/vector.h"
24
26
27class BVH;
28class DeviceQueue;
30class Progress;
31class CPUKernels;
32class Scene;
33
34struct OSLGlobals;
36
37/* Device Types */
38
51
61
62#define DEVICE_MASK(type) (DeviceTypeMask)(1 << type)
63
71
79
81 public:
84 /* used for user preferences, should stay fixed with changing hardware config */
85 string id = "CPU";
86 int num = 0;
87 bool display_device = false; /* GPU is used as a display device. */
88 bool has_nanovdb = false; /* Support NanoVDB volumes. */
89 bool has_mnee = true; /* Support MNEE. */
90 bool has_osl = false; /* Support Open Shading Language. */
91 bool has_guiding = false; /* Support path guiding. */
92 bool has_profiling = false; /* Supports runtime collection of profiling info. */
93 bool has_peer_memory = false; /* GPU has P2P access to memory of another GPU. */
94 bool has_gpu_queue = false; /* Device supports GPU queue. */
95 bool use_hardware_raytracing = false; /* Use hardware instructions to accelerate ray tracing. */
96 bool use_metalrt_by_default = false; /* Use MetalRT by default. */
97 /* Indicate that device execution has been optimized by Blender or vendor developers.
98 * For LTS versions, this helps communicate that newer versions may have better performance. */
100
102 KERNEL_OPTIMIZATION_LEVEL_FULL; /* Optimization level applied to path tracing
103 * kernels (Metal only). */
104 DenoiserTypeMask denoisers = DENOISER_NONE; /* Supported denoiser types. */
105 int cpu_threads = 0;
107 string error_msg;
108
109 DeviceInfo() = default;
110
111 bool operator==(const DeviceInfo &info) const
112 {
113 /* Multiple Devices with the same ID would be very bad. */
114 assert(id != info.id ||
115 (type == info.type && num == info.num && description == info.description));
116 return id == info.id && use_hardware_raytracing == info.use_hardware_raytracing &&
118 }
119 bool operator!=(const DeviceInfo &info) const
120 {
121 return !(*this == info);
122 }
123
124 bool contains_device_type(const DeviceType type) const;
125};
126
127/* Device */
128
129class Device {
130 friend class device_sub_ptr;
131
132 protected:
133 Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
134 : info(info_), stats(stats_), profiler(profiler_), headless(headless_)
135 {
136 }
137
138 string error_msg;
139
140 virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, size_t /*offset*/, size_t /*size*/)
141 {
142 /* Only required for devices that implement denoising. */
143 assert(false);
144 return (device_ptr)0;
145 }
146 virtual void mem_free_sub_ptr(device_ptr /*ptr*/){};
147
148 public:
149 /* noexcept needed to silence TBB warning. */
150 virtual ~Device() noexcept(false);
151
152 /* info */
154 virtual const string &error_message()
155 {
156 return error_msg;
157 }
159 {
160 return !error_message().empty();
161 }
162 virtual void set_error(const string &error)
163 {
164 if (!have_error()) {
166 }
167 fprintf(stderr, "%s\n", error.c_str());
168 fflush(stderr);
169 }
170 virtual BVHLayoutMask get_bvh_layout_mask(const uint kernel_features) const = 0;
171
172 /* statistics */
175 bool headless = true;
176
177 /* constant memory */
178 virtual void const_copy_to(const char *name, void *host, const size_t size) = 0;
179
180 /* load/compile kernels, must be called before adding tasks */
181 virtual bool load_kernels(uint /*kernel_features*/)
182 {
183 return true;
184 }
185
186 virtual bool load_osl_kernels()
187 {
188 return true;
189 }
190
191 /* Request cancellation of any long-running work. */
192 virtual void cancel() {}
193
194 /* Report status and return true if device is ready for rendering. */
195 virtual bool is_ready(string & /*status*/) const
196 {
197 return true;
198 }
199
200 /* GPU device only functions.
201 * These may not be used on CPU or multi-devices. */
202
203 /* Create new queue for executing kernels in. */
205
206 /* CPU device only functions.
207 * These may not be used on GPU or multi-devices. */
208
209 /* Get CPU kernel functions for native instruction set. */
210 static const CPUKernels &get_cpu_kernels();
211 /* Get kernel globals to pass to kernels. */
213 vector<ThreadKernelGlobalsCPU> & /*kernel_thread_globals*/);
214 /* Get OpenShadingLanguage memory buffer. */
215 virtual OSLGlobals *get_cpu_osl_memory();
216
217 /* Acceleration structure building. */
218 virtual void build_bvh(BVH *bvh, Progress &progress, bool refit);
219 /* Used by Metal and OptiX. */
220 virtual void release_bvh(BVH * /*bvh*/) {}
221
222 /* multi device */
223 virtual int device_number(Device * /*sub_device*/)
224 {
225 return 0;
226 }
227
228 /* Called after kernel texture setup, and prior to integrator state setup. */
229 virtual void optimize_for_scene(Scene * /*scene*/) {}
230
231 virtual bool is_resident(device_ptr /*key*/, Device *sub_device)
232 {
233 /* Memory is always resident if this is not a multi device, regardless of whether the pointer
234 * is valid or not (since it may not have been allocated yet). */
235 return sub_device == this;
236 }
237 virtual bool check_peer_access(Device * /*peer_device*/)
238 {
239 return false;
240 }
241
242 virtual bool is_shared(const void * /*shared_pointer*/,
243 const device_ptr /*device_pointer*/,
244 Device * /*sub_device*/)
245 {
246 return false;
247 }
248
249 /* Graphics resources interoperability.
250 *
251 * The interoperability comes here by the meaning that the device is capable of computing result
252 * directly into a OpenGL, Vulkan or Metal buffer. */
253
254 /* Check display is to be updated using graphics interoperability.
255 * The interoperability can not be used is it is not supported by the device. But the device
256 * might also force disable the interoperability if it detects that it will be slower than
257 * copying pixels from the render buffer. */
258 virtual bool should_use_graphics_interop(const GraphicsInteropDevice & /*interop_device*/,
259 const bool /*log*/ = false)
260 {
261 return false;
262 }
263
264 /* Returns native buffer handle for device pointer. */
265 virtual void *get_native_buffer(device_ptr /*ptr*/)
266 {
267 return nullptr;
268 }
269
270 /* Guiding */
271
272 /* Returns path guiding device handle. */
273 virtual void *get_guiding_device() const
274 {
275 LOG(ERROR) << "Request guiding field from a device which does not support it.";
276 return nullptr;
277 }
278
279 /* Sub-devices */
280
281 /* Run given callback for every individual device which will be handling rendering.
282 * For the single device the callback is called for the device itself. For the multi-device the
283 * callback is only called for the sub-devices. */
284 virtual void foreach_device(const std::function<void(Device *)> &callback)
285 {
286 callback(this);
287 }
288
289 /* static */
291 Stats &stats,
293 bool headless);
294
295 static DeviceType type_from_string(const char *name);
296 static string string_from_type(DeviceType type);
298 static vector<DeviceInfo> available_devices(const uint device_type_mask = DEVICE_MASK_ALL);
299 static DeviceInfo dummy_device(const string &error_msg = "");
300 static string device_capabilities(const uint device_type_mask = DEVICE_MASK_ALL);
301 static DeviceInfo get_multi_device(const vector<DeviceInfo> &subdevices,
302 const int threads,
303 bool background);
304
305 /* Tag devices lists for update. */
306 static void tag_update();
307
308 static void free_memory();
309
310 protected:
311 /* Memory allocation, only accessed through device_memory. */
312 friend class MultiDevice;
313 friend class DeviceServer;
314 friend class device_memory;
315
316 virtual void *host_alloc(const MemoryType type, const size_t size);
317 virtual void host_free(const MemoryType type, void *host_pointer, const size_t size);
318
319 virtual void mem_alloc(device_memory &mem) = 0;
320 virtual void mem_copy_to(device_memory &mem) = 0;
321 virtual void mem_move_to_host(device_memory &mem) = 0;
322 virtual void mem_copy_from(
323 device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) = 0;
324 virtual void mem_zero(device_memory &mem) = 0;
325 virtual void mem_free(device_memory &mem) = 0;
326
327 private:
328 /* Indicted whether device types and devices lists were initialized. */
329 static bool need_types_update, need_devices_update;
330 static thread_mutex device_mutex;
331 static vector<DeviceInfo> cuda_devices;
332 static vector<DeviceInfo> optix_devices;
333 static vector<DeviceInfo> cpu_devices;
334 static vector<DeviceInfo> hip_devices;
335 static vector<DeviceInfo> metal_devices;
336 static vector<DeviceInfo> oneapi_devices;
337 static uint devices_initialized_mask;
338};
339
340/* Device, which is GPU, with some common functionality for GPU back-ends. */
341class GPUDevice : public Device {
342 protected:
343 GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
344 : Device(info_, stats_, profiler_, headless_), texture_info(this, "texture_info", MEM_GLOBAL)
345 {
346 }
347
348 public:
350
351 /* For GPUs that can use bindless textures in some way or another. */
355 /* Returns true if the texture info was copied to the device (meaning, some more
356 * re-initialization might be needed). */
357 virtual bool load_texture_info();
358
359 protected:
360 /* Memory allocation, only accessed through device_memory. */
362
364 size_t map_host_used = 0;
365 size_t map_host_limit = 0;
368 using texMemObject = unsigned long long;
369 using arrayMemObject = unsigned long long;
370 struct Mem {
371 Mem() = default;
372
375 };
376 using MemMap = map<device_memory *, Mem>;
379 /* Simple counter which will try to track amount of used device memory */
381
382 virtual void init_host_memory(const size_t preferred_texture_headroom = 0,
383 const size_t preferred_working_headroom = 0);
384 virtual void move_textures_to_host(const size_t size,
385 const size_t headroom,
386 const bool for_texture);
387
388 /* Allocation, deallocation and copy functions, with corresponding
389 * support of device/host allocations. */
390 virtual GPUDevice::Mem *generic_alloc(device_memory &mem, const size_t pitch_padding = 0);
391 virtual void generic_free(device_memory &mem);
392 virtual void generic_copy_to(device_memory &mem);
393
394 /* total - amount of device memory, free - amount of available device memory */
395 virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
396
397 /* Device side memory. */
398 virtual bool alloc_device(void *&device_pointer, const size_t size) = 0;
399 virtual void free_device(void *device_pointer) = 0;
400
401 /* Shared memory. */
402 virtual bool shared_alloc(void *&shared_pointer, const size_t size) = 0;
403 virtual void shared_free(void *shared_pointer) = 0;
404 bool is_shared(const void *shared_pointer,
405 const device_ptr device_pointer,
406 Device *sub_device) override;
407 /* This function should return device pointer corresponding to shared pointer, which
408 * is host buffer, allocated in `shared_alloc`. */
409 virtual void *shared_to_device_pointer(const void *shared_pointer) = 0;
410
411 /* Memory copy. */
412 virtual void copy_host_to_device(void *device_pointer,
413 void *host_pointer,
414 const size_t size) = 0;
415};
416
void BLI_kdtree_nd_ free(KDTree *tree)
unsigned int uint
float progress
Definition WM_types.hh:1019
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
Definition bvh/bvh.h:67
bool has_execution_optimization
KernelOptimizationLevel kernel_optimization_level
vector< DeviceInfo > multi_devices
DenoiserTypeMask denoisers
bool operator==(const DeviceInfo &info) const
bool display_device
string error_msg
bool contains_device_type(const DeviceType type) const
bool has_peer_memory
bool has_gpu_queue
bool operator!=(const DeviceInfo &info) const
bool use_metalrt_by_default
DeviceInfo()=default
bool has_profiling
DeviceType type
string description
bool use_hardware_raytracing
virtual void host_free(const MemoryType type, void *host_pointer, const size_t size)
virtual void optimize_for_scene(Scene *)
virtual const string & error_message()
virtual bool is_resident(device_ptr, Device *sub_device)
virtual void release_bvh(BVH *)
static void free_memory()
virtual bool is_shared(const void *, const device_ptr, Device *)
static DeviceInfo dummy_device(const string &error_msg="")
Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
static DeviceInfo get_multi_device(const vector< DeviceInfo > &subdevices, const int threads, bool background)
static void tag_update()
virtual bool is_ready(string &) const
static const CPUKernels & get_cpu_kernels()
virtual bool load_osl_kernels()
friend class device_sub_ptr
virtual void mem_move_to_host(device_memory &mem)=0
virtual void mem_zero(device_memory &mem)=0
string error_msg
virtual void * get_native_buffer(device_ptr)
virtual int device_number(Device *)
friend class MultiDevice
friend class device_memory
virtual ~Device() noexcept(false)
virtual void mem_free_sub_ptr(device_ptr)
virtual unique_ptr< DeviceQueue > gpu_queue_create()
Profiler & profiler
Stats & stats
virtual void const_copy_to(const char *name, void *host, const size_t size)=0
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
virtual bool load_kernels(uint)
static DeviceType type_from_string(const char *name)
virtual BVHLayoutMask get_bvh_layout_mask(const uint kernel_features) const =0
bool headless
virtual bool should_use_graphics_interop(const GraphicsInteropDevice &, const bool=false)
virtual void get_cpu_kernel_thread_globals(vector< ThreadKernelGlobalsCPU > &)
virtual bool check_peer_access(Device *)
virtual void mem_free(device_memory &mem)=0
virtual void mem_copy_from(device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem)=0
virtual void set_error(const string &error)
virtual void mem_copy_to(device_memory &mem)=0
virtual device_ptr mem_alloc_sub_ptr(device_memory &, size_t, size_t)
friend class DeviceServer
DeviceInfo info
static string device_capabilities(const uint device_type_mask=DEVICE_MASK_ALL)
static vector< DeviceType > available_types()
virtual void foreach_device(const std::function< void(Device *)> &callback)
static string string_from_type(DeviceType type)
virtual void mem_alloc(device_memory &mem)=0
virtual OSLGlobals * get_cpu_osl_memory()
static vector< DeviceInfo > available_devices(const uint device_type_mask=DEVICE_MASK_ALL)
static unique_ptr< Device > create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
virtual void cancel()
virtual void * get_guiding_device() const
bool have_error()
virtual void * host_alloc(const MemoryType type, const size_t size)
int DenoiserTypeMask
Definition denoise.h:23
@ DENOISER_NONE
Definition denoise.h:16
#define CCL_NAMESPACE_END
KernelOptimizationLevel
@ KERNEL_OPTIMIZATION_LEVEL_OFF
@ KERNEL_OPTIMIZATION_LEVEL_FULL
@ KERNEL_OPTIMIZATION_LEVEL_INTERSECT
@ KERNEL_OPTIMIZATION_NUM_LEVELS
DeviceTypeMask
@ DEVICE_MASK_OPTIX
@ DEVICE_MASK_CPU
@ DEVICE_MASK_HIP
@ DEVICE_MASK_ALL
@ DEVICE_MASK_CUDA
@ DEVICE_MASK_METAL
@ DEVICE_MASK_ONEAPI
MetalRTSetting
@ METALRT_OFF
@ METALRT_NUM_SETTINGS
@ METALRT_ON
@ METALRT_AUTO
DeviceType
@ DEVICE_DUMMY
@ DEVICE_NONE
@ DEVICE_METAL
@ DEVICE_MULTI
@ DEVICE_CUDA
@ DEVICE_CPU
@ DEVICE_HIPRT
@ DEVICE_OPTIX
@ DEVICE_HIP
@ DEVICE_ONEAPI
#define this
#define class
#define assert(assertion)
#define LOG(severity)
Definition log.h:32
static void error(const char *str)
int BVHLayoutMask
Definition params.h:50
texMemObject texobject
Mem()=default
arrayMemObject array
thread_mutex texture_info_mutex
bool is_shared(const void *shared_pointer, const device_ptr device_pointer, Device *sub_device) override
bool need_texture_info
virtual bool shared_alloc(void *&shared_pointer, const size_t size)=0
size_t map_host_used
virtual void shared_free(void *shared_pointer)=0
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
map< device_memory *, Mem > MemMap
unsigned long long texMemObject
bool can_map_host
size_t device_texture_headroom
virtual void get_device_memory_info(size_t &total, size_t &free)=0
virtual bool alloc_device(void *&device_pointer, const size_t size)=0
size_t device_working_headroom
friend class device_memory
virtual void * shared_to_device_pointer(const void *shared_pointer)=0
virtual void generic_copy_to(device_memory &mem)
virtual void move_textures_to_host(const size_t size, const size_t headroom, const bool for_texture)
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size)=0
virtual bool load_texture_info()
size_t map_host_limit
virtual void free_device(void *device_pointer)=0
unsigned long long arrayMemObject
thread_mutex device_mem_map_mutex
virtual void generic_free(device_memory &mem)
virtual void init_host_memory(const size_t preferred_texture_headroom=0, const size_t preferred_working_headroom=0)
size_t device_mem_in_use
virtual GPUDevice::Mem * generic_alloc(device_memory &mem, const size_t pitch_padding=0)
device_vector< TextureInfo > texture_info
MemMap device_mem_map
~GPUDevice() noexcept(false) override
std::mutex thread_mutex
Definition thread.h:27
uint64_t device_ptr
Definition types_base.h:44
bool override
Definition wm_files.cc:1184