Blender V5.0
device/device.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#include <cstdlib>
8#include <functional>
9
10#include "bvh/params.h"
11
12#include "device/denoise.h"
13#include "device/memory.h"
14
15#include "util/profiling.h"
16#include "util/stats.h"
17#include "util/string.h"
18#include "util/texture.h"
19#include "util/thread.h"
20#include "util/types.h"
21#include "util/unique_ptr.h"
22#include "util/vector.h"
23
25
26class BVH;
27class DeviceQueue;
29class Progress;
30class CPUKernels;
31class Scene;
32
33struct OSLGlobals;
35
36/* Device Types */
37
50
60
61#define DEVICE_MASK(type) (DeviceTypeMask)(1 << type)
62
70
78
80 public:
83 /* used for user preferences, should stay fixed with changing hardware config */
84 string id = "CPU";
85 int num = 0;
86 bool display_device = false; /* GPU is used as a display device. */
87 bool has_nanovdb = false; /* Support NanoVDB volumes. */
88 bool has_mnee = true; /* Support MNEE. */
89 bool has_osl = false; /* Support Open Shading Language. */
90 bool has_guiding = false; /* Support path guiding. */
91 bool has_profiling = false; /* Supports runtime collection of profiling info. */
92 bool has_peer_memory = false; /* GPU has P2P access to memory of another GPU. */
93 bool has_gpu_queue = false; /* Device supports GPU queue. */
94 bool use_hardware_raytracing = false; /* Use hardware instructions to accelerate ray tracing. */
95 bool use_metalrt_by_default = false; /* Use MetalRT by default. */
96 /* Indicate that device execution has been optimized by Blender or vendor developers.
97 * For LTS versions, this helps communicate that newer versions may have better performance. */
99
101 KERNEL_OPTIMIZATION_LEVEL_FULL; /* Optimization level applied to path tracing
102 * kernels (Metal only). */
103 DenoiserTypeMask denoisers = DENOISER_NONE; /* Supported denoiser types. */
104 int cpu_threads = 0;
106 string error_msg;
107
108 DeviceInfo() = default;
109
110 bool operator==(const DeviceInfo &info) const
111 {
112 /* Multiple Devices with the same ID would be very bad. */
113 assert(id != info.id ||
114 (type == info.type && num == info.num && description == info.description));
115 return id == info.id && use_hardware_raytracing == info.use_hardware_raytracing &&
117 }
118 bool operator!=(const DeviceInfo &info) const
119 {
120 return !(*this == info);
121 }
122};
123
124/* Device */
125
126class Device {
127 friend class device_sub_ptr;
128
129 protected:
130 Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
131 : info(info_), stats(stats_), profiler(profiler_), headless(headless_)
132 {
133 }
134
135 string error_msg;
136
137 virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, size_t /*offset*/, size_t /*size*/)
138 {
139 /* Only required for devices that implement denoising. */
140 assert(false);
141 return (device_ptr)0;
142 }
143 virtual void mem_free_sub_ptr(device_ptr /*ptr*/){};
144
145 public:
146 /* noexcept needed to silence TBB warning. */
147 virtual ~Device() noexcept(false);
148
149 /* info */
151 virtual const string &error_message()
152 {
153 return error_msg;
154 }
156 {
157 return !error_message().empty();
158 }
159 virtual void set_error(const string &error);
160 virtual BVHLayoutMask get_bvh_layout_mask(const uint kernel_features) const = 0;
161
162 /* statistics */
165 bool headless = true;
166
167 /* constant memory */
168 virtual void const_copy_to(const char *name, void *host, const size_t size) = 0;
169
170 /* load/compile kernels, must be called before adding tasks */
171 virtual bool load_kernels(uint /*kernel_features*/)
172 {
173 return true;
174 }
175
176 virtual bool load_osl_kernels()
177 {
178 return true;
179 }
180
181 /* Request cancellation of any long-running work. */
182 virtual void cancel() {}
183
184 /* Report status and return true if device is ready for rendering. */
185 virtual bool is_ready(string & /*status*/) const
186 {
187 return true;
188 }
189
190 /* GPU device only functions.
191 * These may not be used on CPU or multi-devices. */
192
193 /* Create new queue for executing kernels in. */
195
196 /* CPU device only functions.
197 * These may not be used on GPU or multi-devices. */
198
199 /* Get CPU kernel functions for native instruction set. */
200 static const CPUKernels &get_cpu_kernels();
201 /* Get kernel globals to pass to kernels. */
203 vector<ThreadKernelGlobalsCPU> & /*kernel_thread_globals*/);
204 /* Get OpenShadingLanguage memory buffer. */
205 virtual OSLGlobals *get_cpu_osl_memory();
206
207 /* Acceleration structure building. */
208 virtual void build_bvh(BVH *bvh, Progress &progress, bool refit);
209 /* Used by Metal and OptiX. */
210 virtual void release_bvh(BVH * /*bvh*/) {}
211
212 /* Inform of BVH limits, return true to force-rebuild all BVHs and kernels. */
213 virtual bool set_bvh_limits(size_t /*instance_count*/, size_t /*max_prim_count*/)
214 {
215 return false;
216 }
217
218 /* multi device */
219 virtual int device_number(Device * /*sub_device*/)
220 {
221 return 0;
222 }
223
224 /* Called after kernel texture setup, and prior to integrator state setup. */
225 virtual void optimize_for_scene(Scene * /*scene*/) {}
226
227 virtual bool is_resident(device_ptr /*key*/, Device *sub_device)
228 {
229 /* Memory is always resident if this is not a multi device, regardless of whether the pointer
230 * is valid or not (since it may not have been allocated yet). */
231 return sub_device == this;
232 }
233 virtual bool check_peer_access(Device * /*peer_device*/)
234 {
235 return false;
236 }
237
238 virtual bool is_shared(const void * /*shared_pointer*/,
239 const device_ptr /*device_pointer*/,
240 Device * /*sub_device*/)
241 {
242 return false;
243 }
244
245 /* Graphics resources interoperability.
246 *
247 * The interoperability comes here by the meaning that the device is capable of computing result
248 * directly into a OpenGL, Vulkan or Metal buffer. */
249
250 /* Check display is to be updated using graphics interoperability.
251 * The interoperability can not be used is it is not supported by the device. But the device
252 * might also force disable the interoperability if it detects that it will be slower than
253 * copying pixels from the render buffer. */
254 virtual bool should_use_graphics_interop(const GraphicsInteropDevice & /*interop_device*/,
255 const bool /*log*/ = false)
256 {
257 return false;
258 }
259
260 /* Returns native buffer handle for device pointer. */
261 virtual void *get_native_buffer(device_ptr /*ptr*/)
262 {
263 return nullptr;
264 }
265
266 /* Guiding */
267
268 /* Returns path guiding device handle. */
269 virtual void *get_guiding_device() const;
270
271 /* Sub-devices */
272
273 /* Run given callback for every individual device which will be handling rendering.
274 * For the single device the callback is called for the device itself. For the multi-device the
275 * callback is only called for the sub-devices. */
276 virtual void foreach_device(const std::function<void(Device *)> &callback)
277 {
278 callback(this);
279 }
280
281 /* static */
283 Stats &stats,
285 bool headless);
286
287 static DeviceType type_from_string(const char *name);
288 static string string_from_type(DeviceType type);
290 static vector<DeviceInfo> available_devices(const uint device_type_mask = DEVICE_MASK_ALL);
291 static DeviceInfo dummy_device(const string &error_msg = "");
292 static string device_capabilities(const uint device_type_mask = DEVICE_MASK_ALL);
293 static DeviceInfo get_multi_device(const vector<DeviceInfo> &subdevices,
294 const int threads,
295 bool background);
296
297 /* Tag devices lists for update. */
298 static void tag_update();
299
300 static void free_memory();
301
302 protected:
303 /* Memory allocation, only accessed through device_memory. */
304 friend class MultiDevice;
305 friend class DeviceServer;
306 friend class device_memory;
307
308 virtual void *host_alloc(const MemoryType type, const size_t size);
309 virtual void host_free(const MemoryType type, void *host_pointer, const size_t size);
310
311 virtual void mem_alloc(device_memory &mem) = 0;
312 virtual void mem_copy_to(device_memory &mem) = 0;
313 virtual void mem_move_to_host(device_memory &mem) = 0;
314 virtual void mem_copy_from(
315 device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) = 0;
316 virtual void mem_zero(device_memory &mem) = 0;
317 virtual void mem_free(device_memory &mem) = 0;
318
319 private:
320 /* Indicted whether device types and devices lists were initialized. */
321 static bool need_types_update, need_devices_update;
322 static thread_mutex device_mutex;
323 static vector<DeviceInfo> cuda_devices;
324 static vector<DeviceInfo> optix_devices;
325 static vector<DeviceInfo> cpu_devices;
326 static vector<DeviceInfo> hip_devices;
327 static vector<DeviceInfo> metal_devices;
328 static vector<DeviceInfo> oneapi_devices;
329 static uint devices_initialized_mask;
330};
331
332/* Device, which is GPU, with some common functionality for GPU back-ends. */
333class GPUDevice : public Device {
334 protected:
335 GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
336 : Device(info_, stats_, profiler_, headless_), texture_info(this, "texture_info", MEM_GLOBAL)
337 {
338 }
339
340 public:
342
343 /* For GPUs that can use bindless textures in some way or another. */
347 /* Returns true if the texture info was copied to the device (meaning, some more
348 * re-initialization might be needed). */
349 virtual bool load_texture_info();
350
351 protected:
352 /* Memory allocation, only accessed through device_memory. */
353 friend class device_memory;
354
356 size_t map_host_used = 0;
357 size_t map_host_limit = 0;
360 using texMemObject = unsigned long long;
361 using arrayMemObject = unsigned long long;
362 struct Mem {
363 Mem() = default;
364
367 };
368 using MemMap = map<device_memory *, Mem>;
371 /* Simple counter which will try to track amount of used device memory */
373
374 virtual void init_host_memory(const size_t preferred_texture_headroom = 0,
375 const size_t preferred_working_headroom = 0);
376 virtual void move_textures_to_host(const size_t size,
377 const size_t headroom,
378 const bool for_texture);
379
380 /* Allocation, deallocation and copy functions, with corresponding
381 * support of device/host allocations. */
382 virtual GPUDevice::Mem *generic_alloc(device_memory &mem, const size_t pitch_padding = 0);
383 virtual void generic_free(device_memory &mem);
384 virtual void generic_copy_to(device_memory &mem);
385
386 /* total - amount of device memory, free - amount of available device memory */
387 virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
388
389 /* Device side memory. */
390 virtual bool alloc_device(void *&device_pointer, const size_t size) = 0;
391 virtual void free_device(void *device_pointer) = 0;
392
393 /* Shared memory. */
394 virtual bool shared_alloc(void *&shared_pointer, const size_t size) = 0;
395 virtual void shared_free(void *shared_pointer) = 0;
396 bool is_shared(const void *shared_pointer,
397 const device_ptr device_pointer,
398 Device *sub_device) override;
399 /* This function should return device pointer corresponding to shared pointer, which
400 * is host buffer, allocated in `shared_alloc`. */
401 virtual void *shared_to_device_pointer(const void *shared_pointer) = 0;
402
403 /* Memory copy. */
404 virtual void copy_host_to_device(void *device_pointer,
405 void *host_pointer,
406 const size_t size) = 0;
407};
408
void BLI_kdtree_nd_ free(KDTree *tree)
unsigned int uint
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
Definition bvh/bvh.h:67
bool has_execution_optimization
KernelOptimizationLevel kernel_optimization_level
vector< DeviceInfo > multi_devices
DenoiserTypeMask denoisers
bool operator==(const DeviceInfo &info) const
bool display_device
string error_msg
bool has_peer_memory
bool has_gpu_queue
bool operator!=(const DeviceInfo &info) const
bool use_metalrt_by_default
DeviceInfo()=default
bool has_profiling
DeviceType type
string description
bool use_hardware_raytracing
virtual void host_free(const MemoryType type, void *host_pointer, const size_t size)
virtual void optimize_for_scene(Scene *)
virtual void * get_guiding_device() const
virtual const string & error_message()
virtual bool is_resident(device_ptr, Device *sub_device)
virtual void release_bvh(BVH *)
static void free_memory()
virtual bool is_shared(const void *, const device_ptr, Device *)
static DeviceInfo dummy_device(const string &error_msg="")
Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
static DeviceInfo get_multi_device(const vector< DeviceInfo > &subdevices, const int threads, bool background)
static void tag_update()
virtual bool is_ready(string &) const
static const CPUKernels & get_cpu_kernels()
virtual bool load_osl_kernels()
friend class device_sub_ptr
virtual void mem_move_to_host(device_memory &mem)=0
virtual void mem_zero(device_memory &mem)=0
string error_msg
virtual void * get_native_buffer(device_ptr)
virtual int device_number(Device *)
friend class MultiDevice
friend class device_memory
virtual ~Device() noexcept(false)
virtual void mem_free_sub_ptr(device_ptr)
virtual unique_ptr< DeviceQueue > gpu_queue_create()
Profiler & profiler
Stats & stats
virtual void const_copy_to(const char *name, void *host, const size_t size)=0
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
virtual bool load_kernels(uint)
static DeviceType type_from_string(const char *name)
virtual void set_error(const string &error)
virtual BVHLayoutMask get_bvh_layout_mask(const uint kernel_features) const =0
bool headless
virtual bool should_use_graphics_interop(const GraphicsInteropDevice &, const bool=false)
virtual void get_cpu_kernel_thread_globals(vector< ThreadKernelGlobalsCPU > &)
virtual bool check_peer_access(Device *)
virtual void mem_free(device_memory &mem)=0
virtual void mem_copy_from(device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem)=0
virtual void mem_copy_to(device_memory &mem)=0
virtual device_ptr mem_alloc_sub_ptr(device_memory &, size_t, size_t)
friend class DeviceServer
DeviceInfo info
static string device_capabilities(const uint device_type_mask=DEVICE_MASK_ALL)
virtual bool set_bvh_limits(size_t, size_t)
static vector< DeviceType > available_types()
virtual void foreach_device(const std::function< void(Device *)> &callback)
static string string_from_type(DeviceType type)
virtual void mem_alloc(device_memory &mem)=0
virtual OSLGlobals * get_cpu_osl_memory()
static vector< DeviceInfo > available_devices(const uint device_type_mask=DEVICE_MASK_ALL)
static unique_ptr< Device > create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
virtual void cancel()
bool have_error()
virtual void * host_alloc(const MemoryType type, const size_t size)
int DenoiserTypeMask
Definition denoise.h:23
@ DENOISER_NONE
Definition denoise.h:16
#define CCL_NAMESPACE_END
KernelOptimizationLevel
@ KERNEL_OPTIMIZATION_LEVEL_OFF
@ KERNEL_OPTIMIZATION_LEVEL_FULL
@ KERNEL_OPTIMIZATION_LEVEL_INTERSECT
@ KERNEL_OPTIMIZATION_NUM_LEVELS
DeviceTypeMask
@ DEVICE_MASK_OPTIX
@ DEVICE_MASK_CPU
@ DEVICE_MASK_HIP
@ DEVICE_MASK_ALL
@ DEVICE_MASK_CUDA
@ DEVICE_MASK_METAL
@ DEVICE_MASK_ONEAPI
MetalRTSetting
@ METALRT_OFF
@ METALRT_NUM_SETTINGS
@ METALRT_ON
@ METALRT_AUTO
DeviceType
@ DEVICE_DUMMY
@ DEVICE_NONE
@ DEVICE_METAL
@ DEVICE_MULTI
@ DEVICE_CUDA
@ DEVICE_CPU
@ DEVICE_HIPRT
@ DEVICE_OPTIX
@ DEVICE_HIP
@ DEVICE_ONEAPI
#define assert(assertion)
static void error(const char *str)
int BVHLayoutMask
Definition params.h:50
const char * name
texMemObject texobject
Mem()=default
arrayMemObject array
thread_mutex texture_info_mutex
bool is_shared(const void *shared_pointer, const device_ptr device_pointer, Device *sub_device) override
bool need_texture_info
virtual bool shared_alloc(void *&shared_pointer, const size_t size)=0
size_t map_host_used
virtual void shared_free(void *shared_pointer)=0
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
map< device_memory *, Mem > MemMap
unsigned long long texMemObject
bool can_map_host
size_t device_texture_headroom
virtual void get_device_memory_info(size_t &total, size_t &free)=0
virtual bool alloc_device(void *&device_pointer, const size_t size)=0
size_t device_working_headroom
friend class device_memory
virtual void * shared_to_device_pointer(const void *shared_pointer)=0
virtual void generic_copy_to(device_memory &mem)
virtual void move_textures_to_host(const size_t size, const size_t headroom, const bool for_texture)
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size)=0
virtual bool load_texture_info()
size_t map_host_limit
virtual void free_device(void *device_pointer)=0
unsigned long long arrayMemObject
thread_mutex device_mem_map_mutex
virtual void generic_free(device_memory &mem)
virtual void init_host_memory(const size_t preferred_texture_headroom=0, const size_t preferred_working_headroom=0)
size_t device_mem_in_use
virtual GPUDevice::Mem * generic_alloc(device_memory &mem, const size_t pitch_padding=0)
device_vector< TextureInfo > texture_info
MemMap device_mem_map
~GPUDevice() noexcept(false) override
std::mutex thread_mutex
Definition thread.h:27
uint64_t device_ptr
Definition types_base.h:44
bool override
Definition wm_files.cc:1192