Blender V5.0
oneapi/device_impl.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2025 Intel Corporation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_ONEAPI
6# include "device/device.h"
8# include "device/oneapi/queue.h"
10
11# include "util/map.h"
12# include "util/unique_ptr.h"
13
15
16class DeviceQueue;
17
18using OneAPIDeviceIteratorCallback =
19 void (*)(const char *, const char *, const int, bool, bool, bool, void *);
20
21class OneapiDevice : public GPUDevice {
22 private:
23 SyclQueue *device_queue_ = nullptr;
24# ifdef WITH_EMBREE_GPU
25 RTCDevice embree_device = nullptr;
26# if RTC_VERSION >= 40400
27 RTCTraversable embree_traversable = nullptr;
28# else
29 RTCScene embree_traversable = nullptr;
30# endif
31# if RTC_VERSION >= 40302
32 thread_mutex scene_data_mutex;
33 vector<RTCScene> all_embree_scenes;
34# endif
35# endif
36 using ConstMemMap = map<string, unique_ptr<device_vector<uchar>>>;
37 ConstMemMap const_mem_map_;
38 void *kg_memory_ = nullptr;
39 void *kg_memory_device_ = nullptr;
40 size_t kg_memory_size_ = 0;
41 size_t max_memory_on_device_ = 0;
42 std::string oneapi_error_string_;
43 bool use_hardware_raytracing = false;
44 unsigned int kernel_features = 0;
45 int scene_max_shaders_ = 0;
46 /* Currently, there are some functional errors in the different software layers of the DPC++/L0
47 * support regarding several Intel's dGPU executions. As a result, to provide proper
48 * functionality to Blender users, we need to detect such configurations and enable some
49 * workarounds for them. These workarounds don't make sense to enable by default due to a
50 * performance impact - which is not as important for the discussed configuration, as without
51 * workarounds, the configuration with several dGPUs would simply not be functional, making the
52 * performance topic irrelevant anyway. For an example of such issues, see Blender issue #138384.
53 */
54 bool is_several_intel_dgpu_devices_detected = false;
55
56 size_t get_free_mem() const;
57
58 public:
59 BVHLayoutMask get_bvh_layout_mask(const uint requested_features) const override;
60
61 OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
62
63 ~OneapiDevice() override;
64# ifdef WITH_EMBREE_GPU
65 void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
66# endif
67 bool check_peer_access(Device *peer_device) override;
68
69 bool load_kernels(const uint requested_features) override;
70
71 void reserve_private_memory(const uint kernel_features);
72
73 string oneapi_error_message();
74
75 int scene_max_shaders();
76
77 void *kernel_globals_device_pointer();
78
79 /* All memory types. */
80 void mem_alloc(device_memory &mem) override;
81 void mem_copy_to(device_memory &mem) override;
82 void mem_move_to_host(device_memory &mem) override;
83 void mem_copy_from(
84 device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override;
85 void mem_copy_from(device_memory &mem)
86 {
87 mem_copy_from(mem, 0, 0, 0, 0);
88 }
89 void mem_zero(device_memory &mem) override;
90 void mem_free(device_memory &mem) override;
91
92 device_ptr mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t /*size*/) override;
93
94 /* Global memory. */
95 void global_alloc(device_memory &mem);
96 void global_copy_to(device_memory &mem);
97 void global_free(device_memory &mem);
98
99 /* Texture memory. */
100 void tex_alloc(device_texture &mem);
101 void tex_copy_to(device_texture &mem);
102 void tex_free(device_texture &mem);
103
104 /* Host side memory, override for more efficient copies. */
105 void *host_alloc(const MemoryType type, const size_t size) override;
106 void host_free(const MemoryType type, void *host_pointer, const size_t size) override;
107
108 /* Device side memory. */
109 void get_device_memory_info(size_t &total, size_t &free) override;
110 bool alloc_device(void *&device_pointer, const size_t size) override;
111 void free_device(void *device_pointer) override;
112
113 /* Shared memory. */
114 bool shared_alloc(void *&shared_pointer, const size_t size) override;
115 void shared_free(void *shared_pointer) override;
116 void *shared_to_device_pointer(const void *shared_pointer) override;
117
118 /* Memory copy. */
119 void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override;
120 void const_copy_to(const char *name, void *host, const size_t size) override;
121
122 /* Graphics resources interoperability. */
123 bool should_use_graphics_interop(const GraphicsInteropDevice &interop_device,
124 const bool log) override;
125
126 unique_ptr<DeviceQueue> gpu_queue_create() override;
127
128 /* NOTE(@nsirgien): Create this methods to avoid some compilation problems on Windows with host
129 * side compilation (MSVC). */
130 void *usm_aligned_alloc_host(const size_t memory_size, const size_t alignment);
131 void usm_free(void *usm_ptr);
132
133 static void architecture_information(const SyclDevice *device, string &name, bool &is_optimized);
134 static char *device_capabilities();
135 static void iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr);
136
137 size_t get_memcapacity();
138 int get_num_multiprocessors();
139 int get_max_num_threads_per_multiprocessor();
140 bool queue_synchronize(SyclQueue *queue);
141 bool kernel_globals_size(size_t &kernel_global_size);
142 void set_global_memory(SyclQueue *queue,
143 void *kernel_globals,
144 const char *memory_name,
145 void *memory_device_pointer);
146 bool enqueue_kernel(KernelContext *kernel_context,
147 const int kernel,
148 const size_t global_size,
149 const size_t local_size,
150 void **args);
151 void get_adjusted_global_and_local_sizes(SyclQueue *queue,
152 const DeviceKernel kernel,
153 size_t &kernel_global_size,
154 size_t &kernel_local_size);
155 SyclQueue *sycl_queue();
156
157 protected:
158 bool can_use_hardware_raytracing_for_features(const uint requested_features) const;
159 void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
160 bool create_queue(SyclQueue *&external_queue,
161 const int device_index,
162 void *embree_device,
163 bool *is_several_intel_dgpu_devices_detected_pointer);
164 void free_queue(SyclQueue *queue);
165 void *usm_aligned_alloc_host(SyclQueue *queue, const size_t memory_size, const size_t alignment);
166 void *usm_alloc_device(SyclQueue *queue, const size_t memory_size);
167 void usm_free(SyclQueue *queue, void *usm_ptr);
168 bool usm_memcpy(SyclQueue *queue, void *dest, void *src, const size_t num_bytes);
169 bool usm_memset(SyclQueue *queue, void *usm_ptr, unsigned char value, const size_t num_bytes);
170};
171
173
174#endif
void BLI_kdtree_nd_ free(KDTree *tree)
unsigned int uint
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
virtual void host_free(const MemoryType type, void *host_pointer, const size_t size)
virtual void mem_move_to_host(device_memory &mem)=0
virtual void mem_zero(device_memory &mem)=0
virtual unique_ptr< DeviceQueue > gpu_queue_create()
virtual void const_copy_to(const char *name, void *host, const size_t size)=0
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
virtual bool load_kernels(uint)
virtual BVHLayoutMask get_bvh_layout_mask(const uint kernel_features) const =0
virtual bool should_use_graphics_interop(const GraphicsInteropDevice &, const bool=false)
virtual bool check_peer_access(Device *)
virtual void mem_free(device_memory &mem)=0
virtual void mem_copy_from(device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem)=0
virtual void mem_copy_to(device_memory &mem)=0
virtual device_ptr mem_alloc_sub_ptr(device_memory &, size_t, size_t)
static string device_capabilities(const uint device_type_mask=DEVICE_MASK_ALL)
virtual void mem_alloc(device_memory &mem)=0
virtual void * host_alloc(const MemoryType type, const size_t size)
#define CCL_NAMESPACE_END
#define log
#define RTCTraversable
DeviceKernel
ccl_device bool BVH_FUNCTION_FULL_NAME BVH(KernelGlobals kg, const ccl_private Ray *ray, ccl_private LocalIntersection *local_isect, const int local_object, ccl_private uint *lcg_state, const int max_hits)
Definition local.h:28
int BVHLayoutMask
Definition params.h:50
const char * name
virtual bool shared_alloc(void *&shared_pointer, const size_t size)=0
virtual void shared_free(void *shared_pointer)=0
virtual void get_device_memory_info(size_t &total, size_t &free)=0
virtual bool alloc_device(void *&device_pointer, const size_t size)=0
virtual void * shared_to_device_pointer(const void *shared_pointer)=0
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size)=0
virtual void free_device(void *device_pointer)=0
std::mutex thread_mutex
Definition thread.h:27
uint64_t device_ptr
Definition types_base.h:44