Blender V4.3
device/multi/device.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include <sstream>
8#include <stdlib.h>
9
10#include "bvh/multi.h"
11
12#include "device/device.h"
13#include "device/queue.h"
14
15#include "scene/geometry.h"
16
17#include "util/foreach.h"
18#include "util/list.h"
19#include "util/log.h"
20#include "util/map.h"
21#include "util/time.h"
22
24
25class MultiDevice : public Device {
26 public:
27 struct SubDevice {
30 map<device_ptr, device_ptr> ptr_map;
32 };
33
34 list<SubDevice> devices;
37
40 {
41 foreach (const DeviceInfo &subinfo, info.multi_devices) {
42 /* Always add CPU devices at the back since GPU devices can change
43 * host memory pointers, which CPU uses as device pointer. */
44 SubDevice *sub;
45 if (subinfo.type == DEVICE_CPU) {
46 devices.emplace_back();
47 sub = &devices.back();
48 }
49 else {
50 devices.emplace_front();
51 sub = &devices.front();
52 }
53
54 /* The pointer to 'sub->stats' will stay valid even after new devices
55 * are added, since 'devices' is a linked list. */
56 sub->device = Device::create(subinfo, sub->stats, profiler, headless);
57 }
58
59 /* Build a list of peer islands for the available render devices */
60 foreach (SubDevice &sub, devices) {
61 /* First ensure that every device is in at least once peer island */
62 if (sub.peer_island_index < 0) {
63 peer_islands.emplace_back();
64 sub.peer_island_index = (int)peer_islands.size() - 1;
65 peer_islands[sub.peer_island_index].push_back(&sub);
66 }
67
68 if (!info.has_peer_memory) {
69 continue;
70 }
71
72 /* Second check peer access between devices and fill up the islands accordingly */
73 foreach (SubDevice &peer_sub, devices) {
74 if (peer_sub.peer_island_index < 0 &&
75 peer_sub.device->info.type == sub.device->info.type &&
76 peer_sub.device->check_peer_access(sub.device))
77 {
79 peer_islands[sub.peer_island_index].push_back(&peer_sub);
80 }
81 }
82 }
83 }
84
86 {
87 foreach (SubDevice &sub, devices)
88 delete sub.device;
89 }
90
91 const string &error_message() override
92 {
93 error_msg.clear();
94
95 foreach (SubDevice &sub, devices)
97
98 return error_msg;
99 }
100
101 virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override
102 {
103 BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
104 BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
105 foreach (const SubDevice &sub_device, devices) {
106 BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask(
107 kernel_features);
108 bvh_layout_mask &= device_bvh_layout_mask;
109 bvh_layout_mask_all |= device_bvh_layout_mask;
110 }
111
112 /* With multiple OptiX devices, every device needs its own acceleration structure */
113 if (bvh_layout_mask == BVH_LAYOUT_OPTIX) {
115 }
116
117 /* With multiple Metal devices, every device needs its own acceleration structure */
118 if (bvh_layout_mask == BVH_LAYOUT_METAL) {
120 }
121
122 if (bvh_layout_mask == BVH_LAYOUT_HIPRT) {
124 }
125
126 /* With multiple oneAPI devices, every device needs its own acceleration structure */
127 if (bvh_layout_mask == BVH_LAYOUT_EMBREEGPU) {
129 }
130
131 /* When devices do not share a common BVH layout, fall back to creating one for each */
132 const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
133 if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
135 }
136 const BVHLayoutMask BVH_LAYOUT_METAL_EMBREE = (BVH_LAYOUT_METAL | BVH_LAYOUT_EMBREE);
137 if ((bvh_layout_mask_all & BVH_LAYOUT_METAL_EMBREE) == BVH_LAYOUT_METAL_EMBREE) {
139 }
140 const BVHLayoutMask BVH_LAYOUT_EMBREEGPU_EMBREE = (BVH_LAYOUT_EMBREEGPU | BVH_LAYOUT_EMBREE);
141 if ((bvh_layout_mask_all & BVH_LAYOUT_EMBREEGPU_EMBREE) == BVH_LAYOUT_EMBREEGPU_EMBREE) {
143 }
144
145 const BVHLayoutMask BVH_LAYOUT_HIPRT_EMBREE = (BVH_LAYOUT_HIPRT | BVH_LAYOUT_EMBREE);
146 if ((bvh_layout_mask_all & BVH_LAYOUT_HIPRT_EMBREE) == BVH_LAYOUT_HIPRT_EMBREE) {
148 }
149
150 return bvh_layout_mask;
151 }
152
153 bool load_kernels(const uint kernel_features) override
154 {
155 foreach (SubDevice &sub, devices)
156 if (!sub.device->load_kernels(kernel_features)) {
157 return false;
158 }
159
160 return true;
161 }
162
163 bool load_osl_kernels() override
164 {
165 foreach (SubDevice &sub, devices)
166 if (!sub.device->load_osl_kernels()) {
167 return false;
168 }
169
170 return true;
171 }
172
173 void build_bvh(BVH *bvh, Progress &progress, bool refit) override
174 {
175 /* Try to build and share a single acceleration structure, if possible */
177 devices.back().device->build_bvh(bvh, progress, refit);
178 return;
179 }
180
189
190 BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
191 bvh_multi->sub_bvhs.resize(devices.size());
192
193 vector<BVHMulti *> geom_bvhs;
194 geom_bvhs.reserve(bvh->geometry.size());
195 foreach (Geometry *geom, bvh->geometry) {
196 geom_bvhs.push_back(static_cast<BVHMulti *>(geom->bvh));
197 }
198
199 /* Broadcast acceleration structure build to all render devices */
200 size_t i = 0;
201 foreach (SubDevice &sub, devices) {
202 /* Change geometry BVH pointers to the sub BVH */
203 for (size_t k = 0; k < bvh->geometry.size(); ++k) {
204 bvh->geometry[k]->bvh = geom_bvhs[k]->sub_bvhs[i];
205 }
206
207 if (!bvh_multi->sub_bvhs[i]) {
208 BVHParams params = bvh->params;
210 params.bvh_layout = BVH_LAYOUT_OPTIX;
211 }
212 else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL) {
213 params.bvh_layout = BVH_LAYOUT_METAL;
214 }
215 else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT) {
216 params.bvh_layout = BVH_LAYOUT_HIPRT;
217 }
219 params.bvh_layout = BVH_LAYOUT_EMBREEGPU;
220 }
222 params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
224 }
226 params.bvh_layout = sub.device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
228 }
230 params.bvh_layout = sub.device->info.type == DEVICE_HIP ? BVH_LAYOUT_HIPRT :
232 }
236 }
237 /* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
238 * (since they are put into the top level directly, see bvh_embree.cpp) */
239 if (!params.top_level && params.bvh_layout == BVH_LAYOUT_EMBREE &&
240 !bvh->geometry[0]->is_instanced())
241 {
242 i++;
243 continue;
244 }
245
246 bvh_multi->sub_bvhs[i] = BVH::create(params, bvh->geometry, bvh->objects, sub.device);
247 }
248
249 sub.device->build_bvh(bvh_multi->sub_bvhs[i], progress, refit);
250 i++;
251 }
252
253 /* Change geometry BVH pointers back to the multi BVH. */
254 for (size_t k = 0; k < bvh->geometry.size(); ++k) {
255 bvh->geometry[k]->bvh = geom_bvhs[k];
256 }
257 }
258
259 virtual void *get_cpu_osl_memory() override
260 {
261 /* Always return the OSL memory of the CPU device (this works since the constructor above
262 * guarantees that CPU devices are always added to the back). */
263 if (devices.size() > 1 && devices.back().device->info.type != DEVICE_CPU) {
264 return NULL;
265 }
266 return devices.back().device->get_cpu_osl_memory();
267 }
268
269 bool is_resident(device_ptr key, Device *sub_device) override
270 {
271 foreach (SubDevice &sub, devices) {
272 if (sub.device == sub_device) {
273 return find_matching_mem_device(key, sub)->device == sub_device;
274 }
275 }
276 return false;
277 }
278
280 {
281 assert(key != 0 && (sub.peer_island_index >= 0 || sub.ptr_map.find(key) != sub.ptr_map.end()));
282
283 /* Get the memory owner of this key (first try current device, then peer devices) */
284 SubDevice *owner_sub = &sub;
285 if (owner_sub->ptr_map.find(key) == owner_sub->ptr_map.end()) {
286 foreach (SubDevice *island_sub, peer_islands[sub.peer_island_index]) {
287 if (island_sub != owner_sub && island_sub->ptr_map.find(key) != island_sub->ptr_map.end())
288 {
289 owner_sub = island_sub;
290 }
291 }
292 }
293 return owner_sub;
294 }
295
297 {
298 assert(!island.empty());
299
300 /* Get the memory owner of this key or the device with the lowest memory usage when new */
301 SubDevice *owner_sub = island.front();
302 foreach (SubDevice *island_sub, island) {
303 if (key ? (island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) :
304 (island_sub->device->stats.mem_used < owner_sub->device->stats.mem_used))
305 {
306 owner_sub = island_sub;
307 }
308 }
309 return owner_sub;
310 }
311
313 {
314 return find_matching_mem_device(key, sub)->ptr_map[key];
315 }
316
317 void mem_alloc(device_memory &mem) override
318 {
319 device_ptr key = unique_key++;
320
321 assert(mem.type == MEM_READ_ONLY || mem.type == MEM_READ_WRITE || mem.type == MEM_DEVICE_ONLY);
322 /* The remaining memory types can be distributed across devices */
323 foreach (const vector<SubDevice *> &island, peer_islands) {
324 SubDevice *owner_sub = find_suitable_mem_device(key, island);
325 mem.device = owner_sub->device;
326 mem.device_pointer = 0;
327 mem.device_size = 0;
328
329 owner_sub->device->mem_alloc(mem);
330 owner_sub->ptr_map[key] = mem.device_pointer;
331 }
332
333 mem.device = this;
334 mem.device_pointer = key;
336 }
337
338 void mem_copy_to(device_memory &mem) override
339 {
340 device_ptr existing_key = mem.device_pointer;
341 device_ptr key = (existing_key) ? existing_key : unique_key++;
342 size_t existing_size = mem.device_size;
343
344 /* The tile buffers are allocated on each device (see below), so copy to all of them */
345 foreach (const vector<SubDevice *> &island, peer_islands) {
346 SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
347 mem.device = owner_sub->device;
348 mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
349 mem.device_size = existing_size;
350
351 owner_sub->device->mem_copy_to(mem);
352 owner_sub->ptr_map[key] = mem.device_pointer;
353
354 if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
355 /* Need to create texture objects and update pointer in kernel globals on all devices */
356 foreach (SubDevice *island_sub, island) {
357 if (island_sub != owner_sub) {
358 island_sub->device->mem_copy_to(mem);
359 }
360 }
361 }
362 }
363
364 mem.device = this;
365 mem.device_pointer = key;
366 stats.mem_alloc(mem.device_size - existing_size);
367 }
368
369 void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override
370 {
371 device_ptr key = mem.device_pointer;
372 size_t i = 0, sub_h = h / devices.size();
373
374 foreach (SubDevice &sub, devices) {
375 size_t sy = y + i * sub_h;
376 size_t sh = (i == (size_t)devices.size() - 1) ? h - sub_h * i : sub_h;
377
378 SubDevice *owner_sub = find_matching_mem_device(key, sub);
379 mem.device = owner_sub->device;
380 mem.device_pointer = owner_sub->ptr_map[key];
381
382 owner_sub->device->mem_copy_from(mem, sy, w, sh, elem);
383 i++;
384 }
385
386 mem.device = this;
387 mem.device_pointer = key;
388 }
389
390 void mem_zero(device_memory &mem) override
391 {
392 device_ptr existing_key = mem.device_pointer;
393 device_ptr key = (existing_key) ? existing_key : unique_key++;
394 size_t existing_size = mem.device_size;
395
396 foreach (const vector<SubDevice *> &island, peer_islands) {
397 SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
398 mem.device = owner_sub->device;
399 mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
400 mem.device_size = existing_size;
401
402 owner_sub->device->mem_zero(mem);
403 owner_sub->ptr_map[key] = mem.device_pointer;
404 }
405
406 mem.device = this;
407 mem.device_pointer = key;
408 stats.mem_alloc(mem.device_size - existing_size);
409 }
410
411 void mem_free(device_memory &mem) override
412 {
413 device_ptr key = mem.device_pointer;
414 size_t existing_size = mem.device_size;
415
416 /* Free memory that was allocated for all devices (see above) on each device */
417 foreach (const vector<SubDevice *> &island, peer_islands) {
418 SubDevice *owner_sub = find_matching_mem_device(key, *island.front());
419 mem.device = owner_sub->device;
420 mem.device_pointer = owner_sub->ptr_map[key];
421 mem.device_size = existing_size;
422
423 owner_sub->device->mem_free(mem);
424 owner_sub->ptr_map.erase(owner_sub->ptr_map.find(key));
425
426 if (mem.type == MEM_TEXTURE) {
427 /* Free texture objects on all devices */
428 foreach (SubDevice *island_sub, island) {
429 if (island_sub != owner_sub) {
430 island_sub->device->mem_free(mem);
431 }
432 }
433 }
434 }
435
436 mem.device = this;
437 mem.device_pointer = 0;
438 mem.device_size = 0;
439 stats.mem_free(existing_size);
440 }
441
442 void const_copy_to(const char *name, void *host, size_t size) override
443 {
444 foreach (SubDevice &sub, devices)
445 sub.device->const_copy_to(name, host, size);
446 }
447
448 int device_number(Device *sub_device) override
449 {
450 int i = 0;
451
452 foreach (SubDevice &sub, devices) {
453 if (sub.device == sub_device) {
454 return i;
455 }
456 i++;
457 }
458
459 return -1;
460 }
461
462 virtual void foreach_device(const function<void(Device *)> &callback) override
463 {
464 foreach (SubDevice &sub, devices) {
466 }
467 }
468};
469
471 Stats &stats,
472 Profiler &profiler,
473 bool headless)
474{
475 return new MultiDevice(info, stats, profiler, headless);
476}
477
unsigned int uint
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
vector< BVH * > sub_bvhs
Definition multi.h:15
BVHLayout bvh_layout
Definition params.h:84
Definition bvh/bvh.h:66
vector< Geometry * > geometry
Definition bvh/bvh.h:69
static BVH * create(const BVHParams &params, const vector< Geometry * > &geometry, const vector< Object * > &objects, Device *device)
Definition bvh.cpp:89
BVHParams params
Definition bvh/bvh.h:68
vector< Object * > objects
Definition bvh/bvh.h:70
vector< DeviceInfo > multi_devices
bool has_peer_memory
DeviceType type
virtual const string & error_message()
virtual void const_copy_to(const char *name, void *host, size_t size)=0
virtual bool load_osl_kernels()
virtual void mem_zero(device_memory &mem)=0
string error_msg
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)=0
Profiler & profiler
Stats & stats
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
virtual bool load_kernels(uint)
bool headless
virtual bool check_peer_access(Device *)
virtual void mem_free(device_memory &mem)=0
virtual void foreach_device(const function< void(Device *)> &callback)
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const =0
virtual void mem_copy_to(device_memory &mem)=0
DeviceInfo info
virtual void mem_alloc(device_memory &mem)=0
static Device * create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
SubDevice * find_suitable_mem_device(device_ptr key, const vector< SubDevice * > &island)
list< SubDevice > devices
bool is_resident(device_ptr key, Device *sub_device) override
bool load_osl_kernels() override
const string & error_message() override
int device_number(Device *sub_device) override
void mem_copy_to(device_memory &mem) override
void mem_free(device_memory &mem) override
void const_copy_to(const char *name, void *host, size_t size) override
vector< vector< SubDevice * > > peer_islands
void mem_zero(device_memory &mem) override
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
MultiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
virtual void * get_cpu_osl_memory() override
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override
void mem_alloc(device_memory &mem) override
bool load_kernels(const uint kernel_features) override
SubDevice * find_matching_mem_device(device_ptr key, SubDevice &sub)
virtual void foreach_device(const function< void(Device *)> &callback) override
device_ptr find_matching_mem(device_ptr key, SubDevice &sub)
size_t mem_used
Definition util/stats.h:32
void mem_free(size_t size)
Definition util/stats.h:26
void mem_alloc(size_t size)
Definition util/stats.h:20
@ MEM_TEXTURE
@ MEM_READ_WRITE
@ MEM_DEVICE_ONLY
@ MEM_READ_ONLY
DEGForeachIDComponentCallback callback
#define CCL_NAMESPACE_END
@ DEVICE_METAL
@ DEVICE_CPU
@ DEVICE_OPTIX
@ DEVICE_HIP
@ DEVICE_ONEAPI
#define NULL
Device * device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
@ BVH_LAYOUT_OPTIX
@ BVH_LAYOUT_MULTI_HIPRT_EMBREE
@ BVH_LAYOUT_NONE
@ BVH_LAYOUT_MULTI_EMBREEGPU
@ BVH_LAYOUT_METAL
@ BVH_LAYOUT_MULTI_HIPRT
@ BVH_LAYOUT_HIPRT
@ BVH_LAYOUT_EMBREE
@ BVH_LAYOUT_MULTI_OPTIX
@ BVH_LAYOUT_BVH2
@ BVH_LAYOUT_EMBREEGPU
@ BVH_LAYOUT_MULTI_METAL
@ BVH_LAYOUT_MULTI_METAL_EMBREE
@ BVH_LAYOUT_MULTI_EMBREEGPU_EMBREE
@ BVH_LAYOUT_ALL
@ BVH_LAYOUT_MULTI_OPTIX_EMBREE
int BVHLayoutMask
Definition params.h:51
map< device_ptr, device_ptr > ptr_map
uint64_t device_ptr
Definition util/types.h:45