Blender V5.0
device/multi/device.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6#include "device/device.h"
7#include "device/queue.h"
8
9#include <cstdlib>
10#include <functional>
11
12#include "bvh/multi.h"
13
14#include "scene/geometry.h"
15
16#include "util/list.h"
17#include "util/map.h"
18
20
21class MultiDevice : public Device {
22 public:
23 struct SubDevice {
26 map<device_ptr, device_ptr> ptr_map;
28 };
29
30 list<SubDevice> devices;
33
35 : Device(info_, stats, profiler, headless)
36 {
38
39 for (const DeviceInfo &subinfo : this->info.multi_devices) {
40 /* Always add CPU devices at the back since GPU devices can change
41 * host memory pointers, which CPU uses as device pointer. */
42 SubDevice *sub;
43 if (subinfo.type == DEVICE_CPU) {
44 devices.emplace_back();
45 sub = &devices.back();
46 }
47 else {
48 devices.emplace_front();
49 sub = &devices.front();
50 }
51
52 /* The pointer to 'sub->stats' will stay valid even after new devices
53 * are added, since 'devices' is a linked list. */
54 sub->device = Device::create(subinfo, sub->stats, profiler, headless);
55 }
56
57 /* Build a list of peer islands for the available render devices */
58 for (SubDevice &sub : devices) {
59 /* First ensure that every device is in at least once peer island */
60 if (sub.peer_island_index < 0) {
61 peer_islands.emplace_back();
62 sub.peer_island_index = (int)peer_islands.size() - 1;
63 peer_islands[sub.peer_island_index].push_back(&sub);
64 }
65
66 if (!info.has_peer_memory) {
67 continue;
68 }
69
70 /* Second check peer access between devices and fill up the islands accordingly */
71 for (SubDevice &peer_sub : devices) {
72 if (peer_sub.peer_island_index < 0 &&
73 peer_sub.device->info.type == sub.device->info.type &&
74 peer_sub.device->check_peer_access(sub.device.get()))
75 {
76 peer_sub.peer_island_index = sub.peer_island_index;
77 peer_islands[sub.peer_island_index].push_back(&peer_sub);
78 }
79 }
80 }
81 }
82
84 {
85 /* Determine if we can use hardware ray-tracing. It is only supported if all selected
86 * GPU devices support it. Both the backends and scene update code do not support mixed
87 * BVH2 and hardware raytracing. The CPU device will ignore this setting. */
88 bool have_disabled_hardware_rt = false;
89 bool have_enabled_hardware_rt = false;
90
91 for (const DeviceInfo &subinfo : info.multi_devices) {
92 if (subinfo.type != DEVICE_CPU) {
93 if (subinfo.use_hardware_raytracing) {
94 have_enabled_hardware_rt = true;
95 }
96 else {
97 have_disabled_hardware_rt = true;
98 }
99 }
100 }
101
102 info.use_hardware_raytracing = have_enabled_hardware_rt && !have_disabled_hardware_rt;
103
104 for (DeviceInfo &subinfo : info.multi_devices) {
105 if (subinfo.type != DEVICE_CPU) {
106 subinfo.use_hardware_raytracing = info.use_hardware_raytracing;
107 }
108 }
109 }
110
111 const string &error_message() override
112 {
113 error_msg.clear();
114
115 for (SubDevice &sub : devices) {
116 error_msg += sub.device->error_message();
117 }
118
119 return error_msg;
120 }
121
122 BVHLayoutMask get_bvh_layout_mask(const uint kernel_features) const override
123 {
124 BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
125 BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
126 for (const SubDevice &sub_device : devices) {
127 BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask(
128 kernel_features);
129 bvh_layout_mask &= device_bvh_layout_mask;
130 bvh_layout_mask_all |= device_bvh_layout_mask;
131 }
132
133 /* With multiple OptiX devices, every device needs its own acceleration structure */
134 if (bvh_layout_mask == BVH_LAYOUT_OPTIX) {
136 }
137
138 /* With multiple Metal devices, every device needs its own acceleration structure */
139 if (bvh_layout_mask == BVH_LAYOUT_METAL) {
141 }
142
143 if (bvh_layout_mask == BVH_LAYOUT_HIPRT) {
145 }
146
147 /* With multiple oneAPI devices, every device needs its own acceleration structure */
148 if (bvh_layout_mask == BVH_LAYOUT_EMBREEGPU) {
150 }
151
152 /* When devices do not share a common BVH layout, fall back to creating one for each */
153 const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
154 if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
156 }
157 const BVHLayoutMask BVH_LAYOUT_METAL_EMBREE = (BVH_LAYOUT_METAL | BVH_LAYOUT_EMBREE);
158 if ((bvh_layout_mask_all & BVH_LAYOUT_METAL_EMBREE) == BVH_LAYOUT_METAL_EMBREE) {
160 }
161 const BVHLayoutMask BVH_LAYOUT_EMBREEGPU_EMBREE = (BVH_LAYOUT_EMBREEGPU | BVH_LAYOUT_EMBREE);
162 if ((bvh_layout_mask_all & BVH_LAYOUT_EMBREEGPU_EMBREE) == BVH_LAYOUT_EMBREEGPU_EMBREE) {
164 }
165
166 const BVHLayoutMask BVH_LAYOUT_HIPRT_EMBREE = (BVH_LAYOUT_HIPRT | BVH_LAYOUT_EMBREE);
167 if ((bvh_layout_mask_all & BVH_LAYOUT_HIPRT_EMBREE) == BVH_LAYOUT_HIPRT_EMBREE) {
169 }
170
171 return bvh_layout_mask;
172 }
173
174 bool load_kernels(const uint kernel_features) override
175 {
176 for (SubDevice &sub : devices) {
177 if (!sub.device->load_kernels(kernel_features)) {
178 return false;
179 }
180 }
181
182 return true;
183 }
184
185 bool load_osl_kernels() override
186 {
187 for (SubDevice &sub : devices) {
188 if (!sub.device->load_osl_kernels()) {
189 return false;
190 }
191 }
192
193 return true;
194 }
195
196 void build_bvh(BVH *bvh, Progress &progress, bool refit) override
197 {
198 /* Try to build and share a single acceleration structure, if possible */
200 devices.back().device->build_bvh(bvh, progress, refit);
201 return;
202 }
203
212
213 BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
214 bvh_multi->sub_bvhs.resize(devices.size());
215
216 /* Temporarily move ownership of BVH on geometry to this vector, to swap
217 * it for each sub device. Need to find a better way to handle this. */
218 vector<unique_ptr<BVH>> geom_bvhs;
219 geom_bvhs.reserve(bvh->geometry.size());
220 for (Geometry *geom : bvh->geometry) {
221 geom_bvhs.push_back(std::move(geom->bvh));
222 }
223
224 /* Broadcast acceleration structure build to all render devices */
225 size_t i = 0;
226 for (SubDevice &sub : devices) {
227 /* Change geometry BVH pointers to the sub BVH */
228 for (size_t k = 0; k < bvh->geometry.size(); ++k) {
229 bvh->geometry[k]->bvh.release(); // NOLINT: was not actually the owner
230 bvh->geometry[k]->bvh.reset(
231 static_cast<BVHMulti *>(geom_bvhs[k].get())->sub_bvhs[i].get());
232 }
233
234 if (!bvh_multi->sub_bvhs[i]) {
235 BVHParams params = bvh->params;
237 params.bvh_layout = BVH_LAYOUT_OPTIX;
238 }
239 else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL) {
240 params.bvh_layout = BVH_LAYOUT_METAL;
241 }
242 else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT) {
243 params.bvh_layout = BVH_LAYOUT_HIPRT;
244 }
246 params.bvh_layout = BVH_LAYOUT_EMBREEGPU;
247 }
249 params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
251 }
253 params.bvh_layout = sub.device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
255 }
257 params.bvh_layout = sub.device->info.type == DEVICE_HIP ? BVH_LAYOUT_HIPRT :
259 }
261 params.bvh_layout = sub.device->info.type == DEVICE_ONEAPI ? BVH_LAYOUT_EMBREEGPU :
263 }
264 /* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
265 * (since they are put into the top level directly, see bvh_embree.cpp) */
266 if (!params.top_level && params.bvh_layout == BVH_LAYOUT_EMBREE &&
267 !bvh->geometry[0]->is_instanced())
268 {
269 i++;
270 continue;
271 }
272
273 bvh_multi->sub_bvhs[i] = BVH::create(
274 params, bvh->geometry, bvh->objects, sub.device.get());
275 }
276
277 sub.device->build_bvh(bvh_multi->sub_bvhs[i].get(), progress, refit);
278 i++;
279 }
280
281 /* Change BVH ownership back to Geometry. */
282 for (size_t k = 0; k < bvh->geometry.size(); ++k) {
283 bvh->geometry[k]->bvh.release(); // NOLINT: was not actually the owner
284 bvh->geometry[k]->bvh = std::move(geom_bvhs[k]);
285 }
286 }
287
288 OSLGlobals *get_cpu_osl_memory() override
289 {
290 /* Always return the OSL memory of the CPU device (this works since the constructor above
291 * guarantees that CPU devices are always added to the back). */
292 if (devices.size() > 1 && devices.back().device->info.type != DEVICE_CPU) {
293 return nullptr;
294 }
295 return devices.back().device->get_cpu_osl_memory();
296 }
297
298 bool is_resident(device_ptr key, Device *sub_device) override
299 {
300 for (SubDevice &sub : devices) {
301 if (sub.device.get() == sub_device) {
302 return find_matching_mem_device(key, sub)->device.get() == sub_device;
303 }
304 }
305 return false;
306 }
307
309 {
310 assert(key != 0 && (sub.peer_island_index >= 0 || sub.ptr_map.find(key) != sub.ptr_map.end()));
311
312 /* Get the memory owner of this key (first try current device, then peer devices) */
313 SubDevice *owner_sub = &sub;
314 if (owner_sub->ptr_map.find(key) == owner_sub->ptr_map.end()) {
315 for (SubDevice *island_sub : peer_islands[sub.peer_island_index]) {
316 if (island_sub != owner_sub && island_sub->ptr_map.find(key) != island_sub->ptr_map.end())
317 {
318 owner_sub = island_sub;
319 }
320 }
321 }
322 return owner_sub;
323 }
324
326 {
327 assert(!island.empty());
328
329 /* Get the memory owner of this key or the device with the lowest memory usage when new */
330 SubDevice *owner_sub = island.front();
331 for (SubDevice *island_sub : island) {
332 if (key ? (island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) :
333 (island_sub->device->stats.mem_used < owner_sub->device->stats.mem_used))
334 {
335 owner_sub = island_sub;
336 }
337 }
338 return owner_sub;
339 }
340
342 {
343 return find_matching_mem_device(key, sub)->ptr_map[key];
344 }
345
346 void *host_alloc(const MemoryType type, const size_t size) override
347 {
348 for (SubDevice &sub : devices) {
349 if (sub.device->info.type != DEVICE_CPU) {
350 return sub.device->host_alloc(type, size);
351 }
352 }
353
354 return Device::host_alloc(type, size);
355 }
356
357 void host_free(const MemoryType type, void *host_pointer, const size_t size) override
358 {
359 for (SubDevice &sub : devices) {
360 if (sub.device->info.type != DEVICE_CPU) {
361 sub.device->host_free(type, host_pointer, size);
362 return;
363 }
364 }
365
366 Device::host_free(type, host_pointer, size);
367 }
368
369 void mem_alloc(device_memory &mem) override
370 {
371 device_ptr key = unique_key++;
372
373 assert(mem.type == MEM_READ_ONLY || mem.type == MEM_READ_WRITE || mem.type == MEM_DEVICE_ONLY);
374 /* The remaining memory types can be distributed across devices */
375 for (const vector<SubDevice *> &island : peer_islands) {
376 SubDevice *owner_sub = find_suitable_mem_device(key, island);
377 mem.device = owner_sub->device.get();
378 mem.device_pointer = 0;
379 mem.device_size = 0;
380
381 owner_sub->device->mem_alloc(mem);
382 owner_sub->ptr_map[key] = mem.device_pointer;
383 }
384
385 mem.device = this;
386 mem.device_pointer = key;
387 stats.mem_alloc(mem.device_size);
388 }
389
390 void mem_copy_to(device_memory &mem) override
391 {
392 device_ptr existing_key = mem.device_pointer;
393 device_ptr key = (existing_key) ? existing_key : unique_key++;
394 size_t existing_size = mem.device_size;
395
396 for (const vector<SubDevice *> &island : peer_islands) {
397 SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
398 mem.device = owner_sub->device.get();
399 mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
400 mem.device_size = existing_size;
401
402 owner_sub->device->mem_copy_to(mem);
403 owner_sub->ptr_map[key] = mem.device_pointer;
404
405 if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
406 /* Need to create texture objects and update pointer in kernel globals on all devices */
407 for (SubDevice *island_sub : island) {
408 if (island_sub != owner_sub) {
409 island_sub->device->mem_copy_to(mem);
410 }
411 }
412 }
413 }
414
415 mem.device = this;
416 mem.device_pointer = key;
417 stats.mem_alloc(mem.device_size - existing_size);
418 }
419
420 void mem_move_to_host(device_memory &mem) override
421 {
422 assert(mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE);
423
424 device_ptr existing_key = mem.device_pointer;
425 device_ptr key = (existing_key) ? existing_key : unique_key++;
426 size_t existing_size = mem.device_size;
427
428 for (const vector<SubDevice *> &island : peer_islands) {
429 SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
430 mem.device = owner_sub->device.get();
431 mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
432 mem.device_size = existing_size;
433
434 if (!owner_sub->device->is_shared(
435 mem.shared_pointer, mem.device_pointer, owner_sub->device.get()))
436 {
437 owner_sub->device->mem_move_to_host(mem);
438 owner_sub->ptr_map[key] = mem.device_pointer;
439
440 /* Need to create texture objects and update pointer in kernel globals on all devices */
441 for (SubDevice *island_sub : island) {
442 if (island_sub != owner_sub) {
443 island_sub->device->mem_move_to_host(mem);
444 }
445 }
446 }
447 }
448
449 mem.device = this;
450 mem.device_pointer = key;
451 stats.mem_alloc(mem.device_size - existing_size);
452 }
453
454 bool is_shared(const void *shared_pointer, const device_ptr key, Device *sub_device) override
455 {
456 if (key == 0) {
457 return false;
458 }
459
460 for (const SubDevice &sub : devices) {
461 if (sub.device.get() == sub_device) {
462 return sub_device->is_shared(shared_pointer, sub.ptr_map.at(key), sub_device);
463 }
464 }
465
466 assert(!"is_shared failed to find matching device");
467 return false;
468 }
469
471 device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override
472 {
473 device_ptr key = mem.device_pointer;
474 const size_t sub_h = h / devices.size();
475 size_t i = 0;
476
477 for (SubDevice &sub : devices) {
478 size_t sy = y + i * sub_h;
479 size_t sh = (i == (size_t)devices.size() - 1) ? h - sub_h * i : sub_h;
480
481 SubDevice *owner_sub = find_matching_mem_device(key, sub);
482 mem.device = owner_sub->device.get();
483 mem.device_pointer = owner_sub->ptr_map[key];
484
485 owner_sub->device->mem_copy_from(mem, sy, w, sh, elem);
486 i++;
487 }
488
489 mem.device = this;
490 mem.device_pointer = key;
491 }
492
493 void mem_zero(device_memory &mem) override
494 {
495 device_ptr existing_key = mem.device_pointer;
496 device_ptr key = (existing_key) ? existing_key : unique_key++;
497 size_t existing_size = mem.device_size;
498
499 for (const vector<SubDevice *> &island : peer_islands) {
500 SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
501 mem.device = owner_sub->device.get();
502 mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
503 mem.device_size = existing_size;
504
505 owner_sub->device->mem_zero(mem);
506 owner_sub->ptr_map[key] = mem.device_pointer;
507 }
508
509 mem.device = this;
510 mem.device_pointer = key;
511 stats.mem_alloc(mem.device_size - existing_size);
512 }
513
514 void mem_free(device_memory &mem) override
515 {
516 device_ptr key = mem.device_pointer;
517 size_t existing_size = mem.device_size;
518
519 /* Free memory that was allocated for all devices (see above) on each device */
520 for (const vector<SubDevice *> &island : peer_islands) {
521 SubDevice *owner_sub = find_matching_mem_device(key, *island.front());
522 mem.device = owner_sub->device.get();
523 mem.device_pointer = owner_sub->ptr_map[key];
524 mem.device_size = existing_size;
525
526 owner_sub->device->mem_free(mem);
527 owner_sub->ptr_map.erase(owner_sub->ptr_map.find(key));
528
529 if (mem.type == MEM_TEXTURE) {
530 /* Free texture objects on all devices */
531 for (SubDevice *island_sub : island) {
532 if (island_sub != owner_sub) {
533 island_sub->device->mem_free(mem);
534 }
535 }
536 }
537 }
538
539 mem.device = this;
540 mem.device_pointer = 0;
541 mem.device_size = 0;
542 stats.mem_free(existing_size);
543 }
544
545 void const_copy_to(const char *name, void *host, const size_t size) override
546 {
547 for (SubDevice &sub : devices) {
548 sub.device->const_copy_to(name, host, size);
549 }
550 }
551
552 int device_number(Device *sub_device) override
553 {
554 int i = 0;
555
556 for (SubDevice &sub : devices) {
557 if (sub.device.get() == sub_device) {
558 return i;
559 }
560 i++;
561 }
562
563 return -1;
564 }
565
566 void foreach_device(const std::function<void(Device *)> &callback) override
567 {
568 for (SubDevice &sub : devices) {
569 sub.device->foreach_device(callback);
570 }
571 }
572};
573
575 Stats &stats,
576 Profiler &profiler,
577 bool headless)
578{
579 return make_unique<MultiDevice>(info, stats, profiler, headless);
580}
581
unsigned int uint
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
vector< unique_ptr< BVH > > sub_bvhs
Definition multi.h:17
BVHLayout bvh_layout
Definition params.h:83
Definition bvh/bvh.h:67
static unique_ptr< BVH > create(const BVHParams &params, const vector< Geometry * > &geometry, const vector< Object * > &objects, Device *device)
Definition bvh.cpp:97
vector< Geometry * > geometry
Definition bvh/bvh.h:70
BVHParams params
Definition bvh/bvh.h:69
vector< Object * > objects
Definition bvh/bvh.h:71
DeviceType type
bool use_hardware_raytracing
virtual void host_free(const MemoryType type, void *host_pointer, const size_t size)
virtual bool is_shared(const void *, const device_ptr, Device *)
Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
string error_msg
friend class device_memory
Profiler & profiler
Stats & stats
bool headless
DeviceInfo info
static unique_ptr< Device > create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
virtual void * host_alloc(const MemoryType type, const size_t size)
unique_ptr< BVH > bvh
SubDevice * find_suitable_mem_device(device_ptr key, const vector< SubDevice * > &island)
list< SubDevice > devices
bool is_resident(device_ptr key, Device *sub_device) override
bool load_osl_kernels() override
const string & error_message() override
int device_number(Device *sub_device) override
void verify_hardware_raytracing()
void mem_copy_to(device_memory &mem) override
void mem_free(device_memory &mem) override
void * host_alloc(const MemoryType type, const size_t size) override
vector< vector< SubDevice * > > peer_islands
void mem_zero(device_memory &mem) override
bool is_shared(const void *shared_pointer, const device_ptr key, Device *sub_device) override
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
void const_copy_to(const char *name, void *host, const size_t size) override
BVHLayoutMask get_bvh_layout_mask(const uint kernel_features) const override
void foreach_device(const std::function< void(Device *)> &callback) override
void host_free(const MemoryType type, void *host_pointer, const size_t size) override
MultiDevice(const DeviceInfo &info_, Stats &stats, Profiler &profiler, bool headless)
void mem_alloc(device_memory &mem) override
void mem_copy_from(device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override
OSLGlobals * get_cpu_osl_memory() override
bool load_kernels(const uint kernel_features) override
SubDevice * find_matching_mem_device(device_ptr key, SubDevice &sub)
void mem_move_to_host(device_memory &mem) override
device_ptr find_matching_mem(device_ptr key, SubDevice &sub)
@ MEM_TEXTURE
@ MEM_READ_WRITE
@ MEM_DEVICE_ONLY
@ MEM_READ_ONLY
#define CCL_NAMESPACE_END
@ DEVICE_METAL
@ DEVICE_CPU
@ DEVICE_OPTIX
@ DEVICE_HIP
@ DEVICE_ONEAPI
unique_ptr< Device > device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
#define assert(assertion)
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
@ BVH_LAYOUT_OPTIX
@ BVH_LAYOUT_MULTI_HIPRT_EMBREE
@ BVH_LAYOUT_NONE
@ BVH_LAYOUT_MULTI_EMBREEGPU
@ BVH_LAYOUT_METAL
@ BVH_LAYOUT_MULTI_HIPRT
@ BVH_LAYOUT_HIPRT
@ BVH_LAYOUT_EMBREE
@ BVH_LAYOUT_MULTI_OPTIX
@ BVH_LAYOUT_BVH2
@ BVH_LAYOUT_EMBREEGPU
@ BVH_LAYOUT_MULTI_METAL
@ BVH_LAYOUT_MULTI_METAL_EMBREE
@ BVH_LAYOUT_MULTI_EMBREEGPU_EMBREE
@ BVH_LAYOUT_ALL
@ BVH_LAYOUT_MULTI_OPTIX_EMBREE
int BVHLayoutMask
Definition params.h:50
const char * name
unique_ptr< Device > device
map< device_ptr, device_ptr > ptr_map
i
Definition text_draw.cc:230
uint64_t device_ptr
Definition types_base.h:44