Blender V4.3
cycles/device/memory.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifndef __DEVICE_MEMORY_H__
6#define __DEVICE_MEMORY_H__
7
8/* Device Memory
9 *
10 * Data types for allocating, copying and freeing device memory. */
11
12#include "util/array.h"
13#include "util/half.h"
14#include "util/string.h"
15#include "util/texture.h"
16#include "util/types.h"
17#include "util/vector.h"
18
20
21class Device;
22class GPUDevice;
23class CUDADevice;
24class OptiXDevice;
25class HIPDevice;
26class HIPRTDevice;
27class MetalDevice;
28class OneapiDevice;
29
37
38/* Supported Data Types */
39
50
51static constexpr size_t datatype_size(DataType datatype)
52{
53 switch (datatype) {
54 case TYPE_UNKNOWN:
55 return 1;
56 case TYPE_UCHAR:
57 return sizeof(uchar);
58 case TYPE_FLOAT:
59 return sizeof(float);
60 case TYPE_UINT:
61 return sizeof(uint);
62 case TYPE_UINT16:
63 return sizeof(uint16_t);
64 case TYPE_INT:
65 return sizeof(int);
66 case TYPE_HALF:
67 return sizeof(half);
68 case TYPE_UINT64:
69 return sizeof(uint64_t);
70 default:
71 return 0;
72 }
73}
74
75/* Traits for data types */
76
77template<typename T> struct device_type_traits {
79 static const size_t num_elements = sizeof(T);
80};
81
82template<> struct device_type_traits<uchar> {
84 static const size_t num_elements = 1;
85 static_assert(sizeof(uchar) == num_elements * datatype_size(data_type));
86};
87
88template<> struct device_type_traits<uchar2> {
90 static const size_t num_elements = 2;
91 static_assert(sizeof(uchar2) == num_elements * datatype_size(data_type));
92};
93
94template<> struct device_type_traits<uchar3> {
96 static const size_t num_elements = 3;
97 static_assert(sizeof(uchar3) == num_elements * datatype_size(data_type));
98};
99
100template<> struct device_type_traits<uchar4> {
102 static const size_t num_elements = 4;
103 static_assert(sizeof(uchar4) == num_elements * datatype_size(data_type));
104};
105
106template<> struct device_type_traits<uint> {
108 static const size_t num_elements = 1;
109 static_assert(sizeof(uint) == num_elements * datatype_size(data_type));
110};
111
112template<> struct device_type_traits<uint2> {
114 static const size_t num_elements = 2;
115 static_assert(sizeof(uint2) == num_elements * datatype_size(data_type));
116};
117
118template<> struct device_type_traits<uint3> {
119 /* uint3 has different size depending on the device, can't use it for interchanging
120 * memory between CPU and GPU.
121 *
122 * Leave body empty to trigger a compile error if used. */
123};
124
125template<> struct device_type_traits<uint4> {
127 static const size_t num_elements = 4;
128 static_assert(sizeof(uint4) == num_elements * datatype_size(data_type));
129};
130
131template<> struct device_type_traits<int> {
133 static const size_t num_elements = 1;
134 static_assert(sizeof(int) == num_elements * datatype_size(data_type));
135};
136
137template<> struct device_type_traits<int2> {
139 static const size_t num_elements = 2;
140 static_assert(sizeof(int2) == num_elements * datatype_size(data_type));
141};
142
143template<> struct device_type_traits<int3> {
144 /* int3 has different size depending on the device, can't use it for interchanging
145 * memory between CPU and GPU.
146 *
147 * Leave body empty to trigger a compile error if used. */
148};
149
150template<> struct device_type_traits<int4> {
152 static const size_t num_elements = 4;
153 static_assert(sizeof(int4) == num_elements * datatype_size(data_type));
154};
155
156template<> struct device_type_traits<float> {
158 static const size_t num_elements = 1;
159 static_assert(sizeof(float) == num_elements * datatype_size(data_type));
160};
161
162template<> struct device_type_traits<float2> {
164 static const size_t num_elements = 2;
165 static_assert(sizeof(float2) == num_elements * datatype_size(data_type));
166};
167
168template<> struct device_type_traits<float3> {
169 /* float3 has different size depending on the device, can't use it for interchanging
170 * memory between CPU and GPU.
171 *
172 * Leave body empty to trigger a compile error if used. */
173};
174
177 static const size_t num_elements = 3;
178 static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type));
179};
180
181template<> struct device_type_traits<float4> {
183 static const size_t num_elements = 4;
184 static_assert(sizeof(float4) == num_elements * datatype_size(data_type));
185};
186
187template<> struct device_type_traits<half> {
189 static const size_t num_elements = 1;
190 static_assert(sizeof(half) == num_elements * datatype_size(data_type));
191};
192
193template<> struct device_type_traits<ushort4> {
195 static const size_t num_elements = 4;
196 static_assert(sizeof(ushort4) == num_elements * datatype_size(data_type));
197};
198
199template<> struct device_type_traits<uint16_t> {
201 static const size_t num_elements = 1;
202 static_assert(sizeof(uint16_t) == num_elements * datatype_size(data_type));
203};
204
205template<> struct device_type_traits<half4> {
207 static const size_t num_elements = 4;
208 static_assert(sizeof(half4) == num_elements * datatype_size(data_type));
209};
210
211template<> struct device_type_traits<uint64_t> {
213 static const size_t num_elements = 1;
214 static_assert(sizeof(uint64_t) == num_elements * datatype_size(data_type));
215};
216
217/* Device Memory
218 *
219 * Base class for all device memory. This should not be allocated directly,
220 * instead the appropriate subclass can be used. */
221
223 public:
224 size_t memory_size()
225 {
227 }
228 size_t memory_elements_size(int elements)
229 {
230 return elements * data_elements * datatype_size(data_type);
231 }
232
233 /* Data information. */
236 size_t data_size;
242 const char *name;
243 std::string name_storage;
244
245 /* Pointers. */
250 /* reference counter for shared_pointer */
252
253 virtual ~device_memory();
254
255 void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
256 void restore_device();
257
258 bool is_resident(Device *sub_device) const;
259
260 protected:
261 friend class Device;
262 friend class GPUDevice;
263 friend class CUDADevice;
264 friend class OptiXDevice;
265 friend class HIPDevice;
266 friend class HIPRTDevice;
267 friend class MetalDevice;
268 friend class OneapiDevice;
269
270 /* Only create through subclasses. */
271 device_memory(Device *device, const char *name, MemoryType type);
272
273 /* No copying and allowed.
274 *
275 * This is because device implementation might need to register device memory in an allocation
276 * map of some sort and use pointer as a key to identify blocks. Moving data from one place to
277 * another bypassing device allocation routines will make those maps hard to maintain. */
278 device_memory(const device_memory &) = delete;
279 device_memory(device_memory &&other) noexcept = delete;
282
283 /* Host allocation on the device. All host_pointer memory should be
284 * allocated with these functions, for devices that support using
285 * the same pointer for host and device. */
286 void *host_alloc(size_t size);
287 void host_free();
288
289 /* Device memory allocation and copying. */
290 void device_alloc();
291 void device_free();
292 void device_copy_to();
293 void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
294 void device_zero();
295
296 bool device_is_cpu();
297
303};
304
305/* Device Only Memory
306 *
307 * Working memory only needed by the device, with no corresponding allocation
308 * on the host. Only used internally in the device implementations. */
309
310template<typename T> class device_only_memory : public device_memory {
311 public:
312 device_only_memory(Device *device, const char *name, bool allow_host_memory_fallback = false)
313 : device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
314 {
317 }
318
319 device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other)) {}
320
322 {
323 free();
324 }
325
326 void alloc_to_device(size_t num, bool shrink_to_fit = true)
327 {
328 size_t new_size = num;
329 bool reallocate;
330
331 if (shrink_to_fit) {
332 reallocate = (data_size != new_size);
333 }
334 else {
335 reallocate = (data_size < new_size);
336 }
337
338 if (reallocate) {
339 device_free();
340 data_size = new_size;
341 device_alloc();
342 }
343 }
344
345 void free()
346 {
347 device_free();
348 data_size = 0;
349 }
350
352 {
353 device_zero();
354 }
355};
356
357/* Device Vector
358 *
359 * Data vector to exchange data between host and device. Memory will be
360 * allocated on the host first with alloc() and resize, and then filled
361 * in and copied to the device with copy_to_device(). Or alternatively
362 * allocated and set to zero on the device with zero_to_device().
363 *
364 * When using memory type MEM_GLOBAL, a pointer to this memory will be
365 * automatically attached to kernel globals, using the provided name
366 * matching an entry in kernel/data_arrays.h. */
367
368template<typename T> class device_vector : public device_memory {
369 public:
370 device_vector(Device *device, const char *name, MemoryType type)
371 : device_memory(device, name, type)
372 {
375 modified = true;
376 need_realloc_ = true;
377
378 assert(data_elements > 0);
379 }
380
382 {
383 free();
384 }
385
386 /* Host memory allocation. */
387 T *alloc(size_t width, size_t height = 0, size_t depth = 0)
388 {
389 size_t new_size = size(width, height, depth);
390
391 if (new_size != data_size) {
392 device_free();
393 host_free();
394 host_pointer = host_alloc(sizeof(T) * new_size);
395 modified = true;
396 assert(device_pointer == 0);
397 }
398
399 data_size = new_size;
400 data_width = width;
401 data_height = height;
402 data_depth = depth;
403
404 return data();
405 }
406
407 /* Host memory resize. Only use this if the original data needs to be
408 * preserved, it is faster to call alloc() if it can be discarded. */
409 T *resize(size_t width, size_t height = 0, size_t depth = 0)
410 {
411 size_t new_size = size(width, height, depth);
412
413 if (new_size != data_size) {
414 void *new_ptr = host_alloc(sizeof(T) * new_size);
415
416 if (new_size && data_size) {
417 size_t min_size = ((new_size < data_size) ? new_size : data_size);
418 memcpy((T *)new_ptr, (T *)host_pointer, sizeof(T) * min_size);
419 }
420
421 device_free();
422 host_free();
423 host_pointer = new_ptr;
424 assert(device_pointer == 0);
425 }
426
427 data_size = new_size;
428 data_width = width;
429 data_height = height;
430 data_depth = depth;
431
432 return data();
433 }
434
435 /* Take over data from an existing array. */
437 {
438 device_free();
439 host_free();
440
441 data_size = from.size();
442 data_width = 0;
443 data_height = 0;
444 data_depth = 0;
445 host_pointer = from.steal_pointer();
446 assert(device_pointer == 0);
447 }
448
450 {
451 device_free();
452
453 to.set_data((T *)host_pointer, data_size);
454 data_size = 0;
455 data_width = 0;
456 data_height = 0;
457 data_depth = 0;
458 host_pointer = 0;
459 assert(device_pointer == 0);
460 }
461
462 /* Free device and host memory. */
463 void free()
464 {
465 device_free();
466 host_free();
467
468 data_size = 0;
469 data_width = 0;
470 data_height = 0;
471 data_depth = 0;
472 host_pointer = 0;
473 modified = true;
474 need_realloc_ = true;
475 assert(device_pointer == 0);
476 }
477
478 void free_if_need_realloc(bool force_free)
479 {
480 if (need_realloc_ || force_free) {
481 free();
482 }
483 }
484
485 bool is_modified() const
486 {
487 return modified;
488 }
489
491 {
492 return need_realloc_;
493 }
494
496 {
497 modified = true;
498 }
499
501 {
502 need_realloc_ = true;
503 tag_modified();
504 }
505
506 size_t size() const
507 {
508 return data_size;
509 }
510
511 T *data()
512 {
513 return (T *)host_pointer;
514 }
515
516 const T *data() const
517 {
518 return (T *)host_pointer;
519 }
520
521 T &operator[](size_t i)
522 {
523 assert(i < data_size);
524 return data()[i];
525 }
526
528 {
529 if (data_size != 0) {
531 }
532 }
533
535 {
536 if (!modified) {
537 return;
538 }
539
541 }
542
544 {
545 modified = false;
546 need_realloc_ = false;
547 }
548
550 {
551 device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
552 }
553
554 void copy_from_device(size_t y, size_t w, size_t h)
555 {
556 device_copy_from(y, w, h, sizeof(T));
557 }
558
560 {
561 device_zero();
562 }
563
564 void move_device(Device *new_device)
565 {
567 device_free();
568 device = new_device;
570 }
571
572 protected:
573 size_t size(size_t width, size_t height, size_t depth)
574 {
575 return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
576 }
577};
578
579/* Device Sub Memory
580 *
581 * Pointer into existing memory. It is not allocated separately, but created
582 * from an already allocated base memory. It is freed automatically when it
583 * goes out of scope, which should happen before base memory is freed.
584 *
585 * NOTE: some devices require offset and size of the sub_ptr to be properly
586 * aligned to device->mem_address_alingment(). */
587
589 public:
590 device_sub_ptr(device_memory &mem, size_t offset, size_t size);
592
594 {
595 return ptr;
596 }
597
598 protected:
599 /* No copying. */
601
604};
605
606/* Device Texture
607 *
608 * 2D or 3D image texture memory. */
609
611 public:
613 const char *name,
614 const uint slot,
615 ImageDataType image_data_type,
616 InterpolationType interpolation,
617 ExtensionType extension);
619
620 void *alloc(const size_t width, const size_t height, const size_t depth = 0);
621 void copy_to_device();
622
625
626 protected:
627 size_t size(const size_t width, const size_t height, const size_t depth)
628 {
629 return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
630 }
631};
632
634
635#endif /* __DEVICE_MEMORY_H__ */
unsigned char uchar
unsigned int uint
blender::float3 packed_float3
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
friend class CUDADevice
bool is_resident(Device *sub_device) const
Definition memory.cpp:127
size_t memory_elements_size(int elements)
device_memory(const device_memory &)=delete
void device_zero()
Definition memory.cpp:95
void device_alloc()
Definition memory.cpp:69
void device_copy_to()
Definition memory.cpp:82
void device_free()
Definition memory.cpp:75
friend class HIPRTDevice
device_memory(Device *device, const char *name, MemoryType type)
Definition memory.cpp:12
device_ptr original_device_ptr
device_memory(device_memory &&other) noexcept=delete
bool device_is_cpu()
Definition memory.cpp:102
void restore_device()
Definition memory.cpp:120
void * host_alloc(size_t size)
Definition memory.cpp:42
device_memory & operator=(const device_memory &)=delete
device_memory & operator=(device_memory &&)=delete
void host_free()
Definition memory.cpp:60
virtual ~device_memory()
Definition memory.cpp:36
friend class OneapiDevice
void device_copy_from(size_t y, size_t w, size_t h, size_t elem)
Definition memory.cpp:89
friend class MetalDevice
void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr)
Definition memory.cpp:107
friend class OptiXDevice
device_only_memory(device_only_memory &&other) noexcept
void alloc_to_device(size_t num, bool shrink_to_fit=true)
device_only_memory(Device *device, const char *name, bool allow_host_memory_fallback=false)
device_sub_ptr(device_memory &mem, size_t offset, size_t size)
Definition memory.cpp:134
device_sub_ptr & operator=(const device_sub_ptr &)
device_ptr operator*() const
size_t size(const size_t width, const size_t height, const size_t depth)
void copy_to_device()
Definition memory.cpp:232
void * alloc(const size_t width, const size_t height, const size_t depth=0)
Definition memory.cpp:209
device_texture(Device *device, const char *name, const uint slot, ImageDataType image_data_type, InterpolationType interpolation, ExtensionType extension)
Definition memory.cpp:146
size_t size() const
device_vector(Device *device, const char *name, MemoryType type)
size_t size(size_t width, size_t height, size_t depth)
void move_device(Device *new_device)
void give_data(array< T > &to)
T * resize(size_t width, size_t height=0, size_t depth=0)
bool is_modified() const
void steal_data(array< T > &from)
T * alloc(size_t width, size_t height=0, size_t depth=0)
void free_if_need_realloc(bool force_free)
void copy_from_device(size_t y, size_t w, size_t h)
const T * data() const
T & operator[](size_t i)
Definition half.h:42
static constexpr size_t datatype_size(DataType datatype)
@ MEM_TEXTURE
@ MEM_READ_WRITE
@ MEM_DEVICE_ONLY
@ MEM_READ_ONLY
@ TYPE_UNKNOWN
@ TYPE_UINT16
@ TYPE_UINT64
unsigned short half
#define CCL_NAMESPACE_END
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
#define T
unsigned short uint16_t
Definition stdint.h:79
unsigned __int64 uint64_t
Definition stdint.h:90
static const size_t num_elements
static const DataType data_type
Definition half.h:61
float max
ImageDataType
InterpolationType
ExtensionType
uint64_t device_ptr
Definition util/types.h:45