Blender V5.0
cycles/device/memory.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7/* Device Memory
8 *
9 * Data types for allocating, copying and freeing device memory. */
10
11#include "util/array.h"
12#include "util/half.h"
13#include "util/string.h"
14#include "util/texture.h"
15#include "util/types.h"
16
18
19class Device;
20class GPUDevice;
21class CUDADevice;
22class OptiXDevice;
23class HIPDevice;
24class HIPRTDevice;
25class MetalDevice;
26class OneapiDevice;
27
35
36/* Supported Data Types */
37
48
49static constexpr size_t datatype_size(DataType datatype)
50{
51 switch (datatype) {
52 case TYPE_UNKNOWN:
53 return 1;
54 case TYPE_UCHAR:
55 return sizeof(uchar);
56 case TYPE_FLOAT:
57 return sizeof(float);
58 case TYPE_UINT:
59 return sizeof(uint);
60 case TYPE_UINT16:
61 return sizeof(uint16_t);
62 case TYPE_INT:
63 return sizeof(int);
64 case TYPE_HALF:
65 return sizeof(half);
66 case TYPE_UINT64:
67 return sizeof(uint64_t);
68 default:
69 return 0;
70 }
71}
72
73/* Traits for data types */
74
75template<typename T> struct device_type_traits {
77 static const size_t num_elements = sizeof(T);
78};
79
80template<> struct device_type_traits<uchar> {
82 static const size_t num_elements = 1;
83 static_assert(sizeof(uchar) == num_elements * datatype_size(data_type));
84};
85
86template<> struct device_type_traits<uchar2> {
88 static const size_t num_elements = 2;
89 static_assert(sizeof(uchar2) == num_elements * datatype_size(data_type));
90};
91
92template<> struct device_type_traits<uchar3> {
94 static const size_t num_elements = 3;
95 static_assert(sizeof(uchar3) == num_elements * datatype_size(data_type));
96};
97
98template<> struct device_type_traits<uchar4> {
100 static const size_t num_elements = 4;
101 static_assert(sizeof(uchar4) == num_elements * datatype_size(data_type));
102};
103
104template<> struct device_type_traits<uint> {
106 static const size_t num_elements = 1;
107 static_assert(sizeof(uint) == num_elements * datatype_size(data_type));
108};
109
110template<> struct device_type_traits<uint2> {
112 static const size_t num_elements = 2;
113 static_assert(sizeof(uint2) == num_elements * datatype_size(data_type));
114};
115
116template<> struct device_type_traits<uint3> {
117 /* uint3 has different size depending on the device, can't use it for interchanging
118 * memory between CPU and GPU.
119 *
120 * Leave body empty to trigger a compile error if used. */
121};
122
123template<> struct device_type_traits<uint4> {
125 static const size_t num_elements = 4;
126 static_assert(sizeof(uint4) == num_elements * datatype_size(data_type));
127};
128
129template<> struct device_type_traits<int> {
131 static const size_t num_elements = 1;
132 static_assert(sizeof(int) == num_elements * datatype_size(data_type));
133};
134
135template<> struct device_type_traits<int2> {
137 static const size_t num_elements = 2;
138 static_assert(sizeof(int2) == num_elements * datatype_size(data_type));
139};
140
141template<> struct device_type_traits<int3> {
142 /* int3 has different size depending on the device, can't use it for interchanging
143 * memory between CPU and GPU.
144 *
145 * Leave body empty to trigger a compile error if used. */
146};
147
148template<> struct device_type_traits<int4> {
150 static const size_t num_elements = 4;
151 static_assert(sizeof(int4) == num_elements * datatype_size(data_type));
152};
153
154template<> struct device_type_traits<float> {
156 static const size_t num_elements = 1;
157 static_assert(sizeof(float) == num_elements * datatype_size(data_type));
158};
159
160template<> struct device_type_traits<float2> {
162 static const size_t num_elements = 2;
163 static_assert(sizeof(float2) == num_elements * datatype_size(data_type));
164};
165
166template<> struct device_type_traits<float3> {
167 /* float3 has different size depending on the device, can't use it for interchanging
168 * memory between CPU and GPU.
169 *
170 * Leave body empty to trigger a compile error if used. */
171};
172
175 static const size_t num_elements = 3;
176 static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type));
177};
178
179template<> struct device_type_traits<float4> {
181 static const size_t num_elements = 4;
182 static_assert(sizeof(float4) == num_elements * datatype_size(data_type));
183};
184
185template<> struct device_type_traits<half> {
187 static const size_t num_elements = 1;
188 static_assert(sizeof(half) == num_elements * datatype_size(data_type));
189};
190
191template<> struct device_type_traits<ushort4> {
193 static const size_t num_elements = 4;
194 static_assert(sizeof(ushort4) == num_elements * datatype_size(data_type));
195};
196
197template<> struct device_type_traits<uint16_t> {
199 static const size_t num_elements = 1;
200 static_assert(sizeof(uint16_t) == num_elements * datatype_size(data_type));
201};
202
203template<> struct device_type_traits<half4> {
205 static const size_t num_elements = 4;
206 static_assert(sizeof(half4) == num_elements * datatype_size(data_type));
207};
208
209template<> struct device_type_traits<uint64_t> {
211 static const size_t num_elements = 1;
212 static_assert(sizeof(uint64_t) == num_elements * datatype_size(data_type));
213};
214
215/* Device Memory
216 *
217 * Base class for all device memory. This should not be allocated directly,
218 * instead the appropriate subclass can be used. */
219
221 public:
222 size_t memory_size()
223 {
225 }
226 size_t memory_elements_size(const int elements)
227 {
228 return elements * data_elements * datatype_size(data_type);
229 }
230
231 /* Data information. */
234 size_t data_size;
239 const char *name;
241
242 /* Pointers. */
247 /* reference counter for shared_pointer */
249 bool move_to_host = false;
250
251 virtual ~device_memory();
252
253 void swap_device(Device *new_device, const size_t new_device_size, device_ptr new_device_ptr);
254 void restore_device();
255
256 bool is_resident(Device *sub_device) const;
257 bool is_shared(Device *sub_device) const;
258
259 /* No copying and allowed.
260 *
261 * This is because device implementation might need to register device memory in an allocation
262 * map of some sort and use pointer as a key to identify blocks. Moving data from one place to
263 * another bypassing device allocation routines will make those maps hard to maintain. */
264 device_memory(const device_memory &) = delete;
265 device_memory(device_memory &&other) noexcept = delete;
268
269 protected:
270 friend class Device;
271 friend class GPUDevice;
272 friend class CUDADevice;
273 friend class OptiXDevice;
274 friend class HIPDevice;
275 friend class HIPRTDevice;
276 friend class MetalDevice;
277 friend class OneapiDevice;
278
279 /* Only create through subclasses. */
281
282 /* Host allocation on the device. All host_pointer memory should be
283 * allocated with these functions, for devices that support using
284 * the same pointer for host and device. */
285 void *host_alloc(const size_t size);
286
287 /* Device memory allocation and copying. */
288 void device_alloc();
289 void device_copy_to();
290 void device_move_to_host();
291 void device_copy_from(const size_t y, const size_t w, size_t h, const size_t elem);
292 void device_zero();
293
294 /* Memory can only be freed on host and device together. */
296
297 bool device_is_cpu();
298
304};
305
306/* Device Only Memory
307 *
308 * Working memory only needed by the device, with no corresponding allocation
309 * on the host. Only used internally in the device implementations. */
310
311template<typename T> class device_only_memory : public device_memory {
312 public:
313 device_only_memory(Device *device, const char *name, bool allow_host_memory_fallback = false)
314 : device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
315 {
318 }
319
320 device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other)) {}
321
323 {
324 free();
325 }
326
327 void alloc_to_device(const size_t num, bool shrink_to_fit = true)
328 {
329 size_t new_size = num;
330 bool reallocate;
331
332 if (shrink_to_fit) {
333 reallocate = (data_size != new_size);
334 }
335 else {
336 reallocate = (data_size < new_size);
337 }
338
339 if (reallocate) {
341 data_size = new_size;
342 device_alloc();
343 }
344 }
345
346 void free()
347 {
349 data_size = 0;
350 }
351
353 {
354 device_zero();
355 }
356};
357
358/* Device Vector
359 *
360 * Data vector to exchange data between host and device. Memory will be
361 * allocated on the host first with alloc() and resize, and then filled
362 * in and copied to the device with copy_to_device(). Or alternatively
363 * allocated and set to zero on the device with zero_to_device().
364 *
365 * When using memory type MEM_GLOBAL, a pointer to this memory will be
366 * automatically attached to kernel globals, using the provided name
367 * matching an entry in kernel/data_arrays.h. */
368
369template<typename T> class device_vector : public device_memory {
370 public:
381
382 ~device_vector() override
383 {
384 free();
385 }
386
387 /* Host memory allocation. */
388 T *alloc(const size_t width, const size_t height = 0)
389 {
390 size_t new_size = size(width, height);
391
392 if (new_size != data_size) {
394 host_pointer = host_alloc(sizeof(T) * new_size);
395 modified = true;
397 }
398
399 data_size = new_size;
400 data_width = width;
401 data_height = height;
402
403 return data();
404 }
405
406 /* Host memory resize. Only use this if the original data needs to be
407 * preserved or memory needs to be initialized, it is faster to call
408 * alloc() if it can be discarded. */
409 T *resize(const size_t width, const size_t height = 0)
410 {
411 size_t new_size = size(width, height);
412
413 if (new_size != data_size) {
414 void *new_ptr = host_alloc(sizeof(T) * new_size);
415
416 if (new_ptr) {
417 size_t min_size = (new_size < data_size) ? new_size : data_size;
418 for (size_t i = 0; i < min_size; i++) {
419 ((T *)new_ptr)[i] = ((T *)host_pointer)[i];
420 }
421 for (size_t i = data_size; i < new_size; i++) {
422 ((T *)new_ptr)[i] = T();
423 }
424 }
425
427 host_pointer = new_ptr;
429 }
430
431 data_size = new_size;
432 data_width = width;
433 data_height = height;
434
435 return data();
436 }
437
438 /* Take over data from an existing array. */
440 {
442
443 data_size = from.size();
444 data_width = 0;
445 data_height = 0;
448 }
449
450 /* Free device and host memory. */
451 void free()
452 {
454
455 data_size = 0;
456 data_width = 0;
457 data_height = 0;
458 host_pointer = 0;
459 modified = true;
460 need_realloc_ = true;
462 }
463
464 void free_if_need_realloc(bool force_free)
465 {
466 if (need_realloc_ || force_free) {
467 free();
468 }
469 }
470
471 bool is_modified() const
472 {
473 return modified;
474 }
475
477 {
478 return need_realloc_;
479 }
480
482 {
483 modified = true;
484 }
485
487 {
488 need_realloc_ = true;
489 tag_modified();
490 }
491
492 size_t size() const
493 {
494 return data_size;
495 }
496
498 {
499 return (T *)host_pointer;
500 }
501
502 const T *data() const
503 {
504 return (T *)host_pointer;
505 }
506
507 T &operator[](size_t i)
508 {
509 assert(i < data_size);
510 return data()[i];
511 }
512
514 {
515 if (data_size != 0) {
517 }
518 }
519
521 {
522 if (!modified) {
523 return;
524 }
525
527 }
528
530 {
531 modified = false;
532 need_realloc_ = false;
533 }
534
536 {
537 device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
538 }
539
540 void copy_from_device(const size_t y, const size_t w, size_t h)
541 {
542 device_copy_from(y, w, h, sizeof(T));
543 }
544
546 {
547 device_zero();
548 }
549
550 protected:
551 size_t size(const size_t width, const size_t height)
552 {
553 return width * ((height == 0) ? 1 : height);
554 }
555};
556
557/* Device Sub Memory
558 *
559 * Pointer into existing memory. It is not allocated separately, but created
560 * from an already allocated base memory. It is freed automatically when it
561 * goes out of scope, which should happen before base memory is freed.
562 *
563 * NOTE: some devices require offset and size of the sub_ptr to be properly
564 * aligned to device->mem_address_alingment(). */
565
567 public:
568 device_sub_ptr(device_memory &mem, const size_t offset, const size_t size);
570
572 {
573 return ptr;
574 }
575
576 protected:
577 /* No copying. */
579
582};
583
584/* Device Texture
585 *
586 * 2D or 3D image texture memory. */
587
589 public:
591 const char *name,
592 const uint slot,
593 ImageDataType image_data_type,
594 InterpolationType interpolation,
595 ExtensionType extension);
596 ~device_texture() override;
597
598 void *alloc(const size_t width, const size_t height);
599 void copy_to_device();
600
603
604 protected:
605 size_t size(const size_t width, const size_t height)
606 {
607 return width * ((height == 0) ? 1 : height);
608 }
609};
610
ATTR_WARN_UNUSED_RESULT const size_t num
unsigned char uchar
unsigned int uint
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
T * steal_pointer()
size_t size() const
friend class CUDADevice
bool is_resident(Device *sub_device) const
Definition memory.cpp:132
void * host_alloc(const size_t size)
Definition memory.cpp:41
device_memory(const device_memory &)=delete
void device_zero()
Definition memory.cpp:100
void device_alloc()
Definition memory.cpp:74
void device_copy_to()
Definition memory.cpp:80
void swap_device(Device *new_device, const size_t new_device_size, device_ptr new_device_ptr)
Definition memory.cpp:112
friend class HIPRTDevice
size_t memory_elements_size(const int elements)
device_ptr original_device_ptr
void host_and_device_free()
Definition memory.cpp:56
bool is_shared(Device *sub_device) const
Definition memory.cpp:137
device_memory(device_memory &&other) noexcept=delete
bool device_is_cpu()
Definition memory.cpp:107
void device_move_to_host()
Definition memory.cpp:87
void restore_device()
Definition memory.cpp:125
device_memory & operator=(const device_memory &)=delete
device_memory & operator=(device_memory &&)=delete
virtual ~device_memory()
Definition memory.cpp:35
friend class OneapiDevice
void device_copy_from(const size_t y, const size_t w, size_t h, const size_t elem)
Definition memory.cpp:94
friend class MetalDevice
friend class OptiXDevice
void alloc_to_device(const size_t num, bool shrink_to_fit=true)
device_only_memory(device_only_memory &&other) noexcept
device_only_memory(Device *device, const char *name, bool allow_host_memory_fallback=false)
device_sub_ptr(device_memory &mem, const size_t offset, const size_t size)
Definition memory.cpp:144
device_sub_ptr & operator=(const device_sub_ptr &)
device_ptr operator*() const
void copy_to_device()
Definition memory.cpp:240
~device_texture() override
Definition memory.cpp:214
void * alloc(const size_t width, const size_t height)
Definition memory.cpp:220
size_t size(const size_t width, const size_t height)
device_texture(Device *device, const char *name, const uint slot, ImageDataType image_data_type, InterpolationType interpolation, ExtensionType extension)
Definition memory.cpp:157
size_t size() const
device_vector(Device *device, const char *name, MemoryType type)
T * alloc(const size_t width, const size_t height=0)
size_t size(const size_t width, const size_t height)
T * resize(const size_t width, const size_t height=0)
bool is_modified() const
void steal_data(array< T > &from)
void free_if_need_realloc(bool force_free)
const T * data() const
T & operator[](size_t i)
void copy_from_device(const size_t y, const size_t w, size_t h)
Definition half.h:41
nullptr float
static constexpr size_t datatype_size(DataType datatype)
@ MEM_TEXTURE
@ MEM_READ_WRITE
@ MEM_DEVICE_ONLY
@ MEM_READ_ONLY
@ TYPE_UNKNOWN
@ TYPE_UINT16
@ TYPE_UINT64
#define CCL_NAMESPACE_END
#define assert(assertion)
#define T
static const DataType data_type
static const DataType data_type
static const DataType data_type
static const DataType data_type
static const DataType data_type
static const DataType data_type
static const size_t num_elements
static const DataType data_type
Definition half.h:60
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251
ImageDataType
Definition texture.h:32
InterpolationType
Definition texture.h:22
ExtensionType
Definition texture.h:71
uint64_t device_ptr
Definition types_base.h:44