Blender V5.0
mtl_memory.hh
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#pragma once
6
7#include "BLI_map.hh"
8
9#include "mtl_common.hh"
10
11#include <atomic>
12#include <ctime>
13#include <functional>
14#include <map>
15#include <mutex>
16#include <set>
17#include <unordered_map>
18
19#include <Cocoa/Cocoa.h>
20#include <Metal/Metal.h>
21#include <QuartzCore/QuartzCore.h>
22
23@class CAMetalLayer;
24@class MTLCommandQueue;
25@class MTLRenderPipelineState;
26
27/* Metal Memory Manager Overview. */
28/*
29 * The Metal Backend Memory manager is designed to provide an interface
30 * for all other MTL_* modules where memory allocation is required.
31 *
32 * Different allocation strategies and data-structures are used depending
33 * on how the data is used by the backend. These aim to optimally handle
34 * system memory and abstract away any complexity from the MTL_* modules
35 * themselves.
36 *
37 * There are two primary allocation modes which can be used:
38 *
39 * ** MTLScratchBufferManager **
40 *
41 * Each MTLContext owns a ScratchBufferManager which is implemented
42 * as a pool of circular buffers, designed to handle temporary
43 * memory allocations which occur on a per-frame basis. The scratch
44 * buffers allow flushing of host memory to the GPU to be batched.
45 *
46 * Each frame, the next scratch buffer is reset, then later flushed upon
47 * command buffer submission.
48 *
49 * NOTE: This is allocated per-context due to allocations being tied
50 * to workload submissions and context-specific submissions.
51 *
52 * Examples of scratch buffer usage are:
53 * - Immediate-mode temporary vertex buffers.
54 * - Shader uniform data updates
55 * - Staging of data for resource copies, or, data reads/writes.
56 *
57 * Usage:
58 *
59 * MTLContext::get_scratchbuffer_manager() - to fetch active manager.
60 *
61 * MTLTemporaryBuffer scratch_buffer_allocate_range(size)
62 * MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(size, align)
63 *
64 * ---------------------------------------------------------------------------------
65 * ** MTLBufferPool **
66 *
67 * For static and longer-lasting memory allocations, such as those for UBOs,
68 * Vertex buffers, index buffers, etc; We want an optimal abstraction for
69 * fetching a MTLBuffer of the desired size and resource options.
70 *
71 * Memory allocations can be expensive so the MTLBufferPool provides
72 * functionality to track usage of these buffers and once a buffer
73 * is no longer in use, it is returned to the buffer pool for use
74 * by another backend resource.
75 *
76 * The MTLBufferPool provides functionality for safe tracking of resources,
77 * as buffers freed on the host side must have their usage by the GPU tracked,
78 * to ensure they are not prematurely re-used before they have finished being
79 * used by the GPU.
80 *
81 * NOTE: The MTLBufferPool is a global construct which can be fetched from anywhere.
82 *
83 * Usage:
84 * MTLContext::get_global_memory_manager(); - static routine to fetch global memory manager.
85 *
86 * gpu::MTLBuffer *allocate(size, is_cpu_visibile)
87 * gpu::MTLBuffer *allocate_aligned(size, alignment, is_cpu_visibile)
88 * gpu::MTLBuffer *allocate_with_data(size, is_cpu_visibile, data_ptr)
89 * gpu::MTLBuffer *allocate_aligned_with_data(size, alignment, is_cpu_visibile, data_ptr)
90 */
91
92/* Debug memory statistics: Disabled by Macro rather than guarded for
93 * performance considerations. */
94#define MTL_DEBUG_MEMORY_STATISTICS 0
95
96namespace blender::gpu {
97
98/* Forward Declarations. */
99class MTLContext;
100class MTLCommandBufferManager;
101class MTLUniformBuf;
102class MTLStorageBuf;
103
104/* -------------------------------------------------------------------- */
107
108/* MTLBuffer allocation wrapper. */
110
111 public:
112 /* NOTE: ListBase API is not used due to custom destructor operation required to release
113 * Metal objective C buffer resource. */
115
116 private:
117 /* Metal resource. */
118 id<MTLBuffer> metal_buffer_;
119
120 /* Host-visible mapped-memory pointer. Behavior depends on buffer type:
121 * - Shared buffers: pointer represents base address of #MTLBuffer whose data
122 * access has shared access by both the CPU and GPU on
123 * Unified Memory Architectures (UMA).
124 * - Managed buffer: Host-side mapped buffer region for CPU (Host) access. Managed buffers
125 * must be manually flushed to transfer data to GPU-resident buffer.
126 * - Private buffer: Host access is invalid, `data` will be nullptr. */
127 void *data_;
128
129 /* Whether buffer is allocated from an external source. */
130 bool is_external_ = false;
131
132 /* Allocation info. */
133 MTLResourceOptions options_;
134 id<MTLDevice> device_;
135 uint64_t alignment_;
136 uint64_t size_;
137
138 /* Allocated size may be larger than actual size. */
139 uint64_t usage_size_;
140
141 /* Lifetime info - whether the current buffer is actively in use. A buffer
142 * should be in use after it has been allocated. De-allocating the buffer, and
143 * returning it to the free buffer pool will set in_use to false. Using a buffer
144 * while it is not in-use should not be allowed and result in an error. */
145 std::atomic<bool> in_use_;
146
147 public:
148 MTLBuffer(id<MTLDevice> device, uint64_t size, MTLResourceOptions options, uint alignment = 1);
149 MTLBuffer(id<MTLBuffer> external_buffer);
150 ~MTLBuffer();
151
152 /* Fetch information about backing MTLBuffer. */
153 id<MTLBuffer> get_metal_buffer() const;
154 void *get_host_ptr() const;
155 uint64_t get_size_used() const;
156 uint64_t get_size() const;
157
158 /* Flush data to GPU. */
159 void flush();
160 void flush_range(uint64_t offset, uint64_t length);
161 bool requires_flush();
162
163 /* Buffer usage tracking. */
164 void flag_in_use(bool used);
165 bool get_in_use();
166 void set_usage_size(uint64_t size_used);
167
168 /* Debug. */
169 void set_label(NSString *str);
170
171 /* Read properties. */
172 MTLResourceOptions get_resource_options();
174
175 /* Resource-local free: For buffers allocated via memory manager,
176 * this will call the context `free_buffer` method to return the buffer to the context memory
177 * pool.
178 *
179 * Otherwise, free will release the associated metal resource.
180 * As a note, calling the destructor will also destroy the buffer and associated metal
181 * resource. */
182 void free();
183
184 /* Safety check to ensure buffers are not used after free. */
185 void debug_ensure_used();
186
188};
189
190/* View into part of an MTLBuffer. */
192 id<MTLBuffer> metal_buffer;
193 void *data;
196 MTLResourceOptions options;
197
198 void flush();
199 bool requires_flush();
200};
201
202/* Circular scratch buffer allocations should be seen as temporary and only used within the
203 * lifetime of the frame. */
205
206/* Round-Robin Circular-buffer. */
209
210 private:
211 MTLContext &own_context_;
212
213 /* Wrapped MTLBuffer allocation handled. */
214 gpu::MTLBuffer *cbuffer_;
215 /* Allocated SSBO that serves as source for cbuffer. */
216 MTLStorageBuf *ssbo_source_ = nullptr;
217
218 /* Current offset where next allocation will begin. */
219 uint64_t current_offset_;
220
221 /* Whether the Circular Buffer can grow during re-allocation if
222 * the size is exceeded. */
223 bool can_resize_;
224
225 /* Usage information. */
226 uint64_t used_frame_index_;
227 uint64_t last_flush_base_offset_;
228
229 public:
230 MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow);
234 void flush();
235
236 /* Reset pointer back to start of circular buffer. */
237 void reset();
238};
239
240/* Wrapper struct used by Memory Manager to sort and compare gpu::MTLBuffer resources inside the
241 * memory pools. */
246
248 {
249 this->buffer = buf;
250 this->buffer_size = this->buffer->get_size();
251 this->insert_time = std::time(nullptr);
252 }
253
255 {
256 this->buffer = nullptr;
257 this->buffer_size = compare_size;
258 this->insert_time = 0;
259 }
260};
261
263 bool operator()(const MTLBufferHandle &lhs, const MTLBufferHandle &rhs) const
264 {
265 return lhs.buffer_size < rhs.buffer_size;
266 }
267};
268
298 friend class MTLBufferPool;
299
300 private:
301 std::atomic<int> reference_count_;
302 std::atomic<bool> in_free_queue_;
303 std::atomic<bool> referenced_by_workload_;
304 std::recursive_mutex lock_;
305 /* Linked list of next MTLSafeFreeList chunk if current chunk is full. */
306 std::atomic<MTLSafeFreeList *> next_;
307
308 /* Lockless list. MAX_NUM_BUFFERS_ within a chunk based on considerations
309 * for performance and memory. Higher chunk counts are preferable for efficiently
310 * performing block operations such as copying several objects simultaneously.
311 *
312 * MIN_BUFFER_FLUSH_COUNT refers to the minimum count of buffers in the MTLSafeFreeList
313 * before buffers are returned to global memory pool. This is set at a point to reduce
314 * overhead of small pool flushes, while ensuring floating memory overhead is not excessive. */
315 static const int MAX_NUM_BUFFERS_ = 8192;
316 static const int MIN_BUFFER_FLUSH_COUNT = 120;
317 std::atomic<int> current_list_index_;
318 gpu::MTLBuffer *safe_free_pool_[MAX_NUM_BUFFERS_];
319
320 public:
322
323 /* Can be used from multiple threads. Performs insertion into Safe Free List with the least
324 * amount of threading synchronization. */
325 void insert_buffer(gpu::MTLBuffer *buffer);
326
327 /* Whether we need to start a new safe free list, or can carry on using the existing one. */
328 bool should_flush();
329
330 /* Increments command buffer reference count. */
331 void increment_reference();
332
333 /* Decrement and return of buffers to pool occur on MTLCommandBuffer completion callback. */
334 void decrement_reference();
335
337 {
338 in_free_queue_ = true;
339 if (current_list_index_ >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
340 MTLSafeFreeList *next_pool = next_.load();
341 if (next_pool) {
342 next_pool->flag_in_queue();
343 }
344 }
345 }
346
347 MEM_CXX_CLASS_ALLOC_FUNCS("MTLSafeFreeList");
348};
349
350/* MTLBuffer pools. */
351/* Allocating Metal buffers is expensive, so we cache all allocated buffers,
352 * and when requesting a new buffer, find one which fits the required dimensions
353 * from an existing pool of buffers.
354 *
355 * When freeing MTLBuffers, we insert them into the current MTLSafeFreeList, which defers
356 * release of the buffer until the associated command buffers have finished executing.
357 * This prevents a buffer from being re-used while it is still in-use by the GPU.
358 *
359 * * Once command buffers complete, MTLSafeFreeList's associated with the current
360 * command buffer submission are added to the `completed_safelist_queue_`.
361 *
362 * * At a set point in time, all MTLSafeFreeList's in `completed_safelist_queue_` have their
363 * MTLBuffers re-inserted into the Memory Manager's pools. */
365
366 private:
367#if MTL_DEBUG_MEMORY_STATISTICS == 1
368 /* Memory statistics. */
369 std::atomic<int64_t> total_allocation_bytes_;
370
371 /* Debug statistics. */
372 std::atomic<int> per_frame_allocation_count_;
373 std::atomic<int64_t> buffers_in_pool_;
374#endif
375
376 /* Metal resources. */
377 bool initialized_ = false;
378 id<MTLDevice> device_ = nil;
379
380 /* The buffer selection aims to pick a buffer which meets the minimum size requirements.
381 * To do this, we keep an ordered set of all available buffers. If the buffer is larger than the
382 * desired allocation size, we check it against `mtl_buffer_size_threshold_factor_`,
383 * which defines what % larger than the original allocation the buffer can be.
384 * - A higher value results in greater re-use of previously allocated buffers of similar sizes.
385 * - A lower value may result in more dynamic allocations, but minimized memory usage for a given
386 * scenario.
387 * The current value of 1.26 is calibrated for optimal performance and memory utilization. */
388 static constexpr float mtl_buffer_size_threshold_factor_ = 1.26;
389
390 /* Buffer pools using MTLResourceOptions as key for allocation type.
391 * Aliased as 'uint64_t' for map type compatibility.
392 * - A size-ordered list (MultiSet) of allocated buffers is kept per MTLResourceOptions
393 * permutation. This allows efficient lookup for buffers of a given requested size.
394 * - MTLBufferHandle wraps a gpu::MTLBuffer pointer to achieve easy size-based sorting
395 * via CompareMTLBuffer.
396 *
397 * NOTE: buffer_pool_lock_ guards against concurrent access to the memory allocator. This
398 * can occur during light baking or rendering operations. */
399 using MTLBufferPoolOrderedList = std::multiset<MTLBufferHandle, CompareMTLBuffer>;
400 using MTLBufferResourceOptions = uint64_t;
401
402 std::mutex buffer_pool_lock_;
404
405 /* Linked list to track all existing allocations. Prioritizing fast insert/deletion. */
406 gpu::MTLBuffer *allocations_list_base_;
407 uint allocations_list_size_;
408
409 /* Maintain a queue of all MTLSafeFreeList's that have been released
410 * by the GPU and are ready to have their buffers re-inserted into the
411 * MemoryManager pools.
412 * Access to this queue is made thread-safe through safelist_lock_. */
413 std::mutex safelist_lock_;
414 blender::Vector<MTLSafeFreeList *> completed_safelist_queue_;
415
416 /* Current free list, associated with active MTLCommandBuffer submission. */
417 /* MTLBuffer::free() can be called from separate threads, due to usage within animation
418 * system/worker threads. */
419 std::atomic<MTLSafeFreeList *> current_free_list_;
420 std::atomic<int64_t> allocations_in_pool_;
421
422 /* Previous list, to be released after one full frame. */
423 MTLSafeFreeList *prev_free_buffer_list_ = nullptr;
424
425 public:
426 void init(id<MTLDevice> device);
428
429 gpu::MTLBuffer *allocate(uint64_t size, bool cpu_visible);
430 gpu::MTLBuffer *allocate_aligned(uint64_t size, uint alignment, bool cpu_visible);
431 gpu::MTLBuffer *allocate_with_data(uint64_t size, bool cpu_visible, const void *data = nullptr);
433 uint alignment,
434 bool cpu_visible,
435 const void *data = nullptr);
436 bool free_buffer(gpu::MTLBuffer *buffer);
437
438 /* Flush MTLSafeFreeList buffers, for completed lists in `completed_safelist_queue_`,
439 * back to memory pools. */
440 void update_memory_pools();
441
442 /* Access and control over active MTLSafeFreeList. */
444 void begin_new_safe_list();
445
446 /* Add a completed MTLSafeFreeList to completed_safelist_queue_. */
448
449 private:
450 void ensure_buffer_pool(MTLResourceOptions options);
451 void insert_buffer_into_pool(MTLResourceOptions options, gpu::MTLBuffer *buffer);
452 void free();
453
454 /* Allocations list. */
455 void allocations_list_insert(gpu::MTLBuffer *buffer);
456 void allocations_list_delete(gpu::MTLBuffer *buffer);
457 void allocations_list_delete_all();
458};
459
460/* Scratch buffers are circular-buffers used for temporary data within the current frame.
461 * In order to preserve integrity of contents when having multiple-frames-in-flight,
462 * we cycle through a collection of scratch buffers which are reset upon next use.
463 *
464 * Below are a series of properties, declared to manage scratch buffers. If a scratch buffer
465 * overflows, then the original buffer will be flushed and submitted, with retained references
466 * by usage within the command buffer, and a new buffer will be created.
467 * - The new buffer will grow in size to account for increased demand in temporary memory.
468 */
470
471 private:
472 /* Maximum number of scratch buffers to allocate. This should be the maximum number of
473 * simultaneous frames in flight. */
474 static constexpr uint mtl_max_scratch_buffers_ = MTL_NUM_SAFE_FRAMES;
475
476 public:
477 /* Maximum size of single scratch buffer allocation. When re-sizing, this is the maximum size the
478 * newly allocated buffers will grow to. Larger allocations are possible if
479 * `MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION` is enabled, but these will instead allocate new
480 * buffers from the memory pools on the fly. */
481 static constexpr uint mtl_scratch_buffer_max_size_ = 128 * 1024 * 1024;
482
483 /* Initial size of circular scratch buffers prior to growth. */
484 static constexpr uint mtl_scratch_buffer_initial_size_ = 16 * 1024 * 1024;
485
486 private:
487 /* Parent MTLContext. */
488 MTLContext &context_;
489 bool initialised_ = false;
490
491 /* Scratch buffer currently in-use. */
492 uint current_scratch_buffer_ = 0;
493
494 /* Scratch buffer pool. */
495 MTLCircularBuffer *scratch_buffers_[mtl_max_scratch_buffers_];
496
497 public:
498 MTLScratchBufferManager(MTLContext &context) : context_(context) {};
500
501 /* Explicit initialization and freeing of resources.
502 * Initialization must occur after device creation. */
503 void init();
504 void free();
505
506 /* Allocation functions for creating temporary allocations from active circular buffer. */
509
510 /* Ensure a new scratch buffer is started if we move onto a new frame.
511 * Called when a new command buffer begins. */
513
514 /* Flush memory for active scratch buffer to GPU.
515 * This call will perform a partial flush of the buffer starting from
516 * the last offset the data was flushed from, to the current offset. */
518
519 /* Bind the whole scratch buffer as a SSBO resource. */
520 void bind_as_ssbo(int slot);
521 void unbind_as_ssbo();
522
524};
525
527
528} // namespace blender::gpu
unsigned int uint
BMesh const char void * data
void init()
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void push_completed_safe_list(MTLSafeFreeList *list)
gpu::MTLBuffer * allocate_with_data(uint64_t size, bool cpu_visible, const void *data=nullptr)
MTLSafeFreeList * get_current_safe_list()
gpu::MTLBuffer * allocate(uint64_t size, bool cpu_visible)
gpu::MTLBuffer * allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
gpu::MTLBuffer * allocate_aligned_with_data(uint64_t size, uint alignment, bool cpu_visible, const void *data=nullptr)
bool free_buffer(gpu::MTLBuffer *buffer)
void flag_in_use(bool used)
uint64_t get_size() const
void set_usage_size(uint64_t size_used)
gpu::MTLBuffer * next
uint64_t get_size_used() const
void * get_host_ptr() const
void flush_range(uint64_t offset, uint64_t length)
MTLBuffer(id< MTLDevice > device, uint64_t size, MTLResourceOptions options, uint alignment=1)
gpu::MTLBuffer * prev
void set_label(NSString *str)
MTLResourceOptions get_resource_options()
id< MTLBuffer > get_metal_buffer() const
MEM_CXX_CLASS_ALLOC_FUNCS("MTLBuffer")
MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow)
MTLTemporaryBuffer allocate_range_aligned(uint64_t alloc_size, uint alignment)
MTLTemporaryBuffer allocate_range(uint64_t alloc_size)
MEM_CXX_CLASS_ALLOC_FUNCS("MTLSafeFreeList")
void insert_buffer(gpu::MTLBuffer *buffer)
static constexpr uint mtl_scratch_buffer_max_size_
static constexpr uint mtl_scratch_buffer_initial_size_
MTLScratchBufferManager(MTLContext &context)
MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(uint64_t alloc_size, uint alignment)
MTLTemporaryBuffer scratch_buffer_allocate_range(uint64_t alloc_size)
CCL_NAMESPACE_BEGIN struct Options options
#define str(s)
static int compare_size(void *user_data, const void *a1, const void *a2)
float length(VecOp< float, D >) RET
#define MTL_NUM_SAFE_FRAMES
Definition mtl_common.hh:16
MTLBufferRange MTLTemporaryBuffer
bool operator()(const MTLBufferHandle &lhs, const MTLBufferHandle &rhs) const
MTLBufferHandle(gpu::MTLBuffer *buf)
MTLBufferHandle(uint64_t compare_size)
MTLResourceOptions options