Blender V4.3
mtl_memory.hh
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#pragma once
6
7#include <atomic>
8#include <ctime>
9#include <functional>
10#include <map>
11#include <mutex>
12#include <set>
13#include <unordered_map>
14
15#include "mtl_common.hh"
16
17#include <Cocoa/Cocoa.h>
18#include <Metal/Metal.h>
19#include <QuartzCore/QuartzCore.h>
20
21@class CAMetalLayer;
22@class MTLCommandQueue;
23@class MTLRenderPipelineState;
24
25/* Metal Memory Manager Overview. */
26/*
27 * The Metal Backend Memory manager is designed to provide an interface
28 * for all other MTL_* modules where memory allocation is required.
29 *
30 * Different allocation strategies and data-structures are used depending
31 * on how the data is used by the backend. These aim to optimally handle
32 * system memory and abstract away any complexity from the MTL_* modules
33 * themselves.
34 *
35 * There are two primary allocation modes which can be used:
36 *
37 * ** MTLScratchBufferManager **
38 *
39 * Each MTLContext owns a ScratchBufferManager which is implemented
40 * as a pool of circular buffers, designed to handle temporary
41 * memory allocations which occur on a per-frame basis. The scratch
42 * buffers allow flushing of host memory to the GPU to be batched.
43 *
44 * Each frame, the next scratch buffer is reset, then later flushed upon
45 * command buffer submission.
46 *
47 * NOTE: This is allocated per-context due to allocations being tied
48 * to workload submissions and context-specific submissions.
49 *
50 * Examples of scratch buffer usage are:
51 * - Immediate-mode temporary vertex buffers.
52 * - Shader uniform data updates
53 * - Staging of data for resource copies, or, data reads/writes.
54 *
55 * Usage:
56 *
57 * MTLContext::get_scratchbuffer_manager() - to fetch active manager.
58 *
59 * MTLTemporaryBuffer scratch_buffer_allocate_range(size)
60 * MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(size, align)
61 *
62 * ---------------------------------------------------------------------------------
63 * ** MTLBufferPool **
64 *
65 * For static and longer-lasting memory allocations, such as those for UBOs,
66 * Vertex buffers, index buffers, etc; We want an optimal abstraction for
67 * fetching a MTLBuffer of the desired size and resource options.
68 *
69 * Memory allocations can be expensive so the MTLBufferPool provides
70 * functionality to track usage of these buffers and once a buffer
71 * is no longer in use, it is returned to the buffer pool for use
72 * by another backend resource.
73 *
74 * The MTLBufferPool provides functionality for safe tracking of resources,
75 * as buffers freed on the host side must have their usage by the GPU tracked,
76 * to ensure they are not prematurely re-used before they have finished being
77 * used by the GPU.
78 *
79 * NOTE: The MTLBufferPool is a global construct which can be fetched from anywhere.
80 *
81 * Usage:
82 * MTLContext::get_global_memory_manager(); - static routine to fetch global memory manager.
83 *
84 * gpu::MTLBuffer *allocate(size, is_cpu_visibile)
85 * gpu::MTLBuffer *allocate_aligned(size, alignment, is_cpu_visibile)
86 * gpu::MTLBuffer *allocate_with_data(size, is_cpu_visibile, data_ptr)
87 * gpu::MTLBuffer *allocate_aligned_with_data(size, alignment, is_cpu_visibile, data_ptr)
88 */
89
90/* Debug memory statistics: Disabled by Macro rather than guarded for
91 * performance considerations. */
92#define MTL_DEBUG_MEMORY_STATISTICS 0
93
94/* Allows a scratch buffer to temporarily grow beyond its maximum, which allows submission
95 * of one-time-use data packets which are too large. */
96#define MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION 1
97
98namespace blender::gpu {
99
100/* Forward Declarations. */
101class MTLContext;
102class MTLCommandBufferManager;
103class MTLUniformBuf;
104
105/* -------------------------------------------------------------------- */
109/* MTLBuffer allocation wrapper. */
111
112 public:
113 /* NOTE: ListBase API is not used due to custom destructor operation required to release
114 * Metal objective C buffer resource. */
116
117 private:
118 /* Metal resource. */
119 id<MTLBuffer> metal_buffer_;
120
121 /* Host-visible mapped-memory pointer. Behavior depends on buffer type:
122 * - Shared buffers: pointer represents base address of #MTLBuffer whose data
123 * access has shared access by both the CPU and GPU on
124 * Unified Memory Architectures (UMA).
125 * - Managed buffer: Host-side mapped buffer region for CPU (Host) access. Managed buffers
126 * must be manually flushed to transfer data to GPU-resident buffer.
127 * - Private buffer: Host access is invalid, `data` will be nullptr. */
128 void *data_;
129
130 /* Whether buffer is allocated from an external source. */
131 bool is_external_ = false;
132
133 /* Allocation info. */
134 MTLResourceOptions options_;
135 id<MTLDevice> device_;
136 uint64_t alignment_;
137 uint64_t size_;
138
139 /* Allocated size may be larger than actual size. */
140 uint64_t usage_size_;
141
142 /* Lifetime info - whether the current buffer is actively in use. A buffer
143 * should be in use after it has been allocated. De-allocating the buffer, and
144 * returning it to the free buffer pool will set in_use to false. Using a buffer
145 * while it is not in-use should not be allowed and result in an error. */
146 std::atomic<bool> in_use_;
147
148 public:
149 MTLBuffer(id<MTLDevice> device, uint64_t size, MTLResourceOptions options, uint alignment = 1);
150 MTLBuffer(id<MTLBuffer> external_buffer);
151 ~MTLBuffer();
152
153 /* Fetch information about backing MTLBuffer. */
154 id<MTLBuffer> get_metal_buffer() const;
155 void *get_host_ptr() const;
156 uint64_t get_size_used() const;
157 uint64_t get_size() const;
158
159 /* Flush data to GPU. */
160 void flush();
161 void flush_range(uint64_t offset, uint64_t length);
162 bool requires_flush();
163
164 /* Buffer usage tracking. */
165 void flag_in_use(bool used);
166 bool get_in_use();
167 void set_usage_size(uint64_t size_used);
168
169 /* Debug. */
170 void set_label(NSString *str);
171
172 /* Read properties. */
173 MTLResourceOptions get_resource_options();
175
176 /* Resource-local free: For buffers allocated via memory manager,
177 * this will call the context `free_buffer` method to return the buffer to the context memory
178 * pool.
179 *
180 * Otherwise, free will release the associated metal resource.
181 * As a note, calling the destructor will also destroy the buffer and associated metal
182 * resource. */
183 void free();
184
185 /* Safety check to ensure buffers are not used after free. */
186 void debug_ensure_used();
187
189};
190
191/* View into part of an MTLBuffer. */
193 id<MTLBuffer> metal_buffer;
194 void *data;
197 MTLResourceOptions options;
198
199 void flush();
200 bool requires_flush();
201};
202
203/* Circular scratch buffer allocations should be seen as temporary and only used within the
204 * lifetime of the frame. */
206
207/* Round-Robin Circular-buffer. */
210
211 private:
212 MTLContext &own_context_;
213
214 /* Wrapped MTLBuffer allocation handled. */
215 gpu::MTLBuffer *cbuffer_;
216
217 /* Current offset where next allocation will begin. */
218 uint64_t current_offset_;
219
220 /* Whether the Circular Buffer can grow during re-allocation if
221 * the size is exceeded. */
222 bool can_resize_;
223
224 /* Usage information. */
225 uint64_t used_frame_index_;
226 uint64_t last_flush_base_offset_;
227
228 public:
229 MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow);
233 void flush();
234
235 /* Reset pointer back to start of circular buffer. */
236 void reset();
237};
238
239/* Wrapper struct used by Memory Manager to sort and compare gpu::MTLBuffer resources inside the
240 * memory pools. */
245
247 {
248 this->buffer = buf;
249 this->buffer_size = this->buffer->get_size();
250 this->insert_time = std::time(nullptr);
251 }
252
254 {
255 this->buffer = nullptr;
256 this->buffer_size = compare_size;
257 this->insert_time = 0;
258 }
259};
260
262 bool operator()(const MTLBufferHandle &lhs, const MTLBufferHandle &rhs) const
263 {
264 return lhs.buffer_size < rhs.buffer_size;
265 }
266};
267
297 friend class MTLBufferPool;
298
299 private:
300 std::atomic<int> reference_count_;
301 std::atomic<bool> in_free_queue_;
302 std::atomic<bool> referenced_by_workload_;
303 std::recursive_mutex lock_;
304 /* Linked list of next MTLSafeFreeList chunk if current chunk is full. */
305 std::atomic<MTLSafeFreeList *> next_;
306
307 /* Lockless list. MAX_NUM_BUFFERS_ within a chunk based on considerations
308 * for performance and memory. Higher chunk counts are preferable for efficiently
309 * performing block operations such as copying several objects simultaneously.
310 *
311 * MIN_BUFFER_FLUSH_COUNT refers to the minimum count of buffers in the MTLSafeFreeList
312 * before buffers are returned to global memory pool. This is set at a point to reduce
313 * overhead of small pool flushes, while ensuring floating memory overhead is not excessive. */
314 static const int MAX_NUM_BUFFERS_ = 8192;
315 static const int MIN_BUFFER_FLUSH_COUNT = 120;
316 std::atomic<int> current_list_index_;
317 gpu::MTLBuffer *safe_free_pool_[MAX_NUM_BUFFERS_];
318
319 public:
321
322 /* Can be used from multiple threads. Performs insertion into Safe Free List with the least
323 * amount of threading synchronization. */
324 void insert_buffer(gpu::MTLBuffer *buffer);
325
326 /* Whether we need to start a new safe free list, or can carry on using the existing one. */
327 bool should_flush();
328
329 /* Increments command buffer reference count. */
330 void increment_reference();
331
332 /* Decrement and return of buffers to pool occur on MTLCommandBuffer completion callback. */
333 void decrement_reference();
334
336 {
337 in_free_queue_ = true;
338 if (current_list_index_ >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
339 MTLSafeFreeList *next_pool = next_.load();
340 if (next_pool) {
341 next_pool->flag_in_queue();
342 }
343 }
344 }
345
346 MEM_CXX_CLASS_ALLOC_FUNCS("MTLSafeFreeList");
347};
348
349/* MTLBuffer pools. */
350/* Allocating Metal buffers is expensive, so we cache all allocated buffers,
351 * and when requesting a new buffer, find one which fits the required dimensions
352 * from an existing pool of buffers.
353 *
354 * When freeing MTLBuffers, we insert them into the current MTLSafeFreeList, which defers
355 * release of the buffer until the associated command buffers have finished executing.
356 * This prevents a buffer from being re-used while it is still in-use by the GPU.
357 *
358 * * Once command buffers complete, MTLSafeFreeList's associated with the current
359 * command buffer submission are added to the `completed_safelist_queue_`.
360 *
361 * * At a set point in time, all MTLSafeFreeList's in `completed_safelist_queue_` have their
362 * MTLBuffers re-inserted into the Memory Manager's pools. */
364
365 private:
366#if MTL_DEBUG_MEMORY_STATISTICS == 1
367 /* Memory statistics. */
368 std::atomic<int64_t> total_allocation_bytes_;
369
370 /* Debug statistics. */
371 std::atomic<int> per_frame_allocation_count_;
372 std::atomic<int64_t> buffers_in_pool_;
373#endif
374
375 /* Metal resources. */
376 bool initialized_ = false;
377 id<MTLDevice> device_ = nil;
378
379 /* The buffer selection aims to pick a buffer which meets the minimum size requirements.
380 * To do this, we keep an ordered set of all available buffers. If the buffer is larger than the
381 * desired allocation size, we check it against `mtl_buffer_size_threshold_factor_`,
382 * which defines what % larger than the original allocation the buffer can be.
383 * - A higher value results in greater re-use of previously allocated buffers of similar sizes.
384 * - A lower value may result in more dynamic allocations, but minimized memory usage for a given
385 * scenario.
386 * The current value of 1.26 is calibrated for optimal performance and memory utilization. */
387 static constexpr float mtl_buffer_size_threshold_factor_ = 1.26;
388
389 /* Buffer pools using MTLResourceOptions as key for allocation type.
390 * Aliased as 'uint64_t' for map type compatibility.
391 * - A size-ordered list (MultiSet) of allocated buffers is kept per MTLResourceOptions
392 * permutation. This allows efficient lookup for buffers of a given requested size.
393 * - MTLBufferHandle wraps a gpu::MTLBuffer pointer to achieve easy size-based sorting
394 * via CompareMTLBuffer.
395 *
396 * NOTE: buffer_pool_lock_ guards against concurrent access to the memory allocator. This
397 * can occur during light baking or rendering operations. */
398 using MTLBufferPoolOrderedList = std::multiset<MTLBufferHandle, CompareMTLBuffer>;
399 using MTLBufferResourceOptions = uint64_t;
400
401 std::mutex buffer_pool_lock_;
403
404 /* Linked list to track all existing allocations. Prioritizing fast insert/deletion. */
405 gpu::MTLBuffer *allocations_list_base_;
406 uint allocations_list_size_;
407
408 /* Maintain a queue of all MTLSafeFreeList's that have been released
409 * by the GPU and are ready to have their buffers re-inserted into the
410 * MemoryManager pools.
411 * Access to this queue is made thread-safe through safelist_lock_. */
412 std::mutex safelist_lock_;
413 blender::Vector<MTLSafeFreeList *> completed_safelist_queue_;
414
415 /* Current free list, associated with active MTLCommandBuffer submission. */
416 /* MTLBuffer::free() can be called from separate threads, due to usage within animation
417 * system/worker threads. */
418 std::atomic<MTLSafeFreeList *> current_free_list_;
419 std::atomic<int64_t> allocations_in_pool_;
420
421 /* Previous list, to be released after one full frame. */
422 MTLSafeFreeList *prev_free_buffer_list_ = nullptr;
423
424 public:
425 void init(id<MTLDevice> device);
427
428 gpu::MTLBuffer *allocate(uint64_t size, bool cpu_visible);
429 gpu::MTLBuffer *allocate_aligned(uint64_t size, uint alignment, bool cpu_visible);
430 gpu::MTLBuffer *allocate_with_data(uint64_t size, bool cpu_visible, const void *data = nullptr);
432 uint alignment,
433 bool cpu_visible,
434 const void *data = nullptr);
435 bool free_buffer(gpu::MTLBuffer *buffer);
436
437 /* Flush MTLSafeFreeList buffers, for completed lists in `completed_safelist_queue_`,
438 * back to memory pools. */
439 void update_memory_pools();
440
441 /* Access and control over active MTLSafeFreeList. */
443 void begin_new_safe_list();
444
445 /* Add a completed MTLSafeFreeList to completed_safelist_queue_. */
447
448 private:
449 void ensure_buffer_pool(MTLResourceOptions options);
450 void insert_buffer_into_pool(MTLResourceOptions options, gpu::MTLBuffer *buffer);
451 void free();
452
453 /* Allocations list. */
454 void allocations_list_insert(gpu::MTLBuffer *buffer);
455 void allocations_list_delete(gpu::MTLBuffer *buffer);
456 void allocations_list_delete_all();
457};
458
459/* Scratch buffers are circular-buffers used for temporary data within the current frame.
460 * In order to preserve integrity of contents when having multiple-frames-in-flight,
461 * we cycle through a collection of scratch buffers which are reset upon next use.
462 *
463 * Below are a series of properties, declared to manage scratch buffers. If a scratch buffer
464 * overflows, then the original buffer will be flushed and submitted, with retained references
465 * by usage within the command buffer, and a new buffer will be created.
466 * - The new buffer will grow in size to account for increased demand in temporary memory.
467 */
469
470 private:
471 /* Maximum number of scratch buffers to allocate. This should be the maximum number of
472 * simultaneous frames in flight. */
473 static constexpr uint mtl_max_scratch_buffers_ = MTL_NUM_SAFE_FRAMES;
474
475 public:
476 /* Maximum size of single scratch buffer allocation. When re-sizing, this is the maximum size the
477 * newly allocated buffers will grow to. Larger allocations are possible if
478 * `MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION` is enabled, but these will instead allocate new
479 * buffers from the memory pools on the fly. */
480 static constexpr uint mtl_scratch_buffer_max_size_ = 128 * 1024 * 1024;
481
482 /* Initial size of circular scratch buffers prior to growth. */
483 static constexpr uint mtl_scratch_buffer_initial_size_ = 16 * 1024 * 1024;
484
485 private:
486 /* Parent MTLContext. */
487 MTLContext &context_;
488 bool initialised_ = false;
489
490 /* Scratch buffer currently in-use. */
491 uint current_scratch_buffer_ = 0;
492
493 /* Scratch buffer pool. */
494 MTLCircularBuffer *scratch_buffers_[mtl_max_scratch_buffers_];
495
496 public:
497 MTLScratchBufferManager(MTLContext &context) : context_(context){};
499
500 /* Explicit initialization and freeing of resources.
501 * Initialization must occur after device creation. */
502 void init();
503 void free();
504
505 /* Allocation functions for creating temporary allocations from active circular buffer. */
508
509 /* Ensure a new scratch buffer is started if we move onto a new frame.
510 * Called when a new command buffer begins. */
512
513 /* Flush memory for active scratch buffer to GPU.
514 * This call will perform a partial flush of the buffer starting from
515 * the last offset the data was flushed from, to the current offset. */
517
519};
520
523} // namespace blender::gpu
unsigned int uint
void init()
void push_completed_safe_list(MTLSafeFreeList *list)
gpu::MTLBuffer * allocate_with_data(uint64_t size, bool cpu_visible, const void *data=nullptr)
MTLSafeFreeList * get_current_safe_list()
gpu::MTLBuffer * allocate(uint64_t size, bool cpu_visible)
Definition mtl_memory.mm:96
gpu::MTLBuffer * allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
gpu::MTLBuffer * allocate_aligned_with_data(uint64_t size, uint alignment, bool cpu_visible, const void *data=nullptr)
bool free_buffer(gpu::MTLBuffer *buffer)
void flag_in_use(bool used)
uint64_t get_size() const
void set_usage_size(uint64_t size_used)
gpu::MTLBuffer * next
uint64_t get_size_used() const
void * get_host_ptr() const
void flush_range(uint64_t offset, uint64_t length)
MTLBuffer(id< MTLDevice > device, uint64_t size, MTLResourceOptions options, uint alignment=1)
gpu::MTLBuffer * prev
void set_label(NSString *str)
MTLResourceOptions get_resource_options()
id< MTLBuffer > get_metal_buffer() const
MEM_CXX_CLASS_ALLOC_FUNCS("MTLBuffer")
MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow)
MTLTemporaryBuffer allocate_range_aligned(uint64_t alloc_size, uint alignment)
MTLTemporaryBuffer allocate_range(uint64_t alloc_size)
MEM_CXX_CLASS_ALLOC_FUNCS("MTLSafeFreeList")
void insert_buffer(gpu::MTLBuffer *buffer)
static constexpr uint mtl_scratch_buffer_max_size_
static constexpr uint mtl_scratch_buffer_initial_size_
MTLScratchBufferManager(MTLContext &context)
MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(uint64_t alloc_size, uint alignment)
MTLTemporaryBuffer scratch_buffer_allocate_range(uint64_t alloc_size)
local_group_size(16, 16) .push_constant(Type rhs
CCL_NAMESPACE_BEGIN struct Options options
#define str(s)
static int compare_size(void *user_data, const void *a1, const void *a2)
Definition filelist.cc:516
#define MTL_NUM_SAFE_FRAMES
Definition mtl_common.hh:17
unsigned __int64 uint64_t
Definition stdint.h:90
bool operator()(const MTLBufferHandle &lhs, const MTLBufferHandle &rhs) const
MTLBufferHandle(gpu::MTLBuffer *buf)
MTLBufferHandle(uint64_t compare_size)
MTLResourceOptions options