21#define MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION
42#if MTL_DEBUG_MEMORY_STATISTICS == 1
44 total_allocation_bytes_ = 0;
45 per_frame_allocation_count_ = 0;
49 allocations_in_pool_ = 0;
52 allocations_list_base_ =
nullptr;
53 allocations_list_size_ = 0;
66void MTLBufferPool::free()
68 buffer_pool_lock_.lock();
71 allocations_list_delete_all();
74 for (
int safe_pool_free_index = 0; safe_pool_free_index < completed_safelist_queue_.size();
75 safe_pool_free_index++)
77 delete completed_safelist_queue_[safe_pool_free_index];
79 completed_safelist_queue_.clear();
81 safelist_lock_.lock();
82 if (current_free_list_ !=
nullptr) {
83 delete current_free_list_;
84 current_free_list_ =
nullptr;
86 if (prev_free_buffer_list_ !=
nullptr) {
87 delete prev_free_buffer_list_;
88 prev_free_buffer_list_ =
nullptr;
90 safelist_lock_.unlock();
93 for (std::multiset<blender::gpu::MTLBufferHandle, blender::gpu::CompareMTLBuffer> *buffer_pool :
94 buffer_pools_.values())
99 buffer_pools_.clear();
100 buffer_pool_lock_.unlock();
133 options = ([device_ hasUnifiedMemory]) ? MTLResourceStorageModeShared :
134 MTLResourceStorageModeManaged;
137 options = MTLResourceStorageModePrivate;
142 buffer_pool_lock_.lock();
144 std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr(
147 if (pool_search !=
nullptr) {
148 std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = *pool_search;
150 auto result = pool->lower_bound(size_compare);
151 if (
result != pool->end()) {
159 if (found_size >= aligned_alloc_size &&
160 found_size <= (aligned_alloc_size * mtl_buffer_size_threshold_factor_))
163 "[MemoryAllocator] Suitable Buffer of size %lld found, for requested size: %lld",
167 new_buffer = found_buffer;
175 "[MemoryAllocator] Buffer of size %lld found, but was incompatible with requested "
179 new_buffer =
nullptr;
185 if (new_buffer ==
nullptr) {
189 allocations_list_insert(new_buffer);
195#if MTL_DEBUG_MEMORY_STATISTICS == 1
202 allocations_in_pool_ -= new_buffer->
get_size();
210#if MTL_DEBUG_MEMORY_STATISTICS == 1
211 per_frame_allocation_count_++;
215 buffer_pool_lock_.unlock();
264 safelist_lock_.lock();
265 buffer_pool_lock_.lock();
267#if MTL_DEBUG_MEMORY_STATISTICS == 1
268 int num_buffers_added = 0;
272 for (
int safe_pool_free_index = 0; safe_pool_free_index < completed_safelist_queue_.size();
273 safe_pool_free_index++)
275 MTLSafeFreeList *current_pool = completed_safelist_queue_[safe_pool_free_index];
278 while (current_pool !=
nullptr) {
279 current_pool->lock_.lock();
283 int size =
min_ii(current_pool->current_list_index_, MTLSafeFreeList::MAX_NUM_BUFFERS_);
286 while (counter <
size) {
295#if MTL_DEBUG_MEMORY_STATISTICS == 1
304 current_pool->lock_.unlock();
306 current_pool =
nullptr;
309 if (next_list !=
nullptr) {
310 current_pool = next_list;
321 const time_t time_now = std::time(
nullptr);
322 for (
auto buffer_pool_list : buffer_pools_.items()) {
323 MTLBufferPoolOrderedList *pool_allocations = buffer_pool_list.value;
324 MTLBufferPoolOrderedList::iterator pool_iterator = pool_allocations->begin();
325 while (pool_iterator != pool_allocations->end()) {
328 const time_t time_passed = time_now - handle.
insert_time;
333 time_t deletion_time_threshold_s = 600;
336 deletion_time_threshold_s = 2;
341 deletion_time_threshold_s = 4;
345 deletion_time_threshold_s = 15;
349 deletion_time_threshold_s = 60;
352 if (time_passed > deletion_time_threshold_s) {
355 allocations_list_delete(handle.
buffer);
358 pool_iterator = pool_allocations->erase(pool_iterator);
360#if MTL_DEBUG_MEMORY_STATISTICS == 1
369#if MTL_DEBUG_MEMORY_STATISTICS == 1
370 printf(
"--- Allocation Stats ---\n");
371 printf(
" Num buffers processed in pool (this frame): %u\n", num_buffers_added);
373 uint framealloc = (
uint)per_frame_allocation_count_;
374 printf(
" Allocations in frame: %u\n", framealloc);
375 printf(
" Total Buffers allocated: %u\n", allocations_list_size_);
376 printf(
" Total Memory allocated: %u MB\n", (
uint)total_allocation_bytes_ / (1024 * 1024));
378 uint allocs = (
uint)(allocations_in_pool_) / 1024 / 2024;
379 printf(
" Free memory in pools: %u MB\n", allocs);
381 uint buffs = (
uint)buffers_in_pool_;
382 printf(
" Buffers in pools: %u\n", buffs);
384 printf(
" Pools %u:\n", (
uint)buffer_pools_.size());
385 auto key_iterator = buffer_pools_.keys().begin();
386 auto value_iterator = buffer_pools_.values().begin();
387 while (key_iterator != buffer_pools_.keys().end()) {
390 for (
auto it = (*value_iterator)->begin(); it != (*value_iterator)->end(); it++) {
391 mem_in_pool += it->buffer_size;
395 printf(
" Buffers in pool (%u)(%llu): %u (%u MB)\n",
398 (
uint)((*value_iterator)->size()),
399 (
uint)mem_in_pool / 1024 / 1024);
404 per_frame_allocation_count_ = 0;
408 completed_safelist_queue_.clear();
409 buffer_pool_lock_.unlock();
410 safelist_lock_.unlock();
418 safe_list->lock_.lock();
420 BLI_assert(safe_list->reference_count_ == 0 &&
421 "Pool must be fully dereferenced by all in-use cmd buffers before returning.\n");
422 BLI_assert(safe_list->in_free_queue_ ==
false &&
"Pool must not already be in queue");
426 safelist_lock_.lock();
427 completed_safelist_queue_.append(safe_list);
428 safelist_lock_.unlock();
429 safe_list->lock_.unlock();
435 return current_free_list_;
440 safelist_lock_.lock();
444 prev_free_buffer_list_ = active_list;
445 safelist_lock_.unlock();
454void MTLBufferPool::ensure_buffer_pool(MTLResourceOptions
options)
456 std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.
lookup_ptr(
458 if (pool_search ==
nullptr) {
459 std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool =
460 new std::multiset<MTLBufferHandle, CompareMTLBuffer>();
474 this->ensure_buffer_pool(
options);
484 std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = buffer_pools_.
lookup(
options);
486 allocations_in_pool_ += buffer->
get_size();
488#if MTL_DEBUG_MEMORY_STATISTICS == 1
494void MTLBufferPool::allocations_list_insert(gpu::MTLBuffer *buffer)
501 gpu::MTLBuffer *current_head = allocations_list_base_;
502 buffer->
next = current_head;
503 buffer->
prev =
nullptr;
505 if (current_head !=
nullptr) {
506 current_head->
prev = buffer;
509 allocations_list_base_ = buffer;
510 allocations_list_size_++;
512#if MTL_DEBUG_MEMORY_STATISTICS == 1
513 total_allocation_bytes_ += buffer->
get_size();
517void MTLBufferPool::allocations_list_delete(gpu::MTLBuffer *buffer)
525 gpu::MTLBuffer *
next = buffer->
next;
526 gpu::MTLBuffer *
prev = buffer->
prev;
528 if (prev !=
nullptr) {
533 if (
next !=
nullptr) {
538 if (allocations_list_base_ == buffer) {
539 allocations_list_base_ =
next;
542 allocations_list_size_--;
544#if MTL_DEBUG_MEMORY_STATISTICS == 1
545 total_allocation_bytes_ -= buffer->
get_size();
552void MTLBufferPool::allocations_list_delete_all()
554 gpu::MTLBuffer *current = allocations_list_base_;
555 while (current !=
nullptr) {
556 gpu::MTLBuffer *
next = current->
next;
560 allocations_list_size_ = 0;
561 allocations_list_base_ =
nullptr;
563#if MTL_DEBUG_MEMORY_STATISTICS == 1
564 total_allocation_bytes_ = 0;
570 reference_count_ = 1;
571 in_free_queue_ =
false;
572 current_list_index_ = 0;
574 referenced_by_workload_ =
false;
582 uint insert_index = current_list_index_++;
586 if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
593 std::unique_lock
lock(lock_);
595 next_list = next_.load();
598 next_.store(next_list);
605 current_list_index_ = MTLSafeFreeList::MAX_NUM_BUFFERS_;
609 safe_free_pool_[insert_index] = buffer;
618 referenced_by_workload_ =
true;
628 int ref_count = --reference_count_;
630 if (ref_count == 0) {
643 return ((reference_count_ > 1 || referenced_by_workload_) &&
644 current_list_index_ > MIN_BUFFER_FLUSH_COUNT);
662 alignment_ = alignment;
663 device_ = mtl_device;
664 is_external_ =
false;
669 metal_buffer_ = [device_ newBufferWithLength:aligned_alloc_size
options:
options];
672 size_ = aligned_alloc_size;
674 if (!(options_ & MTLResourceStorageModePrivate)) {
675 data_ = [metal_buffer_ contents];
690 metal_buffer_ = external_buffer;
691 [metal_buffer_ retain];
697 options_ = [metal_buffer_ resourceOptions];
698 size_ = [metal_buffer_ allocatedSize];
700 data_ = [metal_buffer_ contents];
709 if (metal_buffer_ != nil) {
710 [metal_buffer_ release];
721 if (metal_buffer_ != nil) {
722 [metal_buffer_ release];
730 return metal_buffer_;
735 BLI_assert(!(options_ & MTLResourceStorageModePrivate));
753 return options_ & MTLResourceStorageModeManaged;
758 metal_buffer_.label =
str;
766 "Buffer should be marked as 'in-use' if being actively used by an instance. Buffer "
767 "has likely already been freed.");
774 [metal_buffer_ didModifyRange:NSMakeRange(0, size_)];
783 [metal_buffer_ didModifyRange:NSMakeRange(offset,
length)];
799 BLI_assert(size_used > 0 && size_used <= size_);
800 usage_size_ = size_used;
816 return this->options & MTLResourceStorageModeManaged;
844 if (!this->initialised_) {
848 for (
int sb = 0; sb < mtl_max_scratch_buffers_; sb++) {
852 BLI_assert(&(scratch_buffers_[sb]->own_context_) == &context_);
854 current_scratch_buffer_ = 0;
861 initialised_ =
false;
864 for (
int sb = 0; sb < mtl_max_scratch_buffers_; sb++) {
865 delete scratch_buffers_[sb];
866 scratch_buffers_[sb] =
nullptr;
868 current_scratch_buffer_ = 0;
880 alignment =
max_uu(alignment, 256);
882 BLI_assert_msg(current_scratch_buffer_ >= 0,
"Scratch Buffer index not set");
883 MTLCircularBuffer *current_scratch_buff = this->scratch_buffers_[current_scratch_buffer_];
884 BLI_assert_msg(current_scratch_buff !=
nullptr,
"Scratch Buffer does not exist");
887 BLI_assert(allocated_range.
size >= alloc_size && allocated_range.
size <= alloc_size + alignment);
889 return allocated_range;
895 MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
896 BLI_assert(&active_scratch_buf->own_context_ == &context_);
902 if (active_scratch_buf->used_frame_index_ < context_.get_current_frame_index()) {
903 current_scratch_buffer_ = (current_scratch_buffer_ + 1) % mtl_max_scratch_buffers_;
904 active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
905 active_scratch_buf->
reset();
906 BLI_assert(&active_scratch_buf->own_context_ == &context_);
907 MTL_LOG_DEBUG(
"Scratch buffer %d reset - (ctx %p)(Frame index: %d)",
908 current_scratch_buffer_,
910 context_.get_current_frame_index());
917 MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
918 BLI_assert(&active_scratch_buf->own_context_ == &context_);
919 active_scratch_buf->
flush();
925 MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
926 BLI_assert(&active_scratch_buf->own_context_ == &context_);
927 active_scratch_buf->ssbo_source_->
bind(slot);
933 MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
934 BLI_assert(&active_scratch_buf->own_context_ == &context_);
935 active_scratch_buf->ssbo_source_->
unbind();
944 cbuffer_ = ssbo_source_->metal_buffer_;
946 can_resize_ = allow_grow;
947 cbuffer_->flag_in_use(
true);
950 last_flush_base_offset_ = 0;
954 cbuffer_->set_label(
@"Circular Scratch Buffer");
974 alignment =
max_uu(alignment, 256);
979 bool can_allocate = (aligned_current_offset + aligned_alloc_size) < cbuffer_->get_size();
981 BLI_assert(aligned_current_offset >= current_offset_);
984 BLI_assert(aligned_current_offset % alignment == 0);
985 BLI_assert(aligned_alloc_size % alignment == 0);
989 uint64_t new_size = cbuffer_->get_size();
996 aligned_current_offset + aligned_alloc_size);
998#ifdef MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION
1007 new_size = aligned_alloc_size;
1008 MTL_LOG_DEBUG(
"Temporarily growing Scratch buffer to %d MB",
1009 (
int)new_size / 1024 / 1024);
1013 MTL_LOG_DEBUG(
"Shrinking Scratch buffer back to %d MB", (
int)new_size / 1024 / 1024);
1020 if (aligned_alloc_size > new_size) {
1026 alloc_range.
data =
nullptr;
1028 alloc_range.
size = 0;
1029 alloc_range.
options = cbuffer_->options;
1035 "Performance Warning: Reached the end of circular buffer of size: %llu, but cannot "
1036 "resize. Starting new buffer",
1037 cbuffer_->get_size());
1043 alloc_range.
data =
nullptr;
1045 alloc_range.
size = 0;
1046 alloc_range.
options = cbuffer_->get_resource_options();
1054 delete ssbo_source_;
1056 cbuffer_ = ssbo_source_->metal_buffer_;
1057 cbuffer_->flag_in_use(
true);
1058 current_offset_ = 0;
1059 last_flush_base_offset_ = 0;
1063 cbuffer_->set_label(
@"Circular Scratch Buffer");
1065 MTL_LOG_DEBUG(
"Resized Metal circular buffer to %llu bytes", new_size);
1068 aligned_current_offset = 0;
1069 BLI_assert((aligned_current_offset + aligned_alloc_size) <= cbuffer_->get_size());
1074 alloc_range.
metal_buffer = cbuffer_->get_metal_buffer();
1075 alloc_range.
data = (
void *)((uint8_t *)([alloc_range.
metal_buffer contents]) +
1076 aligned_current_offset);
1078 alloc_range.
size = aligned_alloc_size;
1079 alloc_range.
options = cbuffer_->get_resource_options();
1083 current_offset_ = aligned_current_offset + aligned_alloc_size;
1084 BLI_assert(current_offset_ <= cbuffer_->get_size());
1092 uint64_t len = current_offset_ - last_flush_base_offset_;
1094 cbuffer_->flush_range(last_flush_base_offset_,
len);
1095 last_flush_base_offset_ = current_offset_;
1104 if (current_offset_ > 0) {
1107 BLI_assert((own_context_.get_current_frame_index() >=
1109 "Trying to reset Circular scratch buffer's while its data is still being used by "
1110 "an in-flight frame");
1112 current_offset_ = 0;
1113 last_flush_base_offset_ = 0;
1117 used_frame_index_ = own_context_.get_current_frame_index();
#define BLI_assert_msg(a, msg)
MINLINE int min_ii(int a, int b)
MINLINE uint max_uu(uint a, uint b)
MINLINE uint64_t ceil_to_multiple_ul(uint64_t a, uint64_t b)
BMesh const char void * data
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
const Value * lookup_ptr(const Key &key) const
const Value & lookup(const Key &key) const
void add_new(const Key &key, const Value &value)
void push_completed_safe_list(MTLSafeFreeList *list)
void init(id< MTLDevice > device)
gpu::MTLBuffer * allocate_with_data(uint64_t size, bool cpu_visible, const void *data=nullptr)
MTLSafeFreeList * get_current_safe_list()
void update_memory_pools()
void begin_new_safe_list()
gpu::MTLBuffer * allocate(uint64_t size, bool cpu_visible)
gpu::MTLBuffer * allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
gpu::MTLBuffer * allocate_aligned_with_data(uint64_t size, uint alignment, bool cpu_visible, const void *data=nullptr)
bool free_buffer(gpu::MTLBuffer *buffer)
void flag_in_use(bool used)
uint64_t get_size() const
void set_usage_size(uint64_t size_used)
uint64_t get_size_used() const
void * get_host_ptr() const
void flush_range(uint64_t offset, uint64_t length)
MTLBuffer(id< MTLDevice > device, uint64_t size, MTLResourceOptions options, uint alignment=1)
void set_label(NSString *str)
MTLResourceOptions get_resource_options()
id< MTLBuffer > get_metal_buffer() const
MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow)
MTLTemporaryBuffer allocate_range_aligned(uint64_t alloc_size, uint alignment)
MTLTemporaryBuffer allocate_range(uint64_t alloc_size)
uint get_current_frame_index()
static MTLBufferPool * get_global_memory_manager()
void increment_reference()
void decrement_reference()
void insert_buffer(gpu::MTLBuffer *buffer)
static constexpr uint mtl_scratch_buffer_max_size_
static constexpr uint mtl_scratch_buffer_initial_size_
MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(uint64_t alloc_size, uint alignment)
void flush_active_scratch_buffer()
void ensure_increment_scratch_buffer()
~MTLScratchBufferManager()
MTLTemporaryBuffer scratch_buffer_allocate_range(uint64_t alloc_size)
void bind_as_ssbo(int slot)
void bind(int slot) override
CCL_NAMESPACE_BEGIN struct Options options
float length(VecOp< float, D >) RET
MINLINE unsigned long long min_ulul(unsigned long long a, unsigned long long b)
MINLINE unsigned long long max_ulul(unsigned long long a, unsigned long long b)
#define MTL_NUM_SAFE_FRAMES
#define MTL_LOG_WARNING(info,...)
#define MTL_LOG_DEBUG(info,...)
static constexpr size_t MEMORY_SIZE_1GB
static constexpr size_t MEMORY_SIZE_512MB
static constexpr size_t MEMORY_SIZE_2GB
static constexpr size_t MEMORY_SIZE_256MB
MTLBufferRange MTLTemporaryBuffer
SymEdge< T > * prev(const SymEdge< T > *se)
MTLResourceOptions options
id< MTLBuffer > metal_buffer