Blender V5.0
mtl_memory.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include "BKE_global.hh"
6
7#include "DNA_userdef_types.h"
8
9#include "BLI_math_base.h"
10
11#include "mtl_context.hh"
12#include "mtl_debug.hh"
13#include "mtl_memory.hh"
14#include "mtl_storage_buffer.hh"
15
16using namespace blender;
17using namespace blender::gpu;
18
19/* Allows a scratch buffer to temporarily grow beyond its maximum, which allows submission
20 * of one-time-use data packets which are too large. */
21#define MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION
22
23/* Memory size in bytes macros, used as pool flushing frequency thresholds. */
24constexpr static size_t MEMORY_SIZE_256MB = 256LL * (1024LL * 1024LL);
25constexpr static size_t MEMORY_SIZE_512MB = 512LL * (1024LL * 1024LL);
26constexpr static size_t MEMORY_SIZE_1GB = 1LL * (1024LL * 1024LL * 1024LL);
27constexpr static size_t MEMORY_SIZE_2GB = 2LL * (1024LL * 1024LL * 1024LL);
28
29namespace blender::gpu {
30
31/* -------------------------------------------------------------------- */
34
35void MTLBufferPool::init(id<MTLDevice> mtl_device)
36{
37 if (!initialized_) {
38 BLI_assert(mtl_device);
39 initialized_ = true;
40 device_ = mtl_device;
41
42#if MTL_DEBUG_MEMORY_STATISTICS == 1
43 /* Debug statistics. */
44 total_allocation_bytes_ = 0;
45 per_frame_allocation_count_ = 0;
46 buffers_in_pool_ = 0;
47#endif
48 /* Track pool allocation size. */
49 allocations_in_pool_ = 0;
50
51 /* Live allocations list. */
52 allocations_list_base_ = nullptr;
53 allocations_list_size_ = 0;
54
55 /* Free pools -- Create initial safe free pool */
56 BLI_assert(current_free_list_ == nullptr);
57 this->begin_new_safe_list();
58 }
59}
60
62{
63 this->free();
64}
65
66void MTLBufferPool::free()
67{
68 buffer_pool_lock_.lock();
69
70 /* Delete all existing allocations. */
71 allocations_list_delete_all();
72
73 /* Release safe free lists. */
74 for (int safe_pool_free_index = 0; safe_pool_free_index < completed_safelist_queue_.size();
75 safe_pool_free_index++)
76 {
77 delete completed_safelist_queue_[safe_pool_free_index];
78 }
79 completed_safelist_queue_.clear();
80
81 safelist_lock_.lock();
82 if (current_free_list_ != nullptr) {
83 delete current_free_list_;
84 current_free_list_ = nullptr;
85 }
86 if (prev_free_buffer_list_ != nullptr) {
87 delete prev_free_buffer_list_;
88 prev_free_buffer_list_ = nullptr;
89 }
90 safelist_lock_.unlock();
91
92 /* Clear and release memory pools. */
93 for (std::multiset<blender::gpu::MTLBufferHandle, blender::gpu::CompareMTLBuffer> *buffer_pool :
94 buffer_pools_.values())
95 {
96 delete buffer_pool;
97 }
98
99 buffer_pools_.clear();
100 buffer_pool_lock_.unlock();
101}
102
104{
105 /* Allocate buffer with default HW-compatible alignment of 256 bytes.
106 * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
107 return this->allocate_aligned(size, 256, cpu_visible);
108}
109
111 bool cpu_visible,
112 const void *data)
113{
114 /* Allocate buffer with default HW-compatible alignment of 256 bytes.
115 * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
116 return this->allocate_aligned_with_data(size, 256, cpu_visible, data);
117}
118
120 uint32_t alignment,
121 bool cpu_visible)
122{
123 /* Check not required. Main GPU module usage considered thread-safe. */
124 // BLI_assert(BLI_thread_is_main());
125
126 /* Calculate aligned size */
127 BLI_assert(alignment > 0);
128 uint64_t aligned_alloc_size = ceil_to_multiple_ul(size, alignment);
129
130 /* Allocate new MTL Buffer */
131 MTLResourceOptions options;
132 if (cpu_visible) {
133 options = ([device_ hasUnifiedMemory]) ? MTLResourceStorageModeShared :
134 MTLResourceStorageModeManaged;
135 }
136 else {
137 options = MTLResourceStorageModePrivate;
138 }
139
140 /* Check if we have a suitable buffer */
141 gpu::MTLBuffer *new_buffer = nullptr;
142 buffer_pool_lock_.lock();
143
144 std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr(
146
147 if (pool_search != nullptr) {
148 std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = *pool_search;
149 MTLBufferHandle size_compare(aligned_alloc_size);
150 auto result = pool->lower_bound(size_compare);
151 if (result != pool->end()) {
152 /* Potential buffer found, check if within size threshold requirements. */
153 gpu::MTLBuffer *found_buffer = result->buffer;
154 BLI_assert(found_buffer);
155 BLI_assert(found_buffer->get_metal_buffer());
156
157 uint64_t found_size = found_buffer->get_size();
158
159 if (found_size >= aligned_alloc_size &&
160 found_size <= (aligned_alloc_size * mtl_buffer_size_threshold_factor_))
161 {
163 "[MemoryAllocator] Suitable Buffer of size %lld found, for requested size: %lld",
164 found_size,
165 aligned_alloc_size);
166
167 new_buffer = found_buffer;
168 BLI_assert(!new_buffer->get_in_use());
169
170 /* Remove buffer from free set. */
171 pool->erase(result);
172 }
173 else {
175 "[MemoryAllocator] Buffer of size %lld found, but was incompatible with requested "
176 "size: %lld",
177 found_size,
178 aligned_alloc_size);
179 new_buffer = nullptr;
180 }
181 }
182 }
183
184 /* Allocate new buffer. */
185 if (new_buffer == nullptr) {
186 new_buffer = new gpu::MTLBuffer(device_, size, options, alignment);
187
188 /* Track allocation in context. */
189 allocations_list_insert(new_buffer);
190 }
191 else {
192 /* Re-use suitable buffer. */
193 new_buffer->set_usage_size(aligned_alloc_size);
194
195#if MTL_DEBUG_MEMORY_STATISTICS == 1
196 /* Debug. */
197 buffers_in_pool_--;
198#endif
199
200 /* Decrement size of pool. */
201 BLI_assert(allocations_in_pool_ >= 0);
202 allocations_in_pool_ -= new_buffer->get_size();
203
204 /* Ensure buffer memory is correctly backed. */
205 BLI_assert(new_buffer->get_metal_buffer());
206 }
207 /* Flag buffer as actively in-use. */
208 new_buffer->flag_in_use(true);
209
210#if MTL_DEBUG_MEMORY_STATISTICS == 1
211 per_frame_allocation_count_++;
212#endif
213
214 /* Release lock. */
215 buffer_pool_lock_.unlock();
216
217 return new_buffer;
218}
219
221 uint32_t /*alignment*/,
222 bool cpu_visible,
223 const void *data)
224{
225 gpu::MTLBuffer *buf = this->allocate_aligned(size, 256, cpu_visible);
226
227 /* Upload initial data. */
228 BLI_assert(data != nullptr);
229 BLI_assert(!(buf->get_resource_options() & MTLResourceStorageModePrivate));
230 BLI_assert(size <= buf->get_size());
232 memcpy(buf->get_host_ptr(), data, size);
233 buf->flush_range(0, size);
234 return buf;
235}
236
238{
239 /* Ensure buffer is flagged as in-use. I.e. has not already been returned to memory pools. */
240 bool buffer_in_use = buffer->get_in_use();
241 BLI_assert(buffer_in_use);
242 if (buffer_in_use) {
243
244 /* Fetch active safe pool from atomic ptr. */
245 MTLSafeFreeList *current_pool = this->get_current_safe_list();
246
247 /* Place buffer in safe_free_pool before returning to MemoryManager buffer pools. */
248 BLI_assert(current_pool);
249 current_pool->insert_buffer(buffer);
250 buffer->flag_in_use(false);
251
252 return true;
253 }
254 return false;
255}
256
258{
259 /* Ensure thread-safe access to `completed_safelist_queue_`, which contains
260 * the list of MTLSafeFreeList's whose buffers are ready to be
261 * re-inserted into the Memory Manager pools.
262 * we also need to lock access to general buffer pools, to ensure allocations
263 * are not simultaneously happening on background threads. */
264 safelist_lock_.lock();
265 buffer_pool_lock_.lock();
266
267#if MTL_DEBUG_MEMORY_STATISTICS == 1
268 int num_buffers_added = 0;
269#endif
270
271 /* Always free oldest MTLSafeFreeList first. */
272 for (int safe_pool_free_index = 0; safe_pool_free_index < completed_safelist_queue_.size();
273 safe_pool_free_index++)
274 {
275 MTLSafeFreeList *current_pool = completed_safelist_queue_[safe_pool_free_index];
276
277 /* Iterate through all MTLSafeFreeList linked-chunks. */
278 while (current_pool != nullptr) {
279 current_pool->lock_.lock();
280 BLI_assert(current_pool);
281 BLI_assert(current_pool->in_free_queue_);
282 int counter = 0;
283 int size = min_ii(current_pool->current_list_index_, MTLSafeFreeList::MAX_NUM_BUFFERS_);
284
285 /* Re-add all buffers within frame index to MemoryManager pools. */
286 while (counter < size) {
287
288 gpu::MTLBuffer *buf = current_pool->safe_free_pool_[counter];
289
290 /* Insert buffer back into open pools. */
291 BLI_assert(buf->get_in_use() == false);
292 this->insert_buffer_into_pool(buf->get_resource_options(), buf);
293 counter++;
294
295#if MTL_DEBUG_MEMORY_STATISTICS == 1
296 num_buffers_added++;
297#endif
298 }
299
300 /* Fetch next MTLSafeFreeList chunk, if any. */
301 MTLSafeFreeList *next_list = current_pool->next_.load();
302
303 /* Delete current MTLSafeFreeList */
304 current_pool->lock_.unlock();
305 delete current_pool;
306 current_pool = nullptr;
307
308 /* Move onto next chunk. */
309 if (next_list != nullptr) {
310 current_pool = next_list;
311 }
312 }
313 }
314
315 /* Release memory allocations which have not been used in a while.
316 * This ensures memory pressure stays low for scenes with compounding complexity during
317 * animation.
318 * If memory is continually used, then we do not want to free this memory as it will be
319 * re-allocated during a short time period. */
320
321 const time_t time_now = std::time(nullptr);
322 for (auto buffer_pool_list : buffer_pools_.items()) {
323 MTLBufferPoolOrderedList *pool_allocations = buffer_pool_list.value;
324 MTLBufferPoolOrderedList::iterator pool_iterator = pool_allocations->begin();
325 while (pool_iterator != pool_allocations->end()) {
326
327 const MTLBufferHandle handle = *pool_iterator;
328 const time_t time_passed = time_now - handle.insert_time;
329
330 /* Free allocations if a certain amount of time has passed.
331 * Deletion frequency depends on how much excess memory
332 * the application is using. */
333 time_t deletion_time_threshold_s = 600;
334 /* Spare pool memory >= 2GB. */
335 if (allocations_in_pool_ >= MEMORY_SIZE_2GB) {
336 deletion_time_threshold_s = 2;
337 }
338 else
339 /* Spare pool memory >= 1GB. */
340 if (allocations_in_pool_ >= MEMORY_SIZE_1GB) {
341 deletion_time_threshold_s = 4;
342 }
343 /* Spare pool memory >= 512MB. */
344 else if (allocations_in_pool_ >= MEMORY_SIZE_512MB) {
345 deletion_time_threshold_s = 15;
346 }
347 /* Spare pool memory >= 256MB. */
348 else if (allocations_in_pool_ >= MEMORY_SIZE_256MB) {
349 deletion_time_threshold_s = 60;
350 }
351
352 if (time_passed > deletion_time_threshold_s) {
353
354 /* Remove buffer from global allocations list and release resource. */
355 allocations_list_delete(handle.buffer);
356
357 /* Remove buffer from pool and update pool statistics. */
358 pool_iterator = pool_allocations->erase(pool_iterator);
359 allocations_in_pool_ -= handle.buffer_size;
360#if MTL_DEBUG_MEMORY_STATISTICS == 1
361 buffers_in_pool_--;
362#endif
363 continue;
364 }
365 pool_iterator++;
366 }
367 }
368
369#if MTL_DEBUG_MEMORY_STATISTICS == 1
370 printf("--- Allocation Stats ---\n");
371 printf(" Num buffers processed in pool (this frame): %u\n", num_buffers_added);
372
373 uint framealloc = (uint)per_frame_allocation_count_;
374 printf(" Allocations in frame: %u\n", framealloc);
375 printf(" Total Buffers allocated: %u\n", allocations_list_size_);
376 printf(" Total Memory allocated: %u MB\n", (uint)total_allocation_bytes_ / (1024 * 1024));
377
378 uint allocs = (uint)(allocations_in_pool_) / 1024 / 2024;
379 printf(" Free memory in pools: %u MB\n", allocs);
380
381 uint buffs = (uint)buffers_in_pool_;
382 printf(" Buffers in pools: %u\n", buffs);
383
384 printf(" Pools %u:\n", (uint)buffer_pools_.size());
385 auto key_iterator = buffer_pools_.keys().begin();
386 auto value_iterator = buffer_pools_.values().begin();
387 while (key_iterator != buffer_pools_.keys().end()) {
388 uint64_t mem_in_pool = 0;
389 uint64_t iters = 0;
390 for (auto it = (*value_iterator)->begin(); it != (*value_iterator)->end(); it++) {
391 mem_in_pool += it->buffer_size;
392 iters++;
393 }
394
395 printf(" Buffers in pool (%u)(%llu): %u (%u MB)\n",
396 (uint)*key_iterator,
397 iters,
398 (uint)((*value_iterator)->size()),
399 (uint)mem_in_pool / 1024 / 1024);
400 ++key_iterator;
401 ++value_iterator;
402 }
403
404 per_frame_allocation_count_ = 0;
405#endif
406
407 /* Clear safe pools list */
408 completed_safelist_queue_.clear();
409 buffer_pool_lock_.unlock();
410 safelist_lock_.unlock();
411}
412
414{
415 /* When an MTLSafeFreeList has been released by the GPU, and buffers are ready to
416 * be re-inserted into the MemoryManager pools for future use, add the MTLSafeFreeList
417 * to the `completed_safelist_queue_` for flushing at a controlled point in time. */
418 safe_list->lock_.lock();
419 BLI_assert(safe_list);
420 BLI_assert(safe_list->reference_count_ == 0 &&
421 "Pool must be fully dereferenced by all in-use cmd buffers before returning.\n");
422 BLI_assert(safe_list->in_free_queue_ == false && "Pool must not already be in queue");
423
424 /* Flag MTLSafeFreeList as having been added, and insert into SafeFreePool queue. */
425 safe_list->flag_in_queue();
426 safelist_lock_.lock();
427 completed_safelist_queue_.append(safe_list);
428 safelist_lock_.unlock();
429 safe_list->lock_.unlock();
430}
431
433{
434 /* Thread-safe access via atomic ptr. */
435 return current_free_list_;
436}
437
439{
440 safelist_lock_.lock();
441 MTLSafeFreeList *previous_list = prev_free_buffer_list_;
442 MTLSafeFreeList *active_list = get_current_safe_list();
443 current_free_list_ = new MTLSafeFreeList();
444 prev_free_buffer_list_ = active_list;
445 safelist_lock_.unlock();
446
447 /* Release final reference for previous list.
448 * NOTE: Outside of lock as this function itself locks. */
449 if (previous_list) {
450 previous_list->decrement_reference();
451 }
452}
453
454void MTLBufferPool::ensure_buffer_pool(MTLResourceOptions options)
455{
456 std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr(
458 if (pool_search == nullptr) {
459 std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool =
460 new std::multiset<MTLBufferHandle, CompareMTLBuffer>();
461 buffer_pools_.add_new((uint64_t)options, pool);
462 }
463}
464
465void MTLBufferPool::insert_buffer_into_pool(MTLResourceOptions options, gpu::MTLBuffer *buffer)
466{
467 /* Ensure `safelist_lock_` is locked in calling code before modifying. */
468 BLI_assert(buffer);
469
470 /* Reset usage size to actual size of allocation. */
471 buffer->set_usage_size(buffer->get_size());
472
473 /* Ensure pool exists. */
474 this->ensure_buffer_pool(options);
475
476 /* TODO(Metal): Support purgeability - Allow buffer in pool to have its memory taken back by the
477 * OS if needed. As we keep allocations around, they may not actually be in use, but we can
478 * ensure they do not block other apps from using memory. Upon a buffer being needed again, we
479 * can reset this state.
480 * TODO(Metal): Purgeability state does not update instantly, so this requires a deferral. */
481 BLI_assert(buffer->get_metal_buffer());
482 // buffer->metal_buffer); [buffer->metal_buffer setPurgeableState:MTLPurgeableStateVolatile];
483
484 std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = buffer_pools_.lookup(options);
485 pool->insert(MTLBufferHandle(buffer));
486 allocations_in_pool_ += buffer->get_size();
487
488#if MTL_DEBUG_MEMORY_STATISTICS == 1
489 /* Debug statistics. */
490 buffers_in_pool_++;
491#endif
492}
493
494void MTLBufferPool::allocations_list_insert(gpu::MTLBuffer *buffer)
495{
496 /* NOTE: Function should only be called while buffer_pool_lock_ is acquired. */
497 BLI_assert(initialized_);
498 BLI_assert(buffer != nullptr);
499
500 /* Insert buffer at base of allocations list. */
501 gpu::MTLBuffer *current_head = allocations_list_base_;
502 buffer->next = current_head;
503 buffer->prev = nullptr;
504
505 if (current_head != nullptr) {
506 current_head->prev = buffer;
507 }
508
509 allocations_list_base_ = buffer;
510 allocations_list_size_++;
511
512#if MTL_DEBUG_MEMORY_STATISTICS == 1
513 total_allocation_bytes_ += buffer->get_size();
514#endif
515}
516
517void MTLBufferPool::allocations_list_delete(gpu::MTLBuffer *buffer)
518{
519 /* NOTE: Function should only be called while buffer_pool_lock_ is acquired. */
520 /* Remove a buffer link in the allocations chain. */
521 BLI_assert(initialized_);
522 BLI_assert(buffer != nullptr);
523 BLI_assert(allocations_list_size_ >= 1);
524
525 gpu::MTLBuffer *next = buffer->next;
526 gpu::MTLBuffer *prev = buffer->prev;
527
528 if (prev != nullptr) {
529 BLI_assert(prev->next == buffer);
530 prev->next = next;
531 }
532
533 if (next != nullptr) {
534 BLI_assert(next->prev == buffer);
535 next->prev = prev;
536 }
537
538 if (allocations_list_base_ == buffer) {
539 allocations_list_base_ = next;
540 BLI_assert(prev == nullptr);
541 }
542 allocations_list_size_--;
543
544#if MTL_DEBUG_MEMORY_STATISTICS == 1
545 total_allocation_bytes_ -= buffer->get_size();
546#endif
547
548 /* Delete buffer. */
549 delete buffer;
550}
551
552void MTLBufferPool::allocations_list_delete_all()
553{
554 gpu::MTLBuffer *current = allocations_list_base_;
555 while (current != nullptr) {
556 gpu::MTLBuffer *next = current->next;
557 delete current;
558 current = next;
559 }
560 allocations_list_size_ = 0;
561 allocations_list_base_ = nullptr;
562
563#if MTL_DEBUG_MEMORY_STATISTICS == 1
564 total_allocation_bytes_ = 0;
565#endif
566}
567
569{
570 reference_count_ = 1;
571 in_free_queue_ = false;
572 current_list_index_ = 0;
573 next_ = nullptr;
574 referenced_by_workload_ = false;
575}
576
578{
579 BLI_assert(in_free_queue_ == false);
580
581 /* Lockless list insert. */
582 uint insert_index = current_list_index_++;
583
584 /* If the current MTLSafeFreeList size is exceeded, we ripple down the linked-list chain and
585 * insert the buffer into the next available chunk. */
586 if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
587
588 /* Check if first caller to generate next pool in chain.
589 * Otherwise, ensure pool exists or wait for first caller to create next pool. */
590 MTLSafeFreeList *next_list = next_.load();
591
592 if (!next_list) {
593 std::unique_lock lock(lock_);
594
595 next_list = next_.load();
596 if (!next_list) {
597 next_list = new MTLSafeFreeList();
598 next_.store(next_list);
599 }
600 }
601 BLI_assert(next_list);
602 next_list->insert_buffer(buffer);
603
604 /* Clamp index to chunk limit if overflowing. */
605 current_list_index_ = MTLSafeFreeList::MAX_NUM_BUFFERS_;
606 return;
607 }
608
609 safe_free_pool_[insert_index] = buffer;
610}
611
612/* Increments from active GPUContext thread. */
614{
615 lock_.lock();
616 BLI_assert(in_free_queue_ == false);
617 reference_count_++;
618 referenced_by_workload_ = true;
619 lock_.unlock();
620}
621
622/* Reference decrements and addition to completed list queue can occur from MTLCommandBuffer
623 * completion callback thread. */
625{
626 lock_.lock();
627 BLI_assert(in_free_queue_ == false);
628 int ref_count = --reference_count_;
629
630 if (ref_count == 0) {
632 }
633 lock_.unlock();
634}
635
637{
638 /* We should only consider refreshing a list if it has been referenced by active workloads, and
639 * contains a sufficient buffer count to avoid overheads associated with flushing the list. If
640 * the reference count is only equal to 1, buffers may have been added, but no command
641 * submissions will have been issued, hence buffers could be returned to the pool prematurely if
642 * associated workload submission occurs later. */
643 return ((reference_count_ > 1 || referenced_by_workload_) &&
644 current_list_index_ > MIN_BUFFER_FLUSH_COUNT);
645}
646
648
649/* -------------------------------------------------------------------- */
652
653MTLBuffer::MTLBuffer(id<MTLDevice> mtl_device,
655 MTLResourceOptions options,
656 uint alignment)
657{
658 /* Calculate aligned allocation size. */
659 BLI_assert(alignment > 0);
660 uint64_t aligned_alloc_size = ceil_to_multiple_ul(size, alignment);
661
662 alignment_ = alignment;
663 device_ = mtl_device;
664 is_external_ = false;
665
666 options_ = options;
667 this->flag_in_use(false);
668
669 metal_buffer_ = [device_ newBufferWithLength:aligned_alloc_size options:options];
670 BLI_assert(metal_buffer_);
671
672 size_ = aligned_alloc_size;
673 this->set_usage_size(size_);
674 if (!(options_ & MTLResourceStorageModePrivate)) {
675 data_ = [metal_buffer_ contents];
676 }
677 else {
678 data_ = nullptr;
679 }
680
681 /* Linked resources. */
682 next = prev = nullptr;
683}
684
685MTLBuffer::MTLBuffer(id<MTLBuffer> external_buffer)
686{
687 BLI_assert(external_buffer != nil);
688
689 /* Ensure external_buffer remains referenced while in-use. */
690 metal_buffer_ = external_buffer;
691 [metal_buffer_ retain];
692
693 /* Extract properties. */
694 is_external_ = true;
695 device_ = nil;
696 alignment_ = 1;
697 options_ = [metal_buffer_ resourceOptions];
698 size_ = [metal_buffer_ allocatedSize];
699 this->set_usage_size(size_);
700 data_ = [metal_buffer_ contents];
701 in_use_ = true;
702
703 /* Linked resources. */
704 next = prev = nullptr;
705}
706
708{
709 if (metal_buffer_ != nil) {
710 [metal_buffer_ release];
711 metal_buffer_ = nil;
712 }
713}
714
716{
717 if (!is_external_) {
719 }
720 else {
721 if (metal_buffer_ != nil) {
722 [metal_buffer_ release];
723 metal_buffer_ = nil;
724 }
725 }
726}
727
729{
730 return metal_buffer_;
731}
732
734{
735 BLI_assert(!(options_ & MTLResourceStorageModePrivate));
736 BLI_assert(data_);
737 return data_;
738}
739
741{
742 return size_;
743}
744
746{
747 return usage_size_;
748}
749
751{
752 /* We do not need to flush shared memory, as addressable buffer is shared. */
753 return options_ & MTLResourceStorageModeManaged;
754}
755
757{
758 metal_buffer_.label = str;
759}
760
762{
763 /* Debug: If buffer is not flagged as in-use, this is a problem. */
765 in_use_,
766 "Buffer should be marked as 'in-use' if being actively used by an instance. Buffer "
767 "has likely already been freed.");
768}
769
771{
772 this->debug_ensure_used();
773 if (this->requires_flush()) {
774 [metal_buffer_ didModifyRange:NSMakeRange(0, size_)];
775 }
776}
777
779{
780 this->debug_ensure_used();
781 if (this->requires_flush()) {
782 BLI_assert((offset + length) <= size_);
783 [metal_buffer_ didModifyRange:NSMakeRange(offset, length)];
784 }
785}
786
788{
789 in_use_ = used;
790}
791
793{
794 return in_use_;
795}
796
798{
799 BLI_assert(size_used > 0 && size_used <= size_);
800 usage_size_ = size_used;
801}
802
804{
805 return options_;
806}
807
809{
810 return alignment_;
811}
812
814{
815 /* We do not need to flush shared memory. */
816 return this->options & MTLResourceStorageModeManaged;
817}
818
820{
821 if (this->requires_flush()) {
823 BLI_assert((this->buffer_offset + this->size) <= [this->metal_buffer length]);
824 BLI_assert(this->buffer_offset >= 0);
825 [this->metal_buffer
826 didModifyRange:NSMakeRange(this->buffer_offset, this->size - this->buffer_offset)];
827 }
828}
829
831
832/* -------------------------------------------------------------------- */
835
840
842{
843
844 if (!this->initialised_) {
845 BLI_assert(context_.device);
846
847 /* Initialize Scratch buffers. */
848 for (int sb = 0; sb < mtl_max_scratch_buffers_; sb++) {
849 scratch_buffers_[sb] = new MTLCircularBuffer(
850 context_, mtl_scratch_buffer_initial_size_, true);
851 BLI_assert(scratch_buffers_[sb]);
852 BLI_assert(&(scratch_buffers_[sb]->own_context_) == &context_);
853 }
854 current_scratch_buffer_ = 0;
855 initialised_ = true;
856 }
857}
858
860{
861 initialised_ = false;
862
863 /* Release Scratch buffers */
864 for (int sb = 0; sb < mtl_max_scratch_buffers_; sb++) {
865 delete scratch_buffers_[sb];
866 scratch_buffers_[sb] = nullptr;
867 }
868 current_scratch_buffer_ = 0;
869}
870
875
877 uint64_t alloc_size, uint alignment)
878{
879 /* Ensure scratch buffer allocation alignment adheres to offset alignment requirements. */
880 alignment = max_uu(alignment, 256);
881
882 BLI_assert_msg(current_scratch_buffer_ >= 0, "Scratch Buffer index not set");
883 MTLCircularBuffer *current_scratch_buff = this->scratch_buffers_[current_scratch_buffer_];
884 BLI_assert_msg(current_scratch_buff != nullptr, "Scratch Buffer does not exist");
885 MTLTemporaryBuffer allocated_range = current_scratch_buff->allocate_range_aligned(alloc_size,
886 alignment);
887 BLI_assert(allocated_range.size >= alloc_size && allocated_range.size <= alloc_size + alignment);
888 BLI_assert(allocated_range.metal_buffer != nil);
889 return allocated_range;
890}
891
893{
894 /* Fetch active scratch buffer. */
895 MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
896 BLI_assert(&active_scratch_buf->own_context_ == &context_);
897
898 /* Ensure existing scratch buffer is no longer in use. MTL_MAX_SCRATCH_BUFFERS specifies
899 * the number of allocated scratch buffers. This value should be equal to the number of
900 * simultaneous frames in-flight. I.e. the maximal number of scratch buffers which are
901 * simultaneously in-use. */
902 if (active_scratch_buf->used_frame_index_ < context_.get_current_frame_index()) {
903 current_scratch_buffer_ = (current_scratch_buffer_ + 1) % mtl_max_scratch_buffers_;
904 active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
905 active_scratch_buf->reset();
906 BLI_assert(&active_scratch_buf->own_context_ == &context_);
907 MTL_LOG_DEBUG("Scratch buffer %d reset - (ctx %p)(Frame index: %d)",
908 current_scratch_buffer_,
909 &context_,
910 context_.get_current_frame_index());
911 }
912}
913
915{
916 /* Fetch active scratch buffer and verify context. */
917 MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
918 BLI_assert(&active_scratch_buf->own_context_ == &context_);
919 active_scratch_buf->flush();
920}
921
923{
924 /* Fetch active scratch buffer and verify context. */
925 MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
926 BLI_assert(&active_scratch_buf->own_context_ == &context_);
927 active_scratch_buf->ssbo_source_->bind(slot);
928}
929
931{
932 /* Fetch active scratch buffer and verify context. */
933 MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
934 BLI_assert(&active_scratch_buf->own_context_ == &context_);
935 active_scratch_buf->ssbo_source_->unbind();
936}
937
938/* MTLCircularBuffer implementation. */
939MTLCircularBuffer::MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow)
940 : own_context_(ctx)
941{
942 BLI_assert(this);
943 ssbo_source_ = new gpu::MTLStorageBuf(initial_size);
944 cbuffer_ = ssbo_source_->metal_buffer_;
945 current_offset_ = 0;
946 can_resize_ = allow_grow;
947 cbuffer_->flag_in_use(true);
948
949 used_frame_index_ = ctx.get_current_frame_index();
950 last_flush_base_offset_ = 0;
951
952 /* Debug label. */
953 if (G.debug & G_DEBUG_GPU) {
954 cbuffer_->set_label(@"Circular Scratch Buffer");
955 }
956}
957
959{
960 delete ssbo_source_;
961}
962
964{
965 return this->allocate_range_aligned(alloc_size, 1);
966}
967
969{
970 BLI_assert(this);
971
972 /* Ensure alignment of an allocation is aligned to compatible offset boundaries. */
973 BLI_assert(alignment > 0);
974 alignment = max_uu(alignment, 256);
975
976 /* Align current offset and allocation size to desired alignment */
977 uint64_t aligned_current_offset = ceil_to_multiple_ul(current_offset_, alignment);
978 uint64_t aligned_alloc_size = ceil_to_multiple_ul(alloc_size, alignment);
979 bool can_allocate = (aligned_current_offset + aligned_alloc_size) < cbuffer_->get_size();
980
981 BLI_assert(aligned_current_offset >= current_offset_);
982 BLI_assert(aligned_alloc_size >= alloc_size);
983
984 BLI_assert(aligned_current_offset % alignment == 0);
985 BLI_assert(aligned_alloc_size % alignment == 0);
986
987 /* Recreate Buffer */
988 if (!can_allocate) {
989 uint64_t new_size = cbuffer_->get_size();
990 if (can_resize_) {
991 /* Resize to the maximum of basic resize heuristic OR the size of the current offset +
992 * requested allocation -- we want the buffer to grow to a large enough size such that it
993 * does not need to resize mid-frame. */
995 ceil_to_multiple_ul(new_size * 1.2f, 256)),
996 aligned_current_offset + aligned_alloc_size);
997
998#ifdef MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION
999 /* IF a requested allocation EXCEEDS the maximum supported size, temporarily allocate up to
1000 * this, but shrink down ASAP. */
1002
1003 /* If new requested allocation is bigger than maximum allowed size, temporarily resize to
1004 * maximum allocation size -- Otherwise, clamp the buffer size back down to the defined
1005 * maximum */
1007 new_size = aligned_alloc_size;
1008 MTL_LOG_DEBUG("Temporarily growing Scratch buffer to %d MB",
1009 (int)new_size / 1024 / 1024);
1010 }
1011 else {
1013 MTL_LOG_DEBUG("Shrinking Scratch buffer back to %d MB", (int)new_size / 1024 / 1024);
1014 }
1015 }
1016 BLI_assert(aligned_alloc_size <= new_size);
1017#else
1019
1020 if (aligned_alloc_size > new_size) {
1021 BLI_assert(false);
1022
1023 /* Cannot allocate */
1024 MTLTemporaryBuffer alloc_range;
1025 alloc_range.metal_buffer = nil;
1026 alloc_range.data = nullptr;
1027 alloc_range.buffer_offset = 0;
1028 alloc_range.size = 0;
1029 alloc_range.options = cbuffer_->options;
1030 }
1031#endif
1032 }
1033 else {
1035 "Performance Warning: Reached the end of circular buffer of size: %llu, but cannot "
1036 "resize. Starting new buffer",
1037 cbuffer_->get_size());
1038 BLI_assert(aligned_alloc_size <= new_size);
1039
1040 /* Cannot allocate. */
1041 MTLTemporaryBuffer alloc_range;
1042 alloc_range.metal_buffer = nil;
1043 alloc_range.data = nullptr;
1044 alloc_range.buffer_offset = 0;
1045 alloc_range.size = 0;
1046 alloc_range.options = cbuffer_->get_resource_options();
1047 }
1048
1049 /* Flush current buffer to ensure changes are visible on the GPU. */
1050 this->flush();
1051
1052 /* Discard old buffer and create a new one - Relying on Metal reference counting to track
1053 * in-use buffers */
1054 delete ssbo_source_;
1055 ssbo_source_ = new gpu::MTLStorageBuf(new_size);
1056 cbuffer_ = ssbo_source_->metal_buffer_;
1057 cbuffer_->flag_in_use(true);
1058 current_offset_ = 0;
1059 last_flush_base_offset_ = 0;
1060
1061 /* Debug label. */
1062 if (G.debug & G_DEBUG_GPU) {
1063 cbuffer_->set_label(@"Circular Scratch Buffer");
1064 }
1065 MTL_LOG_DEBUG("Resized Metal circular buffer to %llu bytes", new_size);
1066
1067 /* Reset allocation Status. */
1068 aligned_current_offset = 0;
1069 BLI_assert((aligned_current_offset + aligned_alloc_size) <= cbuffer_->get_size());
1070 }
1071
1072 /* Allocate chunk. */
1073 MTLTemporaryBuffer alloc_range;
1074 alloc_range.metal_buffer = cbuffer_->get_metal_buffer();
1075 alloc_range.data = (void *)((uint8_t *)([alloc_range.metal_buffer contents]) +
1076 aligned_current_offset);
1077 alloc_range.buffer_offset = aligned_current_offset;
1078 alloc_range.size = aligned_alloc_size;
1079 alloc_range.options = cbuffer_->get_resource_options();
1080 BLI_assert(alloc_range.data);
1081
1082 /* Shift offset to match alignment. */
1083 current_offset_ = aligned_current_offset + aligned_alloc_size;
1084 BLI_assert(current_offset_ <= cbuffer_->get_size());
1085 return alloc_range;
1086}
1087
1089{
1090 BLI_assert(this);
1091
1092 uint64_t len = current_offset_ - last_flush_base_offset_;
1093 if (len > 0) {
1094 cbuffer_->flush_range(last_flush_base_offset_, len);
1095 last_flush_base_offset_ = current_offset_;
1096 }
1097}
1098
1100{
1101 BLI_assert(this);
1102
1103 /* If circular buffer has data written to it, offset will be greater than zero. */
1104 if (current_offset_ > 0) {
1105
1106 /* Ensure the circular buffer is no longer being used by an in-flight frame. */
1107 BLI_assert((own_context_.get_current_frame_index() >=
1108 (used_frame_index_ + MTL_NUM_SAFE_FRAMES - 1)) &&
1109 "Trying to reset Circular scratch buffer's while its data is still being used by "
1110 "an in-flight frame");
1111
1112 current_offset_ = 0;
1113 last_flush_base_offset_ = 0;
1114 }
1115
1116 /* Update used frame index to current. */
1117 used_frame_index_ = own_context_.get_current_frame_index();
1118}
1119
1121
1122} // namespace blender::gpu
@ G_DEBUG_GPU
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
MINLINE int min_ii(int a, int b)
MINLINE uint max_uu(uint a, uint b)
MINLINE uint64_t ceil_to_multiple_ul(uint64_t a, uint64_t b)
unsigned int uint
volatile int lock
BMesh const char void * data
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
const Value * lookup_ptr(const Key &key) const
Definition BLI_map.hh:508
const Value & lookup(const Key &key) const
Definition BLI_map.hh:545
void add_new(const Key &key, const Value &value)
Definition BLI_map.hh:265
void push_completed_safe_list(MTLSafeFreeList *list)
void init(id< MTLDevice > device)
Definition mtl_memory.mm:35
gpu::MTLBuffer * allocate_with_data(uint64_t size, bool cpu_visible, const void *data=nullptr)
MTLSafeFreeList * get_current_safe_list()
gpu::MTLBuffer * allocate(uint64_t size, bool cpu_visible)
gpu::MTLBuffer * allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
gpu::MTLBuffer * allocate_aligned_with_data(uint64_t size, uint alignment, bool cpu_visible, const void *data=nullptr)
bool free_buffer(gpu::MTLBuffer *buffer)
void flag_in_use(bool used)
uint64_t get_size() const
void set_usage_size(uint64_t size_used)
gpu::MTLBuffer * next
uint64_t get_size_used() const
void * get_host_ptr() const
void flush_range(uint64_t offset, uint64_t length)
MTLBuffer(id< MTLDevice > device, uint64_t size, MTLResourceOptions options, uint alignment=1)
gpu::MTLBuffer * prev
void set_label(NSString *str)
MTLResourceOptions get_resource_options()
id< MTLBuffer > get_metal_buffer() const
MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow)
MTLTemporaryBuffer allocate_range_aligned(uint64_t alloc_size, uint alignment)
MTLTemporaryBuffer allocate_range(uint64_t alloc_size)
static MTLBufferPool * get_global_memory_manager()
void insert_buffer(gpu::MTLBuffer *buffer)
static constexpr uint mtl_scratch_buffer_max_size_
static constexpr uint mtl_scratch_buffer_initial_size_
MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(uint64_t alloc_size, uint alignment)
MTLTemporaryBuffer scratch_buffer_allocate_range(uint64_t alloc_size)
void bind(int slot) override
CCL_NAMESPACE_BEGIN struct Options options
#define str(s)
#define printf(...)
float length(VecOp< float, D >) RET
MINLINE unsigned long long min_ulul(unsigned long long a, unsigned long long b)
MINLINE unsigned long long max_ulul(unsigned long long a, unsigned long long b)
static ulong * next
#define G(x, y, z)
#define MTL_NUM_SAFE_FRAMES
Definition mtl_common.hh:16
#define MTL_LOG_WARNING(info,...)
Definition mtl_debug.hh:42
#define MTL_LOG_DEBUG(info,...)
Definition mtl_debug.hh:49
static constexpr size_t MEMORY_SIZE_1GB
Definition mtl_memory.mm:26
static constexpr size_t MEMORY_SIZE_512MB
Definition mtl_memory.mm:25
static constexpr size_t MEMORY_SIZE_2GB
Definition mtl_memory.mm:27
static constexpr size_t MEMORY_SIZE_256MB
Definition mtl_memory.mm:24
MTLBufferRange MTLTemporaryBuffer
SymEdge< T > * prev(const SymEdge< T > *se)
MTLResourceOptions options
uint len