Blender V4.3
mtl_memory.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include "BKE_global.hh"
6
7#include "DNA_userdef_types.h"
8
9#include "mtl_context.hh"
10#include "mtl_debug.hh"
11#include "mtl_memory.hh"
12
13using namespace blender;
14using namespace blender::gpu;
15
16/* Memory size in bytes macros, used as pool flushing frequency thresholds. */
17#define MEMORY_SIZE_2GB 2147483648LL
18#define MEMORY_SIZE_1GB 1073741824LL
19#define MEMORY_SIZE_512MB 536870912LL
20#define MEMORY_SIZE_256MB 268435456LL
21
22namespace blender::gpu {
23
24/* -------------------------------------------------------------------- */
28void MTLBufferPool::init(id<MTLDevice> mtl_device)
29{
30 if (!initialized_) {
31 BLI_assert(mtl_device);
32 initialized_ = true;
33 device_ = mtl_device;
34
35#if MTL_DEBUG_MEMORY_STATISTICS == 1
36 /* Debug statistics. */
37 total_allocation_bytes_ = 0;
38 per_frame_allocation_count_ = 0;
39 buffers_in_pool_ = 0;
40#endif
41 /* Track pool allocation size. */
42 allocations_in_pool_ = 0;
43
44 /* Live allocations list. */
45 allocations_list_base_ = nullptr;
46 allocations_list_size_ = 0;
47
48 /* Free pools -- Create initial safe free pool */
49 BLI_assert(current_free_list_ == nullptr);
50 this->begin_new_safe_list();
51 }
52}
53
55{
56 this->free();
57}
58
59void MTLBufferPool::free()
60{
61 buffer_pool_lock_.lock();
62
63 /* Delete all existing allocations. */
64 allocations_list_delete_all();
65
66 /* Release safe free lists. */
67 for (int safe_pool_free_index = 0; safe_pool_free_index < completed_safelist_queue_.size();
68 safe_pool_free_index++)
69 {
70 delete completed_safelist_queue_[safe_pool_free_index];
71 }
72 completed_safelist_queue_.clear();
73
74 safelist_lock_.lock();
75 if (current_free_list_ != nullptr) {
76 delete current_free_list_;
77 current_free_list_ = nullptr;
78 }
79 if (prev_free_buffer_list_ != nullptr) {
80 delete prev_free_buffer_list_;
81 prev_free_buffer_list_ = nullptr;
82 }
83 safelist_lock_.unlock();
84
85 /* Clear and release memory pools. */
86 for (std::multiset<blender::gpu::MTLBufferHandle, blender::gpu::CompareMTLBuffer> *buffer_pool :
87 buffer_pools_.values())
88 {
89 delete buffer_pool;
90 }
91
92 buffer_pools_.clear();
93 buffer_pool_lock_.unlock();
94}
95
97{
98 /* Allocate buffer with default HW-compatible alignment of 256 bytes.
99 * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
100 return this->allocate_aligned(size, 256, cpu_visible);
101}
102
104 bool cpu_visible,
105 const void *data)
106{
107 /* Allocate buffer with default HW-compatible alignment of 256 bytes.
108 * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
109 return this->allocate_aligned_with_data(size, 256, cpu_visible, data);
110}
111
113 uint32_t alignment,
114 bool cpu_visible)
115{
116 /* Check not required. Main GPU module usage considered thread-safe. */
117 // BLI_assert(BLI_thread_is_main());
118
119 /* Calculate aligned size */
120 BLI_assert(alignment > 0);
121 uint64_t aligned_alloc_size = ceil_to_multiple_ul(size, alignment);
122
123 /* Allocate new MTL Buffer */
124 MTLResourceOptions options;
125 if (cpu_visible) {
126 options = ([device_ hasUnifiedMemory]) ? MTLResourceStorageModeShared :
127 MTLResourceStorageModeManaged;
128 }
129 else {
130 options = MTLResourceStorageModePrivate;
131 }
132
133 /* Check if we have a suitable buffer */
134 gpu::MTLBuffer *new_buffer = nullptr;
135 buffer_pool_lock_.lock();
136
137 std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr(
139
140 if (pool_search != nullptr) {
141 std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = *pool_search;
142 MTLBufferHandle size_compare(aligned_alloc_size);
143 auto result = pool->lower_bound(size_compare);
144 if (result != pool->end()) {
145 /* Potential buffer found, check if within size threshold requirements. */
146 gpu::MTLBuffer *found_buffer = result->buffer;
147 BLI_assert(found_buffer);
148 BLI_assert(found_buffer->get_metal_buffer());
149
150 uint64_t found_size = found_buffer->get_size();
151
152 if (found_size >= aligned_alloc_size &&
153 found_size <= (aligned_alloc_size * mtl_buffer_size_threshold_factor_))
154 {
156 "[MemoryAllocator] Suitable Buffer of size %lld found, for requested size: %lld",
157 found_size,
158 aligned_alloc_size);
159
160 new_buffer = found_buffer;
161 BLI_assert(!new_buffer->get_in_use());
162
163 /* Remove buffer from free set. */
164 pool->erase(result);
165 }
166 else {
168 "[MemoryAllocator] Buffer of size %lld found, but was incompatible with requested "
169 "size: %lld",
170 found_size,
171 aligned_alloc_size);
172 new_buffer = nullptr;
173 }
174 }
175 }
176
177 /* Allocate new buffer. */
178 if (new_buffer == nullptr) {
179 new_buffer = new gpu::MTLBuffer(device_, size, options, alignment);
180
181 /* Track allocation in context. */
182 allocations_list_insert(new_buffer);
183 }
184 else {
185 /* Re-use suitable buffer. */
186 new_buffer->set_usage_size(aligned_alloc_size);
187
188#if MTL_DEBUG_MEMORY_STATISTICS == 1
189 /* Debug. */
190 buffers_in_pool_--;
191#endif
192
193 /* Decrement size of pool. */
194 BLI_assert(allocations_in_pool_ >= 0);
195 allocations_in_pool_ -= new_buffer->get_size();
196
197 /* Ensure buffer memory is correctly backed. */
198 BLI_assert(new_buffer->get_metal_buffer());
199 }
200 /* Flag buffer as actively in-use. */
201 new_buffer->flag_in_use(true);
202
203#if MTL_DEBUG_MEMORY_STATISTICS == 1
204 per_frame_allocation_count_++;
205#endif
206
207 /* Release lock. */
208 buffer_pool_lock_.unlock();
209
210 return new_buffer;
211}
212
214 uint32_t /*alignment*/,
215 bool cpu_visible,
216 const void *data)
217{
218 gpu::MTLBuffer *buf = this->allocate_aligned(size, 256, cpu_visible);
219
220 /* Upload initial data. */
221 BLI_assert(data != nullptr);
222 BLI_assert(!(buf->get_resource_options() & MTLResourceStorageModePrivate));
223 BLI_assert(size <= buf->get_size());
224 BLI_assert(size <= [buf->get_metal_buffer() length]);
225 memcpy(buf->get_host_ptr(), data, size);
226 buf->flush_range(0, size);
227 return buf;
228}
229
231{
232 /* Ensure buffer is flagged as in-use. I.e. has not already been returned to memory pools. */
233 bool buffer_in_use = buffer->get_in_use();
234 BLI_assert(buffer_in_use);
235 if (buffer_in_use) {
236
237 /* Fetch active safe pool from atomic ptr. */
238 MTLSafeFreeList *current_pool = this->get_current_safe_list();
239
240 /* Place buffer in safe_free_pool before returning to MemoryManager buffer pools. */
241 BLI_assert(current_pool);
242 current_pool->insert_buffer(buffer);
243 buffer->flag_in_use(false);
244
245 return true;
246 }
247 return false;
248}
249
251{
252 /* Ensure thread-safe access to `completed_safelist_queue_`, which contains
253 * the list of MTLSafeFreeList's whose buffers are ready to be
254 * re-inserted into the Memory Manager pools.
255 * we also need to lock access to general buffer pools, to ensure allocations
256 * are not simultaneously happening on background threads. */
257 safelist_lock_.lock();
258 buffer_pool_lock_.lock();
259
260#if MTL_DEBUG_MEMORY_STATISTICS == 1
261 int num_buffers_added = 0;
262#endif
263
264 /* Always free oldest MTLSafeFreeList first. */
265 for (int safe_pool_free_index = 0; safe_pool_free_index < completed_safelist_queue_.size();
266 safe_pool_free_index++)
267 {
268 MTLSafeFreeList *current_pool = completed_safelist_queue_[safe_pool_free_index];
269
270 /* Iterate through all MTLSafeFreeList linked-chunks. */
271 while (current_pool != nullptr) {
272 current_pool->lock_.lock();
273 BLI_assert(current_pool);
274 BLI_assert(current_pool->in_free_queue_);
275 int counter = 0;
276 int size = min_ii(current_pool->current_list_index_, MTLSafeFreeList::MAX_NUM_BUFFERS_);
277
278 /* Re-add all buffers within frame index to MemoryManager pools. */
279 while (counter < size) {
280
281 gpu::MTLBuffer *buf = current_pool->safe_free_pool_[counter];
282
283 /* Insert buffer back into open pools. */
284 BLI_assert(buf->get_in_use() == false);
285 this->insert_buffer_into_pool(buf->get_resource_options(), buf);
286 counter++;
287
288#if MTL_DEBUG_MEMORY_STATISTICS == 1
289 num_buffers_added++;
290#endif
291 }
292
293 /* Fetch next MTLSafeFreeList chunk, if any. */
294 MTLSafeFreeList *next_list = current_pool->next_.load();
295
296 /* Delete current MTLSafeFreeList */
297 current_pool->lock_.unlock();
298 delete current_pool;
299 current_pool = nullptr;
300
301 /* Move onto next chunk. */
302 if (next_list != nullptr) {
303 current_pool = next_list;
304 }
305 }
306 }
307
308 /* Release memory allocations which have not been used in a while.
309 * This ensures memory pressure stays low for scenes with compounding complexity during
310 * animation.
311 * If memory is continually used, then we do not want to free this memory as it will be
312 * re-allocated during a short time period. */
313
314 const time_t time_now = std::time(nullptr);
315 for (auto buffer_pool_list : buffer_pools_.items()) {
316 MTLBufferPoolOrderedList *pool_allocations = buffer_pool_list.value;
317 MTLBufferPoolOrderedList::iterator pool_iterator = pool_allocations->begin();
318 while (pool_iterator != pool_allocations->end()) {
319
320 const MTLBufferHandle handle = *pool_iterator;
321 const time_t time_passed = time_now - handle.insert_time;
322
323 /* Free allocations if a certain amount of time has passed.
324 * Deletion frequency depends on how much excess memory
325 * the application is using. */
326 time_t deletion_time_threshold_s = 600;
327 /* Spare pool memory >= 2GB. */
328 if (allocations_in_pool_ >= MEMORY_SIZE_2GB) {
329 deletion_time_threshold_s = 2;
330 }
331 else
332 /* Spare pool memory >= 1GB. */
333 if (allocations_in_pool_ >= MEMORY_SIZE_1GB) {
334 deletion_time_threshold_s = 4;
335 }
336 /* Spare pool memory >= 512MB. */
337 else if (allocations_in_pool_ >= MEMORY_SIZE_512MB) {
338 deletion_time_threshold_s = 15;
339 }
340 /* Spare pool memory >= 256MB. */
341 else if (allocations_in_pool_ >= MEMORY_SIZE_256MB) {
342 deletion_time_threshold_s = 60;
343 }
344
345 if (time_passed > deletion_time_threshold_s) {
346
347 /* Remove buffer from global allocations list and release resource. */
348 allocations_list_delete(handle.buffer);
349
350 /* Remove buffer from pool and update pool statistics. */
351 pool_iterator = pool_allocations->erase(pool_iterator);
352 allocations_in_pool_ -= handle.buffer_size;
353#if MTL_DEBUG_MEMORY_STATISTICS == 1
354 buffers_in_pool_--;
355#endif
356 continue;
357 }
358 pool_iterator++;
359 }
360 }
361
362#if MTL_DEBUG_MEMORY_STATISTICS == 1
363 printf("--- Allocation Stats ---\n");
364 printf(" Num buffers processed in pool (this frame): %u\n", num_buffers_added);
365
366 uint framealloc = (uint)per_frame_allocation_count_;
367 printf(" Allocations in frame: %u\n", framealloc);
368 printf(" Total Buffers allocated: %u\n", allocations_list_size_);
369 printf(" Total Memory allocated: %u MB\n", (uint)total_allocation_bytes_ / (1024 * 1024));
370
371 uint allocs = (uint)(allocations_in_pool_) / 1024 / 2024;
372 printf(" Free memory in pools: %u MB\n", allocs);
373
374 uint buffs = (uint)buffers_in_pool_;
375 printf(" Buffers in pools: %u\n", buffs);
376
377 printf(" Pools %u:\n", (uint)buffer_pools_.size());
378 auto key_iterator = buffer_pools_.keys().begin();
379 auto value_iterator = buffer_pools_.values().begin();
380 while (key_iterator != buffer_pools_.keys().end()) {
381 uint64_t mem_in_pool = 0;
382 uint64_t iters = 0;
383 for (auto it = (*value_iterator)->begin(); it != (*value_iterator)->end(); it++) {
384 mem_in_pool += it->buffer_size;
385 iters++;
386 }
387
388 printf(" Buffers in pool (%u)(%llu): %u (%u MB)\n",
389 (uint)*key_iterator,
390 iters,
391 (uint)((*value_iterator)->size()),
392 (uint)mem_in_pool / 1024 / 1024);
393 ++key_iterator;
394 ++value_iterator;
395 }
396
397 per_frame_allocation_count_ = 0;
398#endif
399
400 /* Clear safe pools list */
401 completed_safelist_queue_.clear();
402 buffer_pool_lock_.unlock();
403 safelist_lock_.unlock();
404}
405
407{
408 /* When an MTLSafeFreeList has been released by the GPU, and buffers are ready to
409 * be re-inserted into the MemoryManager pools for future use, add the MTLSafeFreeList
410 * to the `completed_safelist_queue_` for flushing at a controlled point in time. */
411 safe_list->lock_.lock();
412 BLI_assert(safe_list);
413 BLI_assert(safe_list->reference_count_ == 0 &&
414 "Pool must be fully dereferenced by all in-use cmd buffers before returning.\n");
415 BLI_assert(safe_list->in_free_queue_ == false && "Pool must not already be in queue");
416
417 /* Flag MTLSafeFreeList as having been added, and insert into SafeFreePool queue. */
418 safe_list->flag_in_queue();
419 safelist_lock_.lock();
420 completed_safelist_queue_.append(safe_list);
421 safelist_lock_.unlock();
422 safe_list->lock_.unlock();
423}
424
426{
427 /* Thread-safe access via atomic ptr. */
428 return current_free_list_;
429}
430
432{
433 safelist_lock_.lock();
434 MTLSafeFreeList *previous_list = prev_free_buffer_list_;
435 MTLSafeFreeList *active_list = get_current_safe_list();
436 current_free_list_ = new MTLSafeFreeList();
437 prev_free_buffer_list_ = active_list;
438 safelist_lock_.unlock();
439
440 /* Release final reference for previous list.
441 * NOTE: Outside of lock as this function itself locks. */
442 if (previous_list) {
443 previous_list->decrement_reference();
444 }
445}
446
447void MTLBufferPool::ensure_buffer_pool(MTLResourceOptions options)
448{
449 std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr(
451 if (pool_search == nullptr) {
452 std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool =
453 new std::multiset<MTLBufferHandle, CompareMTLBuffer>();
454 buffer_pools_.add_new((uint64_t)options, pool);
455 }
456}
457
458void MTLBufferPool::insert_buffer_into_pool(MTLResourceOptions options, gpu::MTLBuffer *buffer)
459{
460 /* Ensure `safelist_lock_` is locked in calling code before modifying. */
461 BLI_assert(buffer);
462
463 /* Reset usage size to actual size of allocation. */
464 buffer->set_usage_size(buffer->get_size());
465
466 /* Ensure pool exists. */
467 this->ensure_buffer_pool(options);
468
469 /* TODO(Metal): Support purgeability - Allow buffer in pool to have its memory taken back by the
470 * OS if needed. As we keep allocations around, they may not actually be in use, but we can
471 * ensure they do not block other apps from using memory. Upon a buffer being needed again, we
472 * can reset this state.
473 * TODO(Metal): Purgeability state does not update instantly, so this requires a deferral. */
474 BLI_assert(buffer->get_metal_buffer());
475 // buffer->metal_buffer); [buffer->metal_buffer setPurgeableState:MTLPurgeableStateVolatile];
476
477 std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = buffer_pools_.lookup(options);
478 pool->insert(MTLBufferHandle(buffer));
479 allocations_in_pool_ += buffer->get_size();
480
481#if MTL_DEBUG_MEMORY_STATISTICS == 1
482 /* Debug statistics. */
483 buffers_in_pool_++;
484#endif
485}
486
487void MTLBufferPool::allocations_list_insert(gpu::MTLBuffer *buffer)
488{
489 /* NOTE: Function should only be called while buffer_pool_lock_ is acquired. */
490 BLI_assert(initialized_);
491 BLI_assert(buffer != nullptr);
492
493 /* Insert buffer at base of allocations list. */
494 gpu::MTLBuffer *current_head = allocations_list_base_;
495 buffer->next = current_head;
496 buffer->prev = nullptr;
497
498 if (current_head != nullptr) {
499 current_head->prev = buffer;
500 }
501
502 allocations_list_base_ = buffer;
503 allocations_list_size_++;
504
505#if MTL_DEBUG_MEMORY_STATISTICS == 1
506 total_allocation_bytes_ += buffer->get_size();
507#endif
508}
509
510void MTLBufferPool::allocations_list_delete(gpu::MTLBuffer *buffer)
511{
512 /* NOTE: Function should only be called while buffer_pool_lock_ is acquired. */
513 /* Remove a buffer link in the allocations chain. */
514 BLI_assert(initialized_);
515 BLI_assert(buffer != nullptr);
516 BLI_assert(allocations_list_size_ >= 1);
517
518 gpu::MTLBuffer *next = buffer->next;
519 gpu::MTLBuffer *prev = buffer->prev;
520
521 if (prev != nullptr) {
522 BLI_assert(prev->next == buffer);
523 prev->next = next;
524 }
525
526 if (next != nullptr) {
527 BLI_assert(next->prev == buffer);
528 next->prev = prev;
529 }
530
531 if (allocations_list_base_ == buffer) {
532 allocations_list_base_ = next;
533 BLI_assert(prev == nullptr);
534 }
535 allocations_list_size_--;
536
537#if MTL_DEBUG_MEMORY_STATISTICS == 1
538 total_allocation_bytes_ -= buffer->get_size();
539#endif
540
541 /* Delete buffer. */
542 delete buffer;
543}
544
545void MTLBufferPool::allocations_list_delete_all()
546{
547 gpu::MTLBuffer *current = allocations_list_base_;
548 while (current != nullptr) {
549 gpu::MTLBuffer *next = current->next;
550 delete current;
551 current = next;
552 }
553 allocations_list_size_ = 0;
554 allocations_list_base_ = nullptr;
555
556#if MTL_DEBUG_MEMORY_STATISTICS == 1
557 total_allocation_bytes_ = 0;
558#endif
559}
560
562{
563 reference_count_ = 1;
564 in_free_queue_ = false;
565 current_list_index_ = 0;
566 next_ = nullptr;
567}
568
570{
571 BLI_assert(in_free_queue_ == false);
572
573 /* Lockless list insert. */
574 uint insert_index = current_list_index_++;
575
576 /* If the current MTLSafeFreeList size is exceeded, we ripple down the linked-list chain and
577 * insert the buffer into the next available chunk. */
578 if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
579
580 /* Check if first caller to generate next pool in chain.
581 * Otherwise, ensure pool exists or wait for first caller to create next pool. */
582 MTLSafeFreeList *next_list = next_.load();
583
584 if (!next_list) {
585 std::unique_lock lock(lock_);
586
587 next_list = next_.load();
588 if (!next_list) {
589 next_list = new MTLSafeFreeList();
590 next_.store(next_list);
591 }
592 }
593 BLI_assert(next_list);
594 next_list->insert_buffer(buffer);
595
596 /* Clamp index to chunk limit if overflowing. */
597 current_list_index_ = MTLSafeFreeList::MAX_NUM_BUFFERS_;
598 return;
599 }
600
601 safe_free_pool_[insert_index] = buffer;
602}
603
604/* Increments from active GPUContext thread. */
606{
607 lock_.lock();
608 BLI_assert(in_free_queue_ == false);
609 reference_count_++;
610 referenced_by_workload_ = true;
611 lock_.unlock();
612}
613
614/* Reference decrements and addition to completed list queue can occur from MTLCommandBuffer
615 * completion callback thread. */
617{
618 lock_.lock();
619 BLI_assert(in_free_queue_ == false);
620 int ref_count = --reference_count_;
621
622 if (ref_count == 0) {
624 }
625 lock_.unlock();
626}
627
629{
630 /* We should only consider refreshing a list if it has been referenced by active workloads, and
631 * contains a sufficient buffer count to avoid overheads associated with flushing the list. If
632 * the reference count is only equal to 1, buffers may have been added, but no command
633 * submissions will have been issued, hence buffers could be returned to the pool prematurely if
634 * associated workload submission occurs later. */
635 return ((reference_count_ > 1 || referenced_by_workload_) &&
636 current_list_index_ > MIN_BUFFER_FLUSH_COUNT);
637}
638
641/* -------------------------------------------------------------------- */
645/* Construct a gpu::MTLBuffer wrapper around a newly created metal::MTLBuffer. */
646MTLBuffer::MTLBuffer(id<MTLDevice> mtl_device,
647 uint64_t size,
648 MTLResourceOptions options,
649 uint alignment)
650{
651 /* Calculate aligned allocation size. */
652 BLI_assert(alignment > 0);
653 uint64_t aligned_alloc_size = ceil_to_multiple_ul(size, alignment);
654
655 alignment_ = alignment;
656 device_ = mtl_device;
657 is_external_ = false;
658
659 options_ = options;
660 this->flag_in_use(false);
661
662 metal_buffer_ = [device_ newBufferWithLength:aligned_alloc_size options:options];
663 BLI_assert(metal_buffer_);
664
665 size_ = aligned_alloc_size;
666 this->set_usage_size(size_);
667 if (!(options_ & MTLResourceStorageModePrivate)) {
668 data_ = [metal_buffer_ contents];
669 }
670 else {
671 data_ = nullptr;
672 }
673
674 /* Linked resources. */
675 next = prev = nullptr;
676}
677
678MTLBuffer::MTLBuffer(id<MTLBuffer> external_buffer)
679{
680 BLI_assert(external_buffer != nil);
681
682 /* Ensure external_buffer remains referenced while in-use. */
683 metal_buffer_ = external_buffer;
684 [metal_buffer_ retain];
685
686 /* Extract properties. */
687 is_external_ = true;
688 device_ = nil;
689 alignment_ = 1;
690 options_ = [metal_buffer_ resourceOptions];
691 size_ = [metal_buffer_ allocatedSize];
692 this->set_usage_size(size_);
693 data_ = [metal_buffer_ contents];
694 in_use_ = true;
695
696 /* Linked resources. */
697 next = prev = nullptr;
698}
699
701{
702 if (metal_buffer_ != nil) {
703 [metal_buffer_ release];
704 metal_buffer_ = nil;
705 }
706}
707
709{
710 if (!is_external_) {
712 }
713 else {
714 if (metal_buffer_ != nil) {
715 [metal_buffer_ release];
716 metal_buffer_ = nil;
717 }
718 }
719}
720
722{
723 return metal_buffer_;
724}
725
727{
728 BLI_assert(!(options_ & MTLResourceStorageModePrivate));
729 BLI_assert(data_);
730 return data_;
731}
732
734{
735 return size_;
736}
737
739{
740 return usage_size_;
741}
742
744{
745 /* We do not need to flush shared memory, as addressable buffer is shared. */
746 return options_ & MTLResourceStorageModeManaged;
747}
748
750{
751 metal_buffer_.label = str;
752}
753
755{
756 /* Debug: If buffer is not flagged as in-use, this is a problem. */
758 in_use_,
759 "Buffer should be marked as 'in-use' if being actively used by an instance. Buffer "
760 "has likely already been freed.");
761}
762
764{
765 this->debug_ensure_used();
766 if (this->requires_flush()) {
767 [metal_buffer_ didModifyRange:NSMakeRange(0, size_)];
768 }
769}
770
772{
773 this->debug_ensure_used();
774 if (this->requires_flush()) {
775 BLI_assert((offset + length) <= size_);
776 [metal_buffer_ didModifyRange:NSMakeRange(offset, length)];
777 }
778}
779
781{
782 in_use_ = used;
783}
784
786{
787 return in_use_;
788}
789
791{
792 BLI_assert(size_used > 0 && size_used <= size_);
793 usage_size_ = size_used;
794}
795
797{
798 return options_;
799}
800
802{
803 return alignment_;
804}
805
807{
808 /* We do not need to flush shared memory. */
809 return this->options & MTLResourceStorageModeManaged;
810}
811
813{
814 if (this->requires_flush()) {
816 BLI_assert((this->buffer_offset + this->size) <= [this->metal_buffer length]);
817 BLI_assert(this->buffer_offset >= 0);
818 [this->metal_buffer
819 didModifyRange:NSMakeRange(this->buffer_offset, this->size - this->buffer_offset)];
820 }
821}
822
825/* -------------------------------------------------------------------- */
833
835{
836
837 if (!this->initialised_) {
838 BLI_assert(context_.device);
839
840 /* Initialize Scratch buffers. */
841 for (int sb = 0; sb < mtl_max_scratch_buffers_; sb++) {
842 scratch_buffers_[sb] = new MTLCircularBuffer(
843 context_, mtl_scratch_buffer_initial_size_, true);
844 BLI_assert(scratch_buffers_[sb]);
845 BLI_assert(&(scratch_buffers_[sb]->own_context_) == &context_);
846 }
847 current_scratch_buffer_ = 0;
848 initialised_ = true;
849 }
850}
851
853{
854 initialised_ = false;
855
856 /* Release Scratch buffers */
857 for (int sb = 0; sb < mtl_max_scratch_buffers_; sb++) {
858 delete scratch_buffers_[sb];
859 scratch_buffers_[sb] = nullptr;
860 }
861 current_scratch_buffer_ = 0;
862}
863
868
870 uint64_t alloc_size, uint alignment)
871{
872 /* Ensure scratch buffer allocation alignment adheres to offset alignment requirements. */
873 alignment = max_uu(alignment, 256);
874
875 BLI_assert_msg(current_scratch_buffer_ >= 0, "Scratch Buffer index not set");
876 MTLCircularBuffer *current_scratch_buff = this->scratch_buffers_[current_scratch_buffer_];
877 BLI_assert_msg(current_scratch_buff != nullptr, "Scratch Buffer does not exist");
878 MTLTemporaryBuffer allocated_range = current_scratch_buff->allocate_range_aligned(alloc_size,
879 alignment);
880 BLI_assert(allocated_range.size >= alloc_size && allocated_range.size <= alloc_size + alignment);
881 BLI_assert(allocated_range.metal_buffer != nil);
882 return allocated_range;
883}
884
886{
887 /* Fetch active scratch buffer. */
888 MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
889 BLI_assert(&active_scratch_buf->own_context_ == &context_);
890
891 /* Ensure existing scratch buffer is no longer in use. MTL_MAX_SCRATCH_BUFFERS specifies
892 * the number of allocated scratch buffers. This value should be equal to the number of
893 * simultaneous frames in-flight. I.e. the maximal number of scratch buffers which are
894 * simultaneously in-use. */
895 if (active_scratch_buf->used_frame_index_ < context_.get_current_frame_index()) {
896 current_scratch_buffer_ = (current_scratch_buffer_ + 1) % mtl_max_scratch_buffers_;
897 active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
898 active_scratch_buf->reset();
899 BLI_assert(&active_scratch_buf->own_context_ == &context_);
900 MTL_LOG_INFO("Scratch buffer %d reset - (ctx %p)(Frame index: %d)",
901 current_scratch_buffer_,
902 &context_,
903 context_.get_current_frame_index());
904 }
905}
906
908{
909 /* Fetch active scratch buffer and verify context. */
910 MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_];
911 BLI_assert(&active_scratch_buf->own_context_ == &context_);
912 active_scratch_buf->flush();
913}
914
915/* MTLCircularBuffer implementation. */
916MTLCircularBuffer::MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow)
917 : own_context_(ctx)
918{
919 BLI_assert(this);
920 MTLResourceOptions options = ([own_context_.device hasUnifiedMemory]) ?
921 MTLResourceStorageModeShared :
922 MTLResourceStorageModeManaged;
923 cbuffer_ = new gpu::MTLBuffer(own_context_.device, initial_size, options, 256);
924 current_offset_ = 0;
925 can_resize_ = allow_grow;
926 cbuffer_->flag_in_use(true);
927
928 used_frame_index_ = ctx.get_current_frame_index();
929 last_flush_base_offset_ = 0;
930
931 /* Debug label. */
932 if (G.debug & G_DEBUG_GPU) {
933 cbuffer_->set_label(@"Circular Scratch Buffer");
934 }
935}
936
938{
939 delete cbuffer_;
940}
941
943{
944 return this->allocate_range_aligned(alloc_size, 1);
945}
946
948{
949 BLI_assert(this);
950
951 /* Ensure alignment of an allocation is aligned to compatible offset boundaries. */
952 BLI_assert(alignment > 0);
953 alignment = max_uu(alignment, 256);
954
955 /* Align current offset and allocation size to desired alignment */
956 uint64_t aligned_current_offset = ceil_to_multiple_ul(current_offset_, alignment);
957 uint64_t aligned_alloc_size = ceil_to_multiple_ul(alloc_size, alignment);
958 bool can_allocate = (aligned_current_offset + aligned_alloc_size) < cbuffer_->get_size();
959
960 BLI_assert(aligned_current_offset >= current_offset_);
961 BLI_assert(aligned_alloc_size >= alloc_size);
962
963 BLI_assert(aligned_current_offset % alignment == 0);
964 BLI_assert(aligned_alloc_size % alignment == 0);
965
966 /* Recreate Buffer */
967 if (!can_allocate) {
968 uint64_t new_size = cbuffer_->get_size();
969 if (can_resize_) {
970 /* Resize to the maximum of basic resize heuristic OR the size of the current offset +
971 * requested allocation -- we want the buffer to grow to a large enough size such that it
972 * does not need to resize mid-frame. */
973 new_size = max_ulul(
975 aligned_current_offset + aligned_alloc_size);
976
977#if MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION == 1
978 /* IF a requested allocation EXCEEDS the maximum supported size, temporarily allocate up to
979 * this, but shrink down ASAP. */
981
982 /* If new requested allocation is bigger than maximum allowed size, temporarily resize to
983 * maximum allocation size -- Otherwise, clamp the buffer size back down to the defined
984 * maximum */
986 new_size = aligned_alloc_size;
987 MTL_LOG_INFO("Temporarily growing Scratch buffer to %d MB", (int)new_size / 1024 / 1024);
988 }
989 else {
991 MTL_LOG_INFO("Shrinking Scratch buffer back to %d MB", (int)new_size / 1024 / 1024);
992 }
993 }
994 BLI_assert(aligned_alloc_size <= new_size);
995#else
997
998 if (aligned_alloc_size > new_size) {
999 BLI_assert(false);
1000
1001 /* Cannot allocate */
1002 MTLTemporaryBuffer alloc_range;
1003 alloc_range.metal_buffer = nil;
1004 alloc_range.data = nullptr;
1005 alloc_range.buffer_offset = 0;
1006 alloc_range.size = 0;
1007 alloc_range.options = cbuffer_->options;
1008 }
1009#endif
1010 }
1011 else {
1013 "Performance Warning: Reached the end of circular buffer of size: %llu, but cannot "
1014 "resize. Starting new buffer",
1015 cbuffer_->get_size());
1016 BLI_assert(aligned_alloc_size <= new_size);
1017
1018 /* Cannot allocate. */
1019 MTLTemporaryBuffer alloc_range;
1020 alloc_range.metal_buffer = nil;
1021 alloc_range.data = nullptr;
1022 alloc_range.buffer_offset = 0;
1023 alloc_range.size = 0;
1024 alloc_range.options = cbuffer_->get_resource_options();
1025 }
1026
1027 /* Flush current buffer to ensure changes are visible on the GPU. */
1028 this->flush();
1029
1030 /* Discard old buffer and create a new one - Relying on Metal reference counting to track
1031 * in-use buffers */
1032 MTLResourceOptions prev_options = cbuffer_->get_resource_options();
1033 uint prev_alignment = cbuffer_->get_alignment();
1034 delete cbuffer_;
1035 cbuffer_ = new gpu::MTLBuffer(own_context_.device, new_size, prev_options, prev_alignment);
1036 cbuffer_->flag_in_use(true);
1037 current_offset_ = 0;
1038 last_flush_base_offset_ = 0;
1039
1040 /* Debug label. */
1041 if (G.debug & G_DEBUG_GPU) {
1042 cbuffer_->set_label(@"Circular Scratch Buffer");
1043 }
1044 MTL_LOG_INFO("Resized Metal circular buffer to %llu bytes", new_size);
1045
1046 /* Reset allocation Status. */
1047 aligned_current_offset = 0;
1048 BLI_assert((aligned_current_offset + aligned_alloc_size) <= cbuffer_->get_size());
1049 }
1050
1051 /* Allocate chunk. */
1052 MTLTemporaryBuffer alloc_range;
1053 alloc_range.metal_buffer = cbuffer_->get_metal_buffer();
1054 alloc_range.data = (void *)((uint8_t *)([alloc_range.metal_buffer contents]) +
1055 aligned_current_offset);
1056 alloc_range.buffer_offset = aligned_current_offset;
1057 alloc_range.size = aligned_alloc_size;
1058 alloc_range.options = cbuffer_->get_resource_options();
1059 BLI_assert(alloc_range.data);
1060
1061 /* Shift offset to match alignment. */
1062 current_offset_ = aligned_current_offset + aligned_alloc_size;
1063 BLI_assert(current_offset_ <= cbuffer_->get_size());
1064 return alloc_range;
1065}
1066
1068{
1069 BLI_assert(this);
1070
1071 uint64_t len = current_offset_ - last_flush_base_offset_;
1072 if (len > 0) {
1073 cbuffer_->flush_range(last_flush_base_offset_, len);
1074 last_flush_base_offset_ = current_offset_;
1075 }
1076}
1077
1079{
1080 BLI_assert(this);
1081
1082 /* If circular buffer has data written to it, offset will be greater than zero. */
1083 if (current_offset_ > 0) {
1084
1085 /* Ensure the circular buffer is no longer being used by an in-flight frame. */
1086 BLI_assert((own_context_.get_current_frame_index() >=
1087 (used_frame_index_ + MTL_NUM_SAFE_FRAMES - 1)) &&
1088 "Trying to reset Circular scratch buffer's while its data is still being used by "
1089 "an in-flight frame");
1090
1091 current_offset_ = 0;
1092 last_flush_base_offset_ = 0;
1093 }
1094
1095 /* Update used frame index to current. */
1096 used_frame_index_ = own_context_.get_current_frame_index();
1097}
1098
1101} // namespace blender::gpu
@ G_DEBUG_GPU
#define BLI_assert(a)
Definition BLI_assert.h:50
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:57
MINLINE int min_ii(int a, int b)
MINLINE uint max_uu(uint a, uint b)
MINLINE uint64_t ceil_to_multiple_ul(uint64_t a, uint64_t b)
unsigned int uint
volatile int lock
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void clear()
Definition BLI_map.hh:989
const Value * lookup_ptr(const Key &key) const
Definition BLI_map.hh:484
KeyIterator keys() const
Definition BLI_map.hh:837
const Value & lookup(const Key &key) const
Definition BLI_map.hh:506
ValueIterator values() const
Definition BLI_map.hh:846
void add_new(const Key &key, const Value &value)
Definition BLI_map.hh:241
int64_t size() const
Definition BLI_map.hh:927
ItemIterator items() const
Definition BLI_map.hh:864
void push_completed_safe_list(MTLSafeFreeList *list)
void init(id< MTLDevice > device)
Definition mtl_memory.mm:28
gpu::MTLBuffer * allocate_with_data(uint64_t size, bool cpu_visible, const void *data=nullptr)
MTLSafeFreeList * get_current_safe_list()
gpu::MTLBuffer * allocate(uint64_t size, bool cpu_visible)
Definition mtl_memory.mm:96
gpu::MTLBuffer * allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
gpu::MTLBuffer * allocate_aligned_with_data(uint64_t size, uint alignment, bool cpu_visible, const void *data=nullptr)
bool free_buffer(gpu::MTLBuffer *buffer)
void flag_in_use(bool used)
uint64_t get_size() const
void set_usage_size(uint64_t size_used)
gpu::MTLBuffer * next
uint64_t get_size_used() const
void * get_host_ptr() const
void flush_range(uint64_t offset, uint64_t length)
MTLBuffer(id< MTLDevice > device, uint64_t size, MTLResourceOptions options, uint alignment=1)
gpu::MTLBuffer * prev
void set_label(NSString *str)
MTLResourceOptions get_resource_options()
id< MTLBuffer > get_metal_buffer() const
MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow)
MTLTemporaryBuffer allocate_range_aligned(uint64_t alloc_size, uint alignment)
MTLTemporaryBuffer allocate_range(uint64_t alloc_size)
static MTLBufferPool * get_global_memory_manager()
void insert_buffer(gpu::MTLBuffer *buffer)
static constexpr uint mtl_scratch_buffer_max_size_
static constexpr uint mtl_scratch_buffer_initial_size_
MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(uint64_t alloc_size, uint alignment)
MTLTemporaryBuffer scratch_buffer_allocate_range(uint64_t alloc_size)
#define printf
CCL_NAMESPACE_BEGIN struct Options options
int len
#define str(s)
MINLINE unsigned long long min_ulul(unsigned long long a, unsigned long long b)
MINLINE unsigned long long max_ulul(unsigned long long a, unsigned long long b)
static ulong * next
#define G(x, y, z)
#define MTL_NUM_SAFE_FRAMES
Definition mtl_common.hh:17
#define MTL_LOG_INFO(info,...)
Definition mtl_debug.hh:51
#define MTL_LOG_WARNING(info,...)
Definition mtl_debug.hh:44
#define MEMORY_SIZE_512MB
Definition mtl_memory.mm:19
#define MEMORY_SIZE_2GB
Definition mtl_memory.mm:17
#define MEMORY_SIZE_1GB
Definition mtl_memory.mm:18
#define MEMORY_SIZE_256MB
Definition mtl_memory.mm:20
SymEdge< T > * prev(const SymEdge< T > *se)
unsigned int uint32_t
Definition stdint.h:80
unsigned char uint8_t
Definition stdint.h:78
unsigned __int64 uint64_t
Definition stdint.h:90
MTLResourceOptions options