Blender V5.0
mtl_command_buffer.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include "DNA_userdef_types.h"
6
7#include "mtl_backend.hh"
8#include "mtl_common.hh"
9#include "mtl_context.hh"
10#include "mtl_debug.hh"
11#include "mtl_framebuffer.hh"
12
14
15#include <fstream>
16
17using namespace blender;
18using namespace blender::gpu;
19
20namespace blender::gpu {
21
22/* Counter for active command buffers. */
24
25/* -------------------------------------------------------------------- */
28
29void MTLCommandBufferManager::prepare(bool /*supports_render*/)
30{
31 render_pass_state_.reset_state();
32 compute_state_.reset_state();
33}
34
35void MTLCommandBufferManager::register_encoder_counters()
36{
37 encoder_count_++;
38 empty_ = false;
39}
40
41id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
42{
43 if (active_command_buffer_ == nil) {
44
45 /* Verify number of active command buffers is below limit.
46 * Exceeding this limit will mean we either have a command buffer leak/GPU hang
47 * or we should increase the command buffer limit during MTLQueue creation.
48 * Excessive command buffers can also be caused by frequent GPUContext switches, which cause
49 * the GPU pipeline to flush. This is common during indirect light baking operations.
50 *
51 * NOTE: We currently stall until completion of GPU work upon ::submit if we have reached the
52 * in-flight command buffer limit. */
55
56 if (G.debug & G_DEBUG_GPU) {
57 /* Debug: Enable Advanced Errors for GPU work execution. */
58 MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
59 desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
60 desc.retainedReferences = YES;
61 BLI_assert(context_.queue != nil);
62 active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
63 }
64
65 /* Ensure command buffer is created if debug command buffer unavailable. */
66 if (active_command_buffer_ == nil) {
67 active_command_buffer_ = [context_.queue commandBuffer];
68 }
69
70 [active_command_buffer_ retain];
71 context_.main_command_buffer.inc_active_command_buffer_count();
72
73 /* Ensure we begin new Scratch Buffer if we are on a new frame. */
74 MTLScratchBufferManager &mem = context_.memory_manager;
76
77 /* Reset Command buffer heuristics. */
78 this->reset_counters();
79
80 /* Clear debug stacks. */
81 debug_group_stack.clear();
82 debug_group_pushed_stack.clear();
83 }
84 BLI_assert(active_command_buffer_ != nil);
85 return active_command_buffer_;
86}
87
88/* If wait is true, CPU will stall until GPU work has completed. */
90{
91 /* Skip submission if command buffer is empty. */
92 if (empty_ || active_command_buffer_ == nil) {
93 if (wait) {
94 /* Wait for any previously submitted work on this context to complete.
95 * (The wait function will yield so may need reworking if this hits a
96 * performance critical path which is sensitive to CPU<->GPU latency) */
98 }
99 return false;
100 }
101
102 /* Ensure current encoders are finished. */
104 BLI_assert(active_command_encoder_type_ == MTL_NO_COMMAND_ENCODER);
105
106 /* Flush active ScratchBuffer associated with parent MTLContext. */
107 context_.memory_manager.flush_active_scratch_buffer();
108
109 /*** Submit Command Buffer. ***/
110 /* Command buffer lifetime tracking. */
111 /* Increment current MTLSafeFreeList reference counter to flag MTLBuffers freed within
112 * the current command buffer lifetime as used.
113 * This ensures that in-use resources are not prematurely de-referenced and returned to the
114 * available buffer pool while they are in-use by the GPU. */
115 MTLSafeFreeList *cmd_free_buffer_list =
117 BLI_assert(cmd_free_buffer_list);
118 cmd_free_buffer_list->increment_reference();
119
120 id<MTLCommandBuffer> cmd_buffer_ref = active_command_buffer_;
121 [cmd_buffer_ref retain];
122
123 [cmd_buffer_ref addCompletedHandler:^(id<MTLCommandBuffer> /*cb*/) {
124 /* Upon command buffer completion, decrement MTLSafeFreeList reference count
125 * to allow buffers no longer in use by this CommandBuffer to be freed. */
126 cmd_free_buffer_list->decrement_reference();
127
128 /* Release command buffer after completion callback handled. */
129 [cmd_buffer_ref release];
130
131 /* Decrement count. */
132 context_.main_command_buffer.dec_active_command_buffer_count();
133 }];
134
135 /* Submit command buffer to GPU. */
136 [active_command_buffer_ commit];
137
138 /* If we have too many active command buffers in flight, wait until completed to avoid running
139 * out. We can increase */
142 {
143 wait = true;
145 "Maximum number of command buffers in flight. Host will wait until GPU work has "
146 "completed. Consider increasing GHOST_ContextMTL::max_command_buffer_count or reducing "
147 "work fragmentation to better utilize system hardware. Command buffers are flushed upon "
148 "GPUContext switches, this is the most common cause of excessive command buffer "
149 "generation.");
150 }
151
152 if (wait || (G.debug & G_DEBUG_GPU)) {
153 /* Wait until current GPU work has finished executing. */
154 [active_command_buffer_ waitUntilCompleted];
155
156 /* Command buffer execution debugging can return an error message if
157 * execution has failed or encountered GPU-side errors. */
158 if (G.debug & G_DEBUG_GPU) {
159
160 NSError *error = [active_command_buffer_ error];
161 if (error != nil) {
162 NSLog(@"%@", error);
163 BLI_assert(false);
164 }
165 }
166 }
167
168 /* Release previous frames command buffer and reset active cmd buffer. */
169 if (last_submitted_command_buffer_ != nil) {
170
171 BLI_assert(MTLBackend::get()->is_inside_render_boundary());
172 [last_submitted_command_buffer_ autorelease];
173 last_submitted_command_buffer_ = nil;
174 }
175 last_submitted_command_buffer_ = active_command_buffer_;
176 active_command_buffer_ = nil;
177
178 return true;
179}
180
182
183/* -------------------------------------------------------------------- */
186
187/* Fetch/query current encoder. */
189{
190 return (active_command_encoder_type_ == MTL_RENDER_COMMAND_ENCODER);
191}
192
194{
195 return (active_command_encoder_type_ == MTL_BLIT_COMMAND_ENCODER);
196}
197
199{
200 return (active_command_encoder_type_ == MTL_COMPUTE_COMMAND_ENCODER);
201}
202
204{
205 /* Calling code should check if inside render pass. Otherwise nil. */
206 return active_render_command_encoder_;
207}
208
210{
211 /* Calling code should check if inside render pass. Otherwise nil. */
212 return active_blit_command_encoder_;
213}
214
216{
217 /* Calling code should check if inside render pass. Otherwise nil. */
218 return active_compute_command_encoder_;
219}
220
222{
223 /* If outside of RenderPass, nullptr will be returned. */
224 if (this->is_inside_render_pass()) {
225 return active_frame_buffer_;
226 }
227 return nullptr;
228}
229
230/* Encoder and Pass management. */
231/* End currently active MTLCommandEncoder. */
233{
234
235 /* End active encoder if one is active. */
236 if (active_command_encoder_type_ != MTL_NO_COMMAND_ENCODER) {
237
238 switch (active_command_encoder_type_) {
239 case MTL_RENDER_COMMAND_ENCODER: {
240 /* Verify a RenderCommandEncoder is active and end. */
241 BLI_assert(active_render_command_encoder_ != nil);
242
243 /* Complete Encoding. */
244 [active_render_command_encoder_ endEncoding];
245 [active_render_command_encoder_ release];
246 active_render_command_encoder_ = nil;
247 active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER;
248
249 /* Reset associated frame-buffer flag. */
250 if (!retain_framebuffers) {
251 active_frame_buffer_ = nullptr;
252 active_pass_descriptor_ = nullptr;
253 }
254 return true;
255 }
256
257 case MTL_BLIT_COMMAND_ENCODER: {
258 /* Verify a RenderCommandEncoder is active and end. */
259 BLI_assert(active_blit_command_encoder_ != nil);
260 [active_blit_command_encoder_ endEncoding];
261 [active_blit_command_encoder_ release];
262 active_blit_command_encoder_ = nil;
263 active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER;
264 return true;
265 }
266
267 case MTL_COMPUTE_COMMAND_ENCODER: {
268 /* Verify a RenderCommandEncoder is active and end. */
269 BLI_assert(active_compute_command_encoder_ != nil);
270 [active_compute_command_encoder_ endEncoding];
271 [active_compute_command_encoder_ release];
272 active_compute_command_encoder_ = nil;
273 active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER;
274 return true;
275 }
276
277 default: {
278 BLI_assert(false && "Invalid command encoder type");
279 return false;
280 }
281 };
282 }
283 else {
284 /* MTL_NO_COMMAND_ENCODER. */
285 BLI_assert(active_render_command_encoder_ == nil);
286 BLI_assert(active_blit_command_encoder_ == nil);
287 BLI_assert(active_compute_command_encoder_ == nil);
288 return false;
289 }
290}
291
293 MTLFrameBuffer *ctx_framebuffer, bool force_begin, bool *r_new_pass)
294{
295 /* Ensure valid frame-buffer. */
296 BLI_assert(ctx_framebuffer != nullptr);
297
298 /* Ensure active command buffer. */
299 id<MTLCommandBuffer> cmd_buf = this->ensure_begin();
300 BLI_assert(cmd_buf);
301
302 /* Begin new command encoder if the currently active one is
303 * incompatible or requires updating. */
304 if (active_command_encoder_type_ != MTL_RENDER_COMMAND_ENCODER ||
305 active_frame_buffer_ != ctx_framebuffer || force_begin)
306 {
308
309 /* Determine if this is a re-bind of the same frame-buffer. */
310 bool is_rebind = (active_frame_buffer_ == ctx_framebuffer);
311
312 /* Generate RenderPassDescriptor from bound frame-buffer. */
313 BLI_assert(ctx_framebuffer);
314 active_frame_buffer_ = ctx_framebuffer;
315 active_pass_descriptor_ = active_frame_buffer_->bake_render_pass_descriptor(
316 is_rebind && (!active_frame_buffer_->get_pending_clear()));
317
318 /* Determine if there is a visibility buffer assigned to the context. */
319 gpu::MTLBuffer *visibility_buffer = context_.get_visibility_buffer();
320 this->active_pass_descriptor_.visibilityResultBuffer =
321 (visibility_buffer) ? visibility_buffer->get_metal_buffer() : nil;
322 context_.clear_visibility_dirty();
323
324 /* Ensure we have already cleaned up our previous render command encoder. */
325 BLI_assert(active_render_command_encoder_ == nil);
326
327 /* Unroll pending debug groups. */
328 if (G.debug & G_DEBUG_GPU) {
329 unfold_pending_debug_groups();
330 }
331
332 /* Create new RenderCommandEncoder based on descriptor (and begin encoding). */
333 active_render_command_encoder_ = [cmd_buf
334 renderCommandEncoderWithDescriptor:active_pass_descriptor_];
335 [active_render_command_encoder_ retain];
336 active_command_encoder_type_ = MTL_RENDER_COMMAND_ENCODER;
337
338 /* Add debug label. */
339 if (G.debug & G_DEBUG_GPU) {
340 std::string debug_name = "RenderCmdEncoder: Unnamed";
341 if (!debug_group_pushed_stack.empty()) {
342 debug_name = "RenderCmdEncoder: " + debug_group_pushed_stack.back();
343 }
344 debug_name += " (FrameBuffer: " + std::string(active_frame_buffer_->name_get()) + ")";
345 active_render_command_encoder_.label = [NSString stringWithUTF8String:debug_name.c_str()];
346 }
347
348 /* Update command buffer encoder heuristics. */
349 this->register_encoder_counters();
350
351 /* Apply initial state. */
352 /* Update Viewport and Scissor State */
353 active_frame_buffer_->apply_state();
354
355 /* FLAG FRAMEBUFFER AS CLEARED -- A clear only lasts as long as one has been specified.
356 * After this, resets to Load attachments to parallel GL behavior. */
357 active_frame_buffer_->mark_cleared();
358
359 /* Reset RenderPassState to ensure resource bindings are re-applied. */
360 render_pass_state_.reset_state();
361
362 /* Return true as new pass started. */
363 *r_new_pass = true;
364 }
365 else {
366 /* No new pass. */
367 *r_new_pass = false;
368 }
369
370 BLI_assert(active_render_command_encoder_ != nil);
371 return active_render_command_encoder_;
372}
373
375{
376 /* Ensure active command buffer. */
377 id<MTLCommandBuffer> cmd_buf = this->ensure_begin();
378 BLI_assert(cmd_buf);
379
380 /* Ensure no existing command encoder of a different type is active. */
381 if (active_command_encoder_type_ != MTL_BLIT_COMMAND_ENCODER) {
383 }
384
385 /* Begin new Blit Encoder. */
386 if (active_blit_command_encoder_ == nil) {
387 /* Unroll pending debug groups. */
388 if (G.debug & G_DEBUG_GPU) {
389 unfold_pending_debug_groups();
390 }
391
392 active_blit_command_encoder_ = [cmd_buf blitCommandEncoder];
393 BLI_assert(active_blit_command_encoder_ != nil);
394 [active_blit_command_encoder_ retain];
395 active_command_encoder_type_ = MTL_BLIT_COMMAND_ENCODER;
396
397 /* Add debug label. */
398 if (G.debug & G_DEBUG_GPU) {
399 std::string debug_name = "BlitCmdEncoder: Unnamed";
400 if (!debug_group_pushed_stack.empty()) {
401 debug_name = "BlitCmdEncoder: " + debug_group_pushed_stack.back();
402 }
403 active_blit_command_encoder_.label = [NSString stringWithUTF8String:debug_name.c_str()];
404 }
405
406 /* Update command buffer encoder heuristics. */
407 this->register_encoder_counters();
408 }
409 BLI_assert(active_blit_command_encoder_ != nil);
410 return active_blit_command_encoder_;
411}
412
414{
415 /* Ensure active command buffer. */
416 id<MTLCommandBuffer> cmd_buf = this->ensure_begin();
417 BLI_assert(cmd_buf);
418
419 /* Ensure no existing command encoder of a different type is active. */
420 if (active_command_encoder_type_ != MTL_COMPUTE_COMMAND_ENCODER) {
422 }
423
424 /* Begin new Compute Encoder. */
425 if (active_compute_command_encoder_ == nil) {
426 /* Unroll pending debug groups. */
427 if (G.debug & G_DEBUG_GPU) {
428 unfold_pending_debug_groups();
429 }
430
431 active_compute_command_encoder_ = [cmd_buf computeCommandEncoder];
432 BLI_assert(active_compute_command_encoder_ != nil);
433 [active_compute_command_encoder_ retain];
434 active_command_encoder_type_ = MTL_COMPUTE_COMMAND_ENCODER;
435
436 /* Add debug label. */
437 if (G.debug & G_DEBUG_GPU) {
438 std::string debug_name = "ComputeCmdEncoder: Unnamed";
439 if (!debug_group_pushed_stack.empty()) {
440 debug_name = "ComputeCmdEncoder: " + debug_group_pushed_stack.back();
441 }
442 active_compute_command_encoder_.label = [NSString stringWithUTF8String:debug_name.c_str()];
443 }
444
445 /* Update command buffer encoder heuristics. */
446 this->register_encoder_counters();
447
448 /* Reset RenderPassState to ensure resource bindings are re-applied. */
449 compute_state_.reset_state();
450 }
451 BLI_assert(active_compute_command_encoder_ != nil);
452 return active_compute_command_encoder_;
453}
454
456
457/* -------------------------------------------------------------------- */
460
461/* Rendering Heuristics. */
463{
464 current_draw_call_count_++;
465 vertex_submitted_count_ += vertex_submission;
466 empty_ = false;
467}
468
469/* Reset workload counters. */
471{
472 empty_ = true;
473 current_draw_call_count_ = 0;
474 encoder_count_ = 0;
475 vertex_submitted_count_ = 0;
476}
477
478/* Workload evaluation. */
480{
481 /* Skip if no active command buffer. */
482 if (active_command_buffer_ == nil) {
483 return false;
484 }
485
486 /* Use optimized heuristic to split heavy command buffer submissions to better saturate the
487 * hardware and also reduce stalling from individual large submissions. */
490 {
491 return ((current_draw_call_count_ > 30000) || (vertex_submitted_count_ > 100000000) ||
492 (encoder_count_ > 25));
493 }
494 /* Apple Silicon is less efficient if splitting submissions. */
495 return false;
496}
497
499
500/* -------------------------------------------------------------------- */
503
504/* Debug. */
505void MTLCommandBufferManager::push_debug_group(const char *name, int /*index*/)
506{
507 /* Only perform this operation if capturing. */
508 MTLCaptureManager *capture_manager = [MTLCaptureManager sharedCaptureManager];
509 if (![capture_manager isCapturing]) {
510 return;
511 }
512
513 id<MTLCommandBuffer> cmd = this->ensure_begin();
514 if (cmd != nil) {
515 if (active_command_encoder_type_ != MTL_NO_COMMAND_ENCODER) {
517 }
518
519 debug_group_stack.emplace_back(name);
520 }
521}
522
524{
525 /* Only perform this operation if capturing. */
526 MTLCaptureManager *capture_manager = [MTLCaptureManager sharedCaptureManager];
527 if (![capture_manager isCapturing]) {
528 return;
529 }
530
531 id<MTLCommandBuffer> cmd = this->ensure_begin();
532 if (cmd != nil) {
533 if (active_command_encoder_type_ != MTL_NO_COMMAND_ENCODER) {
535 }
536
537#if METAL_DEBUG_CAPTURE_HIDE_EMPTY == 0
538 /* Unfold pending groups to display empty groups. */
539 unfold_pending_debug_groups();
540#endif
541
542 /* If we have pending debug groups, first pop the last pending one. */
543 if (!debug_group_stack.empty()) {
544 debug_group_stack.pop_back();
545 }
546 else {
547 /* Otherwise, close last active pushed group. */
548 if (!debug_group_pushed_stack.empty()) {
549 debug_group_pushed_stack.pop_back();
550
551 if (debug_group_pushed_stack.size() < uint(METAL_DEBUG_CAPTURE_MAX_NESTED_GROUPS)) {
552 [cmd popDebugGroup];
553 }
554 }
555 }
556 }
557}
558
559void MTLCommandBufferManager::unfold_pending_debug_groups()
560{
561 /* Only perform this operation if capturing. */
562 MTLCaptureManager *capture_manager = [MTLCaptureManager sharedCaptureManager];
563 if (![capture_manager isCapturing]) {
564 return;
565 }
566
567 if (active_command_buffer_ != nil) {
568 for (const std::string &name : debug_group_stack) {
569 if (debug_group_pushed_stack.size() < uint(METAL_DEBUG_CAPTURE_MAX_NESTED_GROUPS)) {
570 [active_command_buffer_ pushDebugGroup:[NSString stringWithFormat:@"%s", name.c_str()]];
571 }
572 debug_group_pushed_stack.push_back(name);
573 }
574 debug_group_stack.clear();
575 }
576}
577
578/* Workload Synchronization. */
580 GPUStageBarrierBits before_stages,
581 GPUStageBarrierBits after_stages)
582{
583 /* Apple Silicon does not support memory barriers for RenderCommandEncoder's.
584 * We do not currently need these due to implicit API guarantees. However, render->render
585 * resource dependencies are only evaluated at RenderCommandEncoder boundaries due to work
586 * execution on TBDR architecture.
587 *
588 * NOTE: Render barriers are therefore inherently expensive. Where possible, opt for local
589 * synchronization using raster order groups, or, prefer compute to avoid subsequent passes
590 * re-loading pass attachments which are not needed. */
591 const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
592 if (is_tile_based_arch) {
593 if (active_command_encoder_type_ == MTL_RENDER_COMMAND_ENCODER) {
594 /* Break render pass to ensure final pass results are visible to subsequent calls. */
596 return true;
597 }
598 /* Skip all barriers for compute and blit passes as Metal will resolve these dependencies. */
599 return false;
600 }
601
602 /* Resolve scope. */
603 MTLBarrierScope scope = 0;
604 if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS || barrier_bits & GPU_BARRIER_TEXTURE_FETCH) {
605 bool is_compute = (active_command_encoder_type_ != MTL_RENDER_COMMAND_ENCODER);
606 scope |= (is_compute ? 0 : MTLBarrierScopeRenderTargets) | MTLBarrierScopeTextures;
607 }
608 if (barrier_bits & GPU_BARRIER_SHADER_STORAGE ||
609 barrier_bits & GPU_BARRIER_VERTEX_ATTRIB_ARRAY || barrier_bits & GPU_BARRIER_ELEMENT_ARRAY ||
610 barrier_bits & GPU_BARRIER_UNIFORM || barrier_bits & GPU_BARRIER_BUFFER_UPDATE)
611 {
612 scope = scope | MTLBarrierScopeBuffers;
613 }
614
615 if (scope != 0) {
616 /* Issue barrier based on encoder. */
617 switch (active_command_encoder_type_) {
618 case MTL_NO_COMMAND_ENCODER:
619 case MTL_BLIT_COMMAND_ENCODER: {
620 /* No barrier to be inserted. */
621 return false;
622 }
623
624 /* Rendering. */
625 case MTL_RENDER_COMMAND_ENCODER: {
626 /* Currently flagging both stages -- can use bits above to filter on stage type --
627 * though full barrier is safe for now. */
628 MTLRenderStages before_stage_flags = 0;
629 MTLRenderStages after_stage_flags = 0;
630 if (before_stages & GPU_BARRIER_STAGE_VERTEX &&
631 !(before_stages & GPU_BARRIER_STAGE_FRAGMENT))
632 {
633 before_stage_flags = before_stage_flags | MTLRenderStageVertex;
634 }
635 if (before_stages & GPU_BARRIER_STAGE_FRAGMENT) {
636 before_stage_flags = before_stage_flags | MTLRenderStageFragment;
637 }
638 if (after_stages & GPU_BARRIER_STAGE_VERTEX) {
639 after_stage_flags = after_stage_flags | MTLRenderStageVertex;
640 }
641 if (after_stages & GPU_BARRIER_STAGE_FRAGMENT) {
642 after_stage_flags = MTLRenderStageFragment;
643 }
644
645 id<MTLRenderCommandEncoder> rec = this->get_active_render_command_encoder();
646 BLI_assert(rec != nil);
647 [rec memoryBarrierWithScope:scope
648 afterStages:after_stage_flags
649 beforeStages:before_stage_flags];
650 return true;
651 }
652
653 /* Compute. */
654 case MTL_COMPUTE_COMMAND_ENCODER: {
655 id<MTLComputeCommandEncoder> rec = this->get_active_compute_command_encoder();
656 BLI_assert(rec != nil);
657 [rec memoryBarrierWithScope:scope];
658 return true;
659 }
660 }
661 }
662 /* No barrier support. */
663 return false;
664}
665
666void MTLCommandBufferManager::encode_signal_event(id<MTLEvent> event, uint64_t signal_value)
667{
668 /* Ensure active command buffer. */
669 id<MTLCommandBuffer> cmd_buf = this->ensure_begin();
670 BLI_assert(cmd_buf);
672 [cmd_buf encodeSignalEvent:event value:signal_value];
673 register_encoder_counters();
674}
675
676void MTLCommandBufferManager::encode_wait_for_event(id<MTLEvent> event, uint64_t signal_value)
677{
678 /* Ensure active command buffer. */
679 id<MTLCommandBuffer> cmd_buf = this->ensure_begin();
680 BLI_assert(cmd_buf);
682 [cmd_buf encodeWaitForEvent:event value:signal_value];
683 register_encoder_counters();
684}
685
687
688/* -------------------------------------------------------------------- */
691/* Reset binding state when a new RenderCommandEncoder is bound, to ensure
692 * pipeline resources are re-applied to the new Encoder.
693 * NOTE: In Metal, state is only persistent within an MTLCommandEncoder,
694 * not globally. */
696{
697 /* Reset Cached pipeline state. */
698 this->bound_pso = nil;
699 this->bound_ds_state = nil;
700
701 /* Clear shader binding. */
702 this->last_bound_shader_state.set(nullptr, 0);
703
704 /* Other states. */
705 MTLFrameBuffer *fb = this->cmd.get_active_framebuffer();
707 this->last_scissor_rect = {0,
708 0,
709 (uint)((fb != nullptr) ? fb->get_width() : 0),
710 (uint)((fb != nullptr) ? fb->get_height() : 0)};
711
712 /* Reset cached resource binding state */
713 for (int ubo = 0; ubo < MTL_MAX_BUFFER_BINDINGS; ubo++) {
714 this->cached_vertex_buffer_bindings[ubo].is_bytes = false;
715 this->cached_vertex_buffer_bindings[ubo].metal_buffer = nil;
716 this->cached_vertex_buffer_bindings[ubo].offset = -1;
717
718 this->cached_fragment_buffer_bindings[ubo].is_bytes = false;
719 this->cached_fragment_buffer_bindings[ubo].metal_buffer = nil;
720 this->cached_fragment_buffer_bindings[ubo].offset = -1;
721 }
722
723 /* Reset cached texture and sampler state binding state. */
724 for (int tex = 0; tex < MTL_MAX_TEXTURE_SLOTS; tex++) {
725 this->cached_vertex_texture_bindings[tex].metal_texture = nil;
726 this->cached_vertex_sampler_state_bindings[tex].sampler_state = nil;
727 this->cached_vertex_sampler_state_bindings[tex].is_arg_buffer_binding = false;
728
729 this->cached_fragment_texture_bindings[tex].metal_texture = nil;
730 this->cached_fragment_sampler_state_bindings[tex].sampler_state = nil;
731 this->cached_fragment_sampler_state_bindings[tex].is_arg_buffer_binding = false;
732 }
733}
734
736{
737 /* Reset Cached pipeline state. */
738 this->bound_pso = nil;
739
740 /* Reset cached resource binding state */
741 for (int ubo = 0; ubo < MTL_MAX_BUFFER_BINDINGS; ubo++) {
742 this->cached_compute_buffer_bindings[ubo].is_bytes = false;
743 this->cached_compute_buffer_bindings[ubo].metal_buffer = nil;
744 this->cached_compute_buffer_bindings[ubo].offset = -1;
745 }
746
747 /* Reset cached texture and sampler state binding state. */
748 for (int tex = 0; tex < MTL_MAX_TEXTURE_SLOTS; tex++) {
749 this->cached_compute_texture_bindings[tex].metal_texture = nil;
750 this->cached_compute_sampler_state_bindings[tex].sampler_state = nil;
751 this->cached_compute_sampler_state_bindings[tex].is_arg_buffer_binding = false;
752 }
753}
754
755/* Bind Texture to current RenderCommandEncoder. */
756void MTLRenderPassState::bind_vertex_texture(id<MTLTexture> tex, uint slot)
757{
758 if (this->cached_vertex_texture_bindings[slot].metal_texture != tex) {
759 id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
760 BLI_assert(rec != nil);
761 [rec setVertexTexture:tex atIndex:slot];
762 this->cached_vertex_texture_bindings[slot].metal_texture = tex;
763 }
764}
765
767{
768 if (this->cached_fragment_texture_bindings[slot].metal_texture != tex) {
769 id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
770 BLI_assert(rec != nil);
771 [rec setFragmentTexture:tex atIndex:slot];
772 this->cached_fragment_texture_bindings[slot].metal_texture = tex;
773 }
774}
775
776void MTLComputeState::bind_compute_texture(id<MTLTexture> tex, uint slot)
777{
778 if (this->cached_compute_texture_bindings[slot].metal_texture != tex) {
779 id<MTLComputeCommandEncoder> rec = this->cmd.get_active_compute_command_encoder();
780 BLI_assert(rec != nil);
781 [rec setTexture:tex atIndex:slot];
782
783 this->cached_compute_texture_bindings[slot].metal_texture = tex;
784 }
785}
786
788 bool use_argument_buffer_for_samplers,
789 uint slot)
790{
791 /* Range check. */
792 const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
793 BLI_assert(slot >= 0);
794 BLI_assert(slot <= shader_interface->get_max_texture_index());
796 UNUSED_VARS_NDEBUG(shader_interface);
797
798 /* If sampler state has not changed for the given slot, we do not need to fetch. */
799 if (this->cached_vertex_sampler_state_bindings[slot].sampler_state == nil ||
800 !(this->cached_vertex_sampler_state_bindings[slot].binding_state == sampler_binding.state) ||
801 use_argument_buffer_for_samplers)
802 {
803
804 id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
805 ctx.get_default_sampler_state() :
806 ctx.get_sampler_from_state(sampler_binding.state);
807 if (!use_argument_buffer_for_samplers) {
808 /* Update binding and cached state. */
809 id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
810 BLI_assert(rec != nil);
811 [rec setVertexSamplerState:sampler_state atIndex:slot];
812 this->cached_vertex_sampler_state_bindings[slot].binding_state = sampler_binding.state;
813 this->cached_vertex_sampler_state_bindings[slot].sampler_state = sampler_state;
814 }
815
816 /* Flag last binding type. */
817 this->cached_vertex_sampler_state_bindings[slot].is_arg_buffer_binding =
818 use_argument_buffer_for_samplers;
819
820 /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
821 * the samplers array is always up to date. */
822 ctx.samplers_.mtl_sampler[slot] = sampler_state;
823 ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
824 }
825}
826
828 bool use_argument_buffer_for_samplers,
829 uint slot)
830{
831 /* Range check. */
832 const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
833 BLI_assert(slot >= 0);
834 BLI_assert(slot <= shader_interface->get_max_texture_index());
836 UNUSED_VARS_NDEBUG(shader_interface);
837
838 /* If sampler state has not changed for the given slot, we do not need to fetch. */
839 if (this->cached_fragment_sampler_state_bindings[slot].sampler_state == nil ||
840 !(this->cached_fragment_sampler_state_bindings[slot].binding_state ==
841 sampler_binding.state) ||
842 use_argument_buffer_for_samplers)
843 {
844
845 id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
846 ctx.get_default_sampler_state() :
847 ctx.get_sampler_from_state(sampler_binding.state);
848 if (!use_argument_buffer_for_samplers) {
849 /* Update binding and cached state. */
850 id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
851 BLI_assert(rec != nil);
852 [rec setFragmentSamplerState:sampler_state atIndex:slot];
853 this->cached_fragment_sampler_state_bindings[slot].binding_state = sampler_binding.state;
854 this->cached_fragment_sampler_state_bindings[slot].sampler_state = sampler_state;
855 }
856
857 /* Flag last binding type */
858 this->cached_fragment_sampler_state_bindings[slot].is_arg_buffer_binding =
859 use_argument_buffer_for_samplers;
860
861 /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
862 * the samplers array is always up to date. */
863 ctx.samplers_.mtl_sampler[slot] = sampler_state;
864 ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
865 }
866}
867
869 bool use_argument_buffer_for_samplers,
870 uint slot)
871{
872 /* Range check. */
873 const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
874 BLI_assert(slot >= 0);
875 BLI_assert(slot <= shader_interface->get_max_texture_index());
877 UNUSED_VARS_NDEBUG(shader_interface);
878
879 /* If sampler state has not changed for the given slot, we do not need to fetch. */
880 if (this->cached_compute_sampler_state_bindings[slot].sampler_state == nil ||
881 !(this->cached_compute_sampler_state_bindings[slot].binding_state ==
882 sampler_binding.state) ||
883 use_argument_buffer_for_samplers)
884 {
885
886 id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
887 ctx.get_default_sampler_state() :
888 ctx.get_sampler_from_state(sampler_binding.state);
889 if (!use_argument_buffer_for_samplers) {
890 /* Update binding and cached state. */
891 id<MTLComputeCommandEncoder> rec = this->cmd.get_active_compute_command_encoder();
892 BLI_assert(rec != nil);
893 [rec setSamplerState:sampler_state atIndex:slot];
894 this->cached_compute_sampler_state_bindings[slot].binding_state = sampler_binding.state;
895 this->cached_compute_sampler_state_bindings[slot].sampler_state = sampler_state;
896 }
897
898 /* Flag last binding type */
899 this->cached_compute_sampler_state_bindings[slot].is_arg_buffer_binding =
900 use_argument_buffer_for_samplers;
901
902 /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
903 * the samplers array is always up to date. */
904 ctx.samplers_.mtl_sampler[slot] = sampler_state;
905 ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
906 }
907}
908
910 uint64_t buffer_offset,
911 uint index)
912{
913 BLI_assert(index >= 0 && index < MTL_MAX_BUFFER_BINDINGS);
914 BLI_assert(buffer_offset >= 0);
915 BLI_assert(buffer != nil);
916
917 BufferBindingCached &current_vert_ubo_binding = this->cached_vertex_buffer_bindings[index];
918 if (current_vert_ubo_binding.offset != buffer_offset ||
919 current_vert_ubo_binding.metal_buffer != buffer || current_vert_ubo_binding.is_bytes)
920 {
921
922 id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
923 BLI_assert(rec != nil);
924
925 if (current_vert_ubo_binding.metal_buffer == buffer) {
926 /* If buffer is the same, but offset has changed. */
927 [rec setVertexBufferOffset:buffer_offset atIndex:index];
928 }
929 else {
930 /* Bind Vertex Buffer. */
931 [rec setVertexBuffer:buffer offset:buffer_offset atIndex:index];
932 }
933
934 /* Update Bind-state cache. */
935 this->cached_vertex_buffer_bindings[index].is_bytes = false;
936 this->cached_vertex_buffer_bindings[index].metal_buffer = buffer;
937 this->cached_vertex_buffer_bindings[index].offset = buffer_offset;
938 }
939}
940
942 uint64_t buffer_offset,
943 uint index)
944{
945 BLI_assert(index >= 0 && index < MTL_MAX_BUFFER_BINDINGS);
946 BLI_assert(buffer_offset >= 0);
947 BLI_assert(buffer != nil);
948
949 BufferBindingCached &current_frag_ubo_binding = this->cached_fragment_buffer_bindings[index];
950 if (current_frag_ubo_binding.offset != buffer_offset ||
951 current_frag_ubo_binding.metal_buffer != buffer || current_frag_ubo_binding.is_bytes)
952 {
953
954 id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
955 BLI_assert(rec != nil);
956
957 if (current_frag_ubo_binding.metal_buffer == buffer) {
958 /* If buffer is the same, but offset has changed. */
959 [rec setFragmentBufferOffset:buffer_offset atIndex:index];
960 }
961 else {
962 /* Bind Fragment Buffer */
963 [rec setFragmentBuffer:buffer offset:buffer_offset atIndex:index];
964 }
965
966 /* Update Bind-state cache */
967 this->cached_fragment_buffer_bindings[index].is_bytes = false;
968 this->cached_fragment_buffer_bindings[index].metal_buffer = buffer;
969 this->cached_fragment_buffer_bindings[index].offset = buffer_offset;
970 }
971}
972
973void MTLComputeState::bind_compute_buffer(id<MTLBuffer> buffer, uint64_t buffer_offset, uint index)
974{
975 BLI_assert(index >= 0 && index < MTL_MAX_BUFFER_BINDINGS);
976 BLI_assert(buffer_offset >= 0);
977 BLI_assert(buffer != nil);
978
979 BufferBindingCached &current_comp_ubo_binding = this->cached_compute_buffer_bindings[index];
980 if (current_comp_ubo_binding.offset != buffer_offset ||
981 current_comp_ubo_binding.metal_buffer != buffer || current_comp_ubo_binding.is_bytes)
982 {
983
984 id<MTLComputeCommandEncoder> rec = this->cmd.get_active_compute_command_encoder();
985 BLI_assert(rec != nil);
986
987 if (current_comp_ubo_binding.metal_buffer == buffer) {
988 /* If buffer is the same, but offset has changed. */
989 [rec setBufferOffset:buffer_offset atIndex:index];
990 }
991 else {
992 /* Bind Compute Buffer */
993 [rec setBuffer:buffer offset:buffer_offset atIndex:index];
994 }
995
996 /* Update Bind-state cache */
997 this->cached_compute_buffer_bindings[index].is_bytes = false;
998 this->cached_compute_buffer_bindings[index].metal_buffer = buffer;
999 this->cached_compute_buffer_bindings[index].offset = buffer_offset;
1000 }
1001}
1002
1004{
1005 /* Bytes always updated as source data may have changed. */
1006 BLI_assert(index >= 0 && index < MTL_MAX_BUFFER_BINDINGS);
1007 BLI_assert(length > 0);
1008 BLI_assert(bytes != nullptr);
1009
1011 id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
1012 [rec setVertexBytes:bytes length:length atIndex:index];
1013
1014 /* Update Bind-state cache */
1015 this->cached_vertex_buffer_bindings[index].is_bytes = true;
1016 this->cached_vertex_buffer_bindings[index].metal_buffer = nil;
1017 this->cached_vertex_buffer_bindings[index].offset = -1;
1018 }
1019 else {
1020 /* We have run over the setBytes limit, bind buffer instead. */
1021 MTLTemporaryBuffer range =
1022 ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
1023 memcpy(range.data, bytes, length);
1024 this->bind_vertex_buffer(range.metal_buffer, range.buffer_offset, index);
1025 }
1026}
1027
1029{
1030 /* Bytes always updated as source data may have changed. */
1031 BLI_assert(index >= 0 && index < MTL_MAX_BUFFER_BINDINGS);
1032 BLI_assert(length > 0);
1033 BLI_assert(bytes != nullptr);
1034
1036 id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
1037 [rec setFragmentBytes:bytes length:length atIndex:index];
1038
1039 /* Update Bind-state cache. */
1040 this->cached_fragment_buffer_bindings[index].is_bytes = true;
1041 this->cached_fragment_buffer_bindings[index].metal_buffer = nil;
1042 this->cached_fragment_buffer_bindings[index].offset = -1;
1043 }
1044 else {
1045 /* We have run over the setBytes limit, bind buffer instead. */
1046 MTLTemporaryBuffer range =
1047 ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
1048 memcpy(range.data, bytes, length);
1049 this->bind_fragment_buffer(range.metal_buffer, range.buffer_offset, index);
1050 }
1051}
1052
1054{
1055 /* Bytes always updated as source data may have changed. */
1056 BLI_assert(index >= 0 && index < MTL_MAX_BUFFER_BINDINGS);
1057 BLI_assert(length > 0);
1058 BLI_assert(bytes != nullptr);
1059
1061 id<MTLComputeCommandEncoder> rec = this->cmd.get_active_compute_command_encoder();
1062 [rec setBytes:bytes length:length atIndex:index];
1063
1064 /* Update Bind-state cache. */
1065 this->cached_compute_buffer_bindings[index].is_bytes = true;
1066 this->cached_compute_buffer_bindings[index].metal_buffer = nil;
1067 this->cached_compute_buffer_bindings[index].offset = -1;
1068 }
1069 else {
1070 /* We have run over the setBytes limit, bind buffer instead. */
1071 MTLTemporaryBuffer range =
1072 ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
1073 memcpy(range.data, bytes, length);
1074 this->bind_compute_buffer(range.metal_buffer, range.buffer_offset, index);
1075 }
1076}
1077
1078void MTLComputeState::bind_pso(id<MTLComputePipelineState> pso)
1079{
1080 if (this->bound_pso != pso) {
1081 id<MTLComputeCommandEncoder> rec = this->cmd.get_active_compute_command_encoder();
1082 [rec setComputePipelineState:pso];
1083 this->bound_pso = pso;
1084 }
1085}
1086
1088
1089} // namespace blender::gpu
@ G_DEBUG_GPU
#define BLI_assert(a)
Definition BLI_assert.h:46
unsigned int uint
#define UNUSED_VARS_NDEBUG(...)
@ GPU_DEVICE_ATI
@ GPU_DEVICE_INTEL
@ GPU_ARCHITECTURE_TBDR
bool GPU_type_matches(GPUDeviceType device, GPUOSType os, GPUDriverType driver)
@ GPU_DRIVER_ANY
@ GPU_OS_ANY
GPUArchitectureType GPU_platform_architecture()
GPUBarrier
Definition GPU_state.hh:29
@ GPU_BARRIER_SHADER_STORAGE
Definition GPU_state.hh:48
@ GPU_BARRIER_TEXTURE_FETCH
Definition GPU_state.hh:37
@ GPU_BARRIER_BUFFER_UPDATE
Definition GPU_state.hh:56
@ GPU_BARRIER_ELEMENT_ARRAY
Definition GPU_state.hh:52
@ GPU_BARRIER_UNIFORM
Definition GPU_state.hh:54
@ GPU_BARRIER_SHADER_IMAGE_ACCESS
Definition GPU_state.hh:35
@ GPU_BARRIER_VERTEX_ATTRIB_ARRAY
Definition GPU_state.hh:50
GPUStageBarrierBits
Definition GPU_state.hh:65
@ GPU_BARRIER_STAGE_FRAGMENT
Definition GPU_state.hh:67
@ GPU_BARRIER_STAGE_VERTEX
Definition GPU_state.hh:66
unsigned long long int uint64_t
static const int max_command_buffer_count
static MTLBackend * get()
MTLSafeFreeList * get_current_safe_list()
id< MTLBuffer > get_metal_buffer() const
bool end_active_command_encoder(bool retain_framebuffers=false)
void encode_signal_event(id< MTLEvent > event, uint64_t value)
id< MTLComputeCommandEncoder > get_active_compute_command_encoder()
void register_draw_counters(int vertex_submission)
void prepare(bool supports_render=true)
id< MTLRenderCommandEncoder > ensure_begin_render_command_encoder(MTLFrameBuffer *ctx_framebuffer, bool force_begin, bool *r_new_pass)
id< MTLComputeCommandEncoder > ensure_begin_compute_encoder()
bool insert_memory_barrier(GPUBarrier barrier_bits, GPUStageBarrierBits before_stages, GPUStageBarrierBits after_stages)
id< MTLBlitCommandEncoder > ensure_begin_blit_encoder()
void push_debug_group(const char *name, int index)
id< MTLRenderCommandEncoder > get_active_render_command_encoder()
static volatile std::atomic< int > num_active_cmd_bufs_in_system
void encode_wait_for_event(id< MTLEvent > event, uint64_t value)
id< MTLBlitCommandEncoder > get_active_blit_command_encoder()
void bind_compute_bytes(const void *bytes, uint64_t length, uint index)
id< MTLComputePipelineState > bound_pso
void bind_compute_sampler(MTLSamplerBinding &sampler_binding, bool use_argument_buffer_for_samplers, uint slot)
MTLCommandBufferManager & cmd
BufferBindingCached cached_compute_buffer_bindings[MTL_MAX_BUFFER_BINDINGS]
void bind_compute_texture(id< MTLTexture > tex, uint slot)
TextureBindingCached cached_compute_texture_bindings[MTL_MAX_TEXTURE_SLOTS]
void bind_compute_buffer(id< MTLBuffer > buffer, uint64_t buffer_offset, uint index)
void bind_pso(id< MTLComputePipelineState > pso)
SamplerStateBindingCached cached_compute_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS]
id< MTLCommandQueue > queue
static MTLBufferPool * get_global_memory_manager()
MTLRenderPassDescriptor * bake_render_pass_descriptor(bool load_contents)
void bind_fragment_sampler(MTLSamplerBinding &sampler_binding, bool use_argument_buffer_for_samplers, uint slot)
void bind_fragment_bytes(const void *bytes, uint64_t length, uint index)
SamplerStateBindingCached cached_fragment_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS]
void bind_vertex_sampler(MTLSamplerBinding &sampler_binding, bool use_argument_buffer_for_samplers, uint slot)
void bind_vertex_buffer(id< MTLBuffer > buffer, uint64_t buffer_offset, uint index)
void bind_vertex_texture(id< MTLTexture > tex, uint slot)
void bind_vertex_bytes(const void *bytes, uint64_t length, uint index)
id< MTLDepthStencilState > bound_ds_state
TextureBindingCached cached_vertex_texture_bindings[MTL_MAX_TEXTURE_SLOTS]
SamplerStateBindingCached cached_vertex_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS]
MTLCommandBufferManager & cmd
BufferBindingCached cached_fragment_buffer_bindings[MTL_MAX_BUFFER_BINDINGS]
id< MTLRenderPipelineState > bound_pso
BufferBindingCached cached_vertex_buffer_bindings[MTL_MAX_BUFFER_BINDINGS]
TextureBindingCached cached_fragment_texture_bindings[MTL_MAX_TEXTURE_SLOTS]
MTLBoundShaderState last_bound_shader_state
void bind_fragment_texture(id< MTLTexture > tex, uint slot)
void bind_fragment_buffer(id< MTLBuffer > buffer, uint64_t buffer_offset, uint index)
float length(VecOp< float, D >) RET
BLI_INLINE float fb(float length, float L)
#define G(x, y, z)
static void error(const char *str)
#define MTL_MAX_BUFFER_BINDINGS
#define MTL_MAX_TEXTURE_SLOTS
#define MTL_MAX_SET_BYTES_SIZE
Definition mtl_common.hh:11
#define METAL_DEBUG_CAPTURE_MAX_NESTED_GROUPS
Definition mtl_debug.hh:19
#define MTL_LOG_WARNING(info,...)
Definition mtl_debug.hh:42
MTLBufferRange MTLTemporaryBuffer
static int sampler_binding(int32_t program, uint32_t uniform_index, int32_t uniform_location, int *sampler_len)
const MTLSamplerState DEFAULT_SAMPLER_STATE
static void init(bNodeTree *, bNode *node)
const char * name