Blender V4.3
mtl_shader.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
9#include "BKE_global.hh"
10
11#include "DNA_userdef_types.h"
12
13#include "BLI_string.h"
14#include "BLI_time.h"
15
16#include <algorithm>
17#include <fstream>
18#include <iostream>
19#include <map>
20#include <mutex>
21#include <regex>
22#include <sstream>
23#include <string>
24
25#include <cstring>
26
27#include "GPU_platform.hh"
28#include "GPU_vertex_format.hh"
29
31#include "mtl_common.hh"
32#include "mtl_context.hh"
33#include "mtl_debug.hh"
35#include "mtl_shader.hh"
38#include "mtl_shader_log.hh"
39#include "mtl_texture.hh"
40#include "mtl_vertex_buffer.hh"
41
42#include "GHOST_C-api.h"
43
44extern const char datatoc_mtl_shader_common_msl[];
45
46using namespace blender;
47using namespace blender::gpu;
48using namespace blender::gpu::shader;
49
50namespace blender::gpu {
51
53{
54 switch (stage) {
56 return "Vertex Shader";
58 return "Fragment Shader";
60 return "Compute Shader";
62 break;
63 }
64 return "Unknown Shader Stage";
65}
66
67/* -------------------------------------------------------------------- */
71/* Create empty shader to be populated later. */
72MTLShader::MTLShader(MTLContext *ctx, const char *name) : Shader(name)
73{
74 context_ = ctx;
75
76 /* Create SHD builder to hold temporary resources until compilation is complete. */
77 shd_builder_ = new MTLShaderBuilder();
78
79#ifndef NDEBUG
80 /* Remove invalid symbols from shader name to ensure debug entry-point function name is valid. */
81 for (uint i : IndexRange(strlen(this->name))) {
82 char c = this->name[i];
83 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
84 }
85 else {
86 this->name[i] = '_';
87 }
88 }
89#endif
90}
91
92/* Create shader from MSL source. */
94 MTLShaderInterface *interface,
95 const char *name,
96 NSString *input_vertex_source,
97 NSString *input_fragment_source,
98 NSString *vert_function_name,
99 NSString *frag_function_name)
100 : MTLShader(ctx, name)
101{
102 BLI_assert([vert_function_name length]);
103 BLI_assert([frag_function_name length]);
104
105 this->set_vertex_function_name(vert_function_name);
106 this->set_fragment_function_name(frag_function_name);
107 this->shader_source_from_msl(input_vertex_source, input_fragment_source);
108 this->set_interface(interface);
109 this->finalize(nullptr);
110}
111
113{
114 if (this->is_valid()) {
115
116 /* Free uniform data block. */
117 if (push_constant_data_ != nullptr) {
118 MEM_freeN(push_constant_data_);
119 push_constant_data_ = nullptr;
120 }
121
122 /* Free Metal resources.
123 * This is done in the order of:
124 * 1. PipelineState objects
125 * 2. MTLFunctions
126 * 3. MTLLibraries
127 * So that each object releases it's references to the one following it. */
128 if (pso_descriptor_ != nil) {
129 [pso_descriptor_ release];
130 pso_descriptor_ = nil;
131 }
132
133 /* Free Pipeline Cache. */
134 pso_cache_lock_.lock();
135 for (const MTLRenderPipelineStateInstance *pso_inst : pso_cache_.values()) {
136 /* Free pipeline state object. */
137 if (pso_inst->pso) {
138 [pso_inst->pso release];
139 }
140 /* Free vertex function. */
141 if (pso_inst->vert) {
142 [pso_inst->vert release];
143 }
144 /* Free fragment function. */
145 if (pso_inst->frag) {
146 [pso_inst->frag release];
147 }
148 delete pso_inst;
149 }
150 pso_cache_.clear();
151
152 /* Free Compute pipeline cache. */
153 for (const MTLComputePipelineStateInstance *pso_inst : compute_pso_cache_.values()) {
154 /* Free pipeline state object. */
155 if (pso_inst->pso) {
156 [pso_inst->pso release];
157 }
158 /* Free compute function. */
159 if (pso_inst->compute) {
160 [pso_inst->compute release];
161 }
162 }
163 compute_pso_cache_.clear();
164 pso_cache_lock_.unlock();
165
166 /* Free shader libraries. */
167 if (shader_library_vert_ != nil) {
168 [shader_library_vert_ release];
169 shader_library_vert_ = nil;
170 }
171 if (shader_library_frag_ != nil) {
172 [shader_library_frag_ release];
173 shader_library_frag_ = nil;
174 }
175 if (shader_library_compute_ != nil) {
176 [shader_library_compute_ release];
177 shader_library_compute_ = nil;
178 }
179
180 /* NOTE(Metal): #ShaderInterface deletion is handled in the super destructor `~Shader()`. */
181 }
182 valid_ = false;
183
184 if (shd_builder_ != nullptr) {
185 delete shd_builder_;
186 shd_builder_ = nullptr;
187 }
188}
189
190void MTLShader::init(const shader::ShaderCreateInfo & /*info*/, bool is_batch_compilation)
191{
192 async_compilation_ = is_batch_compilation;
193}
194
197/* -------------------------------------------------------------------- */
202{
203 /* Flag source as not being compiled from native MSL. */
204 BLI_assert(shd_builder_ != nullptr);
205 shd_builder_->source_from_msl_ = false;
206
207 /* Remove #version tag entry. */
208 sources[SOURCES_INDEX_VERSION] = "";
209
210 /* Consolidate GLSL vertex sources. */
211 std::stringstream ss;
212 for (int i = 0; i < sources.size(); i++) {
213 ss << sources[i] << std::endl;
214 }
215 shd_builder_->glsl_vertex_source_ = ss.str();
216}
217
219{
220 MTL_LOG_ERROR("MTLShader::geometry_shader_from_glsl - Geometry shaders unsupported!");
221}
222
224{
225 /* Flag source as not being compiled from native MSL. */
226 BLI_assert(shd_builder_ != nullptr);
227 shd_builder_->source_from_msl_ = false;
228
229 /* Remove #version tag entry. */
230 sources[SOURCES_INDEX_VERSION] = "";
231
232 /* Consolidate GLSL fragment sources. */
233 std::stringstream ss;
234 int i;
235 for (i = 0; i < sources.size(); i++) {
236 /* Output preprocessor directive to improve shader log. */
238 if (name.is_empty()) {
239 ss << "#line 1 \"generated_code_" << i << "\"\n";
240 }
241 else {
242 ss << "#line 1 \"" << name << "\"\n";
243 }
244
245 ss << sources[i] << '\n';
246 }
247 ss << "#line 1 \"msl_wrapper_code\"\n";
248 shd_builder_->glsl_fragment_source_ = ss.str();
249}
250
252{
253 /* Flag source as not being compiled from native MSL. */
254 BLI_assert(shd_builder_ != nullptr);
255 shd_builder_->source_from_msl_ = false;
256
257 /* Remove #version tag entry. */
258 sources[SOURCES_INDEX_VERSION] = "";
259
260 /* Consolidate GLSL compute sources. */
261 std::stringstream ss;
262 for (int i = 0; i < sources.size(); i++) {
263 /* Output preprocessor directive to improve shader log. */
265 if (name.is_empty()) {
266 ss << "#line 1 \"generated_code_" << i << "\"\n";
267 }
268 else {
269 ss << "#line 1 \"" << name << "\"\n";
270 }
271 ss << sources[i] << std::endl;
272 }
273 shd_builder_->glsl_compute_source_ = ss.str();
274}
275
277{
278 /* Check if Shader has already been finalized. */
279 if (this->is_valid()) {
280 MTL_LOG_ERROR("Shader (%p) '%s' has already been finalized!", this, this->name_get());
281 }
282
283 /* Compute shaders. */
284 bool is_compute = false;
285 if (shd_builder_->glsl_compute_source_.size() > 0) {
286 BLI_assert_msg(info != nullptr, "Compute shaders must use CreateInfo.\n");
287 BLI_assert_msg(!shd_builder_->source_from_msl_, "Compute shaders must compile from GLSL.");
288 is_compute = true;
289 }
290
291 /* Perform GLSL to MSL source translation. */
292 BLI_assert(shd_builder_ != nullptr);
293 if (!shd_builder_->source_from_msl_) {
294 bool success = generate_msl_from_glsl(info);
295 if (!success) {
296 /* GLSL to MSL translation has failed, or is unsupported for this shader. */
297 valid_ = false;
298 BLI_assert_msg(false, "Shader translation from GLSL to MSL has failed. \n");
299
300 /* Create empty interface to allow shader to be silently used. */
301 MTLShaderInterface *mtl_interface = new MTLShaderInterface(this->name_get());
302 this->set_interface(mtl_interface);
303
304 /* Release temporary compilation resources. */
305 delete shd_builder_;
306 shd_builder_ = nullptr;
307 return false;
308 }
309 }
310
312 /* Tuning parameters for compute kernels. */
313 if (is_compute) {
314 int threadgroup_tuning_param = info->mtl_max_threads_per_threadgroup_;
315 if (threadgroup_tuning_param > 0) {
316 maxTotalThreadsPerThreadgroup_Tuning_ = threadgroup_tuning_param;
317 }
318 }
319
320 /* Ensure we have a valid shader interface. */
321 MTLShaderInterface *mtl_interface = this->get_interface();
322 BLI_assert(mtl_interface != nullptr);
323
324 /* Verify Context handle, fetch device and compile shader. */
325 BLI_assert(context_);
326 id<MTLDevice> device = context_->device;
327 BLI_assert(device != nil);
328
329 /* Ensure source and stage entry-point names are set. */
330 BLI_assert(shd_builder_ != nullptr);
331 if (is_compute) {
332 /* Compute path. */
333 BLI_assert([compute_function_name_ length] > 0);
334 BLI_assert([shd_builder_->msl_source_compute_ length] > 0);
335 }
336 else {
337 /* Vertex/Fragment path. */
338 BLI_assert([vertex_function_name_ length] > 0);
339 if (transform_feedback_type_ == GPU_SHADER_TFB_NONE) {
340 BLI_assert([fragment_function_name_ length] > 0);
341 }
342 BLI_assert([shd_builder_->msl_source_vert_ length] > 0);
343 }
344
345 @autoreleasepool {
346 MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease];
347 options.languageVersion = MTLLanguageVersion2_2;
348 options.fastMathEnabled = YES;
349 options.preserveInvariance = YES;
350
351 /* Raster order groups for tile data in struct require Metal 2.3.
352 * Retaining Metal 2.2. for old shaders to maintain backwards
353 * compatibility for existing features. */
354 if (info->subpass_inputs_.size() > 0) {
355 options.languageVersion = MTLLanguageVersion2_3;
356 }
357#if defined(MAC_OS_VERSION_14_0)
358 if (@available(macOS 14.00, *)) {
359 /* Texture atomics require Metal 3.1. */
360 if (bool(info->builtins_ & BuiltinBits::TEXTURE_ATOMIC)) {
361 options.languageVersion = MTLLanguageVersion3_1;
362 }
363 }
364#endif
365
366 NSString *source_to_compile = shd_builder_->msl_source_vert_;
367
368 /* Vertex/Fragment compile stages 0 and/or 1.
369 * Compute shaders compile as stage 2. */
370 ShaderStage initial_stage = (is_compute) ? ShaderStage::COMPUTE : ShaderStage::VERTEX;
371 ShaderStage src_stage = initial_stage;
372 uint8_t total_stages = (is_compute) ? 1 : 2;
373
374 for (int stage_count = 0; stage_count < total_stages; stage_count++) {
375
376 source_to_compile = (src_stage == ShaderStage::VERTEX) ?
377 shd_builder_->msl_source_vert_ :
378 ((src_stage == ShaderStage::COMPUTE) ?
379 shd_builder_->msl_source_compute_ :
380 shd_builder_->msl_source_frag_);
381
382 /* Transform feedback, skip compilation. */
383 if (src_stage == ShaderStage::FRAGMENT && (transform_feedback_type_ != GPU_SHADER_TFB_NONE))
384 {
385 shader_library_frag_ = nil;
386 break;
387 }
388
389 /* Concatenate common source. */
390 NSString *str = [NSString stringWithUTF8String:datatoc_mtl_shader_common_msl];
391 NSString *source_with_header_a = [str stringByAppendingString:source_to_compile];
392
393 /* Inject unique context ID to avoid cross-context shader cache collisions.
394 * Required on macOS 11.0. */
395 NSString *source_with_header = source_with_header_a;
396 [source_with_header retain];
397
398 /* Prepare Shader Library. */
399 NSError *error = nullptr;
400 id<MTLLibrary> library = [device newLibraryWithSource:source_with_header
402 error:&error];
403 if (error) {
404 /* Only exit out if genuine error and not warning. */
405 if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
406 NSNotFound)
407 {
408 const char *errors_c_str = [[error localizedDescription] UTF8String];
409 const char *sources_c_str = (is_compute) ? shd_builder_->glsl_compute_source_.c_str() :
410 shd_builder_->glsl_fragment_source_.c_str();
411
412 MTLLogParser parser;
413 print_log(Span<const char *>(&sources_c_str, 1),
414 errors_c_str,
415 to_string(src_stage),
416 true,
417 &parser);
418
419 /* Release temporary compilation resources. */
420 delete shd_builder_;
421 shd_builder_ = nullptr;
422 return false;
423 }
424 }
425
426 BLI_assert(library != nil);
427
428 switch (src_stage) {
429 case ShaderStage::VERTEX: {
430 /* Store generated library and assign debug name. */
431 shader_library_vert_ = library;
432 shader_library_vert_.label = [NSString stringWithUTF8String:this->name];
433 } break;
435 /* Store generated library for fragment shader and assign debug name. */
436 shader_library_frag_ = library;
437 shader_library_frag_.label = [NSString stringWithUTF8String:this->name];
438 } break;
440 /* Store generated library for fragment shader and assign debug name. */
441 shader_library_compute_ = library;
442 shader_library_compute_.label = [NSString stringWithUTF8String:this->name];
443 } break;
444 case ShaderStage::ANY: {
445 /* Suppress warnings. */
447 } break;
448 }
449
450 [source_with_header autorelease];
451
452 /* Move onto next compilation stage. */
453 if (!is_compute) {
454 src_stage = ShaderStage::FRAGMENT;
455 }
456 else {
457 break;
458 }
459 }
460
461 /* Create descriptors.
462 * Each shader type requires a differing descriptor. */
463 if (!is_compute) {
464 /* Prepare Render pipeline descriptor. */
465 pso_descriptor_ = [[MTLRenderPipelineDescriptor alloc] init];
466 pso_descriptor_.label = [NSString stringWithUTF8String:this->name];
467 }
468
469 /* Shader has successfully been created. */
470 valid_ = true;
471
472 /* Prepare backing data storage for local uniforms. */
473 const MTLShaderBufferBlock &push_constant_block = mtl_interface->get_push_constant_block();
474 if (push_constant_block.size > 0) {
475 push_constant_data_ = MEM_callocN(push_constant_block.size, __func__);
477 }
478 else {
479 push_constant_data_ = nullptr;
480 }
481
482 /* If this is a compute shader, bake base PSO for compute straight-away.
483 * NOTE: This will compile the base unspecialized variant. */
484 if (is_compute) {
485 /* Set descriptor to default shader constants */
486 MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants.values);
487
488 this->bake_compute_pipeline_state(context_, compute_pipeline_descriptor);
489 }
490 }
491
492 /* Release temporary compilation resources. */
493 delete shd_builder_;
494 shd_builder_ = nullptr;
495 return true;
496}
497
499 const eGPUShaderTFBType geom_type)
500{
501 tf_output_name_list_.clear();
502 for (int i = 0; i < name_list.size(); i++) {
503 tf_output_name_list_.append(std::string(name_list[i]));
504 }
505 transform_feedback_type_ = geom_type;
506}
507
509{
510 BLI_assert(transform_feedback_type_ != GPU_SHADER_TFB_NONE);
511 BLI_assert(buf);
512 transform_feedback_active_ = true;
513 transform_feedback_vertbuf_ = buf;
514 BLI_assert(static_cast<MTLVertBuf *>(transform_feedback_vertbuf_)->get_usage_type() ==
516 return true;
517}
518
520{
521 transform_feedback_active_ = false;
522 transform_feedback_vertbuf_ = nullptr;
523}
524
527/* -------------------------------------------------------------------- */
532{
534 if (interface == nullptr || !this->is_valid()) {
536 "MTLShader::bind - Shader '%s' has no valid implementation in Metal, draw calls will be "
537 "skipped.",
538 this->name_get());
539 }
540 ctx->pipeline_state.active_shader = this;
541}
542
544{
546 ctx->pipeline_state.active_shader = nullptr;
547}
548
549void MTLShader::uniform_float(int location, int comp_len, int array_size, const float *data)
550{
551 BLI_assert(this);
552 if (!this->is_valid()) {
553 return;
554 }
555 MTLShaderInterface *mtl_interface = get_interface();
556 if (location < 0 || location >= mtl_interface->get_total_uniforms()) {
557 MTL_LOG_WARNING("Uniform location %d is not valid in Shader %s", location, this->name_get());
558 return;
559 }
560
561 /* Fetch more information about uniform from interface. */
562 const MTLShaderUniform &uniform = mtl_interface->get_uniform(location);
563
564 /* Prepare to copy data into local shader push constant memory block. */
565 BLI_assert(push_constant_data_ != nullptr);
566 uint8_t *dest_ptr = (uint8_t *)push_constant_data_;
567 dest_ptr += uniform.byte_offset;
568 uint32_t copy_size = sizeof(float) * comp_len * array_size;
569
570 /* Test per-element size. It is valid to copy less array elements than the total, but each
571 * array element needs to match. */
572 uint32_t source_per_element_size = sizeof(float) * comp_len;
573 uint32_t dest_per_element_size = uniform.size_in_bytes / uniform.array_len;
575 source_per_element_size <= dest_per_element_size,
576 "source Per-array-element size must be smaller than destination storage capacity for "
577 "that data");
578
579 if (source_per_element_size < dest_per_element_size) {
580 switch (uniform.type) {
581
582 /* Special case for handling 'vec3' array upload. */
583 case MTL_DATATYPE_FLOAT3: {
584 int numvecs = uniform.array_len;
585 uint8_t *data_c = (uint8_t *)data;
586
587 /* It is more efficient on the host to only modify data if it has changed.
588 * Data modifications are small, so memory comparison is cheap.
589 * If uniforms have remained unchanged, then we avoid both copying
590 * data into the local uniform struct, and upload of the modified uniform
591 * contents in the command stream. */
592 bool changed = false;
593 for (int i = 0; i < numvecs; i++) {
594 changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0);
595 if (changed) {
596 memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3);
597 }
598 data_c += sizeof(float) * 3;
599 dest_ptr += sizeof(float) * 4;
600 }
601 if (changed) {
603 }
604 return;
605 }
606
607 /* Special case for handling 'mat3' upload. */
609 int numvecs = 3 * uniform.array_len;
610 uint8_t *data_c = (uint8_t *)data;
611
612 /* It is more efficient on the host to only modify data if it has changed.
613 * Data modifications are small, so memory comparison is cheap.
614 * If uniforms have remained unchanged, then we avoid both copying
615 * data into the local uniform struct, and upload of the modified uniform
616 * contents in the command stream. */
617 bool changed = false;
618 for (int i = 0; i < numvecs; i++) {
619 changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0);
620 if (changed) {
621 memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3);
622 }
623 data_c += sizeof(float) * 3;
624 dest_ptr += sizeof(float) * 4;
625 }
626 if (changed) {
628 }
629 return;
630 }
631 default:
632 shader_debug_printf("INCOMPATIBLE UNIFORM TYPE: %d\n", uniform.type);
633 break;
634 }
635 }
636
637 /* Debug checks. */
639 copy_size <= uniform.size_in_bytes,
640 "Size of provided uniform data is greater than size specified in Shader interface\n");
641
642 /* Only flag UBO as modified if data is different -- This can avoid re-binding of unmodified
643 * local uniform data. */
644 bool data_changed = (memcmp((void *)dest_ptr, (void *)data, copy_size) != 0);
645 if (data_changed) {
647 memcpy((void *)dest_ptr, (void *)data, copy_size);
648 }
649}
650
651void MTLShader::uniform_int(int location, int comp_len, int array_size, const int *data)
652{
653 BLI_assert(this);
654 if (!this->is_valid()) {
655 return;
656 }
657
658 /* NOTE(Metal): Invalidation warning for uniform re-mapping of texture slots, unsupported in
659 * Metal, as we cannot point a texture binding at a different slot. */
660 MTLShaderInterface *mtl_interface = this->get_interface();
661 if (location >= mtl_interface->get_total_uniforms() &&
662 location < (mtl_interface->get_total_uniforms() + mtl_interface->get_total_textures()))
663 {
665 "Texture uniform location re-mapping unsupported in Metal. (Possibly also bad uniform "
666 "location %d)",
667 location);
668 return;
669 }
670
671 if (location < 0 || location >= mtl_interface->get_total_uniforms()) {
672 MTL_LOG_WARNING("Uniform is not valid at location %d - Shader %s", location, this->name_get());
673 return;
674 }
675
676 /* Fetch more information about uniform from interface. */
677 const MTLShaderUniform &uniform = mtl_interface->get_uniform(location);
678
679 /* Determine data location in uniform block. */
680 BLI_assert(push_constant_data_ != nullptr);
681 uint8_t *ptr = (uint8_t *)push_constant_data_;
682 ptr += uniform.byte_offset;
683
685 const char *data_to_copy = (char *)data;
686 uint data_size_to_copy = sizeof(int) * comp_len * array_size;
687
688 /* Special cases for small types support where storage is shader push constant buffer is smaller
689 * than the incoming data. */
690 ushort us;
691 uchar uc;
692 if (uniform.size_in_bytes == 1) {
693 /* Convert integer storage value down to uchar. */
694 data_size_to_copy = uniform.size_in_bytes;
695 uc = *data;
696 data_to_copy = (char *)&uc;
697 }
698 else if (uniform.size_in_bytes == 2) {
699 /* Convert integer storage value down to ushort. */
700 data_size_to_copy = uniform.size_in_bytes;
701 us = *data;
702 data_to_copy = (char *)&us;
703 }
704 else {
706 (mtl_get_data_type_alignment(uniform.type) % sizeof(int)) == 0,
707 "When uniform inputs are provided as integers, the underlying type must adhere "
708 "to alignment per-component. If this test fails, the input data cannot be directly copied "
709 "to the buffer. e.g. Array of small types uchar/bool/ushort etc; are currently not "
710 "handled.");
711 }
712
713 /* Copy data into local block. Only flag UBO as modified if data is different
714 * This can avoid re-binding of unmodified local uniform data, reducing
715 * the total number of copy operations needed and data transfers between
716 * CPU and GPU. */
717 bool data_changed = (memcmp((void *)ptr, (void *)data_to_copy, data_size_to_copy) != 0);
718 if (data_changed) {
720 memcpy((void *)ptr, (void *)data_to_copy, data_size_to_copy);
721 }
722}
723
725{
726 return push_constant_modified_;
727}
728
730{
731 push_constant_modified_ = is_dirty;
732}
733
734/* Attempts to pre-generate a PSO based on the parent shaders PSO
735 * (Render shaders only) */
737{
738 if (parent_shader_ != nullptr) {
740 MTLShader *parent_mtl = static_cast<MTLShader *>(parent_shader_);
741
742 /* Extract PSO descriptors from parent shader. */
745
746 parent_mtl->pso_cache_lock_.lock();
747 for (const auto &pso_entry : parent_mtl->pso_cache_.items()) {
748 const MTLRenderPipelineStateDescriptor &pso_descriptor = pso_entry.key;
749 const MTLRenderPipelineStateInstance *pso_inst = pso_entry.value;
750 descriptors.append(pso_descriptor);
751 prim_classes.append(pso_inst->prim_type);
752 }
753 parent_mtl->pso_cache_lock_.unlock();
754
755 /* Warm shader cache with applied limit.
756 * If limit is <= 0, compile all PSO permutations. */
757 limit = (limit > 0) ? limit : descriptors.size();
758 for (int i : IndexRange(min_ii(descriptors.size(), limit))) {
759 const MTLRenderPipelineStateDescriptor &pso_descriptor = descriptors[i];
760 const MTLPrimitiveTopologyClass &prim_class = prim_classes[i];
761 bake_pipeline_state(ctx, prim_class, pso_descriptor);
762 }
763 }
764}
765
768/* -------------------------------------------------------------------- */
772void MTLShader::set_vertex_function_name(NSString *vert_function_name)
773{
774 vertex_function_name_ = vert_function_name;
775}
776
777void MTLShader::set_fragment_function_name(NSString *frag_function_name)
778{
779 fragment_function_name_ = frag_function_name;
780}
781
782void MTLShader::set_compute_function_name(NSString *compute_function_name)
783{
784 compute_function_name_ = compute_function_name;
785}
786
787void MTLShader::shader_source_from_msl(NSString *input_vertex_source,
788 NSString *input_fragment_source)
789{
790 BLI_assert(shd_builder_ != nullptr);
791 shd_builder_->msl_source_vert_ = input_vertex_source;
792 shd_builder_->msl_source_frag_ = input_fragment_source;
793 shd_builder_->source_from_msl_ = true;
794}
795
796void MTLShader::shader_compute_source_from_msl(NSString *input_compute_source)
797{
798 BLI_assert(shd_builder_ != nullptr);
799 shd_builder_->msl_source_compute_ = input_compute_source;
800 shd_builder_->source_from_msl_ = true;
801}
802
804{
805 /* Assign gpu::Shader super-class interface. */
806 BLI_assert(Shader::interface == nullptr);
808}
809
812/* -------------------------------------------------------------------- */
821 MTLFunctionConstantValues *values,
822 const Shader::Constants &shader_constants,
823 const SpecializationStateDescriptor &specialization_descriptor)
824{
825 for (auto i : shader_constants.types.index_range()) {
826 const Shader::Constants::Value &value = specialization_descriptor.values[i];
827
829 switch (shader_constants.types[i]) {
830 case Type::INT:
831 [values setConstantValue:&value.i type:MTLDataTypeInt atIndex:index];
832 break;
833 case Type::UINT:
834 [values setConstantValue:&value.u type:MTLDataTypeUInt atIndex:index];
835 break;
836 case Type::BOOL:
837 [values setConstantValue:&value.u type:MTLDataTypeBool atIndex:index];
838 break;
839 case Type::FLOAT:
840 [values setConstantValue:&value.f type:MTLDataTypeFloat atIndex:index];
841 break;
842 default:
843 BLI_assert_msg(false, "Unsupported custom constant type.");
844 break;
845 }
846 }
847}
850/* -------------------------------------------------------------------- */
867 MTLContext *ctx, MTLPrimitiveTopologyClass prim_type)
868{
870 /* NOTE(Metal): PSO cache can be accessed from multiple threads, though these operations should
871 * be thread-safe due to organization of high-level renderer. If there are any issues, then
872 * access can be guarded as appropriate. */
873 BLI_assert(this->is_valid());
874
875 /* NOTE(Metal): Vertex input assembly description will have been populated externally
876 * via #MTLBatch or #MTLImmediate during binding or draw. */
877
878 /* Resolve Context Frame-buffer state. */
879 MTLFrameBuffer *framebuffer = ctx->get_current_framebuffer();
880
881 /* Update global pipeline descriptor. */
882 MTLStateManager *state_manager = static_cast<MTLStateManager *>(
884 MTLRenderPipelineStateDescriptor &pipeline_descriptor = state_manager->get_pipeline_descriptor();
885
886 pipeline_descriptor.num_color_attachments = 0;
887 for (int attachment = 0; attachment < GPU_FB_MAX_COLOR_ATTACHMENT; attachment++) {
888 MTLAttachment color_attachment = framebuffer->get_color_attachment(attachment);
889
890 if (color_attachment.used) {
891 /* If SRGB is disabled and format is SRGB, use color data directly with no conversions
892 * between linear and SRGB. */
893 MTLPixelFormat mtl_format = gpu_texture_format_to_metal(
894 color_attachment.texture->format_get());
895 if (framebuffer->get_is_srgb() && !framebuffer->get_srgb_enabled()) {
896 mtl_format = MTLPixelFormatRGBA8Unorm;
897 }
898 pipeline_descriptor.color_attachment_format[attachment] = mtl_format;
899 }
900 else {
901 pipeline_descriptor.color_attachment_format[attachment] = MTLPixelFormatInvalid;
902 }
903
904 pipeline_descriptor.num_color_attachments += (color_attachment.used) ? 1 : 0;
905 }
906 MTLAttachment depth_attachment = framebuffer->get_depth_attachment();
907 MTLAttachment stencil_attachment = framebuffer->get_stencil_attachment();
908 pipeline_descriptor.depth_attachment_format = (depth_attachment.used) ?
910 depth_attachment.texture->format_get()) :
911 MTLPixelFormatInvalid;
912 pipeline_descriptor.stencil_attachment_format =
913 (stencil_attachment.used) ?
914 gpu_texture_format_to_metal(stencil_attachment.texture->format_get()) :
915 MTLPixelFormatInvalid;
916
917 /* Resolve Context Pipeline State (required by PSO). */
918 pipeline_descriptor.color_write_mask = ctx->pipeline_state.color_write_mask;
919 pipeline_descriptor.blending_enabled = ctx->pipeline_state.blending_enabled;
920 pipeline_descriptor.alpha_blend_op = ctx->pipeline_state.alpha_blend_op;
921 pipeline_descriptor.rgb_blend_op = ctx->pipeline_state.rgb_blend_op;
926 pipeline_descriptor.point_size = ctx->pipeline_state.point_size;
927
928 /* Resolve clipping plane enablement. */
929 pipeline_descriptor.clipping_plane_enable_mask = 0;
930 for (const int plane : IndexRange(6)) {
931 pipeline_descriptor.clipping_plane_enable_mask =
932 pipeline_descriptor.clipping_plane_enable_mask |
933 ((ctx->pipeline_state.clip_distance_enabled[plane]) ? (1 << plane) : 0);
934 }
935
936 /* Primitive Type -- Primitive topology class needs to be specified for layered rendering. */
937 bool requires_specific_topology_class = uses_gpu_layer || uses_gpu_viewport_index ||
938 prim_type == MTLPrimitiveTopologyClassPoint;
939 pipeline_descriptor.vertex_descriptor.prim_topology_class =
940 (requires_specific_topology_class) ? prim_type : MTLPrimitiveTopologyClassUnspecified;
941
942 /* Specialization configuration. */
943 pipeline_descriptor.specialization_state = {this->constants.values};
944
945 /* Bake pipeline state using global descriptor. */
946 return bake_pipeline_state(ctx, prim_type, pipeline_descriptor);
947}
948
949/* Variant which bakes a pipeline state based on an existing MTLRenderPipelineStateDescriptor.
950 * This function should be callable from a secondary compilation thread. */
952 MTLContext *ctx,
953 MTLPrimitiveTopologyClass prim_type,
954 const MTLRenderPipelineStateDescriptor &pipeline_descriptor)
955{
956 /* Fetch shader interface. */
957 MTLShaderInterface *mtl_interface = this->get_interface();
958 BLI_assert(mtl_interface);
959 BLI_assert(this->is_valid());
960
961 /* Check if current PSO exists in the cache. */
962 pso_cache_lock_.lock();
963 MTLRenderPipelineStateInstance **pso_lookup = pso_cache_.lookup_ptr(pipeline_descriptor);
964 MTLRenderPipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
965 pso_cache_lock_.unlock();
966
967 if (pipeline_state != nullptr) {
968 return pipeline_state;
969 }
970
971 /* TODO: When fetching a specialized variant of a shader, if this does not yet exist, verify
972 * whether the base unspecialized variant exists:
973 * - If unspecialized version exists: Compile specialized PSO asynchronously, returning base PSO
974 * and flagging state of specialization in cache as being built.
975 * - If unspecialized does NOT exist, build specialized version straight away, as we pay the
976 * cost of compilation in both cases regardless. */
977
978 /* Generate new Render Pipeline State Object (PSO). */
979 @autoreleasepool {
980 /* Prepare Render Pipeline Descriptor. */
981
982 /* Setup function specialization constants, used to modify and optimize
983 * generated code based on current render pipeline configuration. */
984 MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
985
986 /* Custom function constant values: */
988 values, this->constants, pipeline_descriptor.specialization_state);
989
990 /* Prepare Vertex descriptor based on current pipeline vertex binding state. */
991 MTLRenderPipelineDescriptor *desc = pso_descriptor_;
992 [desc reset];
993 pso_descriptor_.label = [NSString stringWithUTF8String:this->name];
994
995 /* Offset the bind index for Uniform buffers such that they begin after the VBO
996 * buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function
997 * specialization constant, customized per unique pipeline state permutation.
998 *
999 * NOTE: For binding point compaction, we could use the number of VBOs present
1000 * in the current PSO configuration `pipeline_descriptors.vertex_descriptor.num_vert_buffers`).
1001 * However, it is more efficient to simply offset the uniform buffer base index to the
1002 * maximal number of VBO bind-points, as then UBO bind-points for similar draw calls
1003 * will align and avoid the requirement for additional binding. */
1004 int MTL_uniform_buffer_base_index = pipeline_descriptor.vertex_descriptor.num_vert_buffers + 1;
1005
1006 /* Null buffer index is used if an attribute is not found in the
1007 * bound VBOs #VertexFormat. */
1008 int null_buffer_index = pipeline_descriptor.vertex_descriptor.num_vert_buffers;
1009 bool using_null_buffer = false;
1010
1011 if (this->get_uses_ssbo_vertex_fetch()) {
1012 /* If using SSBO Vertex fetch mode, no vertex descriptor is required
1013 * as we wont be using stage-in. */
1014 desc.vertexDescriptor = nil;
1015 desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassUnspecified;
1016
1017 /* We want to offset the uniform buffer base to allow for sufficient VBO binding slots - We
1018 * also require +1 slot for the Index buffer. */
1019 MTL_uniform_buffer_base_index = MTL_SSBO_VERTEX_FETCH_IBO_INDEX + 1;
1020 }
1021 else {
1022 for (const uint i :
1023 IndexRange(pipeline_descriptor.vertex_descriptor.max_attribute_value + 1))
1024 {
1025
1026 /* Metal back-end attribute descriptor state. */
1027 const MTLVertexAttributeDescriptorPSO &attribute_desc =
1028 pipeline_descriptor.vertex_descriptor.attributes[i];
1029
1030 /* Flag format conversion */
1031 /* In some cases, Metal cannot implicitly convert between data types.
1032 * In these instances, the fetch mode #GPUVertFetchMode as provided in the vertex format
1033 * is passed in, and used to populate function constants named: MTL_AttributeConvert0..15.
1034 *
1035 * It is then the responsibility of the vertex shader to perform any necessary type
1036 * casting.
1037 *
1038 * See `mtl_shader.hh` for more information. Relevant Metal API documentation:
1039 * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
1040 */
1041 if (attribute_desc.format == MTLVertexFormatInvalid) {
1042 /* If attributes are non-contiguous, we can skip over gaps. */
1044 "MTLShader: baking pipeline state for '%s'- skipping input attribute at "
1045 "index '%d' but none was specified in the current vertex state",
1046 mtl_interface->get_name(),
1047 i);
1048
1049 /* Write out null conversion constant if attribute unused. */
1050 int MTL_attribute_conversion_mode = 0;
1051 [values setConstantValue:&MTL_attribute_conversion_mode
1052 type:MTLDataTypeInt
1053 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
1054 continue;
1055 }
1056
1057 int MTL_attribute_conversion_mode = (int)attribute_desc.format_conversion_mode;
1058 [values setConstantValue:&MTL_attribute_conversion_mode
1059 type:MTLDataTypeInt
1060 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
1061 if (MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT_UNIT ||
1062 MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT)
1063 {
1065 "TODO(Metal): Shader %s needs to support internal format conversion\n",
1066 mtl_interface->get_name());
1067 }
1068
1069 /* Copy metal back-end attribute descriptor state into PSO descriptor.
1070 * NOTE: need to copy each element due to direct assignment restrictions.
1071 * Also note */
1072 MTLVertexAttributeDescriptor *mtl_attribute = desc.vertexDescriptor.attributes[i];
1073
1074 mtl_attribute.format = attribute_desc.format;
1075 mtl_attribute.offset = attribute_desc.offset;
1076 mtl_attribute.bufferIndex = attribute_desc.buffer_index;
1077 }
1078
1079 for (const uint i : IndexRange(pipeline_descriptor.vertex_descriptor.num_vert_buffers)) {
1080 /* Metal back-end state buffer layout. */
1081 const MTLVertexBufferLayoutDescriptorPSO &buf_layout =
1082 pipeline_descriptor.vertex_descriptor.buffer_layouts[i];
1083 /* Copy metal back-end buffer layout state into PSO descriptor.
1084 * NOTE: need to copy each element due to copying from internal
1085 * back-end descriptor to Metal API descriptor. */
1086 MTLVertexBufferLayoutDescriptor *mtl_buf_layout = desc.vertexDescriptor.layouts[i];
1087
1088 mtl_buf_layout.stepFunction = buf_layout.step_function;
1089 mtl_buf_layout.stepRate = buf_layout.step_rate;
1090 mtl_buf_layout.stride = buf_layout.stride;
1091 }
1092
1093 /* Mark empty attribute conversion. */
1094 for (int i = pipeline_descriptor.vertex_descriptor.max_attribute_value + 1;
1096 i++)
1097 {
1098 int MTL_attribute_conversion_mode = 0;
1099 [values setConstantValue:&MTL_attribute_conversion_mode
1100 type:MTLDataTypeInt
1101 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
1102 }
1103
1104 /* DEBUG: Missing/empty attributes. */
1105 /* Attributes are normally mapped as part of the state setting based on the used
1106 * #GPUVertFormat, however, if attributes have not been set, we can sort them out here. */
1107 for (const uint i : IndexRange(mtl_interface->get_total_attributes())) {
1108 const MTLShaderInputAttribute &attribute = mtl_interface->get_attribute(i);
1109 MTLVertexAttributeDescriptor *current_attribute =
1110 desc.vertexDescriptor.attributes[attribute.location];
1111
1112 if (current_attribute.format == MTLVertexFormatInvalid) {
1113#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1114 printf("-> Filling in unbound attribute '%s' for shader PSO '%s' with location: %u\n",
1115 mtl_interface->get_name_at_offset(attribute.name_offset),
1116 mtl_interface->get_name(),
1117 attribute.location);
1118#endif
1119 current_attribute.format = attribute.format;
1120 current_attribute.offset = 0;
1121 current_attribute.bufferIndex = null_buffer_index;
1122
1123 /* Add Null vert buffer binding for invalid attributes. */
1124 if (!using_null_buffer) {
1125 MTLVertexBufferLayoutDescriptor *null_buf_layout =
1126 desc.vertexDescriptor.layouts[null_buffer_index];
1127
1128 /* Use constant step function such that null buffer can
1129 * contain just a singular dummy attribute. */
1130 null_buf_layout.stepFunction = MTLVertexStepFunctionConstant;
1131 null_buf_layout.stepRate = 0;
1132 null_buf_layout.stride = max_ii(null_buf_layout.stride, attribute.size);
1133
1134 /* If we are using the maximum number of vertex buffers, or tight binding indices,
1135 * MTL_uniform_buffer_base_index needs shifting to the bind slot after the null buffer
1136 * index. */
1137 if (null_buffer_index >= MTL_uniform_buffer_base_index) {
1138 MTL_uniform_buffer_base_index = null_buffer_index + 1;
1139 }
1140 using_null_buffer = true;
1141#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1142 MTL_LOG_INFO("Setting up buffer binding for null attribute with buffer index %d",
1143 null_buffer_index);
1144#endif
1145 }
1146 }
1147 }
1148
1149 /* Primitive Topology. */
1150 desc.inputPrimitiveTopology = pipeline_descriptor.vertex_descriptor.prim_topology_class;
1151 }
1152
1153 /* Update constant value for 'MTL_uniform_buffer_base_index'. */
1154 [values setConstantValue:&MTL_uniform_buffer_base_index
1155 type:MTLDataTypeInt
1156 withName:@"MTL_uniform_buffer_base_index"];
1157
1158 /* Storage buffer bind index.
1159 * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
1160 * and an additional space for the push constant block.
1161 * If the shader does not have any uniform blocks, then we can place directly after the push
1162 * constant block. As we do not need an extra spot for the UBO at index '0'. */
1163 int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
1164 ((mtl_interface->get_total_uniform_blocks() > 0) ?
1165 mtl_interface->get_total_uniform_blocks() :
1166 0);
1167 [values setConstantValue:&MTL_storage_buffer_base_index
1168 type:MTLDataTypeInt
1169 withName:@"MTL_storage_buffer_base_index"];
1170
1171 /* Transform feedback constant.
1172 * Ensure buffer is placed after existing buffers, including default buffers. */
1173 int MTL_transform_feedback_buffer_index = -1;
1174 if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
1175 /* If using argument buffers, insert index after argument buffer index. Otherwise, insert
1176 * after uniform buffer bindings. */
1177 MTL_transform_feedback_buffer_index =
1178 MTL_uniform_buffer_base_index +
1179 ((mtl_interface->uses_argument_buffer_for_samplers()) ?
1181 (mtl_interface->get_max_buffer_index() + 2));
1182 }
1183
1184 if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
1185 [values setConstantValue:&MTL_transform_feedback_buffer_index
1186 type:MTLDataTypeInt
1187 withName:@"MTL_transform_feedback_buffer_index"];
1188 }
1189
1190 /* Clipping planes. */
1191 int MTL_clip_distances_enabled = (pipeline_descriptor.clipping_plane_enable_mask > 0) ? 1 : 0;
1192
1193 /* Only define specialization constant if planes are required.
1194 * We guard clip_planes usage on this flag. */
1195 [values setConstantValue:&MTL_clip_distances_enabled
1196 type:MTLDataTypeInt
1197 withName:@"MTL_clip_distances_enabled"];
1198
1199 if (MTL_clip_distances_enabled > 0) {
1200 /* Assign individual enablement flags. Only define a flag function constant
1201 * if it is used. */
1202 for (const int plane : IndexRange(6)) {
1203 int plane_enabled = ctx->pipeline_state.clip_distance_enabled[plane] ? 1 : 0;
1204 if (plane_enabled) {
1205 [values
1206 setConstantValue:&plane_enabled
1207 type:MTLDataTypeInt
1208 withName:[NSString stringWithFormat:@"MTL_clip_distance_enabled%d", plane]];
1209 }
1210 }
1211 }
1212
1213 /* gl_PointSize constant. */
1214 bool null_pointsize = true;
1215 float MTL_pointsize = pipeline_descriptor.point_size;
1216 if (pipeline_descriptor.vertex_descriptor.prim_topology_class ==
1217 MTLPrimitiveTopologyClassPoint)
1218 {
1219 /* `if pointsize is > 0.0`, PROGRAM_POINT_SIZE is enabled, and `gl_PointSize` shader keyword
1220 * overrides the value. Otherwise, if < 0.0, use global constant point size. */
1221 if (MTL_pointsize < 0.0) {
1222 MTL_pointsize = fabsf(MTL_pointsize);
1223 [values setConstantValue:&MTL_pointsize
1224 type:MTLDataTypeFloat
1225 withName:@"MTL_global_pointsize"];
1226 null_pointsize = false;
1227 }
1228 }
1229
1230 if (null_pointsize) {
1231 MTL_pointsize = 0.0f;
1232 [values setConstantValue:&MTL_pointsize
1233 type:MTLDataTypeFloat
1234 withName:@"MTL_global_pointsize"];
1235 }
1236
1237 /* Compile functions */
1238 NSError *error = nullptr;
1239 desc.vertexFunction = [shader_library_vert_ newFunctionWithName:vertex_function_name_
1240 constantValues:values
1241 error:&error];
1242 if (error) {
1243 bool has_error = (
1244 [[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1245 NSNotFound);
1246
1247 const char *errors_c_str = [[error localizedDescription] UTF8String];
1248 const char *sources_c_str = shd_builder_->glsl_fragment_source_.c_str();
1249
1250 MTLLogParser parser;
1251 print_log(
1252 Span<const char *>(&sources_c_str, 1), errors_c_str, "VertShader", has_error, &parser);
1253
1254 /* Only exit out if genuine error and not warning */
1255 if (has_error) {
1256 return nullptr;
1257 }
1258 }
1259
1260 /* If transform feedback is used, Vertex-only stage */
1261 if (transform_feedback_type_ == GPU_SHADER_TFB_NONE) {
1262 desc.fragmentFunction = [shader_library_frag_ newFunctionWithName:fragment_function_name_
1263 constantValues:values
1264 error:&error];
1265 if (error) {
1266 bool has_error = (
1267 [[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1268 NSNotFound);
1269
1270 const char *errors_c_str = [[error localizedDescription] UTF8String];
1271 const char *sources_c_str = shd_builder_->glsl_fragment_source_.c_str();
1272
1273 MTLLogParser parser;
1274 print_log(
1275 Span<const char *>(&sources_c_str, 1), errors_c_str, "FragShader", has_error, &parser);
1276
1277 /* Only exit out if genuine error and not warning */
1278 if (has_error) {
1279 return nullptr;
1280 }
1281 }
1282 }
1283 else {
1284 desc.fragmentFunction = nil;
1285 desc.rasterizationEnabled = false;
1286 }
1287
1288 /* Setup pixel format state */
1289 for (int color_attachment = 0; color_attachment < GPU_FB_MAX_COLOR_ATTACHMENT;
1290 color_attachment++)
1291 {
1292 /* Fetch color attachment pixel format in back-end pipeline state. */
1293 MTLPixelFormat pixel_format = pipeline_descriptor.color_attachment_format[color_attachment];
1294 /* Populate MTL API PSO attachment descriptor. */
1295 MTLRenderPipelineColorAttachmentDescriptor *col_attachment =
1296 desc.colorAttachments[color_attachment];
1297
1298 col_attachment.pixelFormat = pixel_format;
1299 if (pixel_format != MTLPixelFormatInvalid) {
1300 bool format_supports_blending = mtl_format_supports_blending(pixel_format);
1301
1302 col_attachment.writeMask = pipeline_descriptor.color_write_mask;
1303 col_attachment.blendingEnabled = pipeline_descriptor.blending_enabled &&
1304 format_supports_blending;
1305 if (format_supports_blending && pipeline_descriptor.blending_enabled) {
1306 col_attachment.alphaBlendOperation = pipeline_descriptor.alpha_blend_op;
1307 col_attachment.rgbBlendOperation = pipeline_descriptor.rgb_blend_op;
1308 col_attachment.destinationAlphaBlendFactor = pipeline_descriptor.dest_alpha_blend_factor;
1309 col_attachment.destinationRGBBlendFactor = pipeline_descriptor.dest_rgb_blend_factor;
1310 col_attachment.sourceAlphaBlendFactor = pipeline_descriptor.src_alpha_blend_factor;
1311 col_attachment.sourceRGBBlendFactor = pipeline_descriptor.src_rgb_blend_factor;
1312 }
1313 else {
1314 if (pipeline_descriptor.blending_enabled && !format_supports_blending) {
1316 "[Warning] Attempting to Bake PSO, but MTLPixelFormat %d does not support "
1317 "blending\n",
1318 *((int *)&pixel_format));
1319 }
1320 }
1321 }
1322 }
1323 desc.depthAttachmentPixelFormat = pipeline_descriptor.depth_attachment_format;
1324 desc.stencilAttachmentPixelFormat = pipeline_descriptor.stencil_attachment_format;
1325
1326 /* Bind-point range validation.
1327 * We need to ensure that the PSO will have valid bind-point ranges, or is using the
1328 * appropriate bindless fallback path if any bind limits are exceeded. */
1329#ifdef NDEBUG
1330 /* Ensure Buffer bindings are within range. */
1331 BLI_assert_msg((MTL_uniform_buffer_base_index + get_max_ubo_index() + 2) <
1333 "UBO and SSBO bindings exceed the fragment bind table limit.");
1334
1335 /* Transform feedback buffer. */
1336 if (transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
1337 BLI_assert_msg(MTL_transform_feedback_buffer_index < MTL_MAX_BUFFER_BINDINGS,
1338 "Transform feedback buffer binding exceeds the fragment bind table limit.");
1339 }
1340
1341 /* Argument buffer. */
1342 if (mtl_interface->uses_argument_buffer_for_samplers()) {
1344 "Argument buffer binding exceeds the fragment bind table limit.");
1345 }
1346#endif
1347
1348 /* Compile PSO */
1349 MTLAutoreleasedRenderPipelineReflection reflection_data;
1350 id<MTLRenderPipelineState> pso = [ctx->device
1351 newRenderPipelineStateWithDescriptor:desc
1352 options:MTLPipelineOptionBufferTypeInfo
1353 reflection:&reflection_data
1354 error:&error];
1355 if (error) {
1356 NSLog(@"Failed to create PSO for shader: %s error %@\n", this->name, error);
1357 BLI_assert(false);
1358 return nullptr;
1359 }
1360 else if (!pso) {
1361 NSLog(@"Failed to create PSO for shader: %s, but no error was provided!\n", this->name);
1362 BLI_assert(false);
1363 return nullptr;
1364 }
1365 else {
1366#if 0
1367 NSLog(@"Successfully compiled PSO for shader: %s (Metal Context: %p)\n", this->name, ctx);
1368#endif
1369 }
1370
1371 /* Prepare pipeline state instance. */
1373 pso_inst->vert = desc.vertexFunction;
1374 pso_inst->frag = desc.fragmentFunction;
1375 pso_inst->pso = pso;
1376 pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
1377 pso_inst->base_storage_buffer_index = MTL_storage_buffer_base_index;
1378 pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1;
1379 pso_inst->transform_feedback_buffer_index = MTL_transform_feedback_buffer_index;
1380 pso_inst->prim_type = prim_type;
1381
1382 pso_inst->reflection_data_available = (reflection_data != nil);
1383 if (reflection_data != nil) {
1384
1385 /* Extract shader reflection data for buffer bindings.
1386 * This reflection data is used to contrast the binding information
1387 * we know about in the interface against the bindings in the finalized
1388 * PSO. This accounts for bindings which have been stripped out during
1389 * optimization, and allows us to both avoid over-binding and also
1390 * allows us to verify size-correctness for bindings, to ensure
1391 * that buffers bound are not smaller than the size of expected data. */
1392 NSArray<MTLArgument *> *vert_args = [reflection_data vertexArguments];
1393
1394 pso_inst->buffer_bindings_reflection_data_vert.clear();
1395 int buffer_binding_max_ind = 0;
1396
1397 for (int i = 0; i < [vert_args count]; i++) {
1398 MTLArgument *arg = [vert_args objectAtIndex:i];
1399 if ([arg type] == MTLArgumentTypeBuffer) {
1400 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1401 if (buf_index >= 0) {
1402 buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index);
1403 }
1404 }
1405 }
1406 pso_inst->buffer_bindings_reflection_data_vert.resize(buffer_binding_max_ind + 1);
1407 for (int i = 0; i < buffer_binding_max_ind + 1; i++) {
1408 pso_inst->buffer_bindings_reflection_data_vert[i] = {0, 0, 0, false};
1409 }
1410
1411 for (int i = 0; i < [vert_args count]; i++) {
1412 MTLArgument *arg = [vert_args objectAtIndex:i];
1413 if ([arg type] == MTLArgumentTypeBuffer) {
1414 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1415
1416 if (buf_index >= 0) {
1417 pso_inst->buffer_bindings_reflection_data_vert[buf_index] = {
1418 (uint32_t)([arg index]),
1419 (uint32_t)([arg bufferDataSize]),
1420 (uint32_t)([arg bufferAlignment]),
1421 ([arg isActive] == YES) ? true : false};
1422 }
1423 }
1424 }
1425
1426 NSArray<MTLArgument *> *frag_args = [reflection_data fragmentArguments];
1427
1428 pso_inst->buffer_bindings_reflection_data_frag.clear();
1429 buffer_binding_max_ind = 0;
1430
1431 for (int i = 0; i < [frag_args count]; i++) {
1432 MTLArgument *arg = [frag_args objectAtIndex:i];
1433 if ([arg type] == MTLArgumentTypeBuffer) {
1434 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1435 if (buf_index >= 0) {
1436 buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index);
1437 }
1438 }
1439 }
1440 pso_inst->buffer_bindings_reflection_data_frag.resize(buffer_binding_max_ind + 1);
1441 for (int i = 0; i < buffer_binding_max_ind + 1; i++) {
1442 pso_inst->buffer_bindings_reflection_data_frag[i] = {0, 0, 0, false};
1443 }
1444
1445 for (int i = 0; i < [frag_args count]; i++) {
1446 MTLArgument *arg = [frag_args objectAtIndex:i];
1447 if ([arg type] == MTLArgumentTypeBuffer) {
1448 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1449 shader_debug_printf(" BUF IND: %d (arg name: %s)\n", buf_index, [[arg name] UTF8String]);
1450 if (buf_index >= 0) {
1451 pso_inst->buffer_bindings_reflection_data_frag[buf_index] = {
1452 (uint32_t)([arg index]),
1453 (uint32_t)([arg bufferDataSize]),
1454 (uint32_t)([arg bufferAlignment]),
1455 ([arg isActive] == YES) ? true : false};
1456 }
1457 }
1458 }
1459 }
1460
1461 /* Insert into pso cache. */
1462 pso_cache_lock_.lock();
1463 pso_inst->shader_pso_index = pso_cache_.size();
1464 pso_cache_.add(pipeline_descriptor, pso_inst);
1465 pso_cache_lock_.unlock();
1467 "PSO CACHE: Stored new variant in PSO cache for shader '%s' Hash: '%llu'\n",
1468 this->name,
1469 pipeline_descriptor.hash());
1470 return pso_inst;
1471 }
1472}
1473
1475 MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor)
1476{
1477 /* NOTE(Metal): Bakes and caches a PSO for compute. */
1478 BLI_assert(this);
1479 MTLShaderInterface *mtl_interface = this->get_interface();
1480 BLI_assert(mtl_interface);
1481 BLI_assert(this->is_valid());
1482 BLI_assert(shader_library_compute_ != nil);
1483
1484 /* Check if current PSO exists in the cache. */
1485 pso_cache_lock_.lock();
1486 MTLComputePipelineStateInstance **pso_lookup = compute_pso_cache_.lookup_ptr(
1487 compute_pipeline_descriptor);
1488 MTLComputePipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
1489 pso_cache_lock_.unlock();
1490
1491 if (pipeline_state != nullptr) {
1492 /* Return cached PSO state. */
1493 BLI_assert(pipeline_state->pso != nil);
1494 return pipeline_state;
1495 }
1496 else {
1497 /* Prepare Compute Pipeline Descriptor. */
1498
1499 /* Setup function specialization constants, used to modify and optimize
1500 * generated code based on current render pipeline configuration. */
1501 MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
1502
1503 /* TODO: Compile specialized shader variants asynchronously. */
1504
1505 /* Custom function constant values: */
1507 values, this->constants, compute_pipeline_descriptor.specialization_state);
1508
1509 /* Offset the bind index for Uniform buffers such that they begin after the VBO
1510 * buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function
1511 * specialization constant, customized per unique pipeline state permutation.
1512 *
1513 * For Compute shaders, this offset is always zero, but this needs setting as
1514 * it is expected as part of the common Metal shader header. */
1515 int MTL_uniform_buffer_base_index = 0;
1516 [values setConstantValue:&MTL_uniform_buffer_base_index
1517 type:MTLDataTypeInt
1518 withName:@"MTL_uniform_buffer_base_index"];
1519
1520 /* Storage buffer bind index.
1521 * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
1522 * and an additional space for the push constant block.
1523 * If the shader does not have any uniform blocks, then we can place directly after the push
1524 * constant block. As we do not need an extra spot for the UBO at index '0'. */
1525 int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
1526 ((mtl_interface->get_total_uniform_blocks() > 0) ?
1527 mtl_interface->get_total_uniform_blocks() :
1528 0);
1529
1530 [values setConstantValue:&MTL_storage_buffer_base_index
1531 type:MTLDataTypeInt
1532 withName:@"MTL_storage_buffer_base_index"];
1533
1534 /* Compile compute function. */
1535 NSError *error = nullptr;
1536 id<MTLFunction> compute_function = [shader_library_compute_
1537 newFunctionWithName:compute_function_name_
1538 constantValues:values
1539 error:&error];
1540 compute_function.label = [NSString stringWithUTF8String:this->name];
1541
1542 if (error) {
1543 NSLog(@"Compile Error - Metal Shader compute function, error %@", error);
1544
1545 /* Only exit out if genuine error and not warning */
1546 if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1547 NSNotFound)
1548 {
1549 BLI_assert(false);
1550 return nullptr;
1551 }
1552 }
1553
1554 /* Compile PSO. */
1555 MTLComputePipelineDescriptor *desc = [[MTLComputePipelineDescriptor alloc] init];
1556 desc.label = [NSString stringWithUTF8String:this->name];
1557 desc.computeFunction = compute_function;
1558
1567 const MTLCapabilities &capabilities = MTLBackend::get_capabilities();
1568 if (ELEM(capabilities.gpu, APPLE_GPU_M1, APPLE_GPU_M2)) {
1569 if (maxTotalThreadsPerThreadgroup_Tuning_ > 0) {
1570 desc.maxTotalThreadsPerThreadgroup = this->maxTotalThreadsPerThreadgroup_Tuning_;
1571 MTL_LOG_INFO("Using custom parameter for shader %s value %u\n",
1572 this->name,
1573 maxTotalThreadsPerThreadgroup_Tuning_);
1574 }
1575 }
1576
1577 id<MTLComputePipelineState> pso = [ctx->device
1578 newComputePipelineStateWithDescriptor:desc
1579 options:MTLPipelineOptionNone
1580 reflection:nullptr
1581 error:&error];
1582
1583 /* If PSO has compiled but max theoretical threads-per-threadgroup is lower than required
1584 * dispatch size, recompile with increased limit. NOTE: This will result in a performance drop,
1585 * ideally the source shader should be modified to reduce local register pressure, or, local
1586 * work-group size should be reduced.
1587 * Similarly, the custom tuning parameter "mtl_max_total_threads_per_threadgroup" can be
1588 * specified to a sufficiently large value to avoid this. */
1589 if (pso) {
1590 uint num_required_threads_per_threadgroup = compute_pso_common_state_.threadgroup_x_len *
1591 compute_pso_common_state_.threadgroup_y_len *
1592 compute_pso_common_state_.threadgroup_z_len;
1593 if (pso.maxTotalThreadsPerThreadgroup < num_required_threads_per_threadgroup) {
1595 "Shader '%s' requires %u threads per threadgroup, but PSO limit is: %lu. Recompiling "
1596 "with increased limit on descriptor.\n",
1597 this->name,
1598 num_required_threads_per_threadgroup,
1599 (unsigned long)pso.maxTotalThreadsPerThreadgroup);
1600 [pso release];
1601 pso = nil;
1602 desc.maxTotalThreadsPerThreadgroup = 1024;
1603 pso = [ctx->device newComputePipelineStateWithDescriptor:desc
1604 options:MTLPipelineOptionNone
1605 reflection:nullptr
1606 error:&error];
1607 }
1608 }
1609
1610 if (error) {
1611 NSLog(@"Failed to create PSO for compute shader: %s error %@\n", this->name, error);
1612 BLI_assert(false);
1613 return nullptr;
1614 }
1615 else if (!pso) {
1616 NSLog(@"Failed to create PSO for compute shader: %s, but no error was provided!\n",
1617 this->name);
1618 BLI_assert(false);
1619 return nullptr;
1620 }
1621 else {
1622#if 0
1623 NSLog(@"Successfully compiled compute PSO for shader: %s (Metal Context: %p)\n",
1624 this->name,
1625 ctx);
1626#endif
1627 }
1628
1629 [desc release];
1630
1631 /* Gather reflection data and create MTLComputePipelineStateInstance to store results. */
1633 compute_pso_instance->compute = compute_function;
1634 compute_pso_instance->pso = pso;
1635 compute_pso_instance->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
1636 compute_pso_instance->base_storage_buffer_index = MTL_storage_buffer_base_index;
1637 pso_cache_lock_.lock();
1638 compute_pso_instance->shader_pso_index = compute_pso_cache_.size();
1639 compute_pso_cache_.add(compute_pipeline_descriptor, compute_pso_instance);
1640 pso_cache_lock_.unlock();
1641
1642 return compute_pso_instance;
1643 }
1644}
1647/* -------------------------------------------------------------------- */
1651int MTLShader::ssbo_vertex_type_to_attr_type(MTLVertexFormat attribute_type)
1652{
1653 switch (attribute_type) {
1654 case MTLVertexFormatFloat:
1656 case MTLVertexFormatInt:
1658 case MTLVertexFormatUInt:
1660 case MTLVertexFormatShort:
1662 case MTLVertexFormatUChar:
1664 case MTLVertexFormatUChar2:
1666 case MTLVertexFormatUChar3:
1668 case MTLVertexFormatUChar4:
1670 case MTLVertexFormatFloat2:
1672 case MTLVertexFormatFloat3:
1674 case MTLVertexFormatFloat4:
1676 case MTLVertexFormatUInt2:
1678 case MTLVertexFormatUInt3:
1680 case MTLVertexFormatUInt4:
1682 case MTLVertexFormatInt2:
1684 case MTLVertexFormatInt3:
1686 case MTLVertexFormatInt4:
1688 case MTLVertexFormatUCharNormalized:
1690 case MTLVertexFormatUChar2Normalized:
1692 case MTLVertexFormatUChar3Normalized:
1694 case MTLVertexFormatUChar4Normalized:
1696 case MTLVertexFormatInt1010102Normalized:
1698 case MTLVertexFormatShort3Normalized:
1700 default:
1701 BLI_assert_msg(false,
1702 "Not yet supported attribute type for SSBO vertex fetch -- Add entry "
1703 "GPU_SHADER_ATTR_TYPE_** to shader defines, and in this table");
1704 return -1;
1705 }
1706 return -1;
1707}
1708
1710{
1711 MTLShaderInterface *mtl_interface = this->get_interface();
1712 ssbo_vertex_attribute_bind_active_ = true;
1713 ssbo_vertex_attribute_bind_mask_ = (1 << mtl_interface->get_total_attributes()) - 1;
1714
1715 /* Reset tracking of actively used VBO bind slots for SSBO vertex fetch mode. */
1716 for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
1717 ssbo_vbo_slot_used_[i] = false;
1718 }
1719}
1720
1722{
1723 /* Fetch attribute. */
1724 MTLShaderInterface *mtl_interface = this->get_interface();
1725 BLI_assert(ssbo_attr.mtl_attribute_index >= 0 &&
1726 ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes());
1727 UNUSED_VARS_NDEBUG(mtl_interface);
1728
1729 /* Update bind-mask to verify this attribute has been used. */
1730 BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) ==
1731 (1 << ssbo_attr.mtl_attribute_index) &&
1732 "Attribute has already been bound");
1733 ssbo_vertex_attribute_bind_mask_ &= ~(1 << ssbo_attr.mtl_attribute_index);
1734
1735 /* Fetch attribute uniform addresses from cache. */
1736 ShaderSSBOAttributeBinding &cached_ssbo_attribute =
1737 cached_ssbo_attribute_bindings_[ssbo_attr.mtl_attribute_index];
1738 BLI_assert(cached_ssbo_attribute.attribute_index >= 0);
1739
1740 /* Write attribute descriptor properties to shader uniforms. */
1741 this->uniform_int(cached_ssbo_attribute.uniform_offset, 1, 1, &ssbo_attr.attribute_offset);
1742 this->uniform_int(cached_ssbo_attribute.uniform_stride, 1, 1, &ssbo_attr.per_vertex_stride);
1743 int inst_val = (ssbo_attr.is_instance ? 1 : 0);
1744 this->uniform_int(cached_ssbo_attribute.uniform_fetchmode, 1, 1, &inst_val);
1745 this->uniform_int(cached_ssbo_attribute.uniform_vbo_id, 1, 1, &ssbo_attr.vbo_id);
1746 BLI_assert(ssbo_attr.attribute_format >= 0);
1747 this->uniform_int(cached_ssbo_attribute.uniform_attr_type, 1, 1, &ssbo_attr.attribute_format);
1748 ssbo_vbo_slot_used_[ssbo_attr.vbo_id] = true;
1749}
1750
1752 id<MTLRenderCommandEncoder> /*active_encoder*/)
1753{
1754 ssbo_vertex_attribute_bind_active_ = false;
1755
1756 /* If our mask is non-zero, we have unassigned attributes. */
1757 if (ssbo_vertex_attribute_bind_mask_ != 0) {
1758 MTLShaderInterface *mtl_interface = this->get_interface();
1759
1760 /* Determine if there is a free slot we can bind the null buffer to -- We should have at
1761 * least ONE free slot in this instance. */
1762 int null_attr_buffer_slot = -1;
1763 for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
1764 if (!ssbo_vbo_slot_used_[i]) {
1765 null_attr_buffer_slot = i;
1766 break;
1767 }
1768 }
1769 BLI_assert_msg(null_attr_buffer_slot >= 0,
1770 "No suitable bind location for a NULL buffer was found");
1771
1772 for (int i = 0; i < mtl_interface->get_total_attributes(); i++) {
1773 if (ssbo_vertex_attribute_bind_mask_ & (1 << i)) {
1774 const MTLShaderInputAttribute *mtl_shader_attribute = &mtl_interface->get_attribute(i);
1775#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1777 "SSBO Vertex Fetch missing attribute with index: %d. Shader: %s, Attr "
1778 "Name: "
1779 "%s - Null buffer bound",
1780 i,
1781 this->name_get(),
1782 mtl_shader_attribute->name);
1783#endif
1784 /* Bind Attribute with NULL buffer index and stride zero (for constant access). */
1785 MTLSSBOAttribute ssbo_attr(
1786 i, null_attr_buffer_slot, 0, 0, GPU_SHADER_ATTR_TYPE_FLOAT, false);
1789 "Unassigned Shader attribute: %s, Attr Name: %s -- Binding NULL BUFFER to "
1790 "slot %d",
1791 this->name_get(),
1792 mtl_interface->get_name_at_offset(mtl_shader_attribute->name_offset),
1793 null_attr_buffer_slot);
1794 }
1795 }
1796
1797 /* Bind NULL buffer to given VBO slot. */
1798 MTLContext *ctx = MTLContext::get();
1799 id<MTLBuffer> null_buf = ctx->get_null_attribute_buffer();
1800 BLI_assert(null_buf);
1801
1803 rps.bind_vertex_buffer(null_buf, 0, null_attr_buffer_slot);
1804 }
1805}
1806
1808{
1809 if (transform_feedback_type_ == GPU_SHADER_TFB_NONE || !transform_feedback_active_) {
1810 return nullptr;
1811 }
1812 return transform_feedback_vertbuf_;
1813}
1814
1816{
1817 if (this->transform_feedback_type_ == GPU_SHADER_TFB_NONE) {
1818 return false;
1819 }
1820
1821 return (std::find(tf_output_name_list_.begin(), tf_output_name_list_.end(), str) !=
1822 tf_output_name_list_.end());
1823}
1824
1827/* Since this is going to be compiling shaders in a multi-threaded fashion we
1828 * don't want to create an instance per context as we want to restrict the
1829 * number of simultaneous compilation threads to ensure system responsiveness.
1830 * Hence the global shared instance. */
1833
1846
1861
1862/* -------------------------------------------------------------------- */
1867{
1869
1870 terminate_compile_threads = false;
1871}
1872
1874{
1875 /* Shutdown the compiler threads. */
1876 terminate_compile_threads = true;
1877 cond_var.notify_all();
1878
1879 for (auto &thread : compile_threads) {
1880 thread.join();
1881 }
1882
1883 /* Mark any unprocessed work items as ready so we can move
1884 * them into a batch for cleanup. */
1885 if (!parallel_work_queue.empty()) {
1886 std::unique_lock<std::mutex> lock(queue_mutex);
1887 while (!parallel_work_queue.empty()) {
1888 ParallelWork *work_item = parallel_work_queue.front();
1889 work_item->is_ready = true;
1890 parallel_work_queue.pop_front();
1891 }
1892 }
1893
1894 /* Clean up any outstanding batches. */
1895 for (BatchHandle handle : batches.keys()) {
1896 Vector<Shader *> shaders = batch_finalize(handle);
1897 /* Delete any shaders in the batch. */
1898 for (Shader *shader : shaders) {
1899 if (shader) {
1900 delete shader;
1901 }
1902 }
1903 }
1904 BLI_assert(batches.is_empty());
1905}
1906
1908{
1909 std::unique_lock<std::mutex> lock(queue_mutex);
1910
1911 /* Return if the compilation threads already exist */
1912 if (!compile_threads.empty()) {
1913 return;
1914 }
1915
1916 /* Limit to the number of compiler threads to (performance cores - 1) to
1917 * leave one thread free for main thread/UI responsiveness */
1918 const MTLCapabilities &capabilities = MTLBackend::get_capabilities();
1919 int max_mtlcompiler_threads = capabilities.num_performance_cores - 1;
1920
1921 /* Save the main thread context */
1922 GPUContext *main_thread_context = GPU_context_active_get();
1923 MTLContext *metal_context = static_cast<MTLContext *>(unwrap(main_thread_context));
1924 id<MTLDevice> metal_device = metal_context->device;
1925
1926#if defined(MAC_OS_VERSION_13_3)
1927 /* Clamp the number of threads if necessary. */
1928 if (@available(macOS 13.3, *)) {
1929 /* Check we've set the flag to allow more than 2 compile threads. */
1930 BLI_assert(metal_device.shouldMaximizeConcurrentCompilation);
1931 max_mtlcompiler_threads = MIN(int([metal_device maximumConcurrentCompilationTaskCount]),
1932 max_mtlcompiler_threads);
1933 }
1934#endif
1935
1936 /* GPU settings for context creation. */
1937 GHOST_GPUSettings gpuSettings = {0};
1938 gpuSettings.context_type = GHOST_kDrawingContextTypeMetal;
1939 if (G.debug & G_DEBUG_GPU) {
1940 gpuSettings.flags |= GHOST_gpuDebugContext;
1941 }
1942 gpuSettings.preferred_device.index = U.gpu_preferred_index;
1943 gpuSettings.preferred_device.vendor_id = U.gpu_preferred_vendor_id;
1944 gpuSettings.preferred_device.device_id = U.gpu_preferred_device_id;
1945
1946 /* Spawn the compiler threads. */
1947 for (int i = 0; i < max_mtlcompiler_threads; i++) {
1948
1949 /* Grab the system handle. */
1950 GHOST_SystemHandle ghost_system = reinterpret_cast<GHOST_SystemHandle>(
1953
1954 /* Create a Ghost GPU Context using the system handle. */
1955 GHOST_ContextHandle ghost_gpu_context = GHOST_CreateGPUContext(ghost_system, gpuSettings);
1956
1957 /* Create a GPU context for the compile thread to use. */
1958 GPUContext *per_thread_context = GPU_context_create(nullptr, ghost_gpu_context);
1959
1960 /* Restore the main thread context.
1961 * (required as the above context creation also makes it active). */
1962 GPU_context_active_set(main_thread_context);
1963
1964 /* Create a new thread */
1965 compile_threads.push_back(std::thread([this, per_thread_context] {
1966 this->parallel_compilation_thread_func(per_thread_context);
1967 }));
1968 }
1969}
1970
1971void MTLParallelShaderCompiler::parallel_compilation_thread_func(GPUContext *blender_gpu_context)
1972{
1973 /* Contexts can only be created on the main thread so we have to
1974 * pass one in and make it active here */
1975 GPU_context_active_set(blender_gpu_context);
1976
1977 MTLContext *metal_context = static_cast<MTLContext *>(unwrap(blender_gpu_context));
1978 MTLShaderCompiler *shader_compiler = static_cast<MTLShaderCompiler *>(metal_context->compiler);
1979
1980 /* This context is only for compilation, it does not need it's own instance of the compiler */
1981 shader_compiler->release_parallel_shader_compiler();
1982
1983 /* Loop until we get the terminate signal */
1984 while (!terminate_compile_threads) {
1985 /* Grab the next shader off of the queue or wait... */
1986 ParallelWork *work_item = nullptr;
1987 {
1988 std::unique_lock<std::mutex> lock(queue_mutex);
1989 cond_var.wait(lock,
1990 [&] { return terminate_compile_threads || !parallel_work_queue.empty(); });
1991 if (terminate_compile_threads || parallel_work_queue.empty()) {
1992 continue;
1993 }
1994 work_item = parallel_work_queue.front();
1995 parallel_work_queue.pop_front();
1996 }
1997
1998 /* Compile a shader */
1999 if (work_item->work_type == PARALLELWORKTYPE_COMPILE_SHADER) {
2000 BLI_assert(work_item->info);
2001
2002 const shader::ShaderCreateInfo *shader_info = work_item->info;
2003 work_item->shader = static_cast<MTLShader *>(
2004 work_item->shader_compiler->compile(*shader_info, true));
2005
2006 if (work_item->shader) {
2007 /* Generate and cache any render PSOs if possible (typically materials only)
2008 * (Finalize() will already bake a Compute PSO if possible) */
2009 work_item->shader->warm_cache(-1);
2010 }
2011 }
2012 /* Bake PSO */
2013 else if (work_item->work_type == PARALLELWORKTYPE_BAKE_PSO) {
2014 MTLShader *shader = work_item->shader;
2015 /* Currently only support Compute */
2016 BLI_assert(shader && shader->has_compute_shader_lib());
2017
2018 /* Create descriptor using these specialization constants. */
2019 MTLComputePipelineStateDescriptor compute_pipeline_descriptor(
2020 work_item->specialization_values);
2021
2022 shader->bake_compute_pipeline_state(metal_context, compute_pipeline_descriptor);
2023 }
2024 else {
2025 BLI_assert(false);
2026 }
2027 work_item->is_ready = true;
2028 }
2029
2030 GPU_context_discard(blender_gpu_context);
2031}
2032
2033BatchHandle MTLParallelShaderCompiler::create_batch(size_t batch_size)
2034{
2035 std::scoped_lock lock(batch_mutex);
2036 BatchHandle batch_handle = next_batch_handle++;
2037 batches.add(batch_handle, {});
2038 Batch &batch = batches.lookup(batch_handle);
2039 if (batch_size) {
2040 batch.items.reserve(batch_size);
2041 }
2042 batch.is_ready = false;
2043 shader_debug_printf("Created batch %llu\n", batch_handle);
2044 return batch_handle;
2045}
2046
2047void MTLParallelShaderCompiler::add_item_to_batch(ParallelWork *work_item,
2048 BatchHandle batch_handle)
2049{
2050 std::scoped_lock lock(batch_mutex);
2051 Batch &batch = batches.lookup(batch_handle);
2052 batch.items.append(work_item);
2053}
2054
2055void MTLParallelShaderCompiler::add_parallel_item_to_queue(ParallelWork *work_item,
2056 BatchHandle batch_handle)
2057{
2058 shader_debug_printf("Request add shader work\n");
2059 if (!terminate_compile_threads) {
2060
2061 /* Defer creation of compilation threads until required */
2062 if (compile_threads.empty()) {
2064 }
2065
2066 add_item_to_batch(work_item, batch_handle);
2067 std::lock_guard<std::mutex> lock(queue_mutex);
2068 parallel_work_queue.push_back(work_item);
2069 cond_var.notify_one();
2070 }
2071}
2072
2075{
2077
2078 BatchHandle batch_handle = create_batch(infos.size());
2079
2080 shader_debug_printf("Batch compile %llu shaders (Batch = %llu)\n", infos.size(), batch_handle);
2081
2082 /* Have to finalize all shaderInfos *before* any parallel compilation as
2083 * ShaderCreateInfo::finalize() is not thread safe */
2084 for (const shader::ShaderCreateInfo *info : infos) {
2085 const_cast<ShaderCreateInfo *>(info)->finalize();
2086 }
2087
2088 for (const shader::ShaderCreateInfo *info : infos) {
2089 ParallelWork *work_item = new ParallelWork;
2090 work_item->info = info;
2091 work_item->shader_compiler = shader_compiler;
2092 work_item->is_ready = false;
2093 work_item->shader = nullptr;
2094 work_item->work_type = PARALLELWORKTYPE_COMPILE_SHADER;
2095 add_parallel_item_to_queue(work_item, batch_handle);
2096 }
2097
2098 return batch_handle;
2099}
2100
2102{
2103 std::scoped_lock lock(batch_mutex);
2104 Batch &batch = batches.lookup(handle);
2105 if (batch.is_ready) {
2106 return true;
2107 }
2108
2109 for (ParallelWork *item : batch.items) {
2110 if (item->is_ready) {
2111 continue;
2112 }
2113 else {
2114 return false;
2115 }
2116 }
2117
2118 batch.is_ready = true;
2119 shader_debug_printf("Batch %llu is now ready\n", handle);
2120 return batch.is_ready;
2121}
2122
2124{
2125 while (!batch_is_ready(handle)) {
2127 }
2128 std::scoped_lock lock(batch_mutex);
2129
2130 Batch batch = batches.pop(handle);
2132 for (ParallelWork *item : batch.items) {
2133 result.append(item->shader);
2134 delete item;
2135 }
2136 handle = 0;
2137 return result;
2138}
2139
2141 Span<ShaderSpecialization> specializations)
2142{
2144 /* Zero indicates no batch was created */
2145 SpecializationBatchHandle batch_handle = 0;
2146
2147 for (auto &specialization : specializations) {
2148 MTLShader *sh = static_cast<MTLShader *>(unwrap(specialization.shader));
2149
2150 /* Specialization constants only take effect when we create the PSO.
2151 * We don't have the relevant info to create a Render PSO Descriptor unless
2152 * the shader has a has_parent_shader() but in that case it would (currently) be
2153 * invalid to apply specialization constants. For those reasons we currently only
2154 * support pre-compilation of Compute shaders.
2155 * (technically we could call makeFunction but the benefit would likely be minimal) */
2156 if (!sh->has_compute_shader_lib()) {
2157 continue;
2158 }
2159
2160 BLI_assert_msg(sh->is_valid(), "Shader must be finalized before precompiling specializations");
2161
2162 /* Defer batch creation until we have some work to do */
2163 if (!batch_handle) {
2164 batch_handle = create_batch(1);
2165 }
2166
2167 ParallelWork *work_item = new ParallelWork;
2168 work_item->info = nullptr;
2169 work_item->is_ready = false;
2170 work_item->shader = sh;
2171 work_item->work_type = PARALLELWORKTYPE_BAKE_PSO;
2172
2173 /* Add the specialization constants to the work-item */
2174 for (const SpecializationConstant &constant : specialization.constants) {
2175 const ShaderInput *input = sh->interface->constant_get(constant.name.c_str());
2176 BLI_assert_msg(input != nullptr, "The specialization constant doesn't exists");
2177 work_item->specialization_values[input->location].u = constant.value.u;
2178 }
2179 sh->constants.is_dirty = true;
2180
2181 add_parallel_item_to_queue(work_item, batch_handle);
2182 }
2183 return batch_handle;
2184}
2185
2187{
2188 /* Check empty batch case where we have no handle */
2189 if (!handle) {
2190 return true;
2191 }
2192
2193 std::scoped_lock lock(batch_mutex);
2194 Batch &batch = batches.lookup(handle);
2195 if (batch.is_ready) {
2196 return true;
2197 }
2198
2199 for (ParallelWork *item : batch.items) {
2200 if (item->is_ready) {
2201 continue;
2202 }
2203 else {
2204 return false;
2205 }
2206 }
2207
2208 /* Handle is zeroed once the batch is ready */
2209 handle = 0;
2210 batch.is_ready = true;
2211 shader_debug_printf("Specialization Batch %llu is now ready\n", handle);
2212 return batch.is_ready;
2213}
2214
2217/* -------------------------------------------------------------------- */
2222{
2223 parallel_shader_compiler = get_shared_parallel_shader_compiler();
2224}
2225
2230
2232{
2233 if (parallel_shader_compiler) {
2235 parallel_shader_compiler = nullptr;
2236 }
2237}
2238
2240{
2241 BLI_assert(parallel_shader_compiler);
2242 return parallel_shader_compiler->batch_compile(this, infos);
2243}
2245{
2246 return parallel_shader_compiler->batch_is_ready(handle);
2247}
2249{
2250 return parallel_shader_compiler->batch_finalize(handle);
2251}
2253 Span<ShaderSpecialization> specializations)
2254{
2255 return parallel_shader_compiler->precompile_specializations(specializations);
2256}
2257
2259{
2260 return parallel_shader_compiler->specialization_batch_is_ready(handle);
2261}
2262
2265} // namespace blender::gpu
@ G_DEBUG_GPU
#define BLI_assert_unreachable()
Definition BLI_assert.h:97
#define BLI_assert(a)
Definition BLI_assert.h:50
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:57
MINLINE int min_ii(int a, int b)
MINLINE int max_ii(int a, int b)
unsigned char uchar
unsigned short ushort
unsigned int uint
Platform independent time functions.
void BLI_time_sleep_ms(int ms)
Definition time.c:85
#define UNUSED_VARS_NDEBUG(...)
#define ELEM(...)
GHOST C-API function and type declarations.
GHOST_ContextHandle GHOST_CreateGPUContext(GHOST_SystemHandle systemhandle, GHOST_GPUSettings gpuSettings)
static GHOST_SystemCocoa * ghost_system
@ GHOST_gpuDebugContext
Definition GHOST_Types.h:77
bool GPU_use_parallel_compilation()
GPUContext * GPU_context_create(void *ghost_window, void *ghost_context)
void * GPU_backend_ghost_system_get()
GPUContext * GPU_context_active_get()
void GPU_context_discard(GPUContext *)
void GPU_context_active_set(GPUContext *)
int64_t BatchHandle
Definition GPU_shader.hh:68
eGPUShaderTFBType
@ GPU_SHADER_TFB_NONE
int64_t SpecializationBatchHandle
@ GPU_USAGE_DEVICE_ONLY
@ GPU_FETCH_INT_TO_FLOAT_UNIT
@ GPU_FETCH_INT_TO_FLOAT
#define GPU_VERT_ATTR_MAX_LEN
volatile int lock
struct GPUContext GPUContext
SIMD_FORCE_INLINE bool isActive() const
void init()
unsigned int U
Definition btGjkEpa3.h:78
#define MIN(_a, _b)
void reset()
clear internal cached data and reset random seed
Value pop(const Key &key)
Definition BLI_map.hh:378
KeyIterator keys() const
Definition BLI_map.hh:837
bool add(const Key &key, const Value &value)
Definition BLI_map.hh:271
const Value & lookup(const Key &key) const
Definition BLI_map.hh:506
bool is_empty() const
Definition BLI_map.hh:937
constexpr int64_t size() const
Definition BLI_span.hh:494
constexpr int64_t size() const
Definition BLI_span.hh:253
int64_t size() const
void append(const T &value)
IndexRange index_range() const
void reserve(const int64_t min_capacity)
static MTLCapabilities & get_capabilities()
MTLRenderPassState & get_render_pass_state()
MTLFrameBuffer * get_current_framebuffer()
static MTLContext * get()
id< MTLBuffer > get_null_attribute_buffer()
MTLContextGlobalShaderPipelineState pipeline_state
MTLCommandBufferManager main_command_buffer
MTLAttachment get_color_attachment(uint slot)
bool specialization_batch_is_ready(SpecializationBatchHandle &handle)
Vector< Shader * > batch_finalize(BatchHandle &handle)
bool batch_is_ready(BatchHandle handle)
BatchHandle batch_compile(MTLShaderCompiler *shade_compiler, Span< const shader::ShaderCreateInfo * > &infos)
SpecializationBatchHandle precompile_specializations(Span< ShaderSpecialization > specializations)
void bind_vertex_buffer(id< MTLBuffer > buffer, uint64_t buffer_offset, uint index)
virtual SpecializationBatchHandle precompile_specializations(Span< ShaderSpecialization > specializations) override
virtual Vector< Shader * > batch_finalize(BatchHandle &handle) override
virtual ~MTLShaderCompiler() override
virtual bool specialization_batch_is_ready(SpecializationBatchHandle &handle) override
virtual BatchHandle batch_compile(Span< const shader::ShaderCreateInfo * > &infos) override
virtual bool batch_is_ready(BatchHandle handle) override
const MTLShaderBufferBlock & get_push_constant_block() const
const char * get_name_at_offset(uint32_t offset) const
int get_argument_buffer_bind_index(ShaderStage stage) const
const MTLShaderInputAttribute & get_attribute(uint index) const
const MTLShaderUniform & get_uniform(uint index) const
void ssbo_vertex_fetch_bind_attributes_begin()
void set_fragment_function_name(NSString *fragment_function_name)
bool transform_feedback_enable(VertBuf *buf) override
void warm_cache(int limit) override
void ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_attr)
void transform_feedback_names_set(Span< const char * > name_list, const eGPUShaderTFBType geom_type) override
MTLRenderPipelineStateInstance * bake_pipeline_state(MTLContext *ctx, MTLPrimitiveTopologyClass prim_type, const MTLRenderPipelineStateDescriptor &pipeline_descriptor)
void geometry_shader_from_glsl(MutableSpan< const char * > sources) override
MTLComputePipelineStateInstance * bake_compute_pipeline_state(MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor)
void shader_compute_source_from_msl(NSString *input_compute_source)
void uniform_int(int location, int comp_len, int array_size, const int *data) override
bool finalize(const shader::ShaderCreateInfo *info=nullptr) override
void transform_feedback_disable() override
void fragment_shader_from_glsl(MutableSpan< const char * > sources) override
void vertex_shader_from_glsl(MutableSpan< const char * > sources) override
static int ssbo_vertex_type_to_attr_type(MTLVertexFormat attribute_type)
bool has_transform_feedback_varying(std::string str)
void set_vertex_function_name(NSString *vetex_function_name)
void bind() override
void shader_source_from_msl(NSString *input_vertex_source, NSString *input_fragment_source)
VertBuf * get_transform_feedback_active_buffer()
void unbind() override
MTLShaderInterface * get_interface()
void uniform_float(int location, int comp_len, int array_size, const float *data) override
MTLRenderPipelineStateInstance * bake_current_pipeline_state(MTLContext *ctx, MTLPrimitiveTopologyClass prim_type)
void ssbo_vertex_fetch_bind_attributes_end(id< MTLRenderCommandEncoder > active_encoder)
MTLShader(MTLContext *ctx, const char *name)
Definition mtl_shader.mm:72
void compute_shader_from_glsl(MutableSpan< const char * > sources) override
void init(const shader::ShaderCreateInfo &, bool is_batch_compilation) override
bool get_uses_ssbo_vertex_fetch() const override
void set_interface(MTLShaderInterface *interface)
void push_constant_bindstate_mark_dirty(bool is_dirty)
void set_compute_function_name(NSString *compute_function_name)
MTLRenderPipelineStateDescriptor & get_pipeline_descriptor()
Definition mtl_state.hh:59
const ShaderInput * constant_get(const char *name) const
ShaderInterface * interface
const char *const name_get() const
struct blender::gpu::Shader::Constants constants
void print_log(Span< const char * > sources, const char *log, const char *stage, bool error, GPULogParser *parser)
eGPUTextureFormat format_get() const
bool join()
Definition thread.cpp:43
#define printf
CCL_NAMESPACE_BEGIN struct Options options
EvaluationStage stage
Definition deg_eval.cc:83
#define fabsf(x)
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
#define str(s)
struct @620::@622 batch
#define GPU_FB_MAX_COLOR_ATTACHMENT
#define SOURCES_INDEX_VERSION
int count
void MEM_freeN(void *vmemh)
Definition mallocn.cc:105
void *(* MEM_callocN)(size_t len, const char *str)
Definition mallocn.cc:42
#define G(x, y, z)
static void error(const char *str)
#define MTL_MAX_BUFFER_BINDINGS
#define MTL_LOG_INFO(info,...)
Definition mtl_debug.hh:51
#define MTL_LOG_WARNING(info,...)
Definition mtl_debug.hh:44
#define MTL_LOG_ERROR(info,...)
Definition mtl_debug.hh:36
#define shader_debug_printf(...)
Definition mtl_shader.hh:49
#define MTL_SHADER_SPECIALIZATION_CONSTANT_BASE_ID
Definition mtl_shader.hh:53
const char datatoc_mtl_shader_common_msl[]
uint mtl_get_data_type_alignment(eMTLDataType type)
#define GPU_SHADER_ATTR_TYPE_CHAR4
#define GPU_SHADER_ATTR_TYPE_IVEC2
#define GPU_SHADER_ATTR_TYPE_UCHAR3_NORM
#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS
#define GPU_SHADER_ATTR_TYPE_UCHAR_NORM
#define GPU_SHADER_ATTR_TYPE_INT1010102_NORM
#define GPU_SHADER_ATTR_TYPE_INT
#define GPU_SHADER_ATTR_TYPE_UVEC4
#define GPU_SHADER_ATTR_TYPE_CHAR2
#define GPU_SHADER_ATTR_TYPE_SHORT3_NORM
#define GPU_SHADER_ATTR_TYPE_VEC2
#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX
#define GPU_SHADER_ATTR_TYPE_CHAR3
#define GPU_SHADER_ATTR_TYPE_IVEC4
#define GPU_SHADER_ATTR_TYPE_FLOAT
#define GPU_SHADER_ATTR_TYPE_VEC3
#define GPU_SHADER_ATTR_TYPE_SHORT
#define GPU_SHADER_ATTR_TYPE_IVEC3
#define GPU_SHADER_ATTR_TYPE_UCHAR4_NORM
#define GPU_SHADER_ATTR_TYPE_UINT
#define GPU_SHADER_ATTR_TYPE_UVEC3
#define GPU_SHADER_ATTR_TYPE_UVEC2
#define GPU_SHADER_ATTR_TYPE_UCHAR2_NORM
#define GPU_SHADER_ATTR_TYPE_CHAR
#define GPU_SHADER_ATTR_TYPE_VEC4
StringRefNull gpu_shader_dependency_get_filename_from_source_string(const StringRefNull source_string)
Find the name of the file from which the given string was generated.
const char * to_string(ShaderStage stage)
Definition mtl_shader.mm:52
static Context * unwrap(GPUContext *ctx)
MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format)
std::mutex g_shared_parallel_shader_compiler_mutex
MTLParallelShaderCompiler * g_shared_parallel_shader_compiler
void release_shared_parallel_shader_compiler()
bool mtl_format_supports_blending(MTLPixelFormat format)
static void populate_specialization_constant_values(MTLFunctionConstantValues *values, const Shader::Constants &shader_constants, const SpecializationStateDescriptor &specialization_descriptor)
MTLParallelShaderCompiler * get_shared_parallel_shader_compiler()
unsigned int uint32_t
Definition stdint.h:80
unsigned char uint8_t
Definition stdint.h:78
GHOST_TDrawingContextType context_type
GHOST_GPUDevice preferred_device
MTLPixelFormat color_attachment_format[GPU_FB_MAX_COLOR_ATTACHMENT]
blender::Vector< MTLBufferArgumentData > buffer_bindings_reflection_data_frag
blender::Vector< MTLBufferArgumentData > buffer_bindings_reflection_data_vert
Definition mtl_shader.hh:99
MTLVertexBufferLayoutDescriptorPSO buffer_layouts[GPU_BATCH_VBO_MAX_LEN+GPU_BATCH_INST_VBO_MAX_LEN]
MTLVertexAttributeDescriptorPSO attributes[GPU_VERT_ATTR_MAX_LEN]
Vector< gpu::shader::Type > types
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
PointerRNA * ptr
Definition wm_files.cc:4126