Blender V5.0
mtl_shader.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include "BKE_global.hh"
10
11#include "DNA_userdef_types.h"
12
13#include "BLI_string.h"
14#include "BLI_time.h"
15
16#include <algorithm>
17#include <fmt/format.h>
18#include <fstream>
19#include <iostream>
20#include <map>
21#include <mutex>
22#include <regex>
23#include <sstream>
24#include <string>
25
26#include <cstring>
27
28#include "GPU_platform.hh"
29#include "GPU_vertex_format.hh"
30
32#include "mtl_common.hh"
33#include "mtl_context.hh"
34#include "mtl_debug.hh"
36#include "mtl_shader.hh"
39#include "mtl_shader_log.hh"
40#include "mtl_texture.hh"
41#include "mtl_vertex_buffer.hh"
42
43#include "GHOST_C-api.h"
44
45using namespace blender;
46using namespace blender::gpu;
47using namespace blender::gpu::shader;
48
49namespace blender::gpu {
50
51const char *to_string(ShaderStage stage)
52{
53 switch (stage) {
55 return "Vertex Shader";
57 return "Fragment Shader";
59 return "Compute Shader";
61 break;
62 }
63 return "Unknown Shader Stage";
64}
65
66/* -------------------------------------------------------------------- */
69
70/* Create empty shader to be populated later. */
72{
73 context_ = ctx;
74
75 /* Create SHD builder to hold temporary resources until compilation is complete. */
76 shd_builder_ = new MTLShaderBuilder();
77
78#ifndef NDEBUG
79 /* Remove invalid symbols from shader name to ensure debug entry-point function name is valid. */
80 for (uint i : IndexRange(strlen(this->name))) {
81 char c = this->name[i];
82 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
83 }
84 else {
85 this->name[i] = '_';
86 }
87 }
88#endif
89}
90
91/* Create shader from MSL source. */
94 const char *name,
95 NSString *input_vertex_source,
96 NSString *input_fragment_source,
97 NSString *vert_function_name,
98 NSString *frag_function_name)
99 : MTLShader(ctx, name)
100{
101 BLI_assert([vert_function_name length]);
102 BLI_assert([frag_function_name length]);
103
104 this->set_vertex_function_name(vert_function_name);
105 this->set_fragment_function_name(frag_function_name);
106 this->shader_source_from_msl(input_vertex_source, input_fragment_source);
107 this->set_interface(interface);
108 this->finalize(nullptr);
109}
110
112{
113 if (this->is_valid()) {
114
115 /* Free uniform data block. */
116 if (push_constant_data_ != nullptr) {
117 MEM_freeN(push_constant_data_);
118 push_constant_data_ = nullptr;
119 }
120
121 /* Free Metal resources.
122 * This is done in the order of:
123 * 1. PipelineState objects
124 * 2. MTLFunctions
125 * 3. MTLLibraries
126 * So that each object releases it's references to the one following it. */
127 if (pso_descriptor_ != nil) {
128 [pso_descriptor_ release];
129 pso_descriptor_ = nil;
130 }
131
132 /* Free Pipeline Cache. */
133 pso_cache_lock_.lock();
134 for (const MTLRenderPipelineStateInstance *pso_inst : pso_cache_.values()) {
135 /* Free pipeline state object. */
136 if (pso_inst->pso) {
137 [pso_inst->pso release];
138 }
139 /* Free vertex function. */
140 if (pso_inst->vert) {
141 [pso_inst->vert release];
142 }
143 /* Free fragment function. */
144 if (pso_inst->frag) {
145 [pso_inst->frag release];
146 }
147 delete pso_inst;
148 }
149 pso_cache_.clear();
150
151 /* Free Compute pipeline cache. */
152 for (const MTLComputePipelineStateInstance *pso_inst : compute_pso_cache_.values()) {
153 /* Free pipeline state object. */
154 if (pso_inst->pso) {
155 [pso_inst->pso release];
156 }
157 /* Free compute function. */
158 if (pso_inst->compute) {
159 [pso_inst->compute release];
160 }
161 }
162 compute_pso_cache_.clear();
163 pso_cache_lock_.unlock();
164
165 /* Free shader libraries. */
166 if (shader_library_vert_ != nil) {
167 [shader_library_vert_ release];
168 shader_library_vert_ = nil;
169 }
170 if (shader_library_frag_ != nil) {
171 [shader_library_frag_ release];
172 shader_library_frag_ = nil;
173 }
174 if (shader_library_compute_ != nil) {
175 [shader_library_compute_ release];
176 shader_library_compute_ = nil;
177 }
178
179 /* NOTE(Metal): #ShaderInterface deletion is handled in the super destructor `~Shader()`. */
180 }
181 valid_ = false;
182
183 if (shd_builder_ != nullptr) {
184 delete shd_builder_;
185 shd_builder_ = nullptr;
186 }
187}
188
189void MTLShader::init(const shader::ShaderCreateInfo & /*info*/, bool is_batch_compilation)
190{
191 async_compilation_ = is_batch_compilation;
192}
193
195
196/* -------------------------------------------------------------------- */
199
201{
202 /* Flag source as not being compiled from native MSL. */
203 BLI_assert(shd_builder_ != nullptr);
204 shd_builder_->source_from_msl_ = false;
205
206 /* Remove #version tag entry. */
207 sources[SOURCES_INDEX_VERSION] = "";
208
209 /* Consolidate GLSL vertex sources. */
210 std::stringstream ss;
211 for (int i = 0; i < sources.size(); i++) {
212 ss << sources[i] << std::endl;
213 }
214 shd_builder_->glsl_vertex_source_ = ss.str();
215}
216
218{
219 MTL_LOG_ERROR("MTLShader::geometry_shader_from_glsl - Geometry shaders unsupported!");
220}
221
223{
224 /* Flag source as not being compiled from native MSL. */
225 BLI_assert(shd_builder_ != nullptr);
226 shd_builder_->source_from_msl_ = false;
227
228 /* Remove #version tag entry. */
229 sources[SOURCES_INDEX_VERSION] = "";
230
231 /* Consolidate GLSL fragment sources. */
232 std::stringstream ss;
233 int i;
234 for (i = 0; i < sources.size(); i++) {
235 ss << sources[i] << '\n';
236 }
237 shd_builder_->glsl_fragment_source_ = ss.str();
238}
239
241{
242 /* Flag source as not being compiled from native MSL. */
243 BLI_assert(shd_builder_ != nullptr);
244 shd_builder_->source_from_msl_ = false;
245
246 /* Remove #version tag entry. */
247 sources[SOURCES_INDEX_VERSION] = "";
248
249 /* Consolidate GLSL compute sources. */
250 std::stringstream ss;
251 for (int i = 0; i < sources.size(); i++) {
252 ss << sources[i] << std::endl;
253 }
254 shd_builder_->glsl_compute_source_ = ss.str();
255}
256
258{
259 /* Check if Shader has already been finalized. */
260 if (this->is_valid()) {
261 MTL_LOG_ERROR("Shader (%p) '%s' has already been finalized!", this, this->name_get().c_str());
262 }
263
264 /* Compute shaders. */
265 bool is_compute = false;
266 if (shd_builder_->glsl_compute_source_.empty() == false) {
267 BLI_assert_msg(info != nullptr, "Compute shaders must use CreateInfo.\n");
268 BLI_assert_msg(!shd_builder_->source_from_msl_, "Compute shaders must compile from GLSL.");
269 is_compute = true;
270 }
271
272 /* Perform GLSL to MSL source translation. */
273 BLI_assert(shd_builder_ != nullptr);
274 if (!shd_builder_->source_from_msl_) {
275 bool success = generate_msl_from_glsl(info);
276 if (!success) {
277 /* GLSL to MSL translation has failed, or is unsupported for this shader. */
278 valid_ = false;
279 BLI_assert_msg(false, "Shader translation from GLSL to MSL has failed. \n");
280
281 /* Create empty interface to allow shader to be silently used. */
282 MTLShaderInterface *mtl_interface = new MTLShaderInterface(this->name_get().c_str());
283 this->set_interface(mtl_interface);
284
285 /* Release temporary compilation resources. */
286 delete shd_builder_;
287 shd_builder_ = nullptr;
288 return false;
289 }
290 }
291
293 /* Tuning parameters for compute kernels. */
294 if (is_compute) {
295 int threadgroup_tuning_param = info->mtl_max_threads_per_threadgroup_;
296 if (threadgroup_tuning_param > 0) {
297 maxTotalThreadsPerThreadgroup_Tuning_ = threadgroup_tuning_param;
298 }
299 }
300
301 /* Ensure we have a valid shader interface. */
302 MTLShaderInterface *mtl_interface = this->get_interface();
303 BLI_assert(mtl_interface != nullptr);
304
305 /* Verify Context handle, fetch device and compile shader. */
306 BLI_assert(context_);
307 id<MTLDevice> device = context_->device;
308 BLI_assert(device != nil);
309
310 /* Ensure source and stage entry-point names are set. */
311 BLI_assert(shd_builder_ != nullptr);
312 if (is_compute) {
313 /* Compute path. */
314 BLI_assert([compute_function_name_ length] > 0);
315 BLI_assert([shd_builder_->msl_source_compute_ length] > 0);
316 }
317 else {
318 /* Vertex/Fragment path. */
319 BLI_assert([vertex_function_name_ length] > 0);
320 BLI_assert([fragment_function_name_ length] > 0);
321 BLI_assert([shd_builder_->msl_source_vert_ length] > 0);
322 }
323
324 @autoreleasepool {
325 MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease];
326 options.languageVersion = MTLLanguageVersion2_2;
327 options.fastMathEnabled = YES;
328 options.preserveInvariance = YES;
329
330 /* Raster order groups for tile data in struct require Metal 2.3.
331 * Retaining Metal 2.2. for old shaders to maintain backwards
332 * compatibility for existing features. */
333 if (info->subpass_inputs_.is_empty() == false) {
334 options.languageVersion = MTLLanguageVersion2_3;
335 }
336#if defined(MAC_OS_VERSION_14_0)
337 if (@available(macOS 14.00, *)) {
338 /* Texture atomics require Metal 3.1. */
339 if (bool(info->builtins_ & BuiltinBits::TEXTURE_ATOMIC)) {
340 options.languageVersion = MTLLanguageVersion3_1;
341 }
342 }
343#endif
344
345 NSString *source_to_compile = shd_builder_->msl_source_vert_;
346
347 /* Vertex/Fragment compile stages 0 and/or 1.
348 * Compute shaders compile as stage 2. */
349 ShaderStage initial_stage = (is_compute) ? ShaderStage::COMPUTE : ShaderStage::VERTEX;
350 ShaderStage src_stage = initial_stage;
351 uint8_t total_stages = (is_compute) ? 1 : 2;
352
353 for (int stage_count = 0; stage_count < total_stages; stage_count++) {
354 int arg_buf_samplers_size = 0;
355 switch (src_stage) {
357 source_to_compile = shd_builder_->msl_source_vert_;
358 arg_buf_samplers_size = arg_buf_samplers_vert_;
359 break;
361 source_to_compile = shd_builder_->msl_source_frag_;
362 arg_buf_samplers_size = arg_buf_samplers_frag_;
363 break;
365 source_to_compile = shd_builder_->msl_source_compute_;
366 arg_buf_samplers_size = arg_buf_samplers_comp_;
367 break;
368 default:
370 break;
371 };
372
373 std::stringstream ss;
374 /* Inject constant work group sizes. */
375 if (src_stage == ShaderStage::COMPUTE) {
376 ss << "#define MTL_WORKGROUP_SIZE_X " << info->compute_layout_.local_size_x << "\n";
377 ss << "#define MTL_WORKGROUP_SIZE_Y " << info->compute_layout_.local_size_y << "\n";
378 ss << "#define MTL_WORKGROUP_SIZE_Z " << info->compute_layout_.local_size_z << "\n";
379 }
380 ss << "#define MTL_ARGUMENT_BUFFER_NUM_SAMPLERS " << arg_buf_samplers_size << "\n";
381
382 if (bool(info->builtins_ & BuiltinBits::TEXTURE_ATOMIC) &&
383 MTLBackend::get_capabilities().supports_texture_atomics)
384 {
385 ss << "#define MTL_SUPPORTS_TEXTURE_ATOMICS 1\n";
386 }
387
388 shader::GeneratedSource defines_src{"gpu_shader_msl_defines.msl", {}, ss.str()};
389 shader::GeneratedSourceList generated_sources{defines_src};
390
391 /* Concatenate common source. */
393 "gpu_shader_compat_msl.msl", generated_sources);
394 std::string compatibility_concat = fmt::to_string(fmt::join(compatibility_src, ""));
395
396 std::string final_src = compatibility_concat + [source_to_compile UTF8String];
397 NSString *source_with_header = [NSString stringWithUTF8String:final_src.c_str()];
398 [source_with_header retain];
399
400 /* Prepare Shader Library. */
401 NSError *error = nullptr;
402 id<MTLLibrary> library = [device newLibraryWithSource:source_with_header
404 error:&error];
405 if (error) {
406 /* Only exit out if genuine error and not warning. */
407 if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
408 NSNotFound)
409 {
410 const char *errors_c_str = [[error localizedDescription] UTF8String];
411 const StringRefNull source = [source_to_compile UTF8String];
412
414 print_log({source}, errors_c_str, to_string(src_stage), true, &parser);
415
416 /* Release temporary compilation resources. */
417 delete shd_builder_;
418 shd_builder_ = nullptr;
419 return false;
420 }
421 }
422
423 BLI_assert(library != nil);
424
425 switch (src_stage) {
426 case ShaderStage::VERTEX: {
427 /* Store generated library and assign debug name. */
428 shader_library_vert_ = library;
429 shader_library_vert_.label = [NSString stringWithUTF8String:this->name];
430 } break;
432 /* Store generated library for fragment shader and assign debug name. */
433 shader_library_frag_ = library;
434 shader_library_frag_.label = [NSString stringWithUTF8String:this->name];
435 } break;
437 /* Store generated library for fragment shader and assign debug name. */
438 shader_library_compute_ = library;
439 shader_library_compute_.label = [NSString stringWithUTF8String:this->name];
440 } break;
441 case ShaderStage::ANY: {
442 /* Suppress warnings. */
444 } break;
445 }
446
447 [source_with_header autorelease];
448
449 /* Move onto next compilation stage. */
450 if (!is_compute) {
451 src_stage = ShaderStage::FRAGMENT;
452 }
453 else {
454 break;
455 }
456 }
457
458 /* Create descriptors.
459 * Each shader type requires a differing descriptor. */
460 if (!is_compute) {
461 /* Prepare Render pipeline descriptor. */
462 pso_descriptor_ = [[MTLRenderPipelineDescriptor alloc] init];
463 pso_descriptor_.label = [NSString stringWithUTF8String:this->name];
464 }
465
466 /* Shader has successfully been created. */
467 valid_ = true;
468
469 /* Prepare backing data storage for local uniforms. */
470 const MTLShaderBufferBlock &push_constant_block = mtl_interface->get_push_constant_block();
471 if (push_constant_block.size > 0) {
472 push_constant_data_ = MEM_callocN(push_constant_block.size, __func__);
474 }
475 else {
476 push_constant_data_ = nullptr;
477 }
478
479 /* If this is a compute shader, bake base PSO for compute straight-away.
480 * NOTE: This will compile the base unspecialized variant. */
481 if (is_compute) {
482 /* Set descriptor to default shader constants */
483 MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants->values);
484
485 this->bake_compute_pipeline_state(context_, compute_pipeline_descriptor);
486 }
487 }
488
489 /* Release temporary compilation resources. */
490 delete shd_builder_;
491 shd_builder_ = nullptr;
492 return true;
493}
494
496
497/* -------------------------------------------------------------------- */
500
502{
504 /* Copy constants state. */
505 ctx->specialization_constants_set(constants_state);
506
507 if (interface == nullptr || !this->is_valid()) {
509 "MTLShader::bind - Shader '%s' has no valid implementation in Metal, draw calls will be "
510 "skipped.",
511 this->name_get().c_str());
512 }
513 ctx->pipeline_state.active_shader = this;
514}
515
517{
519 ctx->pipeline_state.active_shader = nullptr;
520}
521
522void MTLShader::uniform_float(int location, int comp_len, int array_size, const float *data)
523{
524 BLI_assert(this);
525 if (!this->is_valid()) {
526 return;
527 }
528 MTLShaderInterface *mtl_interface = get_interface();
529 if (location < 0 || location >= mtl_interface->get_total_uniforms()) {
531 "Uniform location %d is not valid in Shader %s", location, this->name_get().c_str());
532 return;
533 }
534
535 /* Fetch more information about uniform from interface. */
536 const MTLShaderUniform &uniform = mtl_interface->get_uniform(location);
537
538 /* Prepare to copy data into local shader push constant memory block. */
539 BLI_assert(push_constant_data_ != nullptr);
540 uint8_t *dest_ptr = (uint8_t *)push_constant_data_;
541 dest_ptr += uniform.byte_offset;
542 uint32_t copy_size = sizeof(float) * comp_len * array_size;
543
544 /* Test per-element size. It is valid to copy less array elements than the total, but each
545 * array element needs to match. */
546 uint32_t source_per_element_size = sizeof(float) * comp_len;
547 uint32_t dest_per_element_size = uniform.size_in_bytes / uniform.array_len;
549 source_per_element_size <= dest_per_element_size,
550 "source Per-array-element size must be smaller than destination storage capacity for "
551 "that data");
552
553 if (source_per_element_size < dest_per_element_size) {
554 switch (uniform.type) {
555
556 /* Special case for handling 'vec3' array upload. */
557 case MTL_DATATYPE_FLOAT3: {
558 int numvecs = uniform.array_len;
559 uint8_t *data_c = (uint8_t *)data;
560
561 /* It is more efficient on the host to only modify data if it has changed.
562 * Data modifications are small, so memory comparison is cheap.
563 * If uniforms have remained unchanged, then we avoid both copying
564 * data into the local uniform struct, and upload of the modified uniform
565 * contents in the command stream. */
566 bool changed = false;
567 for (int i = 0; i < numvecs; i++) {
568 changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0);
569 if (changed) {
570 memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3);
571 }
572 data_c += sizeof(float) * 3;
573 dest_ptr += sizeof(float) * 4;
574 }
575 if (changed) {
577 }
578 return;
579 }
580
581 /* Special case for handling 'mat3' upload. */
583 int numvecs = 3 * uniform.array_len;
584 uint8_t *data_c = (uint8_t *)data;
585
586 /* It is more efficient on the host to only modify data if it has changed.
587 * Data modifications are small, so memory comparison is cheap.
588 * If uniforms have remained unchanged, then we avoid both copying
589 * data into the local uniform struct, and upload of the modified uniform
590 * contents in the command stream. */
591 bool changed = false;
592 for (int i = 0; i < numvecs; i++) {
593 changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0);
594 if (changed) {
595 memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3);
596 }
597 data_c += sizeof(float) * 3;
598 dest_ptr += sizeof(float) * 4;
599 }
600 if (changed) {
602 }
603 return;
604 }
605 default:
606 shader_debug_printf("INCOMPATIBLE UNIFORM TYPE: %d\n", uniform.type);
607 break;
608 }
609 }
610
611 /* Debug checks. */
613 copy_size <= uniform.size_in_bytes,
614 "Size of provided uniform data is greater than size specified in Shader interface\n");
615
616 /* Only flag UBO as modified if data is different -- This can avoid re-binding of unmodified
617 * local uniform data. */
618 bool data_changed = (memcmp((void *)dest_ptr, (void *)data, copy_size) != 0);
619 if (data_changed) {
621 memcpy((void *)dest_ptr, (void *)data, copy_size);
622 }
623}
624
625void MTLShader::uniform_int(int location, int comp_len, int array_size, const int *data)
626{
627 BLI_assert(this);
628 if (!this->is_valid()) {
629 return;
630 }
631
632 /* NOTE(Metal): Invalidation warning for uniform re-mapping of texture slots, unsupported in
633 * Metal, as we cannot point a texture binding at a different slot. */
634 MTLShaderInterface *mtl_interface = this->get_interface();
635 if (location >= mtl_interface->get_total_uniforms() &&
636 location < (mtl_interface->get_total_uniforms() + mtl_interface->get_total_textures()))
637 {
639 "Texture uniform location re-mapping unsupported in Metal. (Possibly also bad uniform "
640 "location %d)",
641 location);
642 return;
643 }
644
645 if (location < 0 || location >= mtl_interface->get_total_uniforms()) {
647 "Uniform is not valid at location %d - Shader %s", location, this->name_get().c_str());
648 return;
649 }
650
651 /* Fetch more information about uniform from interface. */
652 const MTLShaderUniform &uniform = mtl_interface->get_uniform(location);
653
654 /* Determine data location in uniform block. */
655 BLI_assert(push_constant_data_ != nullptr);
656 uint8_t *ptr = (uint8_t *)push_constant_data_;
657 ptr += uniform.byte_offset;
658
660 const char *data_to_copy = (char *)data;
661 uint data_size_to_copy = sizeof(int) * comp_len * array_size;
662
663 /* Special cases for small types support where storage is shader push constant buffer is smaller
664 * than the incoming data. */
665 ushort us;
666 uchar uc;
667 if (uniform.size_in_bytes == 1) {
668 /* Convert integer storage value down to uchar. */
669 data_size_to_copy = uniform.size_in_bytes;
670 uc = *data;
671 data_to_copy = (char *)&uc;
672 }
673 else if (uniform.size_in_bytes == 2) {
674 /* Convert integer storage value down to ushort. */
675 data_size_to_copy = uniform.size_in_bytes;
676 us = *data;
677 data_to_copy = (char *)&us;
678 }
679 else {
681 (mtl_get_data_type_alignment(uniform.type) % sizeof(int)) == 0,
682 "When uniform inputs are provided as integers, the underlying type must adhere "
683 "to alignment per-component. If this test fails, the input data cannot be directly copied "
684 "to the buffer. e.g. Array of small types uchar/bool/ushort etc; are currently not "
685 "handled.");
686 }
687
688 /* Copy data into local block. Only flag UBO as modified if data is different
689 * This can avoid re-binding of unmodified local uniform data, reducing
690 * the total number of copy operations needed and data transfers between
691 * CPU and GPU. */
692 bool data_changed = (memcmp((void *)ptr, (void *)data_to_copy, data_size_to_copy) != 0);
693 if (data_changed) {
695 memcpy((void *)ptr, (void *)data_to_copy, data_size_to_copy);
696 }
697}
698
700{
701 return push_constant_modified_;
702}
703
705{
706 push_constant_modified_ = is_dirty;
707}
708
709/* Attempts to pre-generate a PSO based on the parent shaders PSO
710 * (Render shaders only) */
712{
713 if (parent_shader_ != nullptr) {
715 MTLShader *parent_mtl = static_cast<MTLShader *>(parent_shader_);
716
717 /* Extract PSO descriptors from parent shader. */
720
721 parent_mtl->pso_cache_lock_.lock();
722 for (const auto &pso_entry : parent_mtl->pso_cache_.items()) {
723 const MTLRenderPipelineStateDescriptor &pso_descriptor = pso_entry.key;
724 const MTLRenderPipelineStateInstance *pso_inst = pso_entry.value;
725 descriptors.append(pso_descriptor);
726 prim_classes.append(pso_inst->prim_type);
727 }
728 parent_mtl->pso_cache_lock_.unlock();
729
730 /* Warm shader cache with applied limit.
731 * If limit is <= 0, compile all PSO permutations. */
732 limit = (limit > 0) ? limit : descriptors.size();
733 for (int i : IndexRange(min_ii(descriptors.size(), limit))) {
734 const MTLRenderPipelineStateDescriptor &pso_descriptor = descriptors[i];
735 const MTLPrimitiveTopologyClass &prim_class = prim_classes[i];
736 bake_pipeline_state(ctx, prim_class, pso_descriptor);
737 }
738 }
739}
740
742
743/* -------------------------------------------------------------------- */
746
747void MTLShader::set_vertex_function_name(NSString *vert_function_name)
748{
749 vertex_function_name_ = vert_function_name;
750}
751
752void MTLShader::set_fragment_function_name(NSString *frag_function_name)
753{
754 fragment_function_name_ = frag_function_name;
755}
756
757void MTLShader::set_compute_function_name(NSString *compute_function_name)
758{
759 compute_function_name_ = compute_function_name;
760}
761
762void MTLShader::shader_source_from_msl(NSString *input_vertex_source,
763 NSString *input_fragment_source)
764{
765 BLI_assert(shd_builder_ != nullptr);
766 shd_builder_->msl_source_vert_ = input_vertex_source;
767 shd_builder_->msl_source_frag_ = input_fragment_source;
768 shd_builder_->source_from_msl_ = true;
769}
770
771void MTLShader::shader_compute_source_from_msl(NSString *input_compute_source)
772{
773 BLI_assert(shd_builder_ != nullptr);
774 shd_builder_->msl_source_compute_ = input_compute_source;
775 shd_builder_->source_from_msl_ = true;
776}
777
779{
780 /* Assign gpu::Shader super-class interface. */
781 BLI_assert(Shader::interface == nullptr);
783}
784
786
787/* -------------------------------------------------------------------- */
791
796 MTLFunctionConstantValues *values,
797 const shader::SpecializationConstants &shader_constants,
798 const SpecializationStateDescriptor &specialization_descriptor)
799{
800 for (auto i : shader_constants.types.index_range()) {
801 const shader::SpecializationConstant::Value &value = specialization_descriptor.values[i];
802
804 switch (shader_constants.types[i]) {
805 case Type::int_t:
806 [values setConstantValue:&value.i type:MTLDataTypeInt atIndex:index];
807 break;
808 case Type::uint_t:
809 [values setConstantValue:&value.u type:MTLDataTypeUInt atIndex:index];
810 break;
811 case Type::bool_t:
812 [values setConstantValue:&value.u type:MTLDataTypeBool atIndex:index];
813 break;
814 case Type::float_t:
815 [values setConstantValue:&value.f type:MTLDataTypeFloat atIndex:index];
816 break;
817 default:
818 BLI_assert_msg(false, "Unsupported custom constant type.");
819 break;
820 }
821 }
822}
823
824
825/* -------------------------------------------------------------------- */
828
842 MTLContext *ctx, MTLPrimitiveTopologyClass prim_type)
843{
845 /* NOTE(Metal): PSO cache can be accessed from multiple threads, though these operations should
846 * be thread-safe due to organization of high-level renderer. If there are any issues, then
847 * access can be guarded as appropriate. */
848 BLI_assert(this->is_valid());
849
850 /* NOTE(Metal): Vertex input assembly description will have been populated externally
851 * via #MTLBatch or #MTLImmediate during binding or draw. */
852
853 /* Resolve Context Frame-buffer state. */
854 MTLFrameBuffer *framebuffer = ctx->get_current_framebuffer();
855
856 /* Update global pipeline descriptor. */
857 MTLStateManager *state_manager = static_cast<MTLStateManager *>(
859 MTLRenderPipelineStateDescriptor &pipeline_descriptor = state_manager->get_pipeline_descriptor();
860
861 pipeline_descriptor.num_color_attachments = 0;
862 for (int attachment = 0; attachment < GPU_FB_MAX_COLOR_ATTACHMENT; attachment++) {
863 MTLAttachment color_attachment = framebuffer->get_color_attachment(attachment);
864
865 if (color_attachment.used) {
866 /* If SRGB is disabled and format is SRGB, use color data directly with no conversions
867 * between linear and SRGB. */
868 MTLPixelFormat mtl_format = gpu_texture_format_to_metal(
869 color_attachment.texture->format_get());
870 if (framebuffer->get_is_srgb() && !framebuffer->get_srgb_enabled()) {
871 mtl_format = MTLPixelFormatRGBA8Unorm;
872 }
873 pipeline_descriptor.color_attachment_format[attachment] = mtl_format;
874 }
875 else {
876 pipeline_descriptor.color_attachment_format[attachment] = MTLPixelFormatInvalid;
877 }
878
879 pipeline_descriptor.num_color_attachments += (color_attachment.used) ? 1 : 0;
880 }
881 MTLAttachment depth_attachment = framebuffer->get_depth_attachment();
882 MTLAttachment stencil_attachment = framebuffer->get_stencil_attachment();
883 pipeline_descriptor.depth_attachment_format = (depth_attachment.used) ?
885 depth_attachment.texture->format_get()) :
886 MTLPixelFormatInvalid;
887 pipeline_descriptor.stencil_attachment_format =
888 (stencil_attachment.used) ?
889 gpu_texture_format_to_metal(stencil_attachment.texture->format_get()) :
890 MTLPixelFormatInvalid;
891
892 /* Resolve Context Pipeline State (required by PSO). */
893 pipeline_descriptor.color_write_mask = ctx->pipeline_state.color_write_mask;
894 pipeline_descriptor.blending_enabled = ctx->pipeline_state.blending_enabled;
895 pipeline_descriptor.alpha_blend_op = ctx->pipeline_state.alpha_blend_op;
896 pipeline_descriptor.rgb_blend_op = ctx->pipeline_state.rgb_blend_op;
901 pipeline_descriptor.point_size = ctx->pipeline_state.point_size;
902
903 /* Resolve clipping plane enablement. */
904 pipeline_descriptor.clipping_plane_enable_mask = 0;
905 for (const int plane : IndexRange(6)) {
906 pipeline_descriptor.clipping_plane_enable_mask =
907 pipeline_descriptor.clipping_plane_enable_mask |
908 ((ctx->pipeline_state.clip_distance_enabled[plane]) ? (1 << plane) : 0);
909 }
910
911 /* Primitive Type -- Primitive topology class needs to be specified for layered rendering. */
912 bool requires_specific_topology_class = uses_gpu_layer || uses_gpu_viewport_index ||
913 prim_type == MTLPrimitiveTopologyClassPoint;
914 pipeline_descriptor.vertex_descriptor.prim_topology_class =
915 (requires_specific_topology_class) ? prim_type : MTLPrimitiveTopologyClassUnspecified;
916
917 /* Specialization configuration. */
918 pipeline_descriptor.specialization_state = {ctx->constants_state.values};
919
920 /* Bake pipeline state using global descriptor. */
921 return bake_pipeline_state(ctx, prim_type, pipeline_descriptor);
922}
923
924/* Variant which bakes a pipeline state based on an existing MTLRenderPipelineStateDescriptor.
925 * This function should be callable from a secondary compilation thread. */
927 MTLContext *ctx,
928 MTLPrimitiveTopologyClass prim_type,
929 const MTLRenderPipelineStateDescriptor &pipeline_descriptor)
930{
931 /* Fetch shader interface. */
932 MTLShaderInterface *mtl_interface = this->get_interface();
933 BLI_assert(mtl_interface);
934 BLI_assert(this->is_valid());
935
936 /* Check if current PSO exists in the cache. */
937 pso_cache_lock_.lock();
938 MTLRenderPipelineStateInstance **pso_lookup = pso_cache_.lookup_ptr(pipeline_descriptor);
939 MTLRenderPipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
940 pso_cache_lock_.unlock();
941
942 if (pipeline_state != nullptr) {
943 return pipeline_state;
944 }
945
946 /* TODO: When fetching a specialized variant of a shader, if this does not yet exist, verify
947 * whether the base unspecialized variant exists:
948 * - If unspecialized version exists: Compile specialized PSO asynchronously, returning base PSO
949 * and flagging state of specialization in cache as being built.
950 * - If unspecialized does NOT exist, build specialized version straight away, as we pay the
951 * cost of compilation in both cases regardless. */
952
953 /* Generate new Render Pipeline State Object (PSO). */
954 @autoreleasepool {
955 /* Prepare Render Pipeline Descriptor. */
956
957 /* Setup function specialization constants, used to modify and optimize
958 * generated code based on current render pipeline configuration. */
959 MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
960
961 /* Custom function constant values: */
963 values, *this->constants, pipeline_descriptor.specialization_state);
964
965 /* Prepare Vertex descriptor based on current pipeline vertex binding state. */
966 MTLRenderPipelineDescriptor *desc = pso_descriptor_;
967 [desc reset];
968 pso_descriptor_.label = [NSString stringWithUTF8String:this->name];
969
970 /* Offset the bind index for Uniform buffers such that they begin after the VBO
971 * buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function
972 * specialization constant, customized per unique pipeline state permutation.
973 *
974 * NOTE: For binding point compaction, we could use the number of VBOs present
975 * in the current PSO configuration `pipeline_descriptors.vertex_descriptor.num_vert_buffers`).
976 * However, it is more efficient to simply offset the uniform buffer base index to the
977 * maximal number of VBO bind-points, as then UBO bind-points for similar draw calls
978 * will align and avoid the requirement for additional binding. */
979 int MTL_uniform_buffer_base_index = pipeline_descriptor.vertex_descriptor.num_vert_buffers + 1;
980
981 /* Null buffer index is used if an attribute is not found in the
982 * bound VBOs #VertexFormat. */
983 int null_buffer_index = pipeline_descriptor.vertex_descriptor.num_vert_buffers;
984 bool using_null_buffer = false;
985
986 {
987 for (const uint i :
988 IndexRange(pipeline_descriptor.vertex_descriptor.max_attribute_value + 1))
989 {
990
991 /* Metal back-end attribute descriptor state. */
992 const MTLVertexAttributeDescriptorPSO &attribute_desc =
993 pipeline_descriptor.vertex_descriptor.attributes[i];
994
995 /* Flag format conversion */
996 /* In some cases, Metal cannot implicitly convert between data types.
997 * In these instances, the fetch mode #GPUVertFetchMode as provided in the vertex format
998 * is passed in, and used to populate function constants named: MTL_AttributeConvert0..15.
999 *
1000 * It is then the responsibility of the vertex shader to perform any necessary type
1001 * casting.
1002 *
1003 * See `mtl_shader.hh` for more information. Relevant Metal API documentation:
1004 * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
1005 */
1006 if (attribute_desc.format == MTLVertexFormatInvalid) {
1007#if 0 /* Disable warning as it is too verbose and is supported. */
1009 "MTLShader: baking pipeline state for '%s'- skipping input attribute at "
1010 "index '%d' but none was specified in the current vertex state",
1011 mtl_interface->get_name(),
1012 i);
1013#endif
1014 /* Write out null conversion constant if attribute unused. */
1015 int MTL_attribute_conversion_mode = 0;
1016 [values setConstantValue:&MTL_attribute_conversion_mode
1017 type:MTLDataTypeInt
1018 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
1019 continue;
1020 }
1021
1022 int MTL_attribute_conversion_mode = (int)attribute_desc.format_conversion_mode;
1023 [values setConstantValue:&MTL_attribute_conversion_mode
1024 type:MTLDataTypeInt
1025 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
1026 if (MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT_UNIT) {
1028 "TODO(Metal): Shader %s needs to support internal format conversion\n",
1029 mtl_interface->get_name());
1030 }
1031
1032 /* Copy metal back-end attribute descriptor state into PSO descriptor.
1033 * NOTE: need to copy each element due to direct assignment restrictions.
1034 * Also note */
1035 MTLVertexAttributeDescriptor *mtl_attribute = desc.vertexDescriptor.attributes[i];
1036
1037 mtl_attribute.format = attribute_desc.format;
1038 mtl_attribute.offset = attribute_desc.offset;
1039 mtl_attribute.bufferIndex = attribute_desc.buffer_index;
1040 }
1041
1042 for (const uint i : IndexRange(pipeline_descriptor.vertex_descriptor.num_vert_buffers)) {
1043 /* Metal back-end state buffer layout. */
1044 const MTLVertexBufferLayoutDescriptorPSO &buf_layout =
1045 pipeline_descriptor.vertex_descriptor.buffer_layouts[i];
1046 /* Copy metal back-end buffer layout state into PSO descriptor.
1047 * NOTE: need to copy each element due to copying from internal
1048 * back-end descriptor to Metal API descriptor. */
1049 MTLVertexBufferLayoutDescriptor *mtl_buf_layout = desc.vertexDescriptor.layouts[i];
1050
1051 mtl_buf_layout.stepFunction = buf_layout.step_function;
1052 mtl_buf_layout.stepRate = buf_layout.step_rate;
1053 mtl_buf_layout.stride = buf_layout.stride;
1054 }
1055
1056 /* Mark empty attribute conversion. */
1057 for (int i = pipeline_descriptor.vertex_descriptor.max_attribute_value + 1;
1059 i++)
1060 {
1061 int MTL_attribute_conversion_mode = 0;
1062 [values setConstantValue:&MTL_attribute_conversion_mode
1063 type:MTLDataTypeInt
1064 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
1065 }
1066
1067 /* DEBUG: Missing/empty attributes. */
1068 /* Attributes are normally mapped as part of the state setting based on the used
1069 * #GPUVertFormat, however, if attributes have not been set, we can sort them out here. */
1070 for (const uint i : IndexRange(mtl_interface->get_total_attributes())) {
1071 const MTLShaderInputAttribute &attribute = mtl_interface->get_attribute(i);
1072 MTLVertexAttributeDescriptor *current_attribute =
1073 desc.vertexDescriptor.attributes[attribute.location];
1074
1075 if (current_attribute.format == MTLVertexFormatInvalid) {
1076#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1077 printf("-> Filling in unbound attribute '%s' for shader PSO '%s' with location: %u\n",
1078 mtl_interface->get_name_at_offset(attribute.name_offset),
1079 mtl_interface->get_name(),
1080 attribute.location);
1081#endif
1082 current_attribute.format = attribute.format;
1083 current_attribute.offset = 0;
1084 current_attribute.bufferIndex = null_buffer_index;
1085
1086 /* Add Null vert buffer binding for invalid attributes. */
1087 if (!using_null_buffer) {
1088 MTLVertexBufferLayoutDescriptor *null_buf_layout =
1089 desc.vertexDescriptor.layouts[null_buffer_index];
1090
1091 /* Use constant step function such that null buffer can
1092 * contain just a singular dummy attribute. */
1093 null_buf_layout.stepFunction = MTLVertexStepFunctionConstant;
1094 null_buf_layout.stepRate = 0;
1095 null_buf_layout.stride = max_ii(null_buf_layout.stride, attribute.size);
1096
1097 /* If we are using the maximum number of vertex buffers, or tight binding indices,
1098 * MTL_uniform_buffer_base_index needs shifting to the bind slot after the null buffer
1099 * index. */
1100 if (null_buffer_index >= MTL_uniform_buffer_base_index) {
1101 MTL_uniform_buffer_base_index = null_buffer_index + 1;
1102 }
1103 using_null_buffer = true;
1104#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1105 MTL_LOG_DEBUG("Setting up buffer binding for null attribute with buffer index %d",
1106 null_buffer_index);
1107#endif
1108 }
1109 }
1110 }
1111
1112 /* Primitive Topology. */
1113 desc.inputPrimitiveTopology = pipeline_descriptor.vertex_descriptor.prim_topology_class;
1114 }
1115
1116 /* Update constant value for 'MTL_uniform_buffer_base_index'. */
1117 [values setConstantValue:&MTL_uniform_buffer_base_index
1118 type:MTLDataTypeInt
1119 withName:@"MTL_uniform_buffer_base_index"];
1120
1121 /* Storage buffer bind index.
1122 * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
1123 * and an additional space for the push constant block.
1124 * If the shader does not have any uniform blocks, then we can place directly after the push
1125 * constant block. As we do not need an extra spot for the UBO at index '0'. */
1126 int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
1127 ((mtl_interface->get_total_uniform_blocks() > 0) ?
1128 mtl_interface->get_total_uniform_blocks() :
1129 0);
1130 [values setConstantValue:&MTL_storage_buffer_base_index
1131 type:MTLDataTypeInt
1132 withName:@"MTL_storage_buffer_base_index"];
1133
1134 /* Clipping planes. */
1135 int MTL_clip_distances_enabled = (pipeline_descriptor.clipping_plane_enable_mask > 0) ? 1 : 0;
1136
1137 /* Only define specialization constant if planes are required.
1138 * We guard clip_planes usage on this flag. */
1139 [values setConstantValue:&MTL_clip_distances_enabled
1140 type:MTLDataTypeInt
1141 withName:@"MTL_clip_distances_enabled"];
1142
1143 if (MTL_clip_distances_enabled > 0) {
1144 /* Assign individual enablement flags. Only define a flag function constant
1145 * if it is used. */
1146 for (const int plane : IndexRange(6)) {
1147 int plane_enabled = ctx->pipeline_state.clip_distance_enabled[plane] ? 1 : 0;
1148 if (plane_enabled) {
1149 [values
1150 setConstantValue:&plane_enabled
1151 type:MTLDataTypeInt
1152 withName:[NSString stringWithFormat:@"MTL_clip_distance_enabled%d", plane]];
1153 }
1154 }
1155 }
1156
1157 /* gl_PointSize constant. */
1158 bool null_pointsize = true;
1159 float MTL_pointsize = pipeline_descriptor.point_size;
1160 if (pipeline_descriptor.vertex_descriptor.prim_topology_class ==
1161 MTLPrimitiveTopologyClassPoint)
1162 {
1163 /* `if pointsize is > 0.0`, PROGRAM_POINT_SIZE is enabled, and `gl_PointSize` shader keyword
1164 * overrides the value. Otherwise, if < 0.0, use global constant point size. */
1165 if (MTL_pointsize < 0.0) {
1166 MTL_pointsize = fabsf(MTL_pointsize);
1167 [values setConstantValue:&MTL_pointsize
1168 type:MTLDataTypeFloat
1169 withName:@"MTL_global_pointsize"];
1170 null_pointsize = false;
1171 }
1172 }
1173
1174 if (null_pointsize) {
1175 MTL_pointsize = 0.0f;
1176 [values setConstantValue:&MTL_pointsize
1177 type:MTLDataTypeFloat
1178 withName:@"MTL_global_pointsize"];
1179 }
1180
1181 /* Compile functions */
1182 NSError *error = nullptr;
1183 desc.vertexFunction = [shader_library_vert_ newFunctionWithName:vertex_function_name_
1184 constantValues:values
1185 error:&error];
1186 if (error) {
1187 bool has_error = (
1188 [[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1189 NSNotFound);
1190
1191 const char *errors_c_str = [[error localizedDescription] UTF8String];
1192 const StringRefNull source = shd_builder_->glsl_fragment_source_.c_str();
1193
1195 print_log({source}, errors_c_str, "VertShader", has_error, &parser);
1196
1197 /* Only exit out if genuine error and not warning */
1198 if (has_error) {
1199 return nullptr;
1200 }
1201 }
1202
1203 desc.fragmentFunction = [shader_library_frag_ newFunctionWithName:fragment_function_name_
1204 constantValues:values
1205 error:&error];
1206 if (error) {
1207 bool has_error = (
1208 [[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1209 NSNotFound);
1210
1211 const char *errors_c_str = [[error localizedDescription] UTF8String];
1212 const StringRefNull source = shd_builder_->glsl_fragment_source_;
1213
1215 print_log({source}, errors_c_str, "FragShader", has_error, &parser);
1216
1217 /* Only exit out if genuine error and not warning */
1218 if (has_error) {
1219 return nullptr;
1220 }
1221 }
1222
1223 /* Setup pixel format state */
1224 for (int color_attachment = 0; color_attachment < GPU_FB_MAX_COLOR_ATTACHMENT;
1225 color_attachment++)
1226 {
1227 /* Fetch color attachment pixel format in back-end pipeline state. */
1228 MTLPixelFormat pixel_format = pipeline_descriptor.color_attachment_format[color_attachment];
1229 /* Populate MTL API PSO attachment descriptor. */
1230 MTLRenderPipelineColorAttachmentDescriptor *col_attachment =
1231 desc.colorAttachments[color_attachment];
1232
1233 col_attachment.pixelFormat = pixel_format;
1234 if (pixel_format != MTLPixelFormatInvalid) {
1235 bool format_supports_blending = mtl_format_supports_blending(pixel_format);
1236
1237 col_attachment.writeMask = pipeline_descriptor.color_write_mask;
1238 col_attachment.blendingEnabled = pipeline_descriptor.blending_enabled &&
1239 format_supports_blending;
1240 if (format_supports_blending && pipeline_descriptor.blending_enabled) {
1241 col_attachment.alphaBlendOperation = pipeline_descriptor.alpha_blend_op;
1242 col_attachment.rgbBlendOperation = pipeline_descriptor.rgb_blend_op;
1243 col_attachment.destinationAlphaBlendFactor = pipeline_descriptor.dest_alpha_blend_factor;
1244 col_attachment.destinationRGBBlendFactor = pipeline_descriptor.dest_rgb_blend_factor;
1245 col_attachment.sourceAlphaBlendFactor = pipeline_descriptor.src_alpha_blend_factor;
1246 col_attachment.sourceRGBBlendFactor = pipeline_descriptor.src_rgb_blend_factor;
1247 }
1248 else {
1249 if (pipeline_descriptor.blending_enabled && !format_supports_blending) {
1251 "[Warning] Attempting to Bake PSO, but MTLPixelFormat %d does not support "
1252 "blending\n",
1253 *((int *)&pixel_format));
1254 }
1255 }
1256 }
1257 }
1258 desc.depthAttachmentPixelFormat = pipeline_descriptor.depth_attachment_format;
1259 desc.stencilAttachmentPixelFormat = pipeline_descriptor.stencil_attachment_format;
1260
1261 /* Bind-point range validation.
1262 * We need to ensure that the PSO will have valid bind-point ranges, or is using the
1263 * appropriate bindless fallback path if any bind limits are exceeded. */
1264#ifdef NDEBUG
1265 /* Ensure Buffer bindings are within range. */
1266 BLI_assert_msg((MTL_uniform_buffer_base_index + get_max_ubo_index() + 2) <
1268 "UBO and SSBO bindings exceed the fragment bind table limit.");
1269
1270 /* Argument buffer. */
1271 if (mtl_interface->uses_argument_buffer_for_samplers()) {
1273 "Argument buffer binding exceeds the fragment bind table limit.");
1274 }
1275#endif
1276
1277 /* Compile PSO */
1278 MTLAutoreleasedRenderPipelineReflection reflection_data;
1279 id<MTLRenderPipelineState> pso = [ctx->device
1280 newRenderPipelineStateWithDescriptor:desc
1281 options:MTLPipelineOptionBufferTypeInfo
1282 reflection:&reflection_data
1283 error:&error];
1284 if (error) {
1285 NSLog(@"Failed to create PSO for shader: %s error %@\n", this->name, error);
1286 BLI_assert(false);
1287 return nullptr;
1288 }
1289 else if (!pso) {
1290 NSLog(@"Failed to create PSO for shader: %s, but no error was provided!\n", this->name);
1291 BLI_assert(false);
1292 return nullptr;
1293 }
1294 else {
1295#if 0
1296 NSLog(@"Successfully compiled PSO for shader: %s (Metal Context: %p)\n", this->name, ctx);
1297#endif
1298 }
1299
1300 /* Prepare pipeline state instance. */
1302 pso_inst->vert = desc.vertexFunction;
1303 pso_inst->frag = desc.fragmentFunction;
1304 pso_inst->pso = pso;
1305 pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
1306 pso_inst->base_storage_buffer_index = MTL_storage_buffer_base_index;
1307 pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1;
1308 pso_inst->prim_type = prim_type;
1309
1310 pso_inst->reflection_data_available = (reflection_data != nil);
1311 if (reflection_data != nil) {
1312
1313 /* Extract shader reflection data for buffer bindings.
1314 * This reflection data is used to contrast the binding information
1315 * we know about in the interface against the bindings in the finalized
1316 * PSO. This accounts for bindings which have been stripped out during
1317 * optimization, and allows us to both avoid over-binding and also
1318 * allows us to verify size-correctness for bindings, to ensure
1319 * that buffers bound are not smaller than the size of expected data. */
1320 NSArray<MTLArgument *> *vert_args = [reflection_data vertexArguments];
1321
1322 pso_inst->buffer_bindings_reflection_data_vert.clear();
1323 int buffer_binding_max_ind = 0;
1324
1325 for (int i = 0; i < [vert_args count]; i++) {
1326 MTLArgument *arg = [vert_args objectAtIndex:i];
1327 if ([arg type] == MTLArgumentTypeBuffer) {
1328 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1329 if (buf_index >= 0) {
1330 buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index);
1331 }
1332 }
1333 }
1334 pso_inst->buffer_bindings_reflection_data_vert.resize(buffer_binding_max_ind + 1);
1335 for (int i = 0; i < buffer_binding_max_ind + 1; i++) {
1336 pso_inst->buffer_bindings_reflection_data_vert[i] = {0, 0, 0, false};
1337 }
1338
1339 for (int i = 0; i < [vert_args count]; i++) {
1340 MTLArgument *arg = [vert_args objectAtIndex:i];
1341 if ([arg type] == MTLArgumentTypeBuffer) {
1342 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1343
1344 if (buf_index >= 0) {
1345 pso_inst->buffer_bindings_reflection_data_vert[buf_index] = {
1346 (uint32_t)([arg index]),
1347 (uint32_t)([arg bufferDataSize]),
1348 (uint32_t)([arg bufferAlignment]),
1349 ([arg isActive] == YES) ? true : false};
1350 }
1351 }
1352 }
1353
1354 NSArray<MTLArgument *> *frag_args = [reflection_data fragmentArguments];
1355
1356 pso_inst->buffer_bindings_reflection_data_frag.clear();
1357 buffer_binding_max_ind = 0;
1358
1359 for (int i = 0; i < [frag_args count]; i++) {
1360 MTLArgument *arg = [frag_args objectAtIndex:i];
1361 if ([arg type] == MTLArgumentTypeBuffer) {
1362 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1363 if (buf_index >= 0) {
1364 buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index);
1365 }
1366 }
1367 }
1368 pso_inst->buffer_bindings_reflection_data_frag.resize(buffer_binding_max_ind + 1);
1369 for (int i = 0; i < buffer_binding_max_ind + 1; i++) {
1370 pso_inst->buffer_bindings_reflection_data_frag[i] = {0, 0, 0, false};
1371 }
1372
1373 for (int i = 0; i < [frag_args count]; i++) {
1374 MTLArgument *arg = [frag_args objectAtIndex:i];
1375 if ([arg type] == MTLArgumentTypeBuffer) {
1376 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1377 shader_debug_printf(" BUF IND: %d (arg name: %s)\n", buf_index, [[arg name] UTF8String]);
1378 if (buf_index >= 0) {
1379 pso_inst->buffer_bindings_reflection_data_frag[buf_index] = {
1380 (uint32_t)([arg index]),
1381 (uint32_t)([arg bufferDataSize]),
1382 (uint32_t)([arg bufferAlignment]),
1383 ([arg isActive] == YES) ? true : false};
1384 }
1385 }
1386 }
1387 }
1388
1389 /* Insert into pso cache. */
1390 pso_cache_lock_.lock();
1391 pso_inst->shader_pso_index = pso_cache_.size();
1392 pso_cache_.add(pipeline_descriptor, pso_inst);
1393 pso_cache_lock_.unlock();
1395 "PSO CACHE: Stored new variant in PSO cache for shader '%s' Hash: '%llu'\n",
1396 this->name,
1397 pipeline_descriptor.hash());
1398 return pso_inst;
1399 }
1400}
1401
1403 MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor)
1404{
1405 /* NOTE(Metal): Bakes and caches a PSO for compute. */
1406 BLI_assert(this);
1407 MTLShaderInterface *mtl_interface = this->get_interface();
1408 BLI_assert(mtl_interface);
1409 BLI_assert(this->is_valid());
1410 BLI_assert(shader_library_compute_ != nil);
1411
1412 /* Check if current PSO exists in the cache. */
1413 pso_cache_lock_.lock();
1414 MTLComputePipelineStateInstance *const *pso_lookup = compute_pso_cache_.lookup_ptr(
1415 compute_pipeline_descriptor);
1416 MTLComputePipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
1417 pso_cache_lock_.unlock();
1418
1419 if (pipeline_state != nullptr) {
1420 /* Return cached PSO state. */
1421 BLI_assert(pipeline_state->pso != nil);
1422 return pipeline_state;
1423 }
1424 else {
1425 /* Prepare Compute Pipeline Descriptor. */
1426
1427 /* Setup function specialization constants, used to modify and optimize
1428 * generated code based on current render pipeline configuration. */
1429 MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
1430
1431 /* TODO: Compile specialized shader variants asynchronously. */
1432
1433 /* Custom function constant values: */
1435 values, *this->constants, compute_pipeline_descriptor.specialization_state);
1436
1437 /* Offset the bind index for Uniform buffers such that they begin after the VBO
1438 * buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function
1439 * specialization constant, customized per unique pipeline state permutation.
1440 *
1441 * For Compute shaders, this offset is always zero, but this needs setting as
1442 * it is expected as part of the common Metal shader header. */
1443 int MTL_uniform_buffer_base_index = 0;
1444 [values setConstantValue:&MTL_uniform_buffer_base_index
1445 type:MTLDataTypeInt
1446 withName:@"MTL_uniform_buffer_base_index"];
1447
1448 /* Storage buffer bind index.
1449 * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
1450 * and an additional space for the push constant block.
1451 * If the shader does not have any uniform blocks, then we can place directly after the push
1452 * constant block. As we do not need an extra spot for the UBO at index '0'. */
1453 int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
1454 ((mtl_interface->get_total_uniform_blocks() > 0) ?
1455 mtl_interface->get_total_uniform_blocks() :
1456 0);
1457
1458 [values setConstantValue:&MTL_storage_buffer_base_index
1459 type:MTLDataTypeInt
1460 withName:@"MTL_storage_buffer_base_index"];
1461
1462 /* Compile compute function. */
1463 NSError *error = nullptr;
1464 id<MTLFunction> compute_function = [shader_library_compute_
1465 newFunctionWithName:compute_function_name_
1466 constantValues:values
1467 error:&error];
1468 compute_function.label = [NSString stringWithUTF8String:this->name];
1469
1470 if (error) {
1471 NSLog(@"Compile Error - Metal Shader compute function, error %@", error);
1472
1473 /* Only exit out if genuine error and not warning */
1474 if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1475 NSNotFound)
1476 {
1477 BLI_assert(false);
1478 return nullptr;
1479 }
1480 }
1481
1482 /* Compile PSO. */
1483 MTLComputePipelineDescriptor *desc = [[MTLComputePipelineDescriptor alloc] init];
1484 desc.label = [NSString stringWithUTF8String:this->name];
1485 desc.computeFunction = compute_function;
1486
1495 const MTLCapabilities &capabilities = MTLBackend::get_capabilities();
1496 if (ELEM(capabilities.gpu, APPLE_GPU_M1, APPLE_GPU_M2)) {
1497 if (maxTotalThreadsPerThreadgroup_Tuning_ > 0) {
1498 desc.maxTotalThreadsPerThreadgroup = this->maxTotalThreadsPerThreadgroup_Tuning_;
1499 MTL_LOG_DEBUG("Using custom parameter for shader %s value %u\n",
1500 this->name,
1501 maxTotalThreadsPerThreadgroup_Tuning_);
1502 }
1503 }
1504
1505 id<MTLComputePipelineState> pso = [ctx->device
1506 newComputePipelineStateWithDescriptor:desc
1507 options:MTLPipelineOptionNone
1508 reflection:nullptr
1509 error:&error];
1510
1511 /* If PSO has compiled but max theoretical threads-per-threadgroup is lower than required
1512 * dispatch size, recompile with increased limit. NOTE: This will result in a performance drop,
1513 * ideally the source shader should be modified to reduce local register pressure, or, local
1514 * work-group size should be reduced.
1515 * Similarly, the custom tuning parameter "mtl_max_total_threads_per_threadgroup" can be
1516 * specified to a sufficiently large value to avoid this. */
1517 if (pso) {
1518 uint num_required_threads_per_threadgroup = compute_pso_common_state_.threadgroup_x_len *
1519 compute_pso_common_state_.threadgroup_y_len *
1520 compute_pso_common_state_.threadgroup_z_len;
1521 if (pso.maxTotalThreadsPerThreadgroup < num_required_threads_per_threadgroup) {
1523 "Shader '%s' requires %u threads per threadgroup, but PSO limit is: %lu. Recompiling "
1524 "with increased limit on descriptor.\n",
1525 this->name,
1526 num_required_threads_per_threadgroup,
1527 (unsigned long)pso.maxTotalThreadsPerThreadgroup);
1528 [pso release];
1529 pso = nil;
1530 desc.maxTotalThreadsPerThreadgroup = 1024;
1531 pso = [ctx->device newComputePipelineStateWithDescriptor:desc
1532 options:MTLPipelineOptionNone
1533 reflection:nullptr
1534 error:&error];
1535 }
1536 }
1537
1538 if (error) {
1539 NSLog(@"Failed to create PSO for compute shader: %s error %@\n", this->name, error);
1540 BLI_assert(false);
1541 return nullptr;
1542 }
1543 else if (!pso) {
1544 NSLog(@"Failed to create PSO for compute shader: %s, but no error was provided!\n",
1545 this->name);
1546 BLI_assert(false);
1547 return nullptr;
1548 }
1549 else {
1550#if 0
1551 NSLog(@"Successfully compiled compute PSO for shader: %s (Metal Context: %p)\n",
1552 this->name,
1553 ctx);
1554#endif
1555 }
1556
1557 [desc release];
1558
1559 /* Gather reflection data and create MTLComputePipelineStateInstance to store results. */
1561 compute_pso_instance->compute = compute_function;
1562 compute_pso_instance->pso = pso;
1563 compute_pso_instance->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
1564 compute_pso_instance->base_storage_buffer_index = MTL_storage_buffer_base_index;
1565 pso_cache_lock_.lock();
1566 compute_pso_instance->shader_pso_index = compute_pso_cache_.size();
1567 compute_pso_cache_.add(compute_pipeline_descriptor, compute_pso_instance);
1568 pso_cache_lock_.unlock();
1569
1570 return compute_pso_instance;
1571 }
1572}
1573
1574
1575/* -------------------------------------------------------------------- */
1578
1583
1585{
1586 MTLShader *shader = static_cast<MTLShader *>(compile(info, true));
1587
1588 if (shader) {
1589 /* Generate and cache any render PSOs if possible (typically materials only)
1590 * (Finalize() will already bake a Compute PSO if possible) */
1591 shader->warm_cache(-1);
1592 }
1593
1594 return shader;
1595}
1596
1598{
1599 MTLShader *shader = static_cast<MTLShader *>(specialization.shader);
1600
1601 BLI_assert_msg(shader->is_valid(),
1602 "Shader must be finalized before precompiling specializations");
1603
1604 if (!shader->has_compute_shader_lib()) {
1605 /* Currently only support Compute */
1606 return;
1607 }
1608
1609 /* Create descriptor using these specialization constants. */
1610 MTLComputePipelineStateDescriptor compute_pipeline_descriptor(specialization.constants.values);
1611
1612 MTLContext *metal_context = static_cast<MTLContext *>(Context::get());
1613 shader->bake_compute_pipeline_state(metal_context, compute_pipeline_descriptor);
1614}
1615
1617
1618} // namespace blender::gpu
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
MINLINE int min_ii(int a, int b)
MINLINE int max_ii(int a, int b)
unsigned char uchar
unsigned int uint
unsigned short ushort
Platform independent time functions.
#define ELEM(...)
GHOST C-API function and type declarations.
int GPU_max_parallel_compilations()
static constexpr int GPU_VERT_ATTR_MAX_LEN
@ GPU_FETCH_INT_TO_FLOAT_UNIT
BMesh const char void * data
return true
SIMD_FORCE_INLINE bool isActive() const
void reset()
clear internal cached data and reset random seed
IndexRange index_range() const
constexpr int64_t size() const
Definition BLI_span.hh:493
constexpr const char * c_str() const
int64_t size() const
void append(const T &value)
static Context * get()
static MTLCapabilities & get_capabilities()
MTLFrameBuffer * get_current_framebuffer()
shader::SpecializationConstants constants_state
static MTLContext * get()
MTLContextGlobalShaderPipelineState pipeline_state
void specialization_constants_set(const shader::SpecializationConstants *constants_state)
MTLAttachment get_color_attachment(uint slot)
Shader * compile_shader(const shader::ShaderCreateInfo &info) override
void specialize_shader(ShaderSpecialization &specialization) override
const MTLShaderBufferBlock & get_push_constant_block() const
const char * get_name_at_offset(uint32_t offset) const
int get_argument_buffer_bind_index(ShaderStage stage) const
const MTLShaderInputAttribute & get_attribute(uint index) const
const MTLShaderUniform & get_uniform(uint index) const
void set_fragment_function_name(NSString *fragment_function_name)
void warm_cache(int limit) override
void geometry_shader_from_glsl(MutableSpan< StringRefNull > sources) override
MTLRenderPipelineStateInstance * bake_pipeline_state(MTLContext *ctx, MTLPrimitiveTopologyClass prim_type, const MTLRenderPipelineStateDescriptor &pipeline_descriptor)
MTLComputePipelineStateInstance * bake_compute_pipeline_state(MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor)
void shader_compute_source_from_msl(NSString *input_compute_source)
void uniform_int(int location, int comp_len, int array_size, const int *data) override
void bind(const shader::SpecializationConstants *constants_state) override
bool finalize(const shader::ShaderCreateInfo *info=nullptr) override
void fragment_shader_from_glsl(MutableSpan< StringRefNull > sources) override
void compute_shader_from_glsl(MutableSpan< StringRefNull > sources) override
void set_vertex_function_name(NSString *vetex_function_name)
void shader_source_from_msl(NSString *input_vertex_source, NSString *input_fragment_source)
void unbind() override
MTLShaderInterface * get_interface()
void uniform_float(int location, int comp_len, int array_size, const float *data) override
void vertex_shader_from_glsl(MutableSpan< StringRefNull > sources) override
MTLRenderPipelineStateInstance * bake_current_pipeline_state(MTLContext *ctx, MTLPrimitiveTopologyClass prim_type)
MTLShader(MTLContext *ctx, const char *name)
Definition mtl_shader.mm:71
void init(const shader::ShaderCreateInfo &, bool is_batch_compilation) override
void set_interface(MTLShaderInterface *interface)
void push_constant_bindstate_mark_dirty(bool is_dirty)
void set_compute_function_name(NSString *compute_function_name)
MTLRenderPipelineStateDescriptor & get_pipeline_descriptor()
Definition mtl_state.hh:59
Shader * compile(const shader::ShaderCreateInfo &info, bool is_batch_compilation)
ShaderCompiler(uint32_t threads_count=1, GPUWorker::ContextType context_type=GPUWorker::ContextType::PerThread, bool support_specializations=false)
std::unique_ptr< const shader::SpecializationConstants > constants
ShaderInterface * interface
StringRefNull name_get() const
void print_log(Span< StringRefNull > sources, const char *log, const char *stage, bool error, GPULogParser *parser)
Shader(const char *name)
Definition gpu_shader.cc:57
TextureFormat format_get() const
nullptr float
CCL_NAMESPACE_BEGIN struct Options options
#define GPU_FB_MAX_COLOR_ATTACHMENT
#define printf(...)
float length(VecOp< float, D >) RET
#define SOURCES_INDEX_VERSION
int count
void * MEM_callocN(size_t len, const char *str)
Definition mallocn.cc:118
void MEM_freeN(void *vmemh)
Definition mallocn.cc:113
static void error(const char *str)
#define MTL_MAX_BUFFER_BINDINGS
#define MTL_LOG_WARNING(info,...)
Definition mtl_debug.hh:42
#define MTL_LOG_DEBUG(info,...)
Definition mtl_debug.hh:49
#define MTL_LOG_ERROR(info,...)
Definition mtl_debug.hh:34
#define shader_debug_printf(...)
Definition mtl_shader.hh:49
#define MTL_SHADER_SPECIALIZATION_CONSTANT_BASE_ID
Definition mtl_shader.hh:53
uint mtl_get_data_type_alignment(MTLInterfaceDataType type)
Vector< shader::GeneratedSource, 0 > GeneratedSourceList
Vector< StringRefNull > gpu_shader_dependency_get_resolved_source(const StringRefNull shader_source_name, const shader::GeneratedSourceList &generated_sources, const StringRefNull shader_name)
MTLPixelFormat gpu_texture_format_to_metal(TextureFormat tex_format)
const char * to_string(ShaderStage stage)
Definition mtl_shader.mm:51
static void populate_specialization_constant_values(MTLFunctionConstantValues *values, const shader::SpecializationConstants &shader_constants, const SpecializationStateDescriptor &specialization_descriptor)
bool mtl_format_supports_blending(MTLPixelFormat format)
#define fabsf
blender::gpu::shader::SpecializationConstants constants
blender::gpu::Shader * shader
MTLPixelFormat color_attachment_format[GPU_FB_MAX_COLOR_ATTACHMENT]
blender::Vector< MTLBufferArgumentData > buffer_bindings_reflection_data_frag
Definition mtl_shader.hh:98
blender::Vector< MTLBufferArgumentData > buffer_bindings_reflection_data_vert
Definition mtl_shader.hh:97
MTLVertexAttributeDescriptorPSO attributes[GPU_VERT_ATTR_MAX_LEN]
MTLVertexBufferLayoutDescriptorPSO buffer_layouts[GPU_BATCH_VBO_MAX_LEN]
MTLPrimitiveTopologyClass prim_topology_class
Vector< shader::SpecializationConstant::Value > values
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Vector< SpecializationConstant::Value, 8 > values
i
Definition text_draw.cc:230
PointerRNA * ptr
Definition wm_files.cc:4238