265 bool is_compute =
false;
266 if (shd_builder_->glsl_compute_source_.empty() ==
false) {
267 BLI_assert_msg(info !=
nullptr,
"Compute shaders must use CreateInfo.\n");
268 BLI_assert_msg(!shd_builder_->source_from_msl_,
"Compute shaders must compile from GLSL.");
274 if (!shd_builder_->source_from_msl_) {
275 bool success = generate_msl_from_glsl(info);
279 BLI_assert_msg(
false,
"Shader translation from GLSL to MSL has failed. \n");
287 shd_builder_ =
nullptr;
295 int threadgroup_tuning_param = info->mtl_max_threads_per_threadgroup_;
296 if (threadgroup_tuning_param > 0) {
297 maxTotalThreadsPerThreadgroup_Tuning_ = threadgroup_tuning_param;
307 id<MTLDevice> device = context_->device;
325 MTLCompileOptions *
options = [[[MTLCompileOptions alloc]
init] autorelease];
326 options.languageVersion = MTLLanguageVersion2_2;
328 options.preserveInvariance = YES;
334 options.languageVersion = MTLLanguageVersion2_3;
336#if defined(MAC_OS_VERSION_14_0)
337 if (@available(macOS 14.00, *)) {
340 options.languageVersion = MTLLanguageVersion3_1;
345 NSString *source_to_compile = shd_builder_->msl_source_vert_;
351 uint8_t total_stages = (is_compute) ? 1 : 2;
353 for (
int stage_count = 0; stage_count < total_stages; stage_count++) {
354 int arg_buf_samplers_size = 0;
357 source_to_compile = shd_builder_->msl_source_vert_;
358 arg_buf_samplers_size = arg_buf_samplers_vert_;
361 source_to_compile = shd_builder_->msl_source_frag_;
362 arg_buf_samplers_size = arg_buf_samplers_frag_;
365 source_to_compile = shd_builder_->msl_source_compute_;
366 arg_buf_samplers_size = arg_buf_samplers_comp_;
373 std::stringstream ss;
380 ss <<
"#define MTL_ARGUMENT_BUFFER_NUM_SAMPLERS " << arg_buf_samplers_size <<
"\n";
385 ss <<
"#define MTL_SUPPORTS_TEXTURE_ATOMICS 1\n";
393 "gpu_shader_compat_msl.msl", generated_sources);
394 std::string compatibility_concat = fmt::to_string(fmt::join(compatibility_src,
""));
396 std::string final_src = compatibility_concat + [source_to_compile UTF8String];
397 NSString *source_with_header = [NSString stringWithUTF8String:final_src.c_str()];
398 [source_with_header retain];
401 NSError *
error =
nullptr;
402 id<MTLLibrary> library = [device newLibraryWithSource:source_with_header
407 if ([[
error localizedDescription] rangeOfString:
@"Compilation succeeded"].location ==
410 const char *errors_c_str = [[
error localizedDescription] UTF8String];
418 shd_builder_ =
nullptr;
428 shader_library_vert_ = library;
429 shader_library_vert_.label = [NSString stringWithUTF8String:this->
name];
433 shader_library_frag_ = library;
434 shader_library_frag_.label = [NSString stringWithUTF8String:this->
name];
438 shader_library_compute_ = library;
439 shader_library_compute_.label = [NSString stringWithUTF8String:this->
name];
447 [source_with_header autorelease];
462 pso_descriptor_ = [[MTLRenderPipelineDescriptor alloc]
init];
463 pso_descriptor_.label = [NSString stringWithUTF8String:this->
name];
471 if (push_constant_block.
size > 0) {
476 push_constant_data_ =
nullptr;
491 shd_builder_ =
nullptr;
928 MTLPrimitiveTopologyClass prim_type,
937 pso_cache_lock_.lock();
940 pso_cache_lock_.unlock();
942 if (pipeline_state !=
nullptr) {
943 return pipeline_state;
959 MTLFunctionConstantValues *values = [[MTLFunctionConstantValues
new] autorelease];
966 MTLRenderPipelineDescriptor *desc = pso_descriptor_;
968 pso_descriptor_.label = [NSString stringWithUTF8String:this->
name];
984 bool using_null_buffer =
false;
1006 if (attribute_desc.
format == MTLVertexFormatInvalid) {
1009 "MTLShader: baking pipeline state for '%s'- skipping input attribute at "
1010 "index '%d' but none was specified in the current vertex state",
1015 int MTL_attribute_conversion_mode = 0;
1016 [values setConstantValue:&MTL_attribute_conversion_mode
1018 withName:[NSString stringWithFormat:
@"MTL_AttributeConvert%d",
i]];
1023 [values setConstantValue:&MTL_attribute_conversion_mode
1025 withName:[NSString stringWithFormat:
@"MTL_AttributeConvert%d",
i]];
1028 "TODO(Metal): Shader %s needs to support internal format conversion\n",
1035 MTLVertexAttributeDescriptor *mtl_attribute = desc.vertexDescriptor.attributes[
i];
1037 mtl_attribute.format = attribute_desc.
format;
1038 mtl_attribute.offset = attribute_desc.
offset;
1039 mtl_attribute.bufferIndex = attribute_desc.
buffer_index;
1049 MTLVertexBufferLayoutDescriptor *mtl_buf_layout = desc.vertexDescriptor.layouts[
i];
1052 mtl_buf_layout.stepRate = buf_layout.
step_rate;
1053 mtl_buf_layout.stride = buf_layout.
stride;
1061 int MTL_attribute_conversion_mode = 0;
1062 [values setConstantValue:&MTL_attribute_conversion_mode
1064 withName:[NSString stringWithFormat:
@"MTL_AttributeConvert%d",
i]];
1072 MTLVertexAttributeDescriptor *current_attribute =
1073 desc.vertexDescriptor.attributes[attribute.
location];
1075 if (current_attribute.format == MTLVertexFormatInvalid) {
1076#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1077 printf(
"-> Filling in unbound attribute '%s' for shader PSO '%s' with location: %u\n",
1082 current_attribute.format = attribute.
format;
1083 current_attribute.offset = 0;
1084 current_attribute.bufferIndex = null_buffer_index;
1087 if (!using_null_buffer) {
1088 MTLVertexBufferLayoutDescriptor *null_buf_layout =
1089 desc.vertexDescriptor.layouts[null_buffer_index];
1093 null_buf_layout.stepFunction = MTLVertexStepFunctionConstant;
1094 null_buf_layout.stepRate = 0;
1095 null_buf_layout.stride =
max_ii(null_buf_layout.stride, attribute.
size);
1100 if (null_buffer_index >= MTL_uniform_buffer_base_index) {
1101 MTL_uniform_buffer_base_index = null_buffer_index + 1;
1103 using_null_buffer =
true;
1104#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1105 MTL_LOG_DEBUG(
"Setting up buffer binding for null attribute with buffer index %d",
1117 [values setConstantValue:&MTL_uniform_buffer_base_index
1119 withName:
@"MTL_uniform_buffer_base_index"];
1126 int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
1130 [values setConstantValue:&MTL_storage_buffer_base_index
1132 withName:
@"MTL_storage_buffer_base_index"];
1139 [values setConstantValue:&MTL_clip_distances_enabled
1141 withName:
@"MTL_clip_distances_enabled"];
1143 if (MTL_clip_distances_enabled > 0) {
1148 if (plane_enabled) {
1150 setConstantValue:&plane_enabled
1152 withName:[NSString stringWithFormat:
@"MTL_clip_distance_enabled%d", plane]];
1158 bool null_pointsize =
true;
1159 float MTL_pointsize = pipeline_descriptor.
point_size;
1161 MTLPrimitiveTopologyClassPoint)
1165 if (MTL_pointsize < 0.0) {
1166 MTL_pointsize =
fabsf(MTL_pointsize);
1167 [values setConstantValue:&MTL_pointsize
1168 type:MTLDataTypeFloat
1169 withName:
@"MTL_global_pointsize"];
1170 null_pointsize =
false;
1174 if (null_pointsize) {
1175 MTL_pointsize = 0.0f;
1176 [values setConstantValue:&MTL_pointsize
1177 type:MTLDataTypeFloat
1178 withName:
@"MTL_global_pointsize"];
1182 NSError *
error =
nullptr;
1183 desc.vertexFunction = [shader_library_vert_ newFunctionWithName:vertex_function_name_
1184 constantValues:values
1188 [[
error localizedDescription] rangeOfString:
@"Compilation succeeded"].location ==
1191 const char *errors_c_str = [[
error localizedDescription] UTF8String];
1203 desc.fragmentFunction = [shader_library_frag_ newFunctionWithName:fragment_function_name_
1204 constantValues:values
1208 [[
error localizedDescription] rangeOfString:
@"Compilation succeeded"].location ==
1211 const char *errors_c_str = [[
error localizedDescription] UTF8String];
1212 const StringRefNull source = shd_builder_->glsl_fragment_source_;
1230 MTLRenderPipelineColorAttachmentDescriptor *col_attachment =
1231 desc.colorAttachments[color_attachment];
1233 col_attachment.pixelFormat = pixel_format;
1234 if (pixel_format != MTLPixelFormatInvalid) {
1239 format_supports_blending;
1241 col_attachment.alphaBlendOperation = pipeline_descriptor.
alpha_blend_op;
1242 col_attachment.rgbBlendOperation = pipeline_descriptor.
rgb_blend_op;
1251 "[Warning] Attempting to Bake PSO, but MTLPixelFormat %d does not support "
1253 *((
int *)&pixel_format));
1266 BLI_assert_msg((MTL_uniform_buffer_base_index + get_max_ubo_index() + 2) <
1268 "UBO and SSBO bindings exceed the fragment bind table limit.");
1273 "Argument buffer binding exceeds the fragment bind table limit.");
1278 MTLAutoreleasedRenderPipelineReflection reflection_data;
1279 id<MTLRenderPipelineState> pso = [ctx->
device
1280 newRenderPipelineStateWithDescriptor:desc
1281 options:MTLPipelineOptionBufferTypeInfo
1282 reflection:&reflection_data
1285 NSLog(
@"Failed to create PSO for shader: %s error %@\n", this->
name,
error);
1290 NSLog(
@"Failed to create PSO for shader: %s, but no error was provided!\n", this->
name);
1296 NSLog(
@"Successfully compiled PSO for shader: %s (Metal Context: %p)\n", this->
name, ctx);
1302 pso_inst->
vert = desc.vertexFunction;
1303 pso_inst->
frag = desc.fragmentFunction;
1304 pso_inst->
pso = pso;
1311 if (reflection_data != nil) {
1320 NSArray<MTLArgument *> *vert_args = [reflection_data vertexArguments];
1323 int buffer_binding_max_ind = 0;
1325 for (
int i = 0;
i < [vert_args
count];
i++) {
1326 MTLArgument *arg = [vert_args objectAtIndex:
i];
1327 if ([arg type] == MTLArgumentTypeBuffer) {
1328 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1329 if (buf_index >= 0) {
1330 buffer_binding_max_ind =
max_ii(buffer_binding_max_ind, buf_index);
1335 for (
int i = 0;
i < buffer_binding_max_ind + 1;
i++) {
1339 for (
int i = 0;
i < [vert_args
count];
i++) {
1340 MTLArgument *arg = [vert_args objectAtIndex:
i];
1341 if ([arg type] == MTLArgumentTypeBuffer) {
1342 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1344 if (buf_index >= 0) {
1346 (uint32_t)([arg index]),
1347 (uint32_t)([arg bufferDataSize]),
1348 (uint32_t)([arg bufferAlignment]),
1349 ([arg
isActive] == YES) ?
true :
false};
1354 NSArray<MTLArgument *> *frag_args = [reflection_data fragmentArguments];
1357 buffer_binding_max_ind = 0;
1359 for (
int i = 0;
i < [frag_args
count];
i++) {
1360 MTLArgument *arg = [frag_args objectAtIndex:
i];
1361 if ([arg type] == MTLArgumentTypeBuffer) {
1362 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1363 if (buf_index >= 0) {
1364 buffer_binding_max_ind =
max_ii(buffer_binding_max_ind, buf_index);
1369 for (
int i = 0;
i < buffer_binding_max_ind + 1;
i++) {
1373 for (
int i = 0;
i < [frag_args
count];
i++) {
1374 MTLArgument *arg = [frag_args objectAtIndex:
i];
1375 if ([arg type] == MTLArgumentTypeBuffer) {
1376 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1378 if (buf_index >= 0) {
1380 (uint32_t)([arg index]),
1381 (uint32_t)([arg bufferDataSize]),
1382 (uint32_t)([arg bufferAlignment]),
1383 ([arg
isActive] == YES) ?
true :
false};
1390 pso_cache_lock_.lock();
1392 pso_cache_.add(pipeline_descriptor, pso_inst);
1393 pso_cache_lock_.unlock();
1395 "PSO CACHE: Stored new variant in PSO cache for shader '%s' Hash: '%llu'\n",
1397 pipeline_descriptor.
hash());
1413 pso_cache_lock_.lock();
1415 compute_pipeline_descriptor);
1417 pso_cache_lock_.unlock();
1419 if (pipeline_state !=
nullptr) {
1422 return pipeline_state;
1429 MTLFunctionConstantValues *values = [[MTLFunctionConstantValues
new] autorelease];
1443 int MTL_uniform_buffer_base_index = 0;
1444 [values setConstantValue:&MTL_uniform_buffer_base_index
1446 withName:
@"MTL_uniform_buffer_base_index"];
1453 int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
1458 [values setConstantValue:&MTL_storage_buffer_base_index
1460 withName:
@"MTL_storage_buffer_base_index"];
1463 NSError *
error =
nullptr;
1464 id<MTLFunction> compute_function = [shader_library_compute_
1465 newFunctionWithName:compute_function_name_
1466 constantValues:values
1468 compute_function.label = [NSString stringWithUTF8String:this->
name];
1471 NSLog(
@"Compile Error - Metal Shader compute function, error %@",
error);
1474 if ([[
error localizedDescription] rangeOfString:
@"Compilation succeeded"].location ==
1483 MTLComputePipelineDescriptor *desc = [[MTLComputePipelineDescriptor alloc]
init];
1484 desc.label = [NSString stringWithUTF8String:this->
name];
1485 desc.computeFunction = compute_function;
1497 if (maxTotalThreadsPerThreadgroup_Tuning_ > 0) {
1498 desc.maxTotalThreadsPerThreadgroup = this->maxTotalThreadsPerThreadgroup_Tuning_;
1499 MTL_LOG_DEBUG(
"Using custom parameter for shader %s value %u\n",
1501 maxTotalThreadsPerThreadgroup_Tuning_);
1505 id<MTLComputePipelineState> pso = [ctx->
device
1506 newComputePipelineStateWithDescriptor:desc
1518 uint num_required_threads_per_threadgroup = compute_pso_common_state_.threadgroup_x_len *
1519 compute_pso_common_state_.threadgroup_y_len *
1520 compute_pso_common_state_.threadgroup_z_len;
1521 if (pso.maxTotalThreadsPerThreadgroup < num_required_threads_per_threadgroup) {
1523 "Shader '%s' requires %u threads per threadgroup, but PSO limit is: %lu. Recompiling "
1524 "with increased limit on descriptor.\n",
1526 num_required_threads_per_threadgroup,
1527 (
unsigned long)pso.maxTotalThreadsPerThreadgroup);
1530 desc.maxTotalThreadsPerThreadgroup = 1024;
1531 pso = [ctx->
device newComputePipelineStateWithDescriptor:desc
1539 NSLog(
@"Failed to create PSO for compute shader: %s error %@\n", this->
name,
error);
1544 NSLog(
@"Failed to create PSO for compute shader: %s, but no error was provided!\n",
1551 NSLog(
@"Successfully compiled compute PSO for shader: %s (Metal Context: %p)\n",
1561 compute_pso_instance->
compute = compute_function;
1562 compute_pso_instance->
pso = pso;
1565 pso_cache_lock_.lock();
1567 compute_pso_cache_.add(compute_pipeline_descriptor, compute_pso_instance);
1568 pso_cache_lock_.unlock();
1570 return compute_pso_instance;