Blender V5.0
mtl_backend.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <cstring>
10
11#include "BLI_threads.h"
12
13#include "BKE_global.hh"
14
15#include "gpu_backend.hh"
16#include "mtl_backend.hh"
17#include "mtl_batch.hh"
18#include "mtl_context.hh"
19#include "mtl_framebuffer.hh"
20#include "mtl_immediate.hh"
21#include "mtl_index_buffer.hh"
22#include "mtl_query.hh"
23#include "mtl_shader.hh"
24#include "mtl_storage_buffer.hh"
25#include "mtl_uniform_buffer.hh"
26#include "mtl_vertex_buffer.hh"
27
30
31#include <Cocoa/Cocoa.h>
32#include <Metal/Metal.h>
33#include <QuartzCore/QuartzCore.h>
34#include <sys/sysctl.h>
35
36namespace blender::gpu {
37
38/* Global per-thread AutoReleasePool. */
39thread_local NSAutoreleasePool *g_autoreleasepool = nil;
40thread_local int g_autoreleasepool_depth = 0;
41
42/* -------------------------------------------------------------------- */
45
47{
48 compiler_ = MEM_new<MTLShaderCompiler>(__func__);
49}
50
52{
53 MEM_delete(compiler_);
54}
55
57 /* Placeholder -- Handled in MTLContext. */
58};
59
60Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context)
61{
62 return new MTLContext(ghost_window, ghost_context);
63};
64
66{
67 return new MTLBatch();
68};
69
71{
72 return new MTLFence();
73};
74
79
81{
82 return new MTLIndexBuf();
83};
84
89
94
96{
97 return new MTLShader(MTLContext::get(), name);
98};
99
101{
102 return new gpu::MTLTexture(name);
103}
104
106{
107 return new MTLUniformBuf(size, name);
108};
109
111{
112 return new MTLStorageBuf(size, usage, name);
113}
114
116{
117 return new MTLVertBuf();
118}
119
121{
122 /* All Rendering must occur within a render boundary */
123 /* Track a call-count for nested calls, used to ensure we are inside an
124 * autoreleasepool from all rendering path. */
126
127 if (g_autoreleasepool == nil) {
128 g_autoreleasepool = [[NSAutoreleasePool alloc] init];
129 }
132}
133
135{
136 /* If call-count reaches zero, drain auto release pool.
137 * Ensures temporary objects are freed within a frame's lifetime. */
141
142 if (g_autoreleasepool_depth == 0) {
143 [g_autoreleasepool drain];
144 g_autoreleasepool = nil;
145 }
146}
147
148void MTLBackend::render_step(bool force_resource_release)
149{
150 /* NOTE(Metal): Primarily called from main thread, but below data-structures
151 * and operations are thread-safe, and GPUContext rendering coordination
152 * is also thread-safe. */
153
154 /* Flush any MTLSafeFreeLists which have previously been released by any MTLContext. */
156
157 /* End existing MTLSafeFreeList and begin new list --
158 * Buffers wont `free` until all associated in-flight command buffers have completed.
159 * Decrement final reference count for ensuring the previous list is certainly
160 * released. */
161 MTLSafeFreeList *cmd_free_buffer_list =
163 if (cmd_free_buffer_list->should_flush()) {
165 }
166
167 if (force_resource_release && g_autoreleasepool) {
168 [g_autoreleasepool drain];
169 g_autoreleasepool = [[NSAutoreleasePool alloc] init];
170 }
171}
172
174{
175 return (g_autoreleasepool != nil);
176}
177
179
180/* -------------------------------------------------------------------- */
183
184/* For Metal, platform_init needs to be called after MTLContext initialization. */
185void MTLBackend::platform_init(MTLContext *ctx)
186{
187 if (GPG.initialized) {
188 return;
189 }
190
195
196 BLI_assert(ctx);
197 id<MTLDevice> mtl_device = ctx->device;
198 BLI_assert(device);
199
200 NSString *gpu_name = [mtl_device name];
201 const char *vendor = [gpu_name UTF8String];
202 const char *renderer = "Metal API";
203 const char *version = "1.2";
204 if (G.debug & G_DEBUG_GPU) {
205 printf("METAL API - DETECTED GPU: %s\n", vendor);
206 }
207
208 /* macOS is the only supported platform, but check to ensure we are not building with Metal
209 * enablement on another platform. */
210 BLI_assert_msg(os == GPU_OS_MAC, "Platform must be macOS");
211
212 /* Determine Vendor from name. */
213 if (strstr(vendor, "ATI") || strstr(vendor, "AMD")) {
214 device = GPU_DEVICE_ATI;
215 driver = GPU_DRIVER_OFFICIAL;
216 }
217 else if (strstr(vendor, "NVIDIA")) {
218 device = GPU_DEVICE_NVIDIA;
219 driver = GPU_DRIVER_OFFICIAL;
220 }
221 else if (strstr(vendor, "Intel")) {
222 device = GPU_DEVICE_INTEL;
223 driver = GPU_DRIVER_OFFICIAL;
224 support_level = GPU_SUPPORT_LEVEL_LIMITED;
225 }
226 else if (strstr(vendor, "Apple") || strstr(vendor, "APPLE")) {
227 /* Apple Silicon. */
228 device = GPU_DEVICE_APPLE;
229 driver = GPU_DRIVER_OFFICIAL;
230 }
231 else if (strstr(renderer, "Apple Software Renderer")) {
232 device = GPU_DEVICE_SOFTWARE;
233 driver = GPU_DRIVER_SOFTWARE;
234 }
235 else if (strstr(renderer, "llvmpipe") || strstr(renderer, "softpipe")) {
236 device = GPU_DEVICE_SOFTWARE;
237 driver = GPU_DRIVER_SOFTWARE;
238 }
239 else if (G.debug & G_DEBUG_GPU) {
240 printf("Warning: Could not find a matching GPU name. Things may not behave as expected.\n");
241 printf("Detected configuration:\n");
242 printf("Vendor: %s\n", vendor);
243 printf("Renderer: %s\n", renderer);
244 }
245
246 GPUArchitectureType architecture_type = (mtl_device.hasUnifiedMemory &&
247 device == GPU_DEVICE_APPLE) ?
250
251 GPG.init(device,
252 os,
253 driver,
254 support_level,
256 vendor,
257 renderer,
258 version,
259 architecture_type);
260
261 /* UUID is not supported on Metal. */
263
264 /* LUID is registryID on Metal, or at least this is what libraries like OIDN expects. */
265 const uint64_t luid = mtl_device.registryID;
266 GPG.device_luid.reinitialize(sizeof(luid));
267 std::memcpy(GPG.device_luid.data(), &luid, sizeof(luid));
268
269 /* Metal only has one device per LUID, so only the first bit will always be active.. */
271}
272
273void MTLBackend::platform_exit()
274{
276 GPG.clear();
277}
278
280
281/* -------------------------------------------------------------------- */
285
286static const char *mtl_extensions_get_null(int /*i*/)
287{
288 return nullptr;
289}
290
291bool supports_barycentric_whitelist(id<MTLDevice> device)
292{
293 NSString *gpu_name = [device name];
294 BLI_assert([gpu_name length]);
295 const char *vendor = [gpu_name UTF8String];
296
297 /* Verify GPU support. */
298 bool supported_gpu = [device supportsFamily:MTLGPUFamilyMac2];
299 bool should_support_barycentrics = false;
300
301 /* Known good configs. */
302 if (strstr(vendor, "AMD") || strstr(vendor, "Apple") || strstr(vendor, "APPLE")) {
303 should_support_barycentrics = true;
304 }
305
306 /* Explicit support for Intel-based platforms. */
307 if ((strstr(vendor, "Intel") || strstr(vendor, "INTEL"))) {
308 should_support_barycentrics = true;
309 }
310 return supported_gpu && should_support_barycentrics;
311}
312
313bool is_apple_sillicon(id<MTLDevice> device)
314{
315 NSString *gpu_name = [device name];
316 BLI_assert([gpu_name length]);
317
318 const char *vendor = [gpu_name UTF8String];
319
320 /* Known good configs. */
321 return (strstr(vendor, "Apple") || strstr(vendor, "APPLE"));
322}
323
324static int get_num_performance_cpu_cores(id<MTLDevice> device)
325{
326 const int SYSCTL_BUF_LENGTH = 16;
327 int num_performance_cores = -1;
328 unsigned char sysctl_buffer[SYSCTL_BUF_LENGTH];
329 size_t sysctl_buffer_length = SYSCTL_BUF_LENGTH;
330
331 if (is_apple_sillicon(device)) {
332 /* On Apple Silicon query the number of performance cores */
333 if (sysctlbyname(
334 "hw.perflevel0.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, nullptr, 0) == 0)
335 {
336 num_performance_cores = sysctl_buffer[0];
337 }
338 }
339 else {
340 /* On Intel just return the logical core count */
341 if (sysctlbyname("hw.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, nullptr, 0) == 0) {
342 num_performance_cores = sysctl_buffer[0];
343 }
344 }
345 BLI_assert(num_performance_cores != -1);
346 return num_performance_cores;
347}
348
349static int get_num_efficiency_cpu_cores(id<MTLDevice> device)
350{
351 if (is_apple_sillicon(device)) {
352 /* On Apple Silicon query the number of efficiency cores */
353 const int SYSCTL_BUF_LENGTH = 16;
354 int num_efficiency_cores = -1;
355 unsigned char sysctl_buffer[SYSCTL_BUF_LENGTH];
356 size_t sysctl_buffer_length = SYSCTL_BUF_LENGTH;
357 if (sysctlbyname(
358 "hw.perflevel1.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, nullptr, 0) == 0)
359 {
360 num_efficiency_cores = sysctl_buffer[0];
361 }
362
363 BLI_assert(num_efficiency_cores != -1);
364 return num_efficiency_cores;
365 }
366 return 0;
367}
368
370{
371 /* Device compatibility information using Metal Feature-set tables.
372 * See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
373
374 NSOperatingSystemVersion version = [[NSProcessInfo processInfo] operatingSystemVersion];
375
376 /* Metal Viewport requires macOS Version 10.15 onward. */
377 bool supported_os_version = version.majorVersion >= 11 ||
378 (version.majorVersion == 10 ? version.minorVersion >= 15 : false);
379 if (!supported_os_version) {
380 printf(
381 "OS Version too low to run minimum required metal version. Required at least 10.15, got "
382 "%ld.%ld \n",
383 (long)version.majorVersion,
384 (long)version.minorVersion);
385 return false;
386 }
387
388 id<MTLDevice> device = MTLCreateSystemDefaultDevice();
389
390 /* Debug: Enable low power GPU with Environment Var: METAL_FORCE_INTEL. */
391 static const char *forceIntelStr = getenv("METAL_FORCE_INTEL");
392 bool forceIntel = forceIntelStr ? (atoi(forceIntelStr) != 0) : false;
393
394 if (forceIntel) {
395 NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
396 for (id<MTLDevice> _device in allDevices) {
397 if (_device.lowPower) {
398 device = _device;
399 }
400 }
401 }
402
403 /* Metal Viewport requires argument buffer tier-2 support and Barycentric Coordinates.
404 * These are available on most hardware configurations supporting Metal 2.2. */
405 bool supports_argument_buffers_tier2 = ([device argumentBuffersSupport] ==
406 MTLArgumentBuffersTier2);
407 bool supports_barycentrics = [device supportsShaderBarycentricCoordinates] ||
409 bool supported_metal_version = [device supportsFamily:MTLGPUFamilyMac2];
410
411 bool result = supports_argument_buffers_tier2 && supports_barycentrics && supported_os_version &&
412 supported_metal_version;
413
414 if (G.debug & G_DEBUG_GPU) {
415 if (!supports_argument_buffers_tier2) {
416 printf("[Metal] Device does not support argument buffers tier 2\n");
417 }
418 if (!supports_barycentrics) {
419 printf("[Metal] Device does not support barycentrics coordinates\n");
420 }
421 if (!supported_metal_version) {
422 printf("[Metal] Device does not support metal 2.2 or higher\n");
423 }
424
425 if (result) {
426 printf("Device with name %s supports metal minimum requirements\n",
427 [[device name] UTF8String]);
428 }
429 }
430
431 return result;
432}
433
434void MTLBackend::capabilities_init(MTLContext *ctx)
435{
436 BLI_assert(ctx);
437 id<MTLDevice> device = ctx->device;
438 BLI_assert(device);
439
440 /* Initialize Capabilities. */
441 MTLBackend::capabilities.supports_argument_buffers_tier2 = ([device argumentBuffersSupport] ==
442 MTLArgumentBuffersTier2);
443 MTLBackend::capabilities.supports_family_mac1 = [device supportsFamily:MTLGPUFamilyMac1];
444 MTLBackend::capabilities.supports_family_mac2 = [device supportsFamily:MTLGPUFamilyMac2];
445 MTLBackend::capabilities.supports_family_mac_catalyst1 = [device
446 supportsFamily:MTLGPUFamilyMacCatalyst1];
447 MTLBackend::capabilities.supports_family_mac_catalyst2 = [device
448 supportsFamily:MTLGPUFamilyMacCatalyst2];
449 /* NOTE(Metal): Texture gather is supported on AMD, but results are non consistent
450 * with Apple Silicon GPUs. Disabling for now to avoid erroneous rendering. */
451 MTLBackend::capabilities.supports_texture_gather = [device hasUnifiedMemory];
452
453 /* GPU Type. */
454 const char *gpu_name = [device.name UTF8String];
455 if (strstr(gpu_name, "M1")) {
457 }
458 else if (strstr(gpu_name, "M2")) {
460 }
461 else if (strstr(gpu_name, "M3")) {
463 }
464 else {
466 }
467
468 /* Texture atomics supported in Metal 3.1. */
470#if defined(MAC_OS_VERSION_14_0)
471 if (@available(macOS 14.0, *)) {
473 }
474#endif
475
477 const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
478 if (is_tile_based_arch) {
480 }
481 else {
482 /* NOTE: If emulating tile input reads, we must ensure we also expose position data. */
484 }
485
486 /* CPU Info */
489
490 /* Common Global Capabilities. */
491 GCaps.max_texture_size = ([device supportsFamily:MTLGPUFamilyApple3] ||
493 16384 :
494 8192;
499 128 :
500 (([device supportsFamily:MTLGPUFamilyApple4]) ? 96 : 31);
501 if (GCaps.max_textures <= 32) {
502 BLI_assert(false);
503 }
505
507 GCaps.max_textures_geom = 0; /* N/A geometry shaders not supported. */
509
511
512 /* Conservative uniform data limit is 4KB per-stage -- This is the limit of setBytes.
513 * MTLBuffer path is also supported but not as efficient. */
516
517 GCaps.max_batch_indices = 1 << 31;
518 GCaps.max_batch_vertices = 1 << 31;
521
522 /* Feature support */
523 GCaps.mem_stats_support = false;
525
527
528 /* Compile shaders on performance cores but leave one free so UI is still responsive.
529 * Also respect command line option to reduce number of threads. */
531 MTLBackend::capabilities.num_performance_cores - 1);
532
533 /* Maximum buffer bindings: 31. Consider required slot for uniforms/UBOs/Vertex attributes.
534 * Can use argument buffers if a higher limit is required. */
537 GCaps.max_storage_buffer_size = size_t(ctx->device.maxBufferLength);
538 GCaps.max_uniform_buffer_size = size_t(ctx->device.maxBufferLength);
539 GCaps.storage_buffer_alignment = 256; /* TODO(fclem): But also unused. */
540
541 GCaps.max_work_group_count[0] = 65535;
542 GCaps.max_work_group_count[1] = 65535;
543 GCaps.max_work_group_count[2] = 65535;
544 /* In Metal, total_thread_count is 512 or 1024, such that
545 * threadgroup `width*height*depth <= total_thread_count` */
546 uint max_threads_per_threadgroup_per_dim = ([device supportsFamily:MTLGPUFamilyApple4] ||
548 1024 :
549 512;
550 GCaps.max_work_group_size[0] = max_threads_per_threadgroup_per_dim;
551 GCaps.max_work_group_size[1] = max_threads_per_threadgroup_per_dim;
552 GCaps.max_work_group_size[2] = max_threads_per_threadgroup_per_dim;
553
555
556 /* OPENGL Related workarounds -- none needed for Metal. */
561
562 /* Metal related workarounds. */
563 /* Minimum per-vertex stride is 4 bytes in Metal.
564 * A bound vertex buffer must contribute at least 4 bytes per vertex. */
566
567 /* Force workarounds when starting blender with `--debug-gpu-force-workarounds`.
568 *
569 * Not all workarounds are listed here as some capabilities are currently assumed to be present
570 * on all devices. */
571 if (G.debug & G_DEBUG_GPU_FORCE_WORKAROUNDS) {
572 /* Texture gather is supported on AMD, but results are non consistent with Apple Silicon GPUs
573 * and can be disabled. */
577 }
578}
579
581
582/* -------------------------------------------------------------------- */
585
586void MTLBackend::compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len)
587{
588 /* Fetch Context.
589 * With Metal, workload submission and resource management occurs within the context.
590 * Call compute dispatch on valid context. */
592 BLI_assert(ctx != nullptr);
593 if (ctx) {
594 ctx->compute_dispatch(groups_x_len, groups_y_len, groups_z_len);
595 }
596}
597
599{
600 /* Fetch Context.
601 * With Metal, workload submission and resource management occurs within the context.
602 * Call compute dispatch on valid context. */
604 BLI_assert(ctx != nullptr);
605 if (ctx) {
606 ctx->compute_dispatch_indirect(indirect_buf);
607 }
608}
609
611
612} // namespace blender::gpu
@ G_DEBUG_GPU
@ G_DEBUG_GPU_FORCE_WORKAROUNDS
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
unsigned int uint
int BLI_system_thread_count(void)
Definition threads.cc:253
GPUDeviceType
@ GPU_DEVICE_UNKNOWN
@ GPU_DEVICE_ATI
@ GPU_DEVICE_SOFTWARE
@ GPU_DEVICE_NVIDIA
@ GPU_DEVICE_APPLE
@ GPU_DEVICE_INTEL
GPUArchitectureType
@ GPU_ARCHITECTURE_TBDR
@ GPU_ARCHITECTURE_IMR
GPUSupportLevel
@ GPU_SUPPORT_LEVEL_LIMITED
@ GPU_SUPPORT_LEVEL_SUPPORTED
GPUDriverType
@ GPU_DRIVER_ANY
@ GPU_DRIVER_OFFICIAL
@ GPU_DRIVER_SOFTWARE
GPUOSType
@ GPU_OS_MAC
GPUArchitectureType GPU_platform_architecture()
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
const T * data() const
Definition BLI_array.hh:312
void reinitialize(const int64_t new_size)
Definition BLI_array.hh:419
ShaderCompiler * compiler_
void init(GPUDeviceType gpu_device, GPUOSType os_type, GPUDriverType driver_type, GPUSupportLevel gpu_support_level, GPUBackendType backend, const char *vendor_str, const char *renderer_str, const char *version_str, GPUArchitectureType arch_type)
void render_begin() override
QueryPool * querypool_alloc() override
void compute_dispatch_indirect(StorageBuf *indirect_buf) override
Context * context_alloc(void *ghost_window, void *ghost_context) override
PixelBuffer * pixelbuf_alloc(size_t size) override
FrameBuffer * framebuffer_alloc(const char *name) override
static bool metal_is_supported()
IndexBuf * indexbuf_alloc() override
StorageBuf * storagebuf_alloc(size_t size, GPUUsageType usage, const char *name) override
void render_end() override
void samplers_update() override
Batch * batch_alloc() override
Fence * fence_alloc() override
void init_resources() override
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) override
static MTLCapabilities capabilities
UniformBuf * uniformbuf_alloc(size_t size, const char *name) override
VertBuf * vertbuf_alloc() override
void delete_resources() override
Texture * texture_alloc(const char *name) override
Shader * shader_alloc(const char *name) override
void render_step(bool force_resource_release=false) override
MTLSafeFreeList * get_current_safe_list()
static MTLContext * get()
void compute_dispatch_indirect(StorageBuf *indirect_buf)
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len)
static MTLBufferPool * get_global_memory_manager()
#define in
#define printf(...)
float length(VecOp< float, D >) RET
#define UINT_MAX
Definition hash_md5.cc:44
#define G(x, y, z)
GPUPlatformGlobal GPG
static const char * mtl_extensions_get_null(int)
static int get_num_performance_cpu_cores(id< MTLDevice > device)
NSAutoreleasePool * g_autoreleasepool
bool supports_barycentric_whitelist(id< MTLDevice > device)
static int get_num_efficiency_cpu_cores(id< MTLDevice > device)
GPUCapabilities GCaps
int g_autoreleasepool_depth
bool is_apple_sillicon(id< MTLDevice > device)
static void init(bNodeTree *, bNode *node)
const char * name