Blender V4.3
mtl_backend.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
9#include "BKE_global.hh"
10
11#include "gpu_backend.hh"
12#include "mtl_backend.hh"
13#include "mtl_batch.hh"
14#include "mtl_context.hh"
15#include "mtl_drawlist.hh"
16#include "mtl_framebuffer.hh"
17#include "mtl_immediate.hh"
18#include "mtl_index_buffer.hh"
19#include "mtl_query.hh"
20#include "mtl_shader.hh"
21#include "mtl_storage_buffer.hh"
22#include "mtl_uniform_buffer.hh"
23#include "mtl_vertex_buffer.hh"
24
27
28#include <Cocoa/Cocoa.h>
29#include <Metal/Metal.h>
30#include <QuartzCore/QuartzCore.h>
31#include <sys/sysctl.h>
32
33namespace blender::gpu {
34
35/* Global per-thread AutoReleasePool. */
36thread_local NSAutoreleasePool *g_autoreleasepool = nil;
37thread_local int g_autoreleasepool_depth = 0;
38
39/* -------------------------------------------------------------------- */
44 /* Placeholder -- Handled in MTLContext. */
45};
46
47Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context)
48{
49 return new MTLContext(ghost_window, ghost_context);
50};
51
53{
54 return new MTLBatch();
55};
56
58{
59 return new MTLDrawList(list_length);
60};
61
63{
64 return new MTLFence();
65};
66
68{
69 return new MTLFrameBuffer(MTLContext::get(), name);
70};
71
73{
74 return new MTLIndexBuf();
75};
76
78{
79 return new MTLPixelBuffer(size);
80};
81
86
88{
89 return new MTLShader(MTLContext::get(), name);
90};
91
93{
94 return new gpu::MTLTexture(name);
95}
96
97UniformBuf *MTLBackend::uniformbuf_alloc(size_t size, const char *name)
98{
99 return new MTLUniformBuf(size, name);
100};
101
102StorageBuf *MTLBackend::storagebuf_alloc(size_t size, GPUUsageType usage, const char *name)
103{
104 return new MTLStorageBuf(size, usage, name);
105}
106
108{
109 return new MTLVertBuf();
110}
111
113{
114 /* All Rendering must occur within a render boundary */
115 /* Track a call-count for nested calls, used to ensure we are inside an
116 * autoreleasepool from all rendering path. */
118
119 if (g_autoreleasepool == nil) {
120 g_autoreleasepool = [[NSAutoreleasePool alloc] init];
121 }
124}
125
127{
128 /* If call-count reaches zero, drain auto release pool.
129 * Ensures temporary objects are freed within a frame's lifetime. */
133
134 if (g_autoreleasepool_depth == 0) {
135 [g_autoreleasepool drain];
136 g_autoreleasepool = nil;
137 }
138}
139
141{
142 /* NOTE(Metal): Primarily called from main thread, but below data-structures
143 * and operations are thread-safe, and GPUContext rendering coordination
144 * is also thread-safe. */
145
146 /* Flush any MTLSafeFreeLists which have previously been released by any MTLContext. */
148
149 /* End existing MTLSafeFreeList and begin new list --
150 * Buffers wont `free` until all associated in-flight command buffers have completed.
151 * Decrement final reference count for ensuring the previous list is certainly
152 * released. */
153 MTLSafeFreeList *cmd_free_buffer_list =
155 if (cmd_free_buffer_list->should_flush()) {
157 }
158}
159
161{
162 return (g_autoreleasepool != nil);
163}
164
167/* -------------------------------------------------------------------- */
171/* For Metal, platform_init needs to be called after MTLContext initialization. */
172void MTLBackend::platform_init(MTLContext *ctx)
173{
174 if (GPG.initialized) {
175 return;
176 }
177
182
183 BLI_assert(ctx);
184 id<MTLDevice> mtl_device = ctx->device;
185 BLI_assert(device);
186
187 NSString *gpu_name = [mtl_device name];
188 const char *vendor = [gpu_name UTF8String];
189 const char *renderer = "Metal API";
190 const char *version = "1.2";
191 if (G.debug & G_DEBUG_GPU) {
192 printf("METAL API - DETECTED GPU: %s\n", vendor);
193 }
194
195 /* macOS is the only supported platform, but check to ensure we are not building with Metal
196 * enablement on another platform. */
197 BLI_assert_msg(os == GPU_OS_MAC, "Platform must be macOS");
198
199 /* Determine Vendor from name. */
200 if (strstr(vendor, "ATI") || strstr(vendor, "AMD")) {
201 device = GPU_DEVICE_ATI;
202 driver = GPU_DRIVER_OFFICIAL;
203 }
204 else if (strstr(vendor, "NVIDIA")) {
205 device = GPU_DEVICE_NVIDIA;
206 driver = GPU_DRIVER_OFFICIAL;
207 }
208 else if (strstr(vendor, "Intel")) {
209 device = GPU_DEVICE_INTEL;
210 driver = GPU_DRIVER_OFFICIAL;
211 support_level = GPU_SUPPORT_LEVEL_LIMITED;
212 }
213 else if (strstr(vendor, "Apple") || strstr(vendor, "APPLE")) {
214 /* Apple Silicon. */
215 device = GPU_DEVICE_APPLE;
216 driver = GPU_DRIVER_OFFICIAL;
217 }
218 else if (strstr(renderer, "Apple Software Renderer")) {
219 device = GPU_DEVICE_SOFTWARE;
220 driver = GPU_DRIVER_SOFTWARE;
221 }
222 else if (strstr(renderer, "llvmpipe") || strstr(renderer, "softpipe")) {
223 device = GPU_DEVICE_SOFTWARE;
224 driver = GPU_DRIVER_SOFTWARE;
225 }
226 else if (G.debug & G_DEBUG_GPU) {
227 printf("Warning: Could not find a matching GPU name. Things may not behave as expected.\n");
228 printf("Detected configuration:\n");
229 printf("Vendor: %s\n", vendor);
230 printf("Renderer: %s\n", renderer);
231 }
232
233 GPUArchitectureType architecture_type = (mtl_device.hasUnifiedMemory &&
234 device == GPU_DEVICE_APPLE) ?
237
238 GPG.init(device,
239 os,
240 driver,
241 support_level,
243 vendor,
244 renderer,
245 version,
246 architecture_type);
247}
248
249void MTLBackend::platform_exit()
250{
252 GPG.clear();
253}
254
257/* -------------------------------------------------------------------- */
260MTLCapabilities MTLBackend::capabilities = {};
261
262static const char *mtl_extensions_get_null(int /*i*/)
263{
264 return nullptr;
265}
266
267bool supports_barycentric_whitelist(id<MTLDevice> device)
268{
269 NSString *gpu_name = [device name];
270 BLI_assert([gpu_name length]);
271 const char *vendor = [gpu_name UTF8String];
272
273 /* Verify GPU support. */
274 bool supported_gpu = [device supportsFamily:MTLGPUFamilyMac2];
275 bool should_support_barycentrics = false;
276
277 /* Known good configs. */
278 if (strstr(vendor, "AMD") || strstr(vendor, "Apple") || strstr(vendor, "APPLE")) {
279 should_support_barycentrics = true;
280 }
281
282 /* Explicit support for Intel-based platforms. */
283 if ((strstr(vendor, "Intel") || strstr(vendor, "INTEL"))) {
284 should_support_barycentrics = true;
285 }
286 return supported_gpu && should_support_barycentrics;
287}
288
289bool is_apple_sillicon(id<MTLDevice> device)
290{
291 NSString *gpu_name = [device name];
292 BLI_assert([gpu_name length]);
293
294 const char *vendor = [gpu_name UTF8String];
295
296 /* Known good configs. */
297 return (strstr(vendor, "Apple") || strstr(vendor, "APPLE"));
298}
299
300static int get_num_performance_cpu_cores(id<MTLDevice> device)
301{
302 const int SYSCTL_BUF_LENGTH = 16;
303 int num_performance_cores = -1;
304 unsigned char sysctl_buffer[SYSCTL_BUF_LENGTH];
305 size_t sysctl_buffer_length = SYSCTL_BUF_LENGTH;
306
307 if (is_apple_sillicon(device)) {
308 /* On Apple Silicon query the number of performance cores */
309 if (sysctlbyname("hw.perflevel0.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) ==
310 0)
311 {
312 num_performance_cores = sysctl_buffer[0];
313 }
314 }
315 else {
316 /* On Intel just return the logical core count */
317 if (sysctlbyname("hw.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) == 0) {
318 num_performance_cores = sysctl_buffer[0];
319 }
320 }
321 BLI_assert(num_performance_cores != -1);
322 return num_performance_cores;
323}
324
325static int get_num_efficiency_cpu_cores(id<MTLDevice> device)
326{
327 if (is_apple_sillicon(device)) {
328 /* On Apple Silicon query the number of efficiency cores */
329 const int SYSCTL_BUF_LENGTH = 16;
330 int num_efficiency_cores = -1;
331 unsigned char sysctl_buffer[SYSCTL_BUF_LENGTH];
332 size_t sysctl_buffer_length = SYSCTL_BUF_LENGTH;
333 if (sysctlbyname("hw.perflevel1.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) ==
334 0)
335 {
336 num_efficiency_cores = sysctl_buffer[0];
337 }
338
339 BLI_assert(num_efficiency_cores != -1);
340 return num_efficiency_cores;
341 }
342 else {
343 return 0;
344 }
345}
346
348{
349 /* Device compatibility information using Metal Feature-set tables.
350 * See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
351
352 NSOperatingSystemVersion version = [[NSProcessInfo processInfo] operatingSystemVersion];
353
354 /* Metal Viewport requires macOS Version 10.15 onward. */
355 bool supported_os_version = version.majorVersion >= 11 ||
356 (version.majorVersion == 10 ? version.minorVersion >= 15 : false);
357 if (!supported_os_version) {
358 printf(
359 "OS Version too low to run minimum required metal version. Required at least 10.15, got "
360 "%ld.%ld \n",
361 (long)version.majorVersion,
362 (long)version.minorVersion);
363 return false;
364 }
365
366 id<MTLDevice> device = MTLCreateSystemDefaultDevice();
367
368 /* Debug: Enable low power GPU with Environment Var: METAL_FORCE_INTEL. */
369 static const char *forceIntelStr = getenv("METAL_FORCE_INTEL");
370 bool forceIntel = forceIntelStr ? (atoi(forceIntelStr) != 0) : false;
371
372 if (forceIntel) {
373 NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
374 for (id<MTLDevice> _device in allDevices) {
375 if (_device.lowPower) {
376 device = _device;
377 }
378 }
379 }
380
381 /* Metal Viewport requires argument buffer tier-2 support and Barycentric Coordinates.
382 * These are available on most hardware configurations supporting Metal 2.2. */
383 bool supports_argument_buffers_tier2 = ([device argumentBuffersSupport] ==
384 MTLArgumentBuffersTier2);
385 bool supports_barycentrics = [device supportsShaderBarycentricCoordinates] ||
387 bool supported_metal_version = [device supportsFamily:MTLGPUFamilyMac2];
388
389 bool result = supports_argument_buffers_tier2 && supports_barycentrics && supported_os_version &&
390 supported_metal_version;
391
392 if (G.debug & G_DEBUG_GPU) {
393 if (!supports_argument_buffers_tier2) {
394 printf("[Metal] Device does not support argument buffers tier 2\n");
395 }
396 if (!supports_barycentrics) {
397 printf("[Metal] Device does not support barycentrics coordinates\n");
398 }
399 if (!supported_metal_version) {
400 printf("[Metal] Device does not support metal 2.2 or higher\n");
401 }
402
403 if (result) {
404 printf("Device with name %s supports metal minimum requirements\n",
405 [[device name] UTF8String]);
406 }
407 }
408
409 return result;
410}
411
412void MTLBackend::capabilities_init(MTLContext *ctx)
413{
414 BLI_assert(ctx);
415 id<MTLDevice> device = ctx->device;
416 BLI_assert(device);
417
418 /* Initialize Capabilities. */
419 MTLBackend::capabilities.supports_argument_buffers_tier2 = ([device argumentBuffersSupport] ==
420 MTLArgumentBuffersTier2);
421 MTLBackend::capabilities.supports_family_mac1 = [device supportsFamily:MTLGPUFamilyMac1];
422 MTLBackend::capabilities.supports_family_mac2 = [device supportsFamily:MTLGPUFamilyMac2];
424 supportsFamily:MTLGPUFamilyMacCatalyst1];
426 supportsFamily:MTLGPUFamilyMacCatalyst2];
427 /* NOTE(Metal): Texture gather is supported on AMD, but results are non consistent
428 * with Apple Silicon GPUs. Disabling for now to avoid erroneous rendering. */
429 MTLBackend::capabilities.supports_texture_gather = [device hasUnifiedMemory];
430
431 /* GPU Type. */
432 const char *gpu_name = [device.name UTF8String];
433 if (strstr(gpu_name, "M1")) {
435 }
436 else if (strstr(gpu_name, "M2")) {
438 }
439 else if (strstr(gpu_name, "M3")) {
441 }
442 else {
444 }
445
446 /* Texture atomics supported in Metal 3.1. */
448#if defined(MAC_OS_VERSION_14_0)
449 if (@available(macOS 14.0, *)) {
451 }
452#endif
453
454 /* CPU Info */
457
458 /* Common Global Capabilities. */
459 GCaps.max_texture_size = ([device supportsFamily:MTLGPUFamilyApple3] ||
461 16384 :
462 8192;
466 128 :
467 (([device supportsFamily:MTLGPUFamilyApple4]) ? 96 : 31);
468 if (GCaps.max_textures <= 32) {
469 BLI_assert(false);
470 }
472
474 GCaps.max_textures_geom = 0; /* N/A geometry shaders not supported. */
476
478
479 /* Conservative uniform data limit is 4KB per-stage -- This is the limit of setBytes.
480 * MTLBuffer path is also supported but not as efficient. */
483
484 GCaps.max_batch_indices = 1 << 31;
485 GCaps.max_batch_vertices = 1 << 31;
488
489 /* Feature support */
490 GCaps.mem_stats_support = false;
493
495
496 /* Compile shaders on performance cores but leave one free so UI is still responsive */
498
499 /* Maximum buffer bindings: 31. Consider required slot for uniforms/UBOs/Vertex attributes.
500 * Can use argument buffers if a higher limit is required. */
502 GCaps.max_storage_buffer_size = size_t(ctx->device.maxBufferLength);
503
504 GCaps.max_work_group_count[0] = 65535;
505 GCaps.max_work_group_count[1] = 65535;
506 GCaps.max_work_group_count[2] = 65535;
507 /* In Metal, total_thread_count is 512 or 1024, such that
508 * threadgroup `width*height*depth <= total_thread_count` */
509 uint max_threads_per_threadgroup_per_dim = ([device supportsFamily:MTLGPUFamilyApple4] ||
511 1024 :
512 512;
513 GCaps.max_work_group_size[0] = max_threads_per_threadgroup_per_dim;
514 GCaps.max_work_group_size[1] = max_threads_per_threadgroup_per_dim;
515 GCaps.max_work_group_size[2] = max_threads_per_threadgroup_per_dim;
516
519
520 /* OPENGL Related workarounds -- none needed for Metal. */
526 GCaps.broken_amd_driver = false;
528
529 /* Metal related workarounds. */
530 /* Minimum per-vertex stride is 4 bytes in Metal.
531 * A bound vertex buffer must contribute at least 4 bytes per vertex. */
533}
534
537/* -------------------------------------------------------------------- */
541void MTLBackend::compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len)
542{
543 /* Fetch Context.
544 * With Metal, workload submission and resource management occurs within the context.
545 * Call compute dispatch on valid context. */
547 BLI_assert(ctx != nullptr);
548 if (ctx) {
549 ctx->compute_dispatch(groups_x_len, groups_y_len, groups_z_len);
550 }
551}
552
554{
555 /* Fetch Context.
556 * With Metal, workload submission and resource management occurs within the context.
557 * Call compute dispatch on valid context. */
559 BLI_assert(ctx != nullptr);
560 if (ctx) {
561 ctx->compute_dispatch_indirect(indirect_buf);
562 }
563}
564
567} // namespace blender::gpu
@ G_DEBUG_GPU
#define BLI_assert(a)
Definition BLI_assert.h:50
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:57
unsigned int uint
eGPUDriverType
@ GPU_DRIVER_ANY
@ GPU_DRIVER_OFFICIAL
@ GPU_DRIVER_SOFTWARE
GPUArchitectureType
@ GPU_ARCHITECTURE_TBDR
@ GPU_ARCHITECTURE_IMR
eGPUSupportLevel
@ GPU_SUPPORT_LEVEL_LIMITED
@ GPU_SUPPORT_LEVEL_SUPPORTED
eGPUOSType
@ GPU_OS_MAC
eGPUDeviceType
@ GPU_DEVICE_UNKNOWN
@ GPU_DEVICE_ATI
@ GPU_DEVICE_SOFTWARE
@ GPU_DEVICE_NVIDIA
@ GPU_DEVICE_APPLE
@ GPU_DEVICE_INTEL
void init()
void init(eGPUDeviceType gpu_device, eGPUOSType os_type, eGPUDriverType driver_type, eGPUSupportLevel gpu_support_level, eGPUBackendType backend, const char *vendor_str, const char *renderer_str, const char *version_str, GPUArchitectureType arch_type)
void render_step() override
void render_begin() override
QueryPool * querypool_alloc() override
void compute_dispatch_indirect(StorageBuf *indirect_buf) override
Context * context_alloc(void *ghost_window, void *ghost_context) override
PixelBuffer * pixelbuf_alloc(size_t size) override
FrameBuffer * framebuffer_alloc(const char *name) override
static bool metal_is_supported()
IndexBuf * indexbuf_alloc() override
StorageBuf * storagebuf_alloc(size_t size, GPUUsageType usage, const char *name) override
void render_end() override
void samplers_update() override
Batch * batch_alloc() override
Fence * fence_alloc() override
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) override
static MTLCapabilities capabilities
UniformBuf * uniformbuf_alloc(size_t size, const char *name) override
VertBuf * vertbuf_alloc() override
Texture * texture_alloc(const char *name) override
DrawList * drawlist_alloc(int list_length) override
Shader * shader_alloc(const char *name) override
MTLSafeFreeList * get_current_safe_list()
static MTLContext * get()
void compute_dispatch_indirect(StorageBuf *indirect_buf)
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len)
static MTLBufferPool * get_global_memory_manager()
#define printf
#define NULL
#define G(x, y, z)
GPUPlatformGlobal GPG
static const char * mtl_extensions_get_null(int)
static int get_num_performance_cpu_cores(id< MTLDevice > device)
bool supports_barycentric_whitelist(id< MTLDevice > device)
static int get_num_efficiency_cpu_cores(id< MTLDevice > device)
GPUCapabilities GCaps
thread_local int g_autoreleasepool_depth
bool is_apple_sillicon(id< MTLDevice > device)
thread_local NSAutoreleasePool * g_autoreleasepool