Blender V4.3
util.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_METAL
6
7# include "device/metal/util.h"
9# include "util/md5.h"
10# include "util/path.h"
11# include "util/string.h"
12# include "util/time.h"
13
14# include <IOKit/IOKitLib.h>
15# include <pwd.h>
16# include <sys/shm.h>
17# include <time.h>
18
20
21string MetalInfo::get_device_name(id<MTLDevice> device)
22{
23 string device_name = [device.name UTF8String];
24
25 /* Append the GPU core count so we can distinguish between GPU variants in benchmarks. */
26 int gpu_core_count = get_apple_gpu_core_count(device);
27 device_name += string_printf(gpu_core_count ? " (GPU - %d cores)" : " (GPU)", gpu_core_count);
28
29 return device_name;
30}
31
32int MetalInfo::get_apple_gpu_core_count(id<MTLDevice> device)
33{
34 int core_count = 0;
35 if (@available(macos 12.0, *)) {
36 io_service_t gpu_service = IOServiceGetMatchingService(
37 kIOMainPortDefault, IORegistryEntryIDMatching(device.registryID));
38 if (CFNumberRef numberRef = (CFNumberRef)IORegistryEntryCreateCFProperty(
39 gpu_service, CFSTR("gpu-core-count"), 0, 0))
40 {
41 if (CFGetTypeID(numberRef) == CFNumberGetTypeID()) {
42 CFNumberGetValue(numberRef, kCFNumberSInt32Type, &core_count);
43 }
44 CFRelease(numberRef);
45 }
46 }
47 return core_count;
48}
49
50AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
51{
52 const char *device_name = [device.name UTF8String];
53 if (strstr(device_name, "M1")) {
54 return APPLE_M1;
55 }
56 else if (strstr(device_name, "M2")) {
57 return get_apple_gpu_core_count(device) <= 10 ? APPLE_M2 : APPLE_M2_BIG;
58 }
59 else if (strstr(device_name, "M3")) {
60 return APPLE_M3;
61 }
62 return APPLE_UNKNOWN;
63}
64
65int MetalInfo::optimal_sort_partition_elements()
66{
67 if (auto str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
68 return atoi(str);
69 }
70
71 /* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before
72 * sorting each partition by material. Partitioning into chunks of 65536 elements results in an
73 * overall render time speedup of up to 15%. */
74
75 return 65536;
76}
77
78vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
79{
80 static vector<id<MTLDevice>> usable_devices;
81 static bool already_enumerated = false;
82
83 if (already_enumerated) {
84 return usable_devices;
85 }
86
87 metal_printf("Usable Metal devices:\n");
88 for (id<MTLDevice> device in MTLCopyAllDevices()) {
89 string device_name = get_device_name(device);
90 bool usable = false;
91
92 if (@available(macos 12.2, *)) {
93 const char *device_name_char = [device.name UTF8String];
94 if (!(strstr(device_name_char, "Intel") || strstr(device_name_char, "AMD")) &&
95 strstr(device_name_char, "Apple"))
96 {
97 /* TODO: Implement a better way to identify device vendor instead of relying on name. */
98 usable = true;
99 }
100 }
101
102 if (usable) {
103 metal_printf("- %s\n", device_name.c_str());
104 [device retain];
105 usable_devices.push_back(device);
106 }
107 else {
108 metal_printf(" (skipping \"%s\")\n", device_name.c_str());
109 }
110 }
111 if (usable_devices.empty()) {
112 metal_printf(" No usable Metal devices found\n");
113 }
114 already_enumerated = true;
115
116 return usable_devices;
117}
118
119id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
120 id<MTLCommandBuffer> command_buffer,
121 NSUInteger length,
122 MTLResourceOptions options,
123 const void *pointer,
124 Stats &stats)
125{
126 id<MTLBuffer> buffer = nil;
127
128 MTLStorageMode storageMode = MTLStorageMode((options & MTLResourceStorageModeMask) >>
129 MTLResourceStorageModeShift);
130 MTLCPUCacheMode cpuCacheMode = MTLCPUCacheMode((options & MTLResourceCPUCacheModeMask) >>
131 MTLResourceCPUCacheModeShift);
132
133 {
134 thread_scoped_lock lock(buffer_mutex);
135 /* Find an unused buffer with matching size and storage mode. */
136 for (MetalBufferListEntry &bufferEntry : temp_buffers) {
137 if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode &&
138 cpuCacheMode == bufferEntry.buffer.cpuCacheMode && bufferEntry.command_buffer == nil)
139 {
140 buffer = bufferEntry.buffer;
141 bufferEntry.command_buffer = command_buffer;
142 break;
143 }
144 }
145 if (!buffer) {
146 /* Create a new buffer and add it to the pool. Typically this pool will only grow to a
147 * handful of entries. */
148 buffer = [device newBufferWithLength:length options:options];
149 stats.mem_alloc(buffer.allocatedSize);
150 total_temp_mem_size += buffer.allocatedSize;
151 temp_buffers.push_back(MetalBufferListEntry{buffer, command_buffer});
152 }
153 }
154
155 /* Copy over data */
156 if (pointer) {
157 memcpy(buffer.contents, pointer, length);
158 if (buffer.storageMode == MTLStorageModeManaged) {
159 [buffer didModifyRange:NSMakeRange(0, length)];
160 }
161 }
162
163 return buffer;
164}
165
166void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> command_buffer)
167{
168 assert(command_buffer);
169 thread_scoped_lock lock(buffer_mutex);
170 /* Mark any temp buffers associated with command_buffer as unused. */
171 for (MetalBufferListEntry &buffer_entry : temp_buffers) {
172 if (buffer_entry.command_buffer == command_buffer) {
173 buffer_entry.command_buffer = nil;
174 }
175 }
176}
177
178MetalBufferPool::~MetalBufferPool()
179{
180 thread_scoped_lock lock(buffer_mutex);
181 /* Release all buffers that have not been recently reused */
182 for (MetalBufferListEntry &buffer_entry : temp_buffers) {
183 total_temp_mem_size -= buffer_entry.buffer.allocatedSize;
184 [buffer_entry.buffer release];
185 buffer_entry.buffer = nil;
186 }
187 temp_buffers.clear();
188}
189
191
192#endif /* WITH_METAL */
volatile int lock
void mem_alloc(size_t size)
Definition util/stats.h:20
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
#define str(s)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:30