Blender V5.0
util.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_METAL
6
7# include "device/metal/util.h"
9# include "util/md5.h"
10# include "util/path.h"
11# include "util/string.h"
12# include "util/time.h"
13
14# include <IOKit/IOKitLib.h>
15# include <ctime>
16# include <pwd.h>
17# include <sys/shm.h>
18
20
21/* Comment this out to test workaround for getting gpuAddress and gpuResourceID on macOS < 13.0. */
22# define CYCLES_USE_TIER2D_BINDLESS
23
24string MetalInfo::get_device_name(id<MTLDevice> device)
25{
26 string device_name = [device.name UTF8String];
27
28 /* Append the GPU core count so we can distinguish between GPU variants in benchmarks. */
29 int gpu_core_count = get_apple_gpu_core_count(device);
30 device_name += string_printf(gpu_core_count ? " (GPU - %d cores)" : " (GPU)", gpu_core_count);
31
32 return device_name;
33}
34
35int MetalInfo::get_apple_gpu_core_count(id<MTLDevice> device)
36{
37 int core_count = 0;
38 if (@available(macos 12.0, *)) {
39 io_service_t gpu_service = IOServiceGetMatchingService(
40 kIOMainPortDefault, IORegistryEntryIDMatching(device.registryID));
41 if (CFNumberRef numberRef = (CFNumberRef)IORegistryEntryCreateCFProperty(
42 gpu_service, CFSTR("gpu-core-count"), nullptr, 0))
43 {
44 if (CFGetTypeID(numberRef) == CFNumberGetTypeID()) {
45 CFNumberGetValue(numberRef, kCFNumberSInt32Type, &core_count);
46 }
47 CFRelease(numberRef);
48 }
49 }
50 return core_count;
51}
52
53AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
54{
55 const char *device_name = [device.name UTF8String];
56 if (strstr(device_name, "M1")) {
57 return APPLE_M1;
58 }
59 if (strstr(device_name, "M2")) {
60 return get_apple_gpu_core_count(device) <= 10 ? APPLE_M2 : APPLE_M2_BIG;
61 }
62 if (strstr(device_name, "M3")) {
63 return APPLE_M3;
64 }
65 return APPLE_UNKNOWN;
66}
67
68int MetalInfo::optimal_sort_partition_elements()
69{
70 if (auto *str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
71 return atoi(str);
72 }
73
74 /* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before
75 * sorting each partition by material. Partitioning into chunks of 65536 elements results in an
76 * overall render time speedup of up to 15%. */
77
78 return 65536;
79}
80
81const vector<id<MTLDevice>> &MetalInfo::get_usable_devices()
82{
83 static vector<id<MTLDevice>> usable_devices;
84 static bool already_enumerated = false;
85
86 if (already_enumerated) {
87 return usable_devices;
88 }
89
90 metal_printf("Usable Metal devices:");
91 for (id<MTLDevice> device in MTLCopyAllDevices()) {
92 string device_name = get_device_name(device);
93 bool usable = false;
94
95 if (@available(macos 12.2, *)) {
96 const char *device_name_char = [device.name UTF8String];
97 if (!(strstr(device_name_char, "Intel") || strstr(device_name_char, "AMD")) &&
98 strstr(device_name_char, "Apple"))
99 {
100 /* TODO: Implement a better way to identify device vendor instead of relying on name. */
101 /* We only support Apple Silicon GPUs which all have unified memory, but explicitly check
102 * just in case it ever changes. */
103 usable = [device hasUnifiedMemory];
104 }
105 }
106
107 if (usable) {
108 metal_printf("- %s", device_name.c_str());
109 [device retain];
110 usable_devices.push_back(device);
111 }
112 else {
113 metal_printf(" (skipping \"%s\")", device_name.c_str());
114 }
115 }
116 if (usable_devices.empty()) {
117 metal_printf(" No usable Metal devices found");
118 }
119 already_enumerated = true;
120
121 return usable_devices;
122}
123
124struct GPUAddressHelper {
125 id<MTLBuffer> resource_buffer = nil;
126 id<MTLArgumentEncoder> address_encoder = nil;
127
128 /* One time setup of arg encoder. */
129 void init(id<MTLDevice> device)
130 {
131 if (resource_buffer) {
132 /* No setup required - already initialised. */
133 return;
134 }
135
136# ifdef CYCLES_USE_TIER2D_BINDLESS
137 if (@available(macos 13.0, *)) {
138 /* No setup required - there's an API now! */
139 return;
140 }
141# endif
142
143 /* Setup a tiny buffer to encode the GPU address / resourceID into. */
144 resource_buffer = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
145
146 /* Create an encoder to extract a gpuAddress from a MTLBuffer. */
147 MTLArgumentDescriptor *encoder_params = [[MTLArgumentDescriptor alloc] init];
148 encoder_params.arrayLength = 1;
149 encoder_params.access = MTLBindingAccessReadWrite;
150 encoder_params.dataType = MTLDataTypePointer;
151 address_encoder = [device newArgumentEncoderWithArguments:@[ encoder_params ]];
152 [address_encoder setArgumentBuffer:resource_buffer offset:0];
153 };
154
155 uint64_t gpuAddress(id<MTLBuffer> buffer)
156 {
157# ifdef CYCLES_USE_TIER2D_BINDLESS
158 if (@available(macos 13.0, *)) {
159 return buffer.gpuAddress;
160 }
161# endif
162 [address_encoder setBuffer:buffer offset:0 atIndex:0];
163 return *(uint64_t *)[resource_buffer contents];
164 }
165
166 uint64_t gpuResourceID(id<MTLTexture> texture)
167 {
168# ifdef CYCLES_USE_TIER2D_BINDLESS
169 if (@available(macos 13.0, *)) {
170 MTLResourceID resourceID = texture.gpuResourceID;
171 return (uint64_t &)resourceID;
172 }
173# endif
174 [address_encoder setTexture:texture atIndex:0];
175 return *(uint64_t *)[resource_buffer contents];
176 }
177
178 uint64_t gpuResourceID(id<MTLAccelerationStructure> accel_struct)
179 {
180# ifdef CYCLES_USE_TIER2D_BINDLESS
181 if (@available(macos 13.0, *)) {
182 MTLResourceID resourceID = accel_struct.gpuResourceID;
183 return (uint64_t &)resourceID;
184 }
185# endif
186 [address_encoder setAccelerationStructure:accel_struct atIndex:0];
187 return *(uint64_t *)[resource_buffer contents];
188 }
189
190 uint64_t gpuResourceID(id<MTLIntersectionFunctionTable> ift)
191 {
192# ifdef CYCLES_USE_TIER2D_BINDLESS
193 if (@available(macos 13.0, *)) {
194 MTLResourceID resourceID = ift.gpuResourceID;
195 return (uint64_t &)resourceID;
196 }
197# endif
198 [address_encoder setIntersectionFunctionTable:ift atIndex:0];
199 return *(uint64_t *)[resource_buffer contents];
200 }
201};
202
203GPUAddressHelper g_gpu_address_helper;
204
205void metal_gpu_address_helper_init(id<MTLDevice> device)
206{
207 g_gpu_address_helper.init(device);
208}
209
210uint64_t metal_gpuAddress(id<MTLBuffer> buffer)
211{
212 return g_gpu_address_helper.gpuAddress(buffer);
213}
214
215uint64_t metal_gpuResourceID(id<MTLTexture> texture)
216{
217 return g_gpu_address_helper.gpuResourceID(texture);
218}
219
220uint64_t metal_gpuResourceID(id<MTLAccelerationStructure> accel_struct)
221{
222 return g_gpu_address_helper.gpuResourceID(accel_struct);
223}
224
225uint64_t metal_gpuResourceID(id<MTLIntersectionFunctionTable> ift)
226{
227 return g_gpu_address_helper.gpuResourceID(ift);
228}
229
231
232#endif /* WITH_METAL */
void init()
unsigned long long int uint64_t
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
#define str(s)
#define in
TEX_TEMPLATE DataVec texture(T, FltCoord, float=0.0f) RET
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23