Blender V4.3
device/metal/kernel.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#ifdef WITH_METAL
8
9# include "device/kernel.h"
10# include <Metal/Metal.h>
11
13
14class MetalDevice;
15
16enum {
17 METALRT_TABLE_DEFAULT,
18 METALRT_TABLE_SHADOW,
19 METALRT_TABLE_SHADOW_ALL,
20 METALRT_TABLE_VOLUME,
21 METALRT_TABLE_LOCAL,
22 METALRT_TABLE_LOCAL_MBLUR,
23 METALRT_TABLE_LOCAL_SINGLE_HIT,
24 METALRT_TABLE_LOCAL_SINGLE_HIT_MBLUR,
25 METALRT_TABLE_NUM
26};
27
28/* Pipeline State Object types */
29enum MetalPipelineType {
30 /* A kernel that can be used with all scenes, supporting all features.
31 * It is slow to compile, but only needs to be compiled once and is then
32 * cached for future render sessions. This allows a render to get underway
33 * on the GPU quickly.
34 */
35 PSO_GENERIC,
36
37 /* A intersection kernel that is very quick to specialize and results in faster intersection
38 * kernel performance. It uses Metal function constants to replace several KernelData variables
39 * with fixed constants.
40 */
41 PSO_SPECIALIZED_INTERSECT,
42
43 /* A shading kernel that is slow to specialize, but results in faster shading kernel performance
44 * rendered. It uses Metal function constants to replace several KernelData variables with fixed
45 * constants and short-circuit all unused SVM node case handlers.
46 */
47 PSO_SPECIALIZED_SHADE,
48
49 PSO_NUM
50};
51
52# define METALRT_FEATURE_MASK \
53 (KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD)
54
55const char *kernel_type_as_string(MetalPipelineType pso_type);
56
57/* A pipeline object that can be shared between multiple instances of MetalDeviceQueue. */
58class MetalKernelPipeline {
59 public:
60 void compile();
61
62 int pipeline_id;
63 int originating_device_id;
64
65 id<MTLLibrary> mtlLibrary = nil;
66 MetalPipelineType pso_type;
67 string kernels_md5;
68 size_t usage_count = 0;
69
70 KernelData kernel_data_;
71 bool use_metalrt;
72 uint32_t kernel_features = 0;
73
74 int threads_per_threadgroup;
75
76 DeviceKernel device_kernel;
77 bool loaded = false;
78 id<MTLDevice> mtlDevice = nil;
79 id<MTLFunction> function = nil;
80 id<MTLComputePipelineState> pipeline = nil;
81 int num_threads_per_block = 0;
82
83 bool should_use_binary_archive() const;
84 id<MTLFunction> make_intersection_function(const char *function_name);
85
86 string error_str;
87
88 NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
89};
90
91/* An actively instanced pipeline that can only be used by a single instance of MetalDeviceQueue.
92 */
93class MetalDispatchPipeline {
94 public:
95 ~MetalDispatchPipeline();
96
97 bool update(MetalDevice *metal_device, DeviceKernel kernel);
98 void free_intersection_function_tables();
99
100 private:
101 friend class MetalDeviceQueue;
102 friend struct ShaderCache;
103
104 int pipeline_id = -1;
105
106 MetalPipelineType pso_type;
107 id<MTLComputePipelineState> pipeline = nil;
108 int num_threads_per_block = 0;
109
110 API_AVAILABLE(macos(11.0))
111 id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
112};
113
114/* Cache of Metal kernels for each DeviceKernel. */
115namespace MetalDeviceKernels {
116
117int num_incomplete_specialization_requests();
118int get_loaded_kernel_count(MetalDevice const *device, MetalPipelineType pso_type);
119bool should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type);
120bool load(MetalDevice *device, MetalPipelineType pso_type);
121const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);
122void wait_for_all();
123bool is_benchmark_warmup();
124
125/* Deinitialize all static variables, so that no code would run on application exit. */
126void static_deinitialize();
127
128} /* namespace MetalDeviceKernels */
129
131
132#endif /* WITH_METAL */
#define CCL_NAMESPACE_END
KernelData
DeviceKernel
void load(const VolumeGridData &grid)
static void update(bNodeTree *ntree)
unsigned int uint32_t
Definition stdint.h:80