11# include "device/metal/bvh.h"
17# include <Metal/Metal.h>
23class MetalDevice :
public Device {
25 id<MTLDevice> mtlDevice = nil;
26 id<MTLLibrary> mtlLibrary[PSO_NUM] = {nil};
27 id<MTLArgumentEncoder> mtlBufferKernelParamsEncoder =
29 id<MTLCommandQueue> mtlComputeCommandQueue = nil;
30 id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
31 id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
33 id<MTLCounterSampleBuffer> mtlCounterSampleBuffer = nil;
34 string source[PSO_NUM];
35 string kernels_md5[PSO_NUM];
36 string global_defines_md5[PSO_NUM];
38 bool capture_enabled =
false;
40 KernelParamsMetal launch_params = {
nullptr};
43 bool use_metalrt =
false;
44 bool motion_blur =
false;
45 bool use_pcmi =
false;
46 id<MTLArgumentEncoder> mtlASArgEncoder =
49 id<MTLArgumentEncoder> mtlBlasArgEncoder = nil;
50 id<MTLBuffer> blas_buffer = nil;
52 API_AVAILABLE(macos(11.0))
53 vector<
id<MTLAccelerationStructure>> unique_blas_array;
55 API_AVAILABLE(macos(11.0))
56 id<MTLAccelerationStructure> accel_struct = nil;
59 uint kernel_features = 0;
60 bool using_nanovdb =
false;
61 int max_threads_per_threadgroup;
64 bool has_error =
false;
67 device_memory *mem =
nullptr;
68 int pointer_index = -1;
69 id<MTLBuffer> mtlBuffer = nil;
70 id<MTLTexture> mtlTexture = nil;
73 void *hostPtr =
nullptr;
75 using MetalMemMap = map<device_memory *, unique_ptr<MetalMem>>;
76 MetalMemMap metal_mem_map;
77 std::vector<id<MTLResource>> delayed_free_list;
78 std::recursive_mutex metal_mem_map_mutex;
81 bool is_texture(
const TextureInfo &tex);
82 device_vector<TextureInfo> texture_info;
83 bool need_texture_info =
false;
84 id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
85 id<MTLArgumentEncoder> mtlBufferArgEncoder = nil;
86 id<MTLBuffer> buffer_bindings_1d = nil;
87 id<MTLBuffer> texture_bindings_2d = nil;
88 id<MTLBuffer> texture_bindings_3d = nil;
89 std::vector<id<MTLTexture>> texture_slot_map;
91 MetalPipelineType kernel_specialization_level = PSO_GENERIC;
96 static std::map<int, MetalDevice *> active_device_ids;
98 static bool is_device_cancelled(
const int device_id);
100 static MetalDevice *get_device_by_ID(
const int device_idID,
103 bool is_ready(
string &status)
const override;
105 void cancel()
override;
109 void set_error(
const string &
error)
override;
111 MetalDevice(
const DeviceInfo &info, Stats &stats, Profiler &profiler,
bool headless);
113 ~MetalDevice()
override;
115 bool support_device(
const uint );
117 bool check_peer_access(Device *peer_device)
override;
119 bool use_adaptive_compilation();
121 bool use_local_atomic_sort()
const;
123 string preprocess_source(MetalPipelineType pso_type,
124 const uint kernel_features,
125 string *source =
nullptr);
127 void refresh_source_and_kernels_md5(MetalPipelineType pso_type);
129 void make_source(MetalPipelineType pso_type,
const uint kernel_features);
131 bool load_kernels(
const uint kernel_features)
override;
133 void load_texture_info();
135 void erase_allocation(device_memory &mem);
137 bool should_use_graphics_interop(
const GraphicsInteropDevice &interop_device,
138 const bool log)
override;
142 unique_ptr<DeviceQueue> gpu_queue_create()
override;
146 void optimize_for_scene(
Scene *scene)
override;
148 static void compile_and_load(
const int device_id, MetalPipelineType pso_type);
153 bool max_working_set_exceeded(
const size_t safety_margin = 8 * 1024 * 1024)
const;
155 MetalMem *generic_alloc(device_memory &mem);
157 void generic_copy_to(device_memory &mem);
159 void generic_free(device_memory &mem);
161 void mem_alloc(device_memory &mem)
override;
163 void mem_copy_to(device_memory &mem)
override;
165 void mem_move_to_host(device_memory &mem)
override;
167 void mem_copy_from(device_memory &mem)
169 mem_copy_from(mem, -1, -1, -1, -1);
172 device_memory &mem,
const size_t y,
size_t w,
const size_t h,
size_t elem)
override;
174 void mem_zero(device_memory &mem)
override;
176 void mem_free(device_memory &mem)
override;
178 device_ptr mem_alloc_sub_ptr(device_memory &mem,
const size_t offset,
size_t )
override;
180 void const_copy_to(
const char *name,
void *host,
const size_t size)
override;
182 void global_alloc(device_memory &mem);
183 void global_free(device_memory &mem);
185 void tex_alloc(device_texture &mem);
186 void tex_alloc_as_buffer(device_texture &mem);
187 void tex_copy_to(device_texture &mem);
188 void tex_free(device_texture &mem);
190 void flush_delayed_free_list();
194 void update_bvh(BVHMetal *bvh_metal);
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
#define CCL_NAMESPACE_END
ccl_device bool BVH_FUNCTION_FULL_NAME BVH(KernelGlobals kg, const ccl_private Ray *ray, ccl_private LocalIntersection *local_isect, const int local_object, ccl_private uint *lcg_state, const int max_hits)
static void error(const char *str)
std::unique_lock< std::mutex > thread_scoped_lock