11# include "device/metal/bvh.h"
17# include <Metal/Metal.h>
23class MetalDevice :
public Device {
25 id<MTLDevice> mtlDevice = nil;
26 id<MTLLibrary> mtlLibrary[PSO_NUM] = {nil};
27 id<MTLCommandQueue> mtlComputeCommandQueue = nil;
28 id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
29 id<MTLCounterSampleBuffer> mtlCounterSampleBuffer = nil;
30 string source[PSO_NUM];
31 string kernels_md5[PSO_NUM];
32 string global_defines_md5[PSO_NUM];
34 bool capture_enabled =
false;
37 id<MTLBuffer> launch_params_buffer = nil;
38 KernelParamsMetal *launch_params =
nullptr;
41 bool use_metalrt =
false;
42 bool use_metalrt_extended_limits =
false;
43 bool motion_blur =
false;
44 bool use_pcmi =
false;
46 id<MTLBuffer> blas_buffer = nil;
48 API_AVAILABLE(macos(11.0))
49 vector<
id<MTLAccelerationStructure>> unique_blas_array;
51 API_AVAILABLE(macos(11.0))
52 vector<
id<MTLAccelerationStructure>> blas_array;
54 API_AVAILABLE(macos(11.0))
55 id<MTLAccelerationStructure> accel_struct = nil;
58 uint kernel_features = 0;
59 bool using_nanovdb =
false;
60 int max_threads_per_threadgroup;
63 bool has_error =
false;
66 device_memory *mem =
nullptr;
67 int pointer_index = -1;
68 id<MTLBuffer> mtlBuffer = nil;
69 id<MTLTexture> mtlTexture = nil;
72 void *hostPtr =
nullptr;
74 using MetalMemMap = map<device_memory *, unique_ptr<MetalMem>>;
75 MetalMemMap metal_mem_map;
76 std::vector<id<MTLResource>> delayed_free_list;
77 std::recursive_mutex metal_mem_map_mutex;
80 bool is_texture(
const TextureInfo &tex);
81 device_vector<TextureInfo> texture_info;
82 id<MTLBuffer> texture_bindings = nil;
83 std::vector<id<MTLResource>> texture_slot_map;
85 MetalPipelineType kernel_specialization_level = PSO_GENERIC;
90 static std::map<int, MetalDevice *> active_device_ids;
92 static bool is_device_cancelled(
const int device_id);
94 static MetalDevice *get_device_by_ID(
const int device_idID,
97 bool is_ready(
string &
status)
const override;
99 void cancel()
override;
103 void set_error(
const string &
error)
override;
105 MetalDevice(
const DeviceInfo &info, Stats &stats, Profiler &profiler,
bool headless);
107 ~MetalDevice()
override;
109 bool support_device(
const uint );
111 bool check_peer_access(Device *peer_device)
override;
113 bool use_adaptive_compilation();
115 bool use_local_atomic_sort()
const;
117 string preprocess_source(MetalPipelineType pso_type,
118 const uint kernel_features,
119 string *source =
nullptr);
121 void refresh_source_and_kernels_md5(MetalPipelineType pso_type);
123 void make_source(MetalPipelineType pso_type,
const uint kernel_features);
125 bool load_kernels(
const uint kernel_features)
override;
127 void load_texture_info();
129 void erase_allocation(device_memory &mem);
131 bool should_use_graphics_interop(
const GraphicsInteropDevice &interop_device,
132 const bool log)
override;
136 unique_ptr<DeviceQueue> gpu_queue_create()
override;
140 bool set_bvh_limits(
size_t instance_count,
size_t max_prim_count)
override;
142 void optimize_for_scene(
Scene *scene)
override;
144 static void compile_and_load(
const int device_id, MetalPipelineType pso_type);
149 bool max_working_set_exceeded(
const size_t safety_margin = 8 * 1024 * 1024)
const;
151 MetalMem *generic_alloc(device_memory &mem);
153 void generic_copy_to(device_memory &mem);
155 void generic_free(device_memory &mem);
157 void mem_alloc(device_memory &mem)
override;
159 void mem_copy_to(device_memory &mem)
override;
161 void mem_move_to_host(device_memory &mem)
override;
163 void mem_copy_from(device_memory &mem)
165 mem_copy_from(mem, -1, -1, -1, -1);
168 device_memory &mem,
const size_t y,
size_t w,
const size_t h,
size_t elem)
override;
170 void mem_zero(device_memory &mem)
override;
172 void mem_free(device_memory &mem)
override;
174 device_ptr mem_alloc_sub_ptr(device_memory &mem,
const size_t offset,
size_t )
override;
176 void const_copy_to(
const char *
name,
void *host,
const size_t size)
override;
178 void global_alloc(device_memory &mem);
179 void global_free(device_memory &mem);
181 void tex_alloc(device_texture &mem);
182 void tex_alloc_as_buffer(device_texture &mem);
183 void tex_copy_to(device_texture &mem);
184 void tex_free(device_texture &mem);
186 void flush_delayed_free_list();
190 void update_bvh(BVHMetal *bvh_metal);
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
#define CCL_NAMESPACE_END
ccl_device bool BVH_FUNCTION_FULL_NAME BVH(KernelGlobals kg, const ccl_private Ray *ray, ccl_private LocalIntersection *local_isect, const int local_object, ccl_private uint *lcg_state, const int max_hits)
static void error(const char *str)
std::unique_lock< std::mutex > thread_scoped_lock