16# define MAX_SAMPLE_BUFFER_LENGTH 4096
19# define ANCILLARY_SLOT_COUNT 11
28 MetalDeviceQueue(MetalDevice *device);
29 ~MetalDeviceQueue()
override;
40 const DeviceKernelArguments &args)
override;
59 id<MTLComputeCommandEncoder> get_compute_encoder(
DeviceKernel kernel);
60 id<MTLBlitCommandEncoder> get_blit_encoder();
62 MetalDevice *metal_device_;
64 API_AVAILABLE(macos(11.0), ios(14.0))
65 MTLCommandBufferDescriptor *command_buffer_desc_ =
nullptr;
66 id<MTLDevice> mtlDevice_ = nil;
67 id<MTLCommandQueue> mtlCommandQueue_ = nil;
68 id<MTLCommandBuffer> mtlCommandBuffer_ = nil;
69 id<MTLComputeCommandEncoder> mtlComputeEncoder_ = nil;
70 id<MTLBlitCommandEncoder> mtlBlitEncoder_ = nil;
71 API_AVAILABLE(macos(10.14), ios(14.0))
72 id<MTLSharedEvent> shared_event_ = nil;
73 API_AVAILABLE(macos(10.14), ios(14.0))
74 MTLSharedEventListener *shared_event_listener_ = nil;
77 dispatch_queue_t event_queue_;
78 dispatch_semaphore_t wait_semaphore_;
81 uint64_t command_buffers_submitted_ = 0;
82 uint64_t command_buffers_completed_ = 0;
85 void close_compute_encoder();
86 void close_blit_encoder();
88 bool verbose_tracing_ =
false;
89 bool label_command_encoders_ =
false;
98 std::vector<TimingData> command_encoder_labels_;
99 bool profiling_enabled_ =
false;
102 std::atomic<uint64_t> counter_sample_buffer_curr_idx_ = 0;
104 void flush_timing_stats();
112 double last_completion_time_ = 0.0;
116 id<MTLCaptureScope> mtlCaptureScope_ = nil;
118 int capture_dispatch_counter_ = 0;
119 bool capture_samples_ =
false;
120 int capture_reset_counter_ = 0;
121 bool is_capturing_ =
false;
122 bool is_capturing_to_disk_ =
false;
123 bool has_captured_to_disk_ =
false;
unsigned long long int uint64_t
virtual int num_concurrent_busy_states(const size_t state_size) const =0
virtual void copy_from_device(device_memory &mem)=0
virtual bool supports_local_atomic_sort() const
virtual int num_concurrent_states(const size_t state_size) const =0
virtual bool enqueue(DeviceKernel kernel, const int work_size, const DeviceKernelArguments &args)=0
virtual void init_execution()=0
virtual void copy_to_device(device_memory &mem)=0
virtual unique_ptr< DeviceGraphicsInterop > graphics_interop_create()
virtual int num_sort_partitions(int max_num_paths, uint max_scene_shaders) const
virtual bool synchronize()=0
virtual void * native_queue()
virtual void zero_to_device(device_memory &mem)=0
#define CCL_NAMESPACE_END
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size