Blender V4.5
device/metal/queue.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#ifdef WITH_METAL
8
9# include "device/kernel.h"
10# include "device/memory.h"
11# include "device/queue.h"
12
13# include "device/metal/util.h"
15
16# define MAX_SAMPLE_BUFFER_LENGTH 4096
17
19
20class MetalDevice;
21
22/* Base class for Metal queues. */
23class MetalDeviceQueue : public DeviceQueue {
24 public:
25 MetalDeviceQueue(MetalDevice *device);
26 ~MetalDeviceQueue() override;
27
28 int num_concurrent_states(const size_t /*state_size*/) const override;
29 int num_concurrent_busy_states(const size_t /*state_size*/) const override;
30 int num_sort_partitions(int max_num_paths, uint max_scene_shaders) const override;
31 bool supports_local_atomic_sort() const override;
32
33 void init_execution() override;
34
35 bool enqueue(DeviceKernel kernel,
36 const int work_size,
37 const DeviceKernelArguments &args) override;
38
39 bool synchronize() override;
40
41 void zero_to_device(device_memory &mem) override;
42 void copy_to_device(device_memory &mem) override;
43 void copy_from_device(device_memory &mem) override;
44
45 void *native_queue() override;
46
47 unique_ptr<DeviceGraphicsInterop> graphics_interop_create() override;
48
49 protected:
50 void setup_capture();
51 void update_capture(DeviceKernel kernel);
52 void begin_capture();
53 void end_capture();
54 void prepare_resources(DeviceKernel kernel);
55
56 id<MTLComputeCommandEncoder> get_compute_encoder(DeviceKernel kernel);
57 id<MTLBlitCommandEncoder> get_blit_encoder();
58
59 MetalDevice *metal_device_;
60 MetalBufferPool temp_buffer_pool_;
61
62 API_AVAILABLE(macos(11.0), ios(14.0))
63 MTLCommandBufferDescriptor *command_buffer_desc_ = nullptr;
64 id<MTLDevice> mtlDevice_ = nil;
65 id<MTLCommandQueue> mtlCommandQueue_ = nil;
66 id<MTLCommandBuffer> mtlCommandBuffer_ = nil;
67 id<MTLComputeCommandEncoder> mtlComputeEncoder_ = nil;
68 id<MTLBlitCommandEncoder> mtlBlitEncoder_ = nil;
69 API_AVAILABLE(macos(10.14), ios(14.0))
70 id<MTLSharedEvent> shared_event_ = nil;
71 API_AVAILABLE(macos(10.14), ios(14.0))
72 MTLSharedEventListener *shared_event_listener_ = nil;
73 MetalDispatchPipeline active_pipelines_[DEVICE_KERNEL_NUM];
74
75 dispatch_queue_t event_queue_;
76 dispatch_semaphore_t wait_semaphore_;
77
78 uint64_t shared_event_id_;
79 uint64_t command_buffers_submitted_ = 0;
80 uint64_t command_buffers_completed_ = 0;
81 Stats &stats_;
82
83 void close_compute_encoder();
84 void close_blit_encoder();
85
86 bool verbose_tracing_ = false;
87 bool label_command_encoders_ = false;
88
89 /* Per-kernel profiling (see CYCLES_METAL_PROFILING). */
90
91 struct TimingData {
92 DeviceKernel kernel;
93 int work_size;
94 uint64_t timing_id;
95 };
96 std::vector<TimingData> command_encoder_labels_;
97 bool profiling_enabled_ = false;
98 uint64_t current_encoder_idx_ = 0;
99
100 std::atomic<uint64_t> counter_sample_buffer_curr_idx_ = 0;
101
102 void flush_timing_stats();
103
104 struct TimingStats {
105 double total_time = 0.0;
106 uint64_t total_work_size = 0;
107 uint64_t num_dispatches = 0;
108 };
109 TimingStats timing_stats_[DEVICE_KERNEL_NUM];
110 double last_completion_time_ = 0.0;
111
112 /* .gputrace capture (see CYCLES_DEBUG_METAL_CAPTURE_...). */
113
114 id<MTLCaptureScope> mtlCaptureScope_ = nil;
115 DeviceKernel capture_kernel_;
116 int capture_dispatch_counter_ = 0;
117 bool capture_samples_ = false;
118 int capture_reset_counter_ = 0;
119 bool is_capturing_ = false;
120 bool is_capturing_to_disk_ = false;
121 bool has_captured_to_disk_ = false;
122};
123
125
126#endif /* WITH_METAL */
unsigned int uint
unsigned long long int uint64_t
virtual int num_concurrent_busy_states(const size_t state_size) const =0
virtual void copy_from_device(device_memory &mem)=0
virtual bool supports_local_atomic_sort() const
virtual int num_concurrent_states(const size_t state_size) const =0
virtual bool enqueue(DeviceKernel kernel, const int work_size, const DeviceKernelArguments &args)=0
virtual void init_execution()=0
virtual void copy_to_device(device_memory &mem)=0
virtual unique_ptr< DeviceGraphicsInterop > graphics_interop_create()
virtual int num_sort_partitions(int max_num_paths, uint max_scene_shaders) const
virtual bool synchronize()=0
virtual void * native_queue()
virtual void zero_to_device(device_memory &mem)=0
#define CCL_NAMESPACE_END
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
DeviceKernel
@ DEVICE_KERNEL_NUM
double total_time