Blender V5.0
device/metal/queue.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#ifdef WITH_METAL
8
9# include "device/kernel.h"
10# include "device/memory.h"
11# include "device/queue.h"
12
13# include "device/metal/util.h"
15
16# define MAX_SAMPLE_BUFFER_LENGTH 4096
17
18/* The number of resources to be contiguously encoded into the MetalAncillaries struct. */
19# define ANCILLARY_SLOT_COUNT 11
20
22
23class MetalDevice;
24
25/* Base class for Metal queues. */
26class MetalDeviceQueue : public DeviceQueue {
27 public:
28 MetalDeviceQueue(MetalDevice *device);
29 ~MetalDeviceQueue() override;
30
31 int num_concurrent_states(const size_t /*state_size*/) const override;
32 int num_concurrent_busy_states(const size_t /*state_size*/) const override;
33 int num_sort_partitions(int max_num_paths, uint max_scene_shaders) const override;
34 bool supports_local_atomic_sort() const override;
35
36 void init_execution() override;
37
38 bool enqueue(DeviceKernel kernel,
39 const int work_size,
40 const DeviceKernelArguments &args) override;
41
42 bool synchronize() override;
43
44 void zero_to_device(device_memory &mem) override;
45 void copy_to_device(device_memory &mem) override;
46 void copy_from_device(device_memory &mem) override;
47
48 void *native_queue() override;
49
50 unique_ptr<DeviceGraphicsInterop> graphics_interop_create() override;
51
52 protected:
53 void setup_capture();
54 void update_capture(DeviceKernel kernel);
55 void begin_capture();
56 void end_capture();
57 void prepare_resources(DeviceKernel kernel);
58
59 id<MTLComputeCommandEncoder> get_compute_encoder(DeviceKernel kernel);
60 id<MTLBlitCommandEncoder> get_blit_encoder();
61
62 MetalDevice *metal_device_;
63
64 API_AVAILABLE(macos(11.0), ios(14.0))
65 MTLCommandBufferDescriptor *command_buffer_desc_ = nullptr;
66 id<MTLDevice> mtlDevice_ = nil;
67 id<MTLCommandQueue> mtlCommandQueue_ = nil;
68 id<MTLCommandBuffer> mtlCommandBuffer_ = nil;
69 id<MTLComputeCommandEncoder> mtlComputeEncoder_ = nil;
70 id<MTLBlitCommandEncoder> mtlBlitEncoder_ = nil;
71 API_AVAILABLE(macos(10.14), ios(14.0))
72 id<MTLSharedEvent> shared_event_ = nil;
73 API_AVAILABLE(macos(10.14), ios(14.0))
74 MTLSharedEventListener *shared_event_listener_ = nil;
75 MetalDispatchPipeline active_pipelines_[DEVICE_KERNEL_NUM];
76
77 dispatch_queue_t event_queue_;
78 dispatch_semaphore_t wait_semaphore_;
79
80 uint64_t shared_event_id_;
81 uint64_t command_buffers_submitted_ = 0;
82 uint64_t command_buffers_completed_ = 0;
83 Stats &stats_;
84
85 void close_compute_encoder();
86 void close_blit_encoder();
87
88 bool verbose_tracing_ = false;
89 bool label_command_encoders_ = false;
90
91 /* Per-kernel profiling (see CYCLES_METAL_PROFILING). */
92
93 struct TimingData {
94 DeviceKernel kernel;
95 int work_size;
96 uint64_t timing_id;
97 };
98 std::vector<TimingData> command_encoder_labels_;
99 bool profiling_enabled_ = false;
100 uint64_t current_encoder_idx_ = 0;
101
102 std::atomic<uint64_t> counter_sample_buffer_curr_idx_ = 0;
103
104 void flush_timing_stats();
105
106 struct TimingStats {
107 double total_time = 0.0;
108 uint64_t total_work_size = 0;
109 uint64_t num_dispatches = 0;
110 };
111 TimingStats timing_stats_[DEVICE_KERNEL_NUM];
112 double last_completion_time_ = 0.0;
113
114 /* .gputrace capture (see CYCLES_DEBUG_METAL_CAPTURE_...). */
115
116 id<MTLCaptureScope> mtlCaptureScope_ = nil;
117 DeviceKernel capture_kernel_;
118 int capture_dispatch_counter_ = 0;
119 bool capture_samples_ = false;
120 int capture_reset_counter_ = 0;
121 bool is_capturing_ = false;
122 bool is_capturing_to_disk_ = false;
123 bool has_captured_to_disk_ = false;
124};
125
127
128#endif /* WITH_METAL */
unsigned int uint
unsigned long long int uint64_t
virtual int num_concurrent_busy_states(const size_t state_size) const =0
virtual void copy_from_device(device_memory &mem)=0
virtual bool supports_local_atomic_sort() const
virtual int num_concurrent_states(const size_t state_size) const =0
virtual bool enqueue(DeviceKernel kernel, const int work_size, const DeviceKernelArguments &args)=0
virtual void init_execution()=0
virtual void copy_to_device(device_memory &mem)=0
virtual unique_ptr< DeviceGraphicsInterop > graphics_interop_create()
virtual int num_sort_partitions(int max_num_paths, uint max_scene_shaders) const
virtual bool synchronize()=0
virtual void * native_queue()
virtual void zero_to_device(device_memory &mem)=0
#define CCL_NAMESPACE_END
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
DeviceKernel
@ DEVICE_KERNEL_NUM
double total_time