Blender V4.3
device/metal/queue.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#ifdef WITH_METAL
8
9# include "device/kernel.h"
10# include "device/memory.h"
11# include "device/queue.h"
12
13# include "device/metal/util.h"
15
17
18class MetalDevice;
19
20/* Base class for Metal queues. */
21class MetalDeviceQueue : public DeviceQueue {
22 public:
23 MetalDeviceQueue(MetalDevice *device);
24 ~MetalDeviceQueue();
25
26 virtual int num_concurrent_states(const size_t) const override;
27 virtual int num_concurrent_busy_states(const size_t) const override;
28 virtual int num_sort_partition_elements() const override;
29 virtual bool supports_local_atomic_sort() const override;
30
31 virtual void init_execution() override;
32
33 virtual bool enqueue(DeviceKernel kernel,
34 const int work_size,
35 DeviceKernelArguments const &args) override;
36
37 virtual bool synchronize() override;
38
39 virtual void zero_to_device(device_memory &mem) override;
40 virtual void copy_to_device(device_memory &mem) override;
41 virtual void copy_from_device(device_memory &mem) override;
42
43 virtual void *native_queue() override;
44
45 protected:
46 void setup_capture();
47 void update_capture(DeviceKernel kernel);
48 void begin_capture();
49 void end_capture();
50 void prepare_resources(DeviceKernel kernel);
51
52 id<MTLComputeCommandEncoder> get_compute_encoder(DeviceKernel kernel);
53 id<MTLBlitCommandEncoder> get_blit_encoder();
54
55 MetalDevice *metal_device_;
56 MetalBufferPool temp_buffer_pool_;
57
58 API_AVAILABLE(macos(11.0), ios(14.0))
59 MTLCommandBufferDescriptor *command_buffer_desc_ = nullptr;
60 id<MTLDevice> mtlDevice_ = nil;
61 id<MTLCommandQueue> mtlCommandQueue_ = nil;
62 id<MTLCommandBuffer> mtlCommandBuffer_ = nil;
63 id<MTLComputeCommandEncoder> mtlComputeEncoder_ = nil;
64 id<MTLBlitCommandEncoder> mtlBlitEncoder_ = nil;
65 API_AVAILABLE(macos(10.14), ios(14.0))
66 id<MTLSharedEvent> shared_event_ = nil;
67 API_AVAILABLE(macos(10.14), ios(14.0))
68 MTLSharedEventListener *shared_event_listener_ = nil;
69 MetalDispatchPipeline active_pipelines_[DEVICE_KERNEL_NUM];
70
71 dispatch_queue_t event_queue_;
72 dispatch_semaphore_t wait_semaphore_;
73
74 struct CopyBack {
75 void *host_pointer;
76 void *gpu_mem;
78 };
79 std::vector<CopyBack> copy_back_mem_;
80
81 uint64_t shared_event_id_;
82 uint64_t command_buffers_submitted_ = 0;
83 uint64_t command_buffers_completed_ = 0;
84 Stats &stats_;
85
86 void close_compute_encoder();
87 void close_blit_encoder();
88
89 bool verbose_tracing_ = false;
90 bool label_command_encoders_ = false;
91
92 /* Per-kernel profiling (see CYCLES_METAL_PROFILING). */
93
94 struct TimingData {
95 DeviceKernel kernel;
96 int work_size;
97 uint64_t timing_id;
98 };
99 std::vector<TimingData> command_encoder_labels_;
100 bool profiling_enabled_ = false;
101 uint64_t current_encoder_idx_ = 0;
102
103 id<MTLCounterSampleBuffer> counter_sample_buffer_ = nil;
104 std::atomic<uint64_t> counter_sample_buffer_curr_idx_ = 0;
105
106 void flush_timing_stats();
107
108 struct TimingStats {
109 double total_time = 0.0;
110 uint64_t total_work_size = 0;
111 uint64_t num_dispatches = 0;
112 };
113 TimingStats timing_stats_[DEVICE_KERNEL_NUM];
114 double last_completion_time_ = 0.0;
115
116 /* .gputrace capture (see CYCLES_DEBUG_METAL_CAPTURE_...). */
117
118 id<MTLCaptureScope> mtlCaptureScope_ = nil;
119 DeviceKernel capture_kernel_;
120 int capture_dispatch_counter_ = 0;
121 bool capture_samples_ = false;
122 int capture_reset_counter_ = 0;
123 bool is_capturing_ = false;
124 bool is_capturing_to_disk_ = false;
125 bool has_captured_to_disk_ = false;
126};
127
129
130#endif /* WITH_METAL */
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
virtual int num_sort_partition_elements() const
virtual int num_concurrent_busy_states(const size_t state_size) const =0
virtual void copy_from_device(device_memory &mem)=0
virtual bool supports_local_atomic_sort() const
virtual int num_concurrent_states(const size_t state_size) const =0
virtual void init_execution()=0
virtual void copy_to_device(device_memory &mem)=0
virtual bool synchronize()=0
virtual bool enqueue(DeviceKernel kernel, const int work_size, DeviceKernelArguments const &args)=0
virtual void * native_queue()
virtual void zero_to_device(device_memory &mem)=0
#define CCL_NAMESPACE_END
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
DeviceKernel
@ DEVICE_KERNEL_NUM
unsigned __int64 uint64_t
Definition stdint.h:90
double total_time