Blender V5.0
vk_device_submission.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2025 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <chrono>
10#include <condition_variable>
11#include <thread>
12
13#include "BLI_mutex.hh"
14#include "BLI_task.h"
15
16#include "vk_device.hh"
17
18#include "CLG_log.h"
19
20static CLG_LogRef LOG = {"gpu.vulkan"};
21
22namespace blender::gpu {
23
24/* -------------------------------------------------------------------- */
27
33
44
46 VKDiscardPool &context_discard_pool,
47 bool submit_to_device,
48 bool wait_for_completion,
49 VkPipelineStageFlags wait_dst_stage_mask,
50 VkSemaphore wait_semaphore,
51 VkSemaphore signal_semaphore,
52 VkFence signal_fence)
53{
54 if (render_graph->is_empty()) {
55 render_graph->reset();
58 return timeline_value_;
59 }
60
61 VKRenderGraphSubmitTask *submit_task = MEM_new<VKRenderGraphSubmitTask>(__func__);
62 submit_task->render_graph = render_graph;
63 submit_task->submit_to_device = submit_to_device;
64 submit_task->wait_dst_stage_mask = wait_dst_stage_mask;
65 submit_task->wait_semaphore = wait_semaphore;
66 submit_task->signal_semaphore = signal_semaphore;
67 submit_task->signal_fence = signal_fence;
68 submit_task->wait_for_submission = nullptr;
69
70 /* We need to wait for submission as otherwise the signal semaphore can still not be in an
71 * initial state. */
72 const bool wait_for_submission = signal_semaphore != VK_NULL_HANDLE && !wait_for_completion;
73 VKRenderGraphWait wait_condition{};
74 if (wait_for_submission) {
75 submit_task->wait_for_submission = &wait_condition;
76 }
77 TimelineValue timeline = 0;
78 {
79 std::scoped_lock lock(orphaned_data.mutex_get());
80 timeline = submit_task->timeline = submit_to_device ? ++timeline_value_ : timeline_value_ + 1;
81 orphaned_data.timeline_ = timeline;
82 orphaned_data.move_data(context_discard_pool, timeline);
84 submitted_render_graphs_, submit_task, BLI_THREAD_QUEUE_WORK_PRIORITY_NORMAL);
85 }
86 submit_task = nullptr;
87
88 if (wait_for_submission) {
89 std::unique_lock<blender::Mutex> lock(wait_condition.is_submitted_mutex);
90 wait_condition.is_submitted_condition.wait(lock, [&] { return wait_condition.is_submitted; });
91 }
92
93 if (wait_for_completion) {
94 wait_for_timeline(timeline);
95 }
96 return timeline;
97}
98
100{
101 if (timeline == 0) {
102 return;
103 }
104 VkSemaphoreWaitInfo vk_semaphore_wait_info = {
105 VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, nullptr, 0, 1, &vk_timeline_semaphore_, &timeline};
106 vkWaitSemaphores(vk_device_, &vk_semaphore_wait_info, UINT64_MAX);
107}
108
110{
111 std::scoped_lock lock(*queue_mutex_);
112 vkQueueWaitIdle(vk_queue_);
113}
114
116{
118 BLI_thread_queue_pop_timeout(unused_render_graphs_, 0));
119 if (render_graph) {
120 return render_graph;
121 }
122
123 std::scoped_lock lock(resources.mutex);
124 render_graph = MEM_new<render_graph::VKRenderGraph>(__func__, resources);
125 render_graphs_.append(render_graph);
126 return render_graph;
127}
128
129void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data)
130{
131 CLOG_TRACE(&LOG, "Submission runner has started");
132 UNUSED_VARS(task_data);
133
134 VKDevice *device = static_cast<VKDevice *>(BLI_task_pool_user_data(pool));
135 VkCommandPool vk_command_pool = VK_NULL_HANDLE;
136 VkCommandPoolCreateInfo vk_command_pool_create_info = {
137 VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
138 nullptr,
139 VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
140 device->vk_queue_family_};
141 vkCreateCommandPool(device->vk_device_, &vk_command_pool_create_info, nullptr, &vk_command_pool);
142
144 render_graph::VKCommandBuilder command_builder;
145 Vector<VkCommandBuffer> command_buffers_unused;
146 TimelineResources<VkCommandBuffer> command_buffers_in_use;
147 VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE;
148 Vector<VkCommandBuffer> unsubmitted_command_buffers;
149 Vector<VkSubmitInfo> submit_infos;
150 submit_infos.reserve(2);
151 std::optional<render_graph::VKCommandBufferWrapper> command_buffer;
152 uint64_t previous_gc_timeline = 0;
153
154 CLOG_TRACE(&LOG, "Submission runner initialized");
155 while (!BLI_task_pool_current_canceled(pool)) {
156 VKRenderGraphSubmitTask *submit_task = static_cast<VKRenderGraphSubmitTask *>(
157 BLI_thread_queue_pop_timeout(device->submitted_render_graphs_, 1));
158 if (submit_task == nullptr) {
159 continue;
160 }
161 uint64_t current_timeline = device->submission_finished_timeline_get();
162 if (assign_if_different(previous_gc_timeline, current_timeline)) {
163 device->orphaned_data.destroy_discarded_resources(*device, current_timeline);
164 }
165
166 /* End current command buffer when we need to wait for a semaphore. In this case all previous
167 * recorded commands can run before the wait semaphores. The commands that must be guarded by
168 * the semaphores are part of the new submitted render graph. */
169 if (submit_task->wait_semaphore != VK_NULL_HANDLE && command_buffer.has_value()) {
170 command_buffer->end_recording();
171 unsubmitted_command_buffers.append(vk_command_buffer);
172 command_buffer.reset();
173 }
174
175 if (!command_buffer.has_value()) {
176 /* Check for completed command buffers that can be reused. */
177 if (command_buffers_unused.is_empty()) {
178 command_buffers_in_use.remove_old(current_timeline,
179 [&](VkCommandBuffer vk_command_buffer) {
180 command_buffers_unused.append(vk_command_buffer);
181 });
182 }
183
184 /* Create new command buffers when there are no left to be reused. */
185 if (command_buffers_unused.is_empty()) {
186 command_buffers_unused.resize(10, VK_NULL_HANDLE);
187 VkCommandBufferAllocateInfo vk_command_buffer_allocate_info = {
188 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
189 nullptr,
190 vk_command_pool,
191 VK_COMMAND_BUFFER_LEVEL_PRIMARY,
192 10};
193 vkAllocateCommandBuffers(
194 device->vk_device_, &vk_command_buffer_allocate_info, command_buffers_unused.data());
195 };
196
197 vk_command_buffer = command_buffers_unused.pop_last();
198 command_buffer = std::make_optional<render_graph::VKCommandBufferWrapper>(
199 vk_command_buffer, device->extensions_);
200 command_buffer->begin_recording();
201 }
202
203 BLI_assert(vk_command_buffer != VK_NULL_HANDLE);
204
207 {
208 std::scoped_lock lock_resources(device->resources.mutex);
209 command_builder.build_nodes(render_graph, *command_buffer, node_handles);
210 }
211 command_builder.record_commands(render_graph, *command_buffer, node_handles);
212
213 if (submit_task->submit_to_device) {
214 /* Create submit infos for previous command buffers. */
215 submit_infos.clear();
216 if (!unsubmitted_command_buffers.is_empty()) {
217 VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
218 nullptr,
219 0,
220 nullptr,
221 nullptr,
222 uint32_t(unsubmitted_command_buffers.size()),
223 unsubmitted_command_buffers.data(),
224 0,
225 nullptr};
226 submit_infos.append(vk_submit_info);
227 }
228
229 /* Finalize current command buffer. */
230 command_buffer->end_recording();
231 unsubmitted_command_buffers.append(vk_command_buffer);
232
233 uint32_t wait_semaphore_len = submit_task->wait_semaphore == VK_NULL_HANDLE ? 0 : 1;
234 uint32_t signal_semaphore_len = submit_task->signal_semaphore == VK_NULL_HANDLE ? 1 : 2;
235 VkSemaphore signal_semaphores[2] = {device->vk_timeline_semaphore_,
236 submit_task->signal_semaphore};
237 uint64_t signal_semaphore_values[2] = {submit_task->timeline, 0};
238
239 VkTimelineSemaphoreSubmitInfo vk_timeline_semaphore_submit_info = {
240 VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
241 nullptr,
242 0,
243 nullptr,
244 signal_semaphore_len,
245 signal_semaphore_values};
246 VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
247 &vk_timeline_semaphore_submit_info,
248 wait_semaphore_len,
249 &submit_task->wait_semaphore,
250 &submit_task->wait_dst_stage_mask,
251 1,
252 &unsubmitted_command_buffers.last(),
253 signal_semaphore_len,
254 signal_semaphores};
255 submit_infos.append(vk_submit_info);
256
257 {
258 std::scoped_lock lock_queue(*device->queue_mutex_);
259 vkQueueSubmit(device->vk_queue_,
260 submit_infos.size(),
261 submit_infos.data(),
262 submit_task->signal_fence);
263 }
264 if (submit_task->wait_for_submission != nullptr) {
265 std::unique_lock<blender::Mutex> lock(
267 submit_task->wait_for_submission->is_submitted = true;
268 submit_task->wait_for_submission->is_submitted_condition.notify_one();
269 }
270 vk_command_buffer = VK_NULL_HANDLE;
271 for (VkCommandBuffer vk_command_buffer : unsubmitted_command_buffers) {
272 command_buffers_in_use.append_timeline(submit_task->timeline, vk_command_buffer);
273 }
274 unsubmitted_command_buffers.clear();
275 command_buffer.reset();
276 }
277
278 render_graph.reset();
279 BLI_thread_queue_push(device->unused_render_graphs_,
280 std::move(submit_task->render_graph),
282 MEM_delete<VKRenderGraphSubmitTask>(submit_task);
283 }
284 CLOG_TRACE(&LOG, "Submission runner is being canceled");
285
286 /* Clear command buffers and pool */
287 {
288 std::scoped_lock lock(*device->queue_mutex_);
289 vkDeviceWaitIdle(device->vk_device_);
290 }
291 command_buffers_in_use.remove_old(UINT64_MAX, [&](VkCommandBuffer vk_command_buffer) {
292 command_buffers_unused.append(vk_command_buffer);
293 });
294 vkFreeCommandBuffers(device->vk_device_,
295 vk_command_pool,
296 command_buffers_unused.size(),
297 command_buffers_unused.data());
298 vkDestroyCommandPool(device->vk_device_, vk_command_pool, nullptr);
299 CLOG_TRACE(&LOG, "Submission runner finished");
300}
301
302void VKDevice::init_submission_pool()
303{
304 CLOG_TRACE(&LOG, "Create submission pool");
306 submitted_render_graphs_ = BLI_thread_queue_init();
307 unused_render_graphs_ = BLI_thread_queue_init();
308
309 VkSemaphoreTypeCreateInfo vk_semaphore_type_create_info = {
310 VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, VK_SEMAPHORE_TYPE_TIMELINE, 0};
311 VkSemaphoreCreateInfo vk_semaphore_create_info = {
312 VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &vk_semaphore_type_create_info, 0};
313 vkCreateSemaphore(vk_device_, &vk_semaphore_create_info, nullptr, &vk_timeline_semaphore_);
314
315 BLI_task_pool_push(submission_pool_, VKDevice::submission_runner, nullptr, false, nullptr);
316}
317
318void VKDevice::deinit_submission_pool()
319{
320 CLOG_TRACE(&LOG, "Cancelling submission pool");
321 BLI_task_pool_cancel(submission_pool_);
322 CLOG_TRACE(&LOG, "Waiting for completion");
323 BLI_task_pool_work_and_wait(submission_pool_);
324 CLOG_TRACE(&LOG, "Freeing submission pool");
325 BLI_task_pool_free(submission_pool_);
326 submission_pool_ = nullptr;
327
328 while (!BLI_thread_queue_is_empty(submitted_render_graphs_)) {
329 VKRenderGraphSubmitTask *submit_task = static_cast<VKRenderGraphSubmitTask *>(
330 BLI_thread_queue_pop(submitted_render_graphs_));
331 MEM_delete<VKRenderGraphSubmitTask>(submit_task);
332 }
333 BLI_thread_queue_free(submitted_render_graphs_);
334 submitted_render_graphs_ = nullptr;
335 BLI_thread_queue_free(unused_render_graphs_);
336 unused_render_graphs_ = nullptr;
337
338 vkDestroySemaphore(vk_device_, vk_timeline_semaphore_, nullptr);
339 vk_timeline_semaphore_ = VK_NULL_HANDLE;
340}
341
343
344} // namespace blender::gpu
#define BLI_assert(a)
Definition BLI_assert.h:46
@ TASK_PRIORITY_HIGH
Definition BLI_task.h:53
void * BLI_task_pool_user_data(TaskPool *pool)
Definition task_pool.cc:550
bool BLI_task_pool_current_canceled(TaskPool *pool)
Definition task_pool.cc:545
void BLI_task_pool_work_and_wait(TaskPool *pool)
Definition task_pool.cc:535
void BLI_task_pool_cancel(TaskPool *pool)
Definition task_pool.cc:540
TaskPool * BLI_task_pool_create_background_serial(void *userdata, eTaskPriority priority)
Definition task_pool.cc:516
void BLI_task_pool_free(TaskPool *pool)
Definition task_pool.cc:521
void BLI_task_pool_push(TaskPool *pool, TaskRunFunction run, void *taskdata, bool free_taskdata, TaskFreeFunction freedata)
Definition task_pool.cc:526
@ BLI_THREAD_QUEUE_WORK_PRIORITY_NORMAL
void * BLI_thread_queue_pop(ThreadQueue *queue)
Definition threads.cc:712
ThreadQueue * BLI_thread_queue_init(void)
Definition threads.cc:624
void BLI_thread_queue_free(ThreadQueue *queue)
Definition threads.cc:635
bool BLI_thread_queue_is_empty(ThreadQueue *queue)
Definition threads.cc:839
uint64_t BLI_thread_queue_push(ThreadQueue *queue, void *work, ThreadQueueWorkPriority priority)
Definition threads.cc:645
void * BLI_thread_queue_pop_timeout(ThreadQueue *queue, int ms)
Definition threads.cc:782
#define UNUSED_VARS(...)
#define CLOG_TRACE(clg_ref,...)
Definition CLG_log.h:192
volatile int lock
unsigned long long int uint64_t
int64_t size() const
void append(const T &value)
const T & last(const int64_t n=0) const
bool is_empty() const
void resize(const int64_t new_size)
void reserve(const int64_t min_capacity)
void remove_old(TimelineValue current_timeline, Deleter deleter)
void append_timeline(TimelineValue timeline, Item item)
render_graph::VKResourceStateTracker resources
Definition vk_device.hh:217
TimelineValue submission_finished_timeline_get() const
Definition vk_device.hh:402
render_graph::VKRenderGraph * render_graph_new()
VKDiscardPool orphaned_data
Definition vk_device.hh:218
static void submission_runner(TaskPool *__restrict pool, void *task_data)
TimelineValue render_graph_submit(render_graph::VKRenderGraph *render_graph, VKDiscardPool &context_discard_pool, bool submit_to_device, bool wait_for_completion, VkPipelineStageFlags wait_dst_stage_mask, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkFence signal_fence)
void wait_for_timeline(TimelineValue timeline)
void destroy_discarded_resources(VKDevice &device, TimelineValue current_timeline)
void record_commands(VKRenderGraph &render_graph, VKCommandBufferInterface &command_buffer, Span< NodeHandle > node_handles)
void build_nodes(VKRenderGraph &render_graph, VKCommandBufferInterface &command_buffer, Span< NodeHandle > node_handles)
Span< NodeHandle > select_nodes(const VKRenderGraph &render_graph)
#define UINT64_MAX
#define LOG(level)
Definition log.h:97
uint64_t TimelineValue
Definition vk_common.hh:36
static CLG_LogRef LOG
bool assign_if_different(T &old_value, T new_value)
std::mutex Mutex
Definition BLI_mutex.hh:47
render_graph::VKRenderGraph * render_graph
std::condition_variable_any is_submitted_condition