Blender V4.3
queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#include "device/queue.h"
6
7#include "util/algorithm.h"
8#include "util/log.h"
9#include "util/time.h"
10
11#include <iomanip>
12
14
16 : device(device),
17 last_kernels_enqueued_(0),
18 last_sync_time_(0.0),
19 is_per_kernel_performance_(false)
20{
21 DCHECK_NE(device, nullptr);
22 is_per_kernel_performance_ = getenv("CYCLES_DEBUG_PER_KERNEL_PERFORMANCE");
23}
24
26{
28 /* Print kernel execution times sorted by time. */
30 for (const auto &stat : stats_kernel_time_) {
31 stats_sorted.push_back(stat);
32 }
33
34 sort(stats_sorted.begin(),
35 stats_sorted.end(),
36 [](const pair<DeviceKernelMask, double> &a, const pair<DeviceKernelMask, double> &b) {
37 return a.second > b.second;
38 });
39
40 VLOG_DEVICE_STATS << "GPU queue stats:";
41 double total_time = 0.0;
42 for (const auto &[mask, time] : stats_sorted) {
44 VLOG_DEVICE_STATS << " " << std::setfill(' ') << std::setw(10) << std::fixed
45 << std::setprecision(5) << std::right << time
46 << "s: " << device_kernel_mask_as_string(mask);
47 }
48
50 VLOG_DEVICE_STATS << "GPU queue total time: " << std::fixed << std::setprecision(5)
51 << total_time;
52 }
53 }
54}
55
64
66{
68 VLOG_DEVICE_STATS << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size "
69 << work_size;
70 }
71
72 last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
73}
74
81
83{
85 const double new_time = time_dt();
86 const double elapsed_time = new_time - last_sync_time_;
87 VLOG_DEVICE_STATS << "GPU queue synchronize, elapsed " << std::setw(10) << elapsed_time << "s";
88
89 /* There is no sense to have an entries in the performance data
90 * container without related kernel information. */
91 if (last_kernels_enqueued_ != 0) {
93 }
94
95 last_sync_time_ = new_time;
96 }
97
99}
100
105
static DBVT_INLINE btDbvtNode * sort(btDbvtNode *n, btDbvtNode *&r)
Definition btDbvt.cpp:418
double last_sync_time_
DeviceKernelMask last_kernels_enqueued_
void debug_enqueue_end()
Definition queue.cpp:75
void debug_synchronize()
Definition queue.cpp:82
bool is_per_kernel_performance_
Device * device
map< DeviceKernelMask, double > stats_kernel_time_
string debug_active_kernels()
Definition queue.cpp:101
virtual ~DeviceQueue()
Definition queue.cpp:25
virtual bool synchronize()=0
void debug_init_execution()
Definition queue.cpp:56
DeviceQueue(Device *device)
Definition queue.cpp:15
void debug_enqueue_begin(DeviceKernel kernel, const int work_size)
Definition queue.cpp:65
local_group_size(16, 16) .push_constant(Type b
double time
#define CCL_NAMESPACE_END
string device_kernel_mask_as_string(DeviceKernelMask mask)
const char * device_kernel_as_string(DeviceKernel kernel)
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
DeviceKernel
#define VLOG_DEVICE_STATS_IS_ON
Definition log.h:79
#define VLOG_DEVICE_STATS
Definition log.h:78
#define DCHECK_NE(a, b)
Definition log.h:58
unsigned __int64 uint64_t
Definition stdint.h:90
CCL_NAMESPACE_BEGIN double time_dt()
Definition time.cpp:36
double total_time