Blender V4.5
queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#include "device/queue.h"
6#include "device/kernel.h"
7
8#include "util/algorithm.h"
9#include "util/log.h"
10#include "util/time.h"
11
12#include <iomanip>
13
15
17{
18 DCHECK_NE(device, nullptr);
19 is_per_kernel_performance_ = getenv("CYCLES_DEBUG_PER_KERNEL_PERFORMANCE");
20}
21
23{
25 /* Print kernel execution times sorted by time. */
27 for (const auto &stat : stats_kernel_time_) {
28 stats_sorted.push_back(stat);
29 }
30
31 sort(stats_sorted.begin(),
32 stats_sorted.end(),
33 [](const pair<DeviceKernelMask, double> &a, const pair<DeviceKernelMask, double> &b) {
34 return a.second > b.second;
35 });
36
37 VLOG_DEVICE_STATS << "GPU queue stats:";
38 double total_time = 0.0;
39 for (const auto &[mask, time] : stats_sorted) {
40 total_time += time;
41 VLOG_DEVICE_STATS << " " << std::setfill(' ') << std::setw(10) << std::fixed
42 << std::setprecision(5) << std::right << time
44 }
45
47 VLOG_DEVICE_STATS << "GPU queue total time: " << std::fixed << std::setprecision(5)
48 << total_time;
49 }
50 }
51}
52
61
63{
65 VLOG_DEVICE_STATS << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size "
66 << work_size;
67 }
68
69 last_kernels_enqueued_.set(kernel, true);
70}
71
78
80{
82 const double new_time = time_dt();
83 const double elapsed_time = new_time - last_sync_time_;
84 VLOG_DEVICE_STATS << "GPU queue synchronize, elapsed " << std::setw(10) << elapsed_time << "s";
85
86 /* There is no sense to have an entries in the performance data
87 * container without related kernel information. */
88 if (last_kernels_enqueued_.any()) {
90 }
91
92 last_sync_time_ = new_time;
93 }
94
96}
97
102
static DBVT_INLINE btDbvtNode * sort(btDbvtNode *n, btDbvtNode *&r)
Definition btDbvt.cpp:418
double last_sync_time_
DeviceKernelMask last_kernels_enqueued_
void debug_enqueue_end()
Definition queue.cpp:72
void debug_synchronize()
Definition queue.cpp:79
bool is_per_kernel_performance_
Device * device
map< DeviceKernelMask, double > stats_kernel_time_
string debug_active_kernels()
Definition queue.cpp:98
virtual ~DeviceQueue()
Definition queue.cpp:22
virtual bool synchronize()=0
void debug_init_execution()
Definition queue.cpp:53
DeviceQueue(Device *device)
Definition queue.cpp:16
void debug_enqueue_begin(DeviceKernel kernel, const int work_size)
Definition queue.cpp:62
#define CCL_NAMESPACE_END
string device_kernel_mask_as_string(DeviceKernelMask mask)
const char * device_kernel_as_string(DeviceKernel kernel)
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
DeviceKernel
#define VLOG_DEVICE_STATS_IS_ON
Definition log.h:78
#define VLOG_DEVICE_STATS
Definition log.h:77
#define DCHECK_NE(a, b)
Definition log.h:57
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
CCL_NAMESPACE_BEGIN double time_dt()
Definition time.cpp:38
double total_time