Blender V5.0
queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#include <iomanip>
6
7#include "device/kernel.h"
8#include "device/queue.h"
9
10#include "util/algorithm.h"
11#include "util/log.h"
12#include "util/time.h"
13
15
17{
18 DCHECK_NE(device, nullptr);
19 is_per_kernel_performance_ = getenv("CYCLES_DEBUG_PER_KERNEL_PERFORMANCE");
20}
21
23{
25 /* Print kernel execution times sorted by time. */
27 for (const auto &stat : stats_kernel_time_) {
28 stats_sorted.push_back(stat);
29 }
30
31 sort(stats_sorted.begin(),
32 stats_sorted.end(),
33 [](const pair<DeviceKernelMask, double> &a, const pair<DeviceKernelMask, double> &b) {
34 return a.second > b.second;
35 });
36
37 LOG_TRACE << "GPU queue stats:";
38 double total_time = 0.0;
39 for (const auto &[mask, time] : stats_sorted) {
40 total_time += time;
41 LOG_TRACE << " " << std::setfill(' ') << std::setw(10) << std::fixed << std::setprecision(5)
42 << std::right << time << "s: " << device_kernel_mask_as_string(mask);
43 }
44
46 LOG_TRACE << "GPU queue total time: " << std::fixed << std::setprecision(5) << total_time;
47 }
48 }
49}
50
59
61{
63 LOG_TRACE << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size "
64 << work_size;
65 }
66
67 last_kernels_enqueued_.set(kernel, true);
68}
69
76
78{
80 const double new_time = time_dt();
81 const double elapsed_time = new_time - last_sync_time_;
82 LOG_TRACE << "GPU queue synchronize, elapsed " << std::setw(10) << elapsed_time << "s";
83
84 /* There is no sense to have an entries in the performance data
85 * container without related kernel information. */
86 if (last_kernels_enqueued_.any()) {
88 }
89
90 last_sync_time_ = new_time;
91 }
92
94}
95
100
static DBVT_INLINE btDbvtNode * sort(btDbvtNode *n, btDbvtNode *&r)
Definition btDbvt.cpp:418
double last_sync_time_
DeviceKernelMask last_kernels_enqueued_
void debug_enqueue_end()
Definition queue.cpp:70
void debug_synchronize()
Definition queue.cpp:77
bool is_per_kernel_performance_
Device * device
map< DeviceKernelMask, double > stats_kernel_time_
string debug_active_kernels()
Definition queue.cpp:96
virtual ~DeviceQueue()
Definition queue.cpp:22
virtual bool synchronize()=0
void debug_init_execution()
Definition queue.cpp:51
DeviceQueue(Device *device)
Definition queue.cpp:16
void debug_enqueue_begin(DeviceKernel kernel, const int work_size)
Definition queue.cpp:60
#define CCL_NAMESPACE_END
string device_kernel_mask_as_string(DeviceKernelMask mask)
const char * device_kernel_as_string(DeviceKernel kernel)
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
DeviceKernel
#define LOG_IS_ON(level)
Definition log.h:113
@ LOG_LEVEL_TRACE
Definition log.h:27
#define DCHECK_NE(a, b)
Definition log.h:143
#define LOG_TRACE
Definition log.h:108
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
CCL_NAMESPACE_BEGIN double time_dt()
Definition time.cpp:47
double total_time