28 stats_sorted.push_back(stat);
31 sort(stats_sorted.begin(),
33 [](
const pair<DeviceKernelMask, double> &a,
const pair<DeviceKernelMask, double> &
b) {
34 return a.second > b.second;
39 for (
const auto &[
mask, time] : stats_sorted) {
41 LOG_TRACE <<
" " << std::setfill(
' ') << std::setw(10) << std::fixed << std::setprecision(5)
80 const double new_time =
time_dt();
82 LOG_TRACE <<
"GPU queue synchronize, elapsed " << std::setw(10) << elapsed_time <<
"s";
static DBVT_INLINE btDbvtNode * sort(btDbvtNode *n, btDbvtNode *&r)
DeviceKernelMask last_kernels_enqueued_
bool is_per_kernel_performance_
map< DeviceKernelMask, double > stats_kernel_time_
string debug_active_kernels()
virtual bool synchronize()=0
void debug_init_execution()
DeviceQueue(Device *device)
void debug_enqueue_begin(DeviceKernel kernel, const int work_size)
#define CCL_NAMESPACE_END
string device_kernel_mask_as_string(DeviceKernelMask mask)
const char * device_kernel_as_string(DeviceKernel kernel)
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
CCL_NAMESPACE_BEGIN double time_dt()