|
Sierra Toolkit
Version of the Day
|
00001 /*------------------------------------------------------------------------*/ 00002 /* Copyright 2010 - 2011 Sandia Corporation. */ 00003 /* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ 00004 /* license for use of this work by or on behalf of the U.S. Government. */ 00005 /* Export of this program may require a license from the */ 00006 /* United States Government. */ 00007 /*------------------------------------------------------------------------*/ 00008 00009 #include <stk_util/diag/PrintTimer.hpp> 00010 #include <stk_util/diag/PrintTable.hpp> 00011 00012 #include <iomanip> 00013 #include <ostream> 00014 #include <stdexcept> 00015 #include <typeinfo> 00016 #include <utility> 00017 #include <algorithm> 00018 #include <limits> 00019 00020 #include <stk_util/diag/Writer.hpp> 00021 #include <stk_util/diag/WriterManip.hpp> 00022 #include <stk_util/diag/WriterExt.hpp> 00023 #include <stk_util/util/string_case_compare.hpp> 00024 #include <stk_util/util/Marshal.hpp> 00025 00026 namespace stk_classic { 00027 namespace diag { 00028 namespace { 00029 struct ParallelTimer; 00030 }} 00031 00032 template <class T> 00033 Marshal &operator<<(Marshal &mout, const diag::Timer::Metric<T> &t); 00034 00035 Marshal &operator<<(Marshal &mout, const diag::Timer &t); 00036 00037 Marshal &operator>>(Marshal &min, diag::ParallelTimer &t); 00038 } 00039 00040 namespace stk_classic { 00041 namespace diag { 00042 00043 namespace { 00044 00051 struct Percent 00052 { 00053 Percent(double numerator, double denominator) 00054 : m_numerator(numerator), 00055 m_denominator(denominator) 00056 {} 00057 00068 std::ostream &operator()(std::ostream &os) const; 00069 00070 private: 00071 double m_numerator; 00072 double m_denominator; 00073 }; 00074 00075 00076 std::ostream & 00077 Percent::operator()( 00078 std::ostream & os) const 00079 { 00080 std::ostringstream strout; 00081 00082 if (m_numerator == 0.0) 00083 strout << "(0.00%)"; 00084 else if (m_denominator == 0.0) 00085 strout << "( NaN)"; 00086 else { 00087 double ratio = m_numerator/m_denominator*100.0; 00088 if (ratio < 0.01) 00089 strout << "(<0.01%)"; 00090 else if (ratio >= 100.0) 00091 strout << "(" << std::setw(5) << std::setprecision(1) << std::fixed << ratio << "%)"; 00092 else 00093 strout << "(" << std::setw(5) << std::setprecision(2) << std::fixed << ratio << "%)"; 00094 } 00095 00096 return os << strout.str(); 00097 } 00098 00099 00109 inline std::ostream &operator<<(std::ostream &os, const Percent &p) { 00110 return p(os); 00111 } 00112 00113 struct ParallelTimer 00114 { 00115 template <typename T> 00116 struct Metric 00117 { 00118 Metric() 00119 : m_value(0), 00120 m_sum(0.0), 00121 m_min(std::numeric_limits<double>::max()), 00122 m_max(0.0) 00123 {} 00124 00125 typename MetricTraits<T>::Type m_value; 00126 typename MetricTraits<T>::Type m_checkpoint; 00127 double m_sum; 00128 double m_min; 00129 double m_max; 00130 00131 void accumulate(const Metric<T> &metric, bool checkpoint) { 00132 double value = static_cast<double>(metric.m_value); 00133 if (checkpoint) 00134 value -= static_cast<double>(metric.m_checkpoint); 00135 00136 m_sum += value; 00137 m_min = std::min(m_min, value); 00138 m_max = std::max(m_max, value); 00139 } 00140 00141 Writer &dump(Writer &dout) const { 00142 if (dout.shouldPrint()) { 00143 dout << "Metric<" << typeid(typename MetricTraits<T>::Type) << ">" << push << dendl; 00144 dout << "m_value " << m_value << dendl; 00145 dout << "m_checkpoint " << m_value << dendl; 00146 dout << "m_sum " << m_sum << dendl; 00147 dout << "m_min " << m_min << dendl; 00148 dout << "m_max " << m_max << dendl; 00149 dout << pop; 00150 } 00151 return dout; 00152 } 00153 }; 00154 00155 ParallelTimer() 00156 : m_name(), 00157 m_timerMask(0), 00158 m_subtimerLapCount(0), 00159 m_lapCount(), 00160 m_cpuTime(), 00161 m_wallTime(), 00162 m_MPICount(), 00163 m_MPIByteCount(), 00164 m_heapAlloc(), 00165 m_subtimerList() 00166 {} 00167 00168 ParallelTimer(const ParallelTimer ¶llel_timer) 00169 : m_name(parallel_timer.m_name), 00170 m_timerMask(parallel_timer.m_timerMask), 00171 m_subtimerLapCount(parallel_timer.m_subtimerLapCount), 00172 m_lapCount(parallel_timer.m_lapCount), 00173 m_cpuTime(parallel_timer.m_cpuTime), 00174 m_wallTime(parallel_timer.m_wallTime), 00175 m_MPICount(parallel_timer.m_MPICount), 00176 m_MPIByteCount(parallel_timer.m_MPIByteCount), 00177 m_heapAlloc(parallel_timer.m_heapAlloc), 00178 m_subtimerList(parallel_timer.m_subtimerList) 00179 {} 00180 00181 ParallelTimer &operator=(const ParallelTimer ¶llel_timer) { 00182 m_name = parallel_timer.m_name; 00183 m_timerMask = parallel_timer.m_timerMask; 00184 m_subtimerLapCount = parallel_timer.m_subtimerLapCount; 00185 m_lapCount = parallel_timer.m_lapCount; 00186 m_cpuTime = parallel_timer.m_cpuTime; 00187 m_wallTime = parallel_timer.m_wallTime; 00188 m_MPICount = parallel_timer.m_MPICount; 00189 m_heapAlloc = parallel_timer.m_heapAlloc; 00190 m_subtimerList = parallel_timer.m_subtimerList; 00191 00192 return *this; 00193 } 00194 00195 template <class T> 00196 const Metric<T> &getMetric() const; 00197 00198 std::string m_name; 00199 TimerMask m_timerMask; 00200 double m_subtimerLapCount; 00201 00202 Metric<LapCount> m_lapCount; 00203 Metric<CPUTime> m_cpuTime; 00204 Metric<WallTime> m_wallTime; 00205 Metric<MPICount> m_MPICount; 00206 Metric<MPIByteCount> m_MPIByteCount; 00207 Metric<HeapAlloc> m_heapAlloc; 00208 00209 std::list<ParallelTimer> m_subtimerList; 00210 00211 Writer &dump(Writer &dout) const; 00212 }; 00213 00214 template<> 00215 const ParallelTimer::Metric<LapCount> & 00216 ParallelTimer::getMetric<LapCount>() const { 00217 return m_lapCount; 00218 } 00219 00220 00221 template<> 00222 const ParallelTimer::Metric<CPUTime> & 00223 ParallelTimer::getMetric<CPUTime>() const { 00224 return m_cpuTime; 00225 } 00226 00227 00228 template<> 00229 const ParallelTimer::Metric<WallTime> & 00230 ParallelTimer::getMetric<WallTime>() const { 00231 return m_wallTime; 00232 } 00233 00234 00235 template<> 00236 const ParallelTimer::Metric<MPICount> & 00237 ParallelTimer::getMetric<MPICount>() const { 00238 return m_MPICount; 00239 } 00240 00241 00242 template<> 00243 const ParallelTimer::Metric<MPIByteCount> & 00244 ParallelTimer::getMetric<MPIByteCount>() const { 00245 return m_MPIByteCount; 00246 } 00247 00248 00249 template<> 00250 const ParallelTimer::Metric<HeapAlloc> & 00251 ParallelTimer::getMetric<HeapAlloc>() const { 00252 return m_heapAlloc; 00253 } 00254 00255 00256 template <typename T> 00257 Writer &operator<<(Writer &dout, const ParallelTimer::Metric<T> &t) { 00258 return t.dump(dout); 00259 } 00260 00261 Writer &operator<<(Writer &dout, const ParallelTimer ¶llel_timer) { 00262 return parallel_timer.dump(dout); 00263 } 00264 00265 Writer & 00266 ParallelTimer::dump(Writer &dout) const { 00267 if (dout.shouldPrint()) { 00268 dout << "ParallelTimer " << m_name << push << dendl; 00269 dout << "m_name " << m_name << dendl; 00270 dout << "m_timerMask " << hex << m_timerMask << dendl; 00271 dout << "m_subtimerLapCount " << m_subtimerLapCount << dendl; 00272 dout << "m_lapCount " << m_lapCount << dendl; 00273 dout << "m_cpuTime " << m_cpuTime << dendl; 00274 dout << "m_wallTime " << m_wallTime << dendl; 00275 dout << "m_MPICount " << m_MPICount << dendl; 00276 dout << "m_MPIByteCount " << m_MPIByteCount << dendl; 00277 dout << "m_heapAlloc " << m_heapAlloc << dendl; 00278 dout << "m_subtimerList " << m_subtimerList << dendl; 00279 dout << pop; 00280 } 00281 return dout; 00282 } 00283 00284 #ifdef __INTEL_COMPILER 00285 #pragma warning(push) 00286 #pragma warning(disable: 444) 00287 #endif 00288 class finder : public std::unary_function<ParallelTimer, bool> 00289 { 00290 public: 00291 finder(const std::string &name) 00292 : m_name(name) 00293 {} 00294 00295 bool operator()(const ParallelTimer ¶llel_timer) const { 00296 return equal_case(parallel_timer.m_name, m_name); 00297 } 00298 00299 private: 00300 std::string m_name; 00301 }; 00302 #ifdef __INTEL_COMPILER 00303 #pragma warning(pop) 00304 #endif 00305 00306 00307 void 00308 merge_parallel_timer( 00309 ParallelTimer & p0, 00310 const ParallelTimer & p1, 00311 bool checkpoint) 00312 { 00313 p0.m_timerMask = p1.m_timerMask; 00314 p0.m_subtimerLapCount += p1.m_subtimerLapCount; 00315 p0.m_lapCount.accumulate(p1.m_lapCount, checkpoint); 00316 p0.m_cpuTime.accumulate(p1.m_cpuTime, checkpoint); 00317 p0.m_wallTime.accumulate(p1.m_wallTime, checkpoint); 00318 p0.m_MPICount.accumulate(p1.m_MPICount, checkpoint); 00319 p0.m_MPIByteCount.accumulate(p1.m_MPIByteCount, checkpoint); 00320 p0.m_heapAlloc.accumulate(p1.m_heapAlloc, checkpoint); 00321 00322 00323 for (std::list<ParallelTimer>::const_iterator p1_it = p1.m_subtimerList.begin(); p1_it != p1.m_subtimerList.end(); ++p1_it) { 00324 std::list<ParallelTimer>::iterator p0_it = std::find_if(p0.m_subtimerList.begin(), p0.m_subtimerList.end(), finder((*p1_it).m_name)); 00325 if (p0_it == p0.m_subtimerList.end()) { 00326 p0.m_subtimerList.push_back((*p1_it)); 00327 p0_it = --p0.m_subtimerList.end(); 00328 merge_parallel_timer(*p0_it, *p1_it, checkpoint); 00329 } 00330 else 00331 merge_parallel_timer(*p0_it, *p1_it, checkpoint); 00332 } 00333 } 00334 00335 00336 void 00337 collect_timers( 00338 Timer & root_timer, 00339 ParallelTimer & parallel_timer, 00340 bool checkpoint, 00341 ParallelMachine comm) 00342 { 00343 Marshal mout; 00344 mout << root_timer; 00345 00346 #ifdef STK_HAS_MPI 00347 const int parallel_root = 0 ; 00348 const int parallel_size = parallel_machine_size(comm); 00349 const int parallel_rank = parallel_machine_rank(comm); 00350 00351 // Gather the send counts on root processor 00352 std::string send_string(mout.str()); 00353 00354 ParallelTimer root_parallel_timer; 00355 00356 //We need to gather the timer data in a number of 'cycles' where we 00357 //only receive from a portion of the other processors each cycle. 00358 //This is because buffer allocation-failures have been observed for 00359 //runs on very large numbers of processors if the 'root' processor tries 00360 //to allocate a buffer large enough to hold timing data from all other 00361 //procesors. 00362 int num_cycles = 16; 00363 if (parallel_size < 1024) { 00364 //If less than 1024 processors, just do them all at once. 00365 num_cycles = 1; 00366 } 00367 00368 std::vector<char> buffer; 00369 00370 for(int ii=0; ii<num_cycles; ++ii) { 00371 std::vector<int> recv_count(parallel_size, 0); 00372 int * const recv_count_ptr = &recv_count[0] ; 00373 00374 //send_count is the amount of data this processor needs to send. 00375 int send_count = send_string.size(); 00376 00377 //should this processor send on the current cycle ? If not, set send_count to 0. 00378 if ((parallel_rank+ii)%num_cycles!=0) { 00379 send_count = 0; 00380 } 00381 00382 int result = MPI_Gather(&send_count, 1, MPI_INT, 00383 recv_count_ptr, 1, MPI_INT, 00384 parallel_root, comm); 00385 if (MPI_SUCCESS != result) { 00386 std::ostringstream message ; 00387 message << "stk_classic::diag::collect_timers FAILED: MPI_Gather = " << result ; 00388 throw std::runtime_error(message.str()); 00389 } 00390 00391 // Receive counts are only non-zero on the root processor: 00392 std::vector<int> recv_displ(parallel_size + 1, 0); 00393 00394 for (int i = 0 ; i < parallel_size ; ++i) { 00395 recv_displ[i + 1] = recv_displ[i] + recv_count[i] ; 00396 } 00397 00398 const int recv_size = recv_displ[parallel_size] ; 00399 00400 buffer.assign(recv_size, 0); 00401 00402 { 00403 const char * const send_ptr = send_string.data(); 00404 char * const recv_ptr = recv_size ? & buffer[0] : 0; 00405 int * const recv_displ_ptr = & recv_displ[0] ; 00406 00407 result = MPI_Gatherv((void *) send_ptr, send_count, MPI_CHAR, 00408 recv_ptr, recv_count_ptr, recv_displ_ptr, MPI_CHAR, 00409 parallel_root, comm); 00410 if (MPI_SUCCESS != result) { 00411 std::ostringstream message ; 00412 message << "stk_classic::diag::collect_timers FAILED: MPI_Gatherv = " << result ; 00413 throw std::runtime_error(message.str()); 00414 } 00415 00416 std::vector<ParallelTimer> parallel_timer_vector; 00417 parallel_timer_vector.reserve(parallel_size); 00418 00419 if (parallel_rank == parallel_root) { 00420 for (int j = 0; j < parallel_size; ++j) { 00421 int received_count = recv_displ[j+1] - recv_displ[j]; 00422 if (received_count > 0) { 00423 //grow parallel_timer_vector by 1: 00424 parallel_timer_vector.resize(parallel_timer_vector.size()+1); 00425 Marshal min(std::string(recv_ptr + recv_displ[j], recv_ptr + recv_displ[j + 1])); 00426 //put this data into the last entry of parallel_timer_vector: 00427 min >> parallel_timer_vector[parallel_timer_vector.size()-1]; 00428 } 00429 } 00430 00431 if (parallel_rank==parallel_root && send_count>0) root_parallel_timer = parallel_timer_vector[0]; 00432 00433 for (size_t j = 0; j < parallel_timer_vector.size(); ++j) 00434 merge_parallel_timer(root_parallel_timer, parallel_timer_vector[j], checkpoint); 00435 } 00436 } 00437 } 00438 parallel_timer = root_parallel_timer; 00439 #endif 00440 } 00441 00442 // PrintTable &printTable(PrintTable &table, MPI_Comm mpi_comm, MetricsMask metrics_mask) const; 00443 00444 PrintTable & 00445 printSubtable( 00446 PrintTable & table, 00447 const Timer & root_timer, 00448 const Timer & timer, 00449 MetricsMask metrics_mask, 00450 int depth, 00451 bool timer_checkpoint) 00452 { 00453 if (timer.getSubtimerLapCount() != 0.0) { 00454 if (timer.shouldRecord()) { 00455 if (timer.getTimerMask() == 0 || timer.getMetric<LapCount>().getAccumulatedLap(timer_checkpoint) > 0) { 00456 table << justify(PrintTable::Cell::LEFT) << indent(depth) << timer.getName() << end_col 00457 << justify(PrintTable::Cell::RIGHT) << timer.getMetric<LapCount>().getAccumulatedLap(timer_checkpoint) << end_col; 00458 00459 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC) 00460 table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint)) 00461 << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint)) << end_col; 00462 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC) 00463 table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint)) 00464 << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint)) << end_col; 00465 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC) 00466 table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint)) 00467 << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint)) << end_col; 00468 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC) 00469 table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint)) 00470 << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint)) << end_col; 00471 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC) 00472 table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint)) 00473 << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint)) << end_col; 00474 } 00475 else 00476 table << justify(PrintTable::Cell::LEFT) << indent(depth) << span << timer.getName() << end_col; 00477 00478 table << end_row; 00479 depth++; 00480 } 00481 00482 for (TimerList::const_iterator it = timer.begin(); it != timer.end(); ++it) 00483 printSubtable(table, root_timer, *it, metrics_mask, depth, timer_checkpoint); 00484 } 00485 00486 return table; 00487 } 00488 00489 00490 PrintTable & 00491 printSubtable( 00492 PrintTable & table, 00493 const ParallelTimer & root_timer, 00494 const ParallelTimer & timer, 00495 MetricsMask metrics_mask, 00496 int depth, 00497 bool timer_checkpoint) 00498 { 00499 if (timer.m_subtimerLapCount != 0.0) { 00500 if (timer.m_timerMask == 0 || timer.getMetric<LapCount>().m_sum > 0) { 00501 table << justify(PrintTable::Cell::LEFT) << indent(depth) << timer.m_name << end_col 00502 << justify(PrintTable::Cell::RIGHT) << timer.getMetric<LapCount>().m_sum << end_col; 00503 00504 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC) 00505 table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_sum) 00506 << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_sum, root_timer.getMetric<CPUTime>().m_sum) << end_col 00507 << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_min) 00508 << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_min, root_timer.getMetric<CPUTime>().m_sum) << end_col 00509 << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_max) 00510 << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_max, root_timer.getMetric<CPUTime>().m_sum) << end_col; 00511 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC) 00512 table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_sum) 00513 << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_sum, root_timer.getMetric<WallTime>().m_sum) << end_col 00514 << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_min) 00515 << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_min, root_timer.getMetric<WallTime>().m_sum) << end_col 00516 << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_max) 00517 << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_max, root_timer.getMetric<WallTime>().m_sum) << end_col; 00518 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC) 00519 table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_sum) 00520 << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_sum, root_timer.getMetric<MPICount>().m_sum) << end_col 00521 << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_min) 00522 << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_min, root_timer.getMetric<MPICount>().m_sum) << end_col 00523 << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_max) 00524 << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_max, root_timer.getMetric<MPICount>().m_sum) << end_col; 00525 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC) 00526 table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_sum) 00527 << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_sum, root_timer.getMetric<MPIByteCount>().m_sum) << end_col 00528 << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_min) 00529 << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_min, root_timer.getMetric<MPIByteCount>().m_sum) << end_col 00530 << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_max) 00531 << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_max, root_timer.getMetric<MPIByteCount>().m_sum) << end_col; 00532 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC) 00533 table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_sum) 00534 << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_sum, root_timer.getMetric<HeapAlloc>().m_sum) << end_col 00535 << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_min) 00536 << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_min, root_timer.getMetric<HeapAlloc>().m_sum) << end_col 00537 << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_max) 00538 << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_max, root_timer.getMetric<HeapAlloc>().m_sum) << end_col; 00539 } 00540 else 00541 table << justify(PrintTable::Cell::LEFT) << indent(depth) << span << timer.m_name << end_col; 00542 00543 table << end_row; 00544 depth++; 00545 } 00546 00547 for (std::list<ParallelTimer>::const_iterator it = timer.m_subtimerList.begin(); it != timer.m_subtimerList.end(); ++it) 00548 printSubtable(table, root_timer, *it, metrics_mask, depth, timer_checkpoint); 00549 00550 return table; 00551 } 00552 00553 00554 PrintTable & 00555 printTable( 00556 PrintTable & table, 00557 Timer & root_timer, 00558 MetricsMask metrics_mask, 00559 size_t name_width, 00560 bool timer_checkpoint) 00561 { 00562 updateRootTimer(root_timer); 00563 00564 root_timer.accumulateSubtimerLapCounts(); 00565 00566 if (metrics_mask & getEnabledTimerMetricsMask()) { 00567 table.setAutoEndCol(false); 00568 00569 table << cell_width(name_width) << justify(PrintTable::Cell::CENTER) << "Timer" << (timer_checkpoint ? " (delta time)" : "") << end_col 00570 << justify(PrintTable::Cell::CENTER) << "Count" << end_col; 00571 00572 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC) 00573 table << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col; 00574 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC) 00575 table << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col; 00576 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC) 00577 table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col; 00578 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC) 00579 table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col; 00580 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC) 00581 table << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col; 00582 00583 table << end_header; 00584 00585 printSubtable(table, root_timer, root_timer, metrics_mask, 0, timer_checkpoint); 00586 00587 if (timer_checkpoint) 00588 root_timer.checkpoint(); 00589 } 00590 00591 return table; 00592 } 00593 00594 00595 PrintTable & 00596 printTable( 00597 PrintTable & table, 00598 Timer & root_timer, 00599 MetricsMask metrics_mask, 00600 size_t name_width, 00601 bool timer_checkpoint, 00602 ParallelMachine parallel_machine) 00603 { 00604 updateRootTimer(root_timer); 00605 00606 root_timer.accumulateSubtimerLapCounts(); 00607 00608 ParallelTimer parallel_timer; 00609 00610 stk_classic::diag::collect_timers(root_timer, parallel_timer, timer_checkpoint, parallel_machine); 00611 00612 int parallel_rank = parallel_machine_rank(parallel_machine); 00613 if (parallel_rank == 0) { 00614 if (metrics_mask & getEnabledTimerMetricsMask()) { 00615 table.setAutoEndCol(false); 00616 00617 table << end_col << end_col; 00618 00619 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC) 00620 table << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col 00621 << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col 00622 << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col; 00623 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC) 00624 table << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col 00625 << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col 00626 << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col; 00627 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC) 00628 table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col 00629 << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col 00630 << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col; 00631 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC) 00632 table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col 00633 << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col 00634 << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col; 00635 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC) 00636 table << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col 00637 << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col 00638 << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col; 00639 00640 table << end_header; 00641 table << cell_width(name_width) << justify(PrintTable::Cell::CENTER) << "Timer" << (timer_checkpoint ? " (delta time)" : "") << end_col 00642 << justify(PrintTable::Cell::CENTER) << "Count" << end_col; 00643 00644 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC) 00645 table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col 00646 << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col 00647 << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col; 00648 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC) 00649 table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col 00650 << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col 00651 << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col; 00652 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC) 00653 table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col 00654 << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col 00655 << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col; 00656 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC) 00657 table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col 00658 << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col 00659 << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col; 00660 if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC) 00661 table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col 00662 << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col 00663 << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col; 00664 00665 table << end_header; 00666 00667 printSubtable(table, parallel_timer, parallel_timer, metrics_mask, 0, timer_checkpoint); 00668 } 00669 00670 if (timer_checkpoint) 00671 root_timer.checkpoint(); 00672 } 00673 00674 return table; 00675 } 00676 00677 } // namespace <empty> 00678 00679 00680 std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask metrics_mask, bool timer_checkpoint) 00681 { 00682 stk_classic::PrintTable print_table; 00683 00684 printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint); 00685 00686 os << print_table; 00687 00688 return os; 00689 } 00690 00691 00692 std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask metrics_mask, bool timer_checkpoint, ParallelMachine parallel_machine) 00693 { 00694 stk_classic::PrintTable print_table; 00695 00696 int parallel_size = parallel_machine_size(parallel_machine); 00697 if (parallel_size == 1) 00698 printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint); 00699 else 00700 printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint, parallel_machine); 00701 00702 os << print_table; 00703 00704 return os; 00705 } 00706 00707 00708 // std::ostream &printXML(std::ostream &os, MPI_Comm mpi_comm, MetricsMask metrics_mask) const; 00709 std::ostream &printXML(std::ostream &os, MetricsMask metrics_mask, bool timer_checkpoint); 00710 00711 std::ostream &printSubXML(std::ostream &os, MetricsMask metrics_mask, int depth, bool timer_checkpoint); 00712 00713 } // namespace diag 00714 00715 Marshal &operator<<(stk_classic::Marshal &mout, const diag::Timer &t); 00716 00717 template <class T> 00718 Marshal &operator<<(Marshal &mout, const diag::Timer::Metric<T> &t) { 00719 mout << t.getAccumulatedLap(false) << t.getAccumulatedLap(true); 00720 00721 return mout; 00722 } 00723 00724 Marshal &operator<<(Marshal &mout, const diag::Timer &t) { 00725 mout << t.getName() << t.getTimerMask() << t.getSubtimerLapCount() 00726 << t.getMetric<diag::LapCount>() << t.getMetric<diag::CPUTime>() << t.getMetric<diag::WallTime>() 00727 << t.getMetric<diag::MPICount>() << t.getMetric<diag::MPIByteCount>() << t.getMetric<diag::HeapAlloc>(); 00728 00729 mout << t.getTimerList(); 00730 00731 return mout; 00732 } 00733 00734 Marshal &operator>>(Marshal &min, diag::ParallelTimer &t) { 00735 min >> t.m_name >> t.m_timerMask >> t.m_subtimerLapCount 00736 >> t.m_lapCount.m_value 00737 >> t.m_lapCount.m_checkpoint 00738 >> t.m_cpuTime.m_value 00739 >> t.m_cpuTime.m_checkpoint 00740 >> t.m_wallTime.m_value 00741 >> t.m_wallTime.m_checkpoint 00742 >> t.m_MPICount.m_value 00743 >> t.m_MPICount.m_checkpoint 00744 >> t.m_MPIByteCount.m_value 00745 >> t.m_MPIByteCount.m_checkpoint 00746 >> t.m_heapAlloc.m_value 00747 >> t.m_heapAlloc.m_checkpoint; 00748 00749 min >> t.m_subtimerList; 00750 00751 return min; 00752 } 00753 00754 } // namespace stk_classic