|
Teuchos Package Browser (Single Doxygen Collection)
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Teuchos: Common Tools Package 00005 // Copyright (2004) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 // @HEADER 00041 00042 #include "Teuchos_TimeMonitor.hpp" 00043 #include "Teuchos_CommHelpers.hpp" 00044 #include "Teuchos_DefaultComm.hpp" 00045 #include "Teuchos_TableColumn.hpp" 00046 #include "Teuchos_TableFormat.hpp" 00047 #include "Teuchos_StandardParameterEntryValidators.hpp" 00048 #include "Teuchos_ScalarTraits.hpp" 00049 #include <functional> 00050 00051 00052 namespace Teuchos { 00105 template<class Ordinal, class ScalarType, class IndexType> 00106 class MaxLoc : 00107 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > { 00108 public: 00109 void 00110 reduce (const Ordinal count, 00111 const std::pair<ScalarType, IndexType> inBuffer[], 00112 std::pair<ScalarType, IndexType> inoutBuffer[]) const; 00113 }; 00114 00115 template<class Ordinal> 00116 class MaxLoc<Ordinal, double, int> : 00117 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > { 00118 public: 00119 void 00120 reduce (const Ordinal count, 00121 const std::pair<double, int> inBuffer[], 00122 std::pair<double, int> inoutBuffer[]) const 00123 { 00124 for (Ordinal ind = 0; ind < count; ++ind) { 00125 const std::pair<double, int>& in = inBuffer[ind]; 00126 std::pair<double, int>& inout = inoutBuffer[ind]; 00127 00128 if (in.first > inout.first) { 00129 inout.first = in.first; 00130 inout.second = in.second; 00131 } else if (in.first < inout.first) { 00132 // Don't need to do anything; inout has the values. 00133 } else { // equal, or at least one is NaN. 00134 inout.first = in.first; 00135 inout.second = std::min (in.second, inout.second); 00136 } 00137 } 00138 } 00139 }; 00140 00167 template<class Ordinal, class ScalarType, class IndexType> 00168 class MinLoc : 00169 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > { 00170 public: 00171 void 00172 reduce (const Ordinal count, 00173 const std::pair<ScalarType, IndexType> inBuffer[], 00174 std::pair<ScalarType, IndexType> inoutBuffer[]) const; 00175 }; 00176 00177 template<class Ordinal> 00178 class MinLoc<Ordinal, double, int> : 00179 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > { 00180 public: 00181 void 00182 reduce (const Ordinal count, 00183 const std::pair<double, int> inBuffer[], 00184 std::pair<double, int> inoutBuffer[]) const 00185 { 00186 for (Ordinal ind = 0; ind < count; ++ind) { 00187 const std::pair<double, int>& in = inBuffer[ind]; 00188 std::pair<double, int>& inout = inoutBuffer[ind]; 00189 00190 if (in.first < inout.first) { 00191 inout.first = in.first; 00192 inout.second = in.second; 00193 } else if (in.first > inout.first) { 00194 // Don't need to do anything; inout has the values. 00195 } else { // equal, or at least one is NaN. 00196 inout.first = in.first; 00197 inout.second = std::min (in.second, inout.second); 00198 } 00199 } 00200 } 00201 }; 00202 00206 template<class Ordinal, class ScalarType, class IndexType> 00207 class MinLocNonzero : 00208 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > { 00209 public: 00210 void 00211 reduce (const Ordinal count, 00212 const std::pair<ScalarType, IndexType> inBuffer[], 00213 std::pair<ScalarType, IndexType> inoutBuffer[]) const; 00214 }; 00215 00216 template<class Ordinal> 00217 class MinLocNonzero<Ordinal, double, int> : 00218 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > { 00219 public: 00220 void 00221 reduce (const Ordinal count, 00222 const std::pair<double, int> inBuffer[], 00223 std::pair<double, int> inoutBuffer[]) const 00224 { 00225 for (Ordinal ind = 0; ind < count; ++ind) { 00226 const std::pair<double, int>& in = inBuffer[ind]; 00227 std::pair<double, int>& inout = inoutBuffer[ind]; 00228 00229 if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) { 00230 inout.first = in.first; 00231 inout.second = in.second; 00232 } else if (in.first > inout.first) { 00233 // Don't need to do anything; inout has the values. 00234 } else { // equal, or at least one is NaN. 00235 inout.first = in.first; 00236 inout.second = std::min (in.second, inout.second); 00237 } 00238 } 00239 } 00240 }; 00241 00242 // Typedef used internally by TimeMonitor::summarize() and its 00243 // helper functions. The map is keyed on timer label (a string). 00244 // Each value is a pair: (total number of seconds over all calls to 00245 // that timer, total number of calls to that timer). 00246 typedef std::map<std::string, std::pair<double, int> > timer_map_t; 00247 00248 TimeMonitor::TimeMonitor (Time& timer, bool reset) 00249 : PerformanceMonitorBase<Time>(timer, reset) 00250 { 00251 if (!isRecursiveCall()) counter().start(reset); 00252 } 00253 00254 TimeMonitor::~TimeMonitor() { 00255 if (!isRecursiveCall()) counter().stop(); 00256 } 00257 00258 void 00259 TimeMonitor::disableTimer (const std::string& name) 00260 { 00261 RCP<Time> timer = lookupCounter (name); 00262 TEUCHOS_TEST_FOR_EXCEPTION( 00263 timer == null, std::invalid_argument, 00264 "TimeMonitor::disableTimer: Invalid timer \"" << name << "\""); 00265 timer->disable (); 00266 } 00267 00268 void 00269 TimeMonitor::enableTimer (const std::string& name) 00270 { 00271 RCP<Time> timer = lookupCounter (name); 00272 TEUCHOS_TEST_FOR_EXCEPTION( 00273 timer == null, std::invalid_argument, 00274 "TimeMonitor::enableTimer: Invalid timer \"" << name << "\""); 00275 timer->enable (); 00276 } 00277 00278 void 00279 TimeMonitor::zeroOutTimers() 00280 { 00281 typedef std::map<std::string, RCP<Time> > map_type; 00282 typedef map_type::iterator iter_type; 00283 map_type& ctrs = counters (); 00284 00285 // In debug mode, loop first to check whether any of the timers 00286 // are running, before resetting them. This ensures that this 00287 // method satisfies the strong exception guarantee (either it 00288 // completes normally, or there are no side effects). 00289 #ifdef TEUCHOS_DEBUG 00290 for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) { 00291 // We throw a runtime_error rather than a logic_error, because 00292 // logic_error suggests a bug in the implementation of 00293 // TimeMonitor. Calling zeroOutTimers() when a timer is running 00294 // is not TimeMonitor's fault. 00295 TEUCHOS_TEST_FOR_EXCEPTION( 00296 it->second->isRunning (), std::runtime_error, 00297 "Timer \"" << it->second->name () << "\" is currently running. " 00298 "You are not allowed to reset running timers."); 00299 } 00300 #endif // TEUCHOS_DEBUG 00301 00302 for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) { 00303 it->second->reset (); 00304 } 00305 } 00306 00307 // An anonymous namespace is the standard way of limiting linkage of 00308 // its contained routines to file scope. 00309 namespace { 00310 // \brief Return an "empty" local timer datum. 00311 // 00312 // "Empty" means the datum has zero elapsed time and zero call 00313 // count. This function does not actually create a timer. 00314 // 00315 // \param name The timer's name. 00316 std::pair<std::string, std::pair<double, int> > 00317 makeEmptyTimerDatum (const std::string& name) 00318 { 00319 return std::make_pair (name, std::make_pair (double(0), int(0))); 00320 } 00321 00322 // \fn collectLocalTimerData 00323 // \brief Collect and sort local timer data by timer names. 00324 // 00325 // \param localData [out] Map whose keys are the timer names, and 00326 // whose value for each key is the total elapsed time (in 00327 // seconds) and the call count for the timer with that name. 00328 // 00329 // \param localCounters [in] Timers from which to extract data. 00330 // 00331 // \param filter [in] Filter for timer labels. If filter is not 00332 // empty, this method will only collect data for local timers 00333 // whose labels begin with this string. 00334 // 00335 // Extract the total elapsed time and call count from each timer 00336 // in the given array. Merge results for timers with duplicate 00337 // labels, by summing their total elapsed times and call counts 00338 // pairwise. 00339 void 00340 collectLocalTimerData (timer_map_t& localData, 00341 const std::map<std::string, RCP<Time> >& localCounters, 00342 const std::string& filter="") 00343 { 00344 using std::make_pair; 00345 typedef timer_map_t::iterator iter_t; 00346 00347 timer_map_t theLocalData; 00348 for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin(); 00349 it != localCounters.end(); ++it) { 00350 const std::string& name = it->second->name (); 00351 00352 // Filter current timer name, if provided filter is nonempty. 00353 // Filter string must _start_ the timer label, not just be in it. 00354 const bool skipThisOne = (filter != "" && name.find (filter) != 0); 00355 if (! skipThisOne) { 00356 const double timing = it->second->totalElapsedTime (); 00357 const int numCalls = it->second->numCalls (); 00358 00359 // Merge timers with duplicate labels, by summing their 00360 // total elapsed times and call counts. 00361 iter_t loc = theLocalData.find (name); 00362 if (loc == theLocalData.end()) { 00363 // Use loc as an insertion location hint. 00364 theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls))); 00365 } 00366 else { 00367 loc->second.first += timing; 00368 loc->second.second += numCalls; 00369 } 00370 } 00371 } 00372 // This avoids copying the map, and also makes this method 00373 // satisfy the strong exception guarantee. 00374 localData.swap (theLocalData); 00375 } 00376 00377 // \brief Locally filter out timer data with zero call counts. 00378 // 00379 // \param timerData [in/out] 00380 void 00381 filterZeroData (timer_map_t& timerData) 00382 { 00383 // FIXME (mfh 15 Mar 2013) Should use std::map::erase with 00384 // iterator hint, instead of rebuilding the map completely. 00385 timer_map_t newTimerData; 00386 for (timer_map_t::const_iterator it = timerData.begin(); 00387 it != timerData.end(); ++it) { 00388 if (it->second.second > 0) { 00389 newTimerData[it->first] = it->second; 00390 } 00391 } 00392 timerData.swap (newTimerData); 00393 } 00394 00416 void 00417 collectLocalTimerDataAndNames (timer_map_t& localTimerData, 00418 Array<std::string>& localTimerNames, 00419 const std::map<std::string, RCP<Time> >& localTimers, 00420 const bool writeZeroTimers, 00421 const std::string& filter="") 00422 { 00423 // Collect and sort local timer data by timer names. 00424 collectLocalTimerData (localTimerData, localTimers, filter); 00425 00426 // Filter out zero data locally first. This ensures that if we 00427 // are writing global stats, and if a timer name exists in the 00428 // set of global names, then that timer has a nonzero call count 00429 // on at least one MPI process. 00430 if (! writeZeroTimers) { 00431 filterZeroData (localTimerData); 00432 } 00433 00434 // Extract the set of local timer names. The std::map keeps 00435 // them sorted alphabetically. 00436 localTimerNames.reserve (localTimerData.size()); 00437 for (timer_map_t::const_iterator it = localTimerData.begin(); 00438 it != localTimerData.end(); ++it) { 00439 localTimerNames.push_back (it->first); 00440 } 00441 } 00442 00477 void 00478 collectGlobalTimerData (timer_map_t& globalTimerData, 00479 Array<std::string>& globalTimerNames, 00480 timer_map_t& localTimerData, 00481 Array<std::string>& localTimerNames, 00482 Ptr<const Comm<int> > comm, 00483 const bool alwaysWriteLocal, 00484 const ECounterSetOp setOp) 00485 { 00486 // There may be some global timers that are not local timers on 00487 // the calling MPI process(es). In that case, if 00488 // alwaysWriteLocal is true, then we need to fill in the 00489 // "missing" local timers. That will ensure that both global 00490 // and local timer columns in the output table have the same 00491 // number of rows. The collectLocalTimerDataAndNames() method 00492 // may have already filtered out local timers with zero call 00493 // counts (if its writeZeroTimers argument was false), but we 00494 // won't be filtering again. Thus, any local timer data we 00495 // insert here won't get filtered out. 00496 // 00497 // Note that calling summarize() with writeZeroTimers == false 00498 // will still do what it says, even if we insert local timers 00499 // with zero call counts here. 00500 00501 // This does the correct and inexpensive thing (just copies the 00502 // timer data) if numProcs == 1. Otherwise, it initiates a 00503 // communication with \f$O(\log P)\f$ messages along the 00504 // critical path, where \f$P\f$ is the number of participating 00505 // processes. 00506 mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp); 00507 00508 #ifdef TEUCHOS_DEBUG 00509 { 00510 // Sanity check that all processes have the name number of 00511 // global timer names. 00512 const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size(); 00513 timer_map_t::size_type minNumGlobalNames = 0; 00514 timer_map_t::size_type maxNumGlobalNames = 0; 00515 reduceAll (*comm, REDUCE_MIN, myNumGlobalNames, 00516 outArg (minNumGlobalNames)); 00517 reduceAll (*comm, REDUCE_MAX, myNumGlobalNames, 00518 outArg (maxNumGlobalNames)); 00519 TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames, 00520 std::logic_error, "Min # global timer names = " << minNumGlobalNames 00521 << " != max # global timer names = " << maxNumGlobalNames 00522 << ". Please report this bug to the Teuchos developers."); 00523 TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames, 00524 std::logic_error, "My # global timer names = " << myNumGlobalNames 00525 << " != min # global timer names = " << minNumGlobalNames 00526 << ". Please report this bug to the Teuchos developers."); 00527 } 00528 #endif // TEUCHOS_DEBUG 00529 00530 // mergeCounterNames() just merges the counters' names, not 00531 // their actual data. Now we need to fill globalTimerData with 00532 // this process' timer data for the timers in globalTimerNames. 00533 // 00534 // All processes need the full list of global timers, since 00535 // there may be some global timers that are not local timers. 00536 // That's why mergeCounterNames() has to be an all-reduce, not 00537 // just a reduction to Proc 0. 00538 // 00539 // Insertion optimization: if the iterator given to map::insert 00540 // points right before where we want to insert, insertion is 00541 // O(1). globalTimerNames is sorted, so feeding the iterator 00542 // output of map::insert into the next invocation's input should 00543 // make the whole insertion O(N) where N is the number of 00544 // entries in globalTimerNames. 00545 timer_map_t::iterator globalMapIter = globalTimerData.begin(); 00546 timer_map_t::iterator localMapIter; 00547 for (Array<string>::const_iterator it = globalTimerNames.begin(); 00548 it != globalTimerNames.end(); ++it) { 00549 const std::string& globalName = *it; 00550 localMapIter = localTimerData.find (globalName); 00551 00552 if (localMapIter == localTimerData.end()) { 00553 if (alwaysWriteLocal) { 00554 // If there are some global timers that are not local 00555 // timers, and if we want to print local timers, we insert 00556 // a local timer datum with zero elapsed time and zero 00557 // call count into localTimerData as well. This will 00558 // ensure that both global and local timer columns in the 00559 // output table have the same number of rows. 00560 // 00561 // We really only need to do this on Proc 0, which is the 00562 // only process that currently may print local timers. 00563 // However, we do it on all processes, just in case 00564 // someone later wants to modify this function to print 00565 // out local timer data for some process other than Proc 00566 // 0. This extra computation won't affect the cost along 00567 // the critical path, for future computations in which 00568 // Proc 0 participates. 00569 localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName)); 00570 00571 // Make sure the missing global name gets added to the 00572 // list of local names. We'll re-sort the list of local 00573 // names below. 00574 localTimerNames.push_back (globalName); 00575 } 00576 // There's a global timer that's not a local timer. Add it 00577 // to our pre-merge version of the global timer data so that 00578 // we can safely merge the global timer data later. 00579 globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName)); 00580 } 00581 else { 00582 // We have this global timer name in our local timer list. 00583 // Fill in our pre-merge version of the global timer data 00584 // with our local data. 00585 globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second)); 00586 } 00587 } 00588 00589 if (alwaysWriteLocal) { 00590 // Re-sort the list of local timer names, since we may have 00591 // inserted "missing" names above. 00592 std::sort (localTimerNames.begin(), localTimerNames.end()); 00593 } 00594 00595 #ifdef TEUCHOS_DEBUG 00596 { 00597 // Sanity check that all processes have the name number of 00598 // global timers. 00599 const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size(); 00600 timer_map_t::size_type minNumGlobalTimers = 0; 00601 timer_map_t::size_type maxNumGlobalTimers = 0; 00602 reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers, 00603 outArg (minNumGlobalTimers)); 00604 reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers, 00605 outArg (maxNumGlobalTimers)); 00606 TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers, 00607 std::logic_error, "Min # global timers = " << minNumGlobalTimers 00608 << " != max # global timers = " << maxNumGlobalTimers 00609 << ". Please report this bug to the Teuchos developers."); 00610 TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers, 00611 std::logic_error, "My # global timers = " << myNumGlobalTimers 00612 << " != min # global timers = " << minNumGlobalTimers 00613 << ". Please report this bug to the Teuchos developers."); 00614 } 00615 #endif // TEUCHOS_DEBUG 00616 } 00617 00664 void 00665 computeGlobalTimerStats (stat_map_type& statData, 00666 std::vector<std::string>& statNames, 00667 Ptr<const Comm<int> > comm, 00668 const timer_map_t& globalTimerData, 00669 const bool ignoreZeroTimers) 00670 { 00671 using Teuchos::ScalarTraits; 00672 00673 const int numTimers = static_cast<int> (globalTimerData.size()); 00674 const int numProcs = comm->getSize(); 00675 00676 // Extract pre-reduction timings and call counts into a 00677 // sequential array. This array will be in the same order as 00678 // the global timer names are in the map. 00679 Array<std::pair<double, int> > timingsAndCallCounts; 00680 timingsAndCallCounts.reserve (numTimers); 00681 for (timer_map_t::const_iterator it = globalTimerData.begin(); 00682 it != globalTimerData.end(); ++it) { 00683 timingsAndCallCounts.push_back (it->second); 00684 } 00685 00686 // For each timer name, compute the min timing and its 00687 // corresponding call count. If two processes have the same 00688 // timing but different call counts, the minimum call count will 00689 // be used. 00690 Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers); 00691 if (numTimers > 0) { 00692 if (ignoreZeroTimers) 00693 reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers, 00694 &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]); 00695 else 00696 reduceAll (*comm, MinLoc<int, double, int>(), numTimers, 00697 &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]); 00698 } 00699 00700 // For each timer name, compute the max timing and its 00701 // corresponding call count. If two processes have the same 00702 // timing but different call counts, the minimum call count will 00703 // be used. 00704 Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers); 00705 if (numTimers > 0) { 00706 reduceAll (*comm, MaxLoc<int, double, int>(), numTimers, 00707 &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]); 00708 } 00709 00710 // For each timer name, compute the mean-over-processes timing, 00711 // the mean call count, and the mean-over-call-counts timing. 00712 // The mean call count is reported as a double to allow a 00713 // fractional value. 00714 // 00715 // Each local timing is really the total timing over all local 00716 // invocations. The number of local invocations is the call 00717 // count. Thus, the mean-over-call-counts timing is the sum of 00718 // all the timings (over all processes), divided by the sum of 00719 // all the call counts (over all processes). We compute it in a 00720 // different way to over unnecessary overflow. 00721 Array<double> meanOverCallCountsTimings (numTimers); 00722 Array<double> meanOverProcsTimings (numTimers); 00723 Array<double> meanCallCounts (numTimers); 00724 Array<int> ICallThisTimer (numTimers); 00725 Array<int> numProcsCallingEachTimer (numTimers); 00726 { 00727 // Figure out how many processors actually call each timer. 00728 if (ignoreZeroTimers) { 00729 for (int k = 0; k < numTimers; ++k) { 00730 const double callCount = static_cast<double> (timingsAndCallCounts[k].second); 00731 if (callCount > 0) ICallThisTimer[k] = 1; 00732 else ICallThisTimer[k] = 0; 00733 } 00734 if (numTimers > 0) { 00735 reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0], 00736 &numProcsCallingEachTimer[0]); 00737 } 00738 } 00739 00740 // When summing, first scale by the number of processes. This 00741 // avoids unnecessary overflow, and also gives us the mean 00742 // call count automatically. 00743 Array<double> scaledTimings (numTimers); 00744 Array<double> scaledCallCounts (numTimers); 00745 const double P = static_cast<double> (numProcs); 00746 00747 if (ignoreZeroTimers) { 00748 for (int k = 0; k < numTimers; ++k) { 00749 const double timing = timingsAndCallCounts[k].first; 00750 const double callCount = static_cast<double> (timingsAndCallCounts[k].second); 00751 00752 scaledTimings[k] = timing / numProcsCallingEachTimer[k]; 00753 scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k]; 00754 } 00755 } 00756 else { 00757 for (int k = 0; k < numTimers; ++k) { 00758 const double timing = timingsAndCallCounts[k].first; 00759 const double callCount = static_cast<double> (timingsAndCallCounts[k].second); 00760 00761 scaledTimings[k] = timing / P; 00762 scaledCallCounts[k] = callCount / P; 00763 } 00764 } 00765 00766 if (numTimers > 0) { 00767 reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0], 00768 &meanOverProcsTimings[0]); 00769 reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0], 00770 &meanCallCounts[0]); 00771 } 00772 // We don't have to undo the scaling for the mean timings; 00773 // just divide by the scaled call count. 00774 for (int k = 0; k < numTimers; ++k) { 00775 if (meanCallCounts[k] > ScalarTraits<double>::zero ()) { 00776 meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k]; 00777 } 00778 else { 00779 meanOverCallCountsTimings[k] = ScalarTraits<double>::zero (); 00780 } 00781 } 00782 } 00783 00784 // Reformat the data into the map of statistics. Be sure that 00785 // each value (the std::vector of (timing, call count) pairs, 00786 // each entry of which is a different statistic) preserves the 00787 // order of statNames. 00788 statNames.resize (4); 00789 statNames[0] = "MinOverProcs"; 00790 statNames[1] = "MeanOverProcs"; 00791 statNames[2] = "MaxOverProcs"; 00792 statNames[3] = "MeanOverCallCounts"; 00793 00794 stat_map_type::iterator statIter = statData.end(); 00795 timer_map_t::const_iterator it = globalTimerData.begin(); 00796 for (int k = 0; it != globalTimerData.end(); ++k, ++it) { 00797 std::vector<std::pair<double, double> > curData (4); 00798 curData[0] = minTimingsAndCallCounts[k]; 00799 curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]); 00800 curData[2] = maxTimingsAndCallCounts[k]; 00801 curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]); 00802 00803 // statIter gives an insertion location hint that makes each 00804 // insertion O(1), since we remember the location of the last 00805 // insertion. 00806 statIter = statData.insert (statIter, std::make_pair (it->first, curData)); 00807 } 00808 } 00809 00810 00827 RCP<const Comm<int> > 00828 getDefaultComm () 00829 { 00830 // The default communicator. If Trilinos was built with MPI 00831 // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet 00832 // been initialized, it's not valid to use the communicator!) 00833 // Otherwise, this should be a "serial" (no MPI, one "process") 00834 // communicator. 00835 RCP<const Comm<int> > comm = DefaultComm<int>::getComm (); 00836 00837 #ifdef HAVE_MPI 00838 { 00839 int mpiHasBeenStarted = 0; 00840 MPI_Initialized (&mpiHasBeenStarted); 00841 if (! mpiHasBeenStarted) { 00842 // Make pComm a new "serial communicator." 00843 comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ())); 00844 } 00845 } 00846 #endif // HAVE_MPI 00847 return comm; 00848 } 00849 00850 } // namespace (anonymous) 00851 00852 00853 void 00854 TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData, 00855 std::vector<std::string>& statNames, 00856 Ptr<const Comm<int> > comm, 00857 const ECounterSetOp setOp, 00858 const std::string& filter) 00859 { 00860 // Collect local timer data and names. Filter out timers with 00861 // zero call counts if writeZeroTimers is false. Also, apply the 00862 // timer label filter at this point, so we don't have to compute 00863 // statistics on timers we don't want to display anyway. 00864 timer_map_t localTimerData; 00865 Array<std::string> localTimerNames; 00866 const bool writeZeroTimers = false; 00867 collectLocalTimerDataAndNames (localTimerData, localTimerNames, 00868 counters(), writeZeroTimers, filter); 00869 // Merge the local timer data and names into global timer data and 00870 // names. 00871 timer_map_t globalTimerData; 00872 Array<std::string> globalTimerNames; 00873 const bool alwaysWriteLocal = false; 00874 collectGlobalTimerData (globalTimerData, globalTimerNames, 00875 localTimerData, localTimerNames, 00876 comm, alwaysWriteLocal, setOp); 00877 // Compute statistics on the data. 00878 computeGlobalTimerStats (statData, statNames, comm, globalTimerData, false); 00879 } 00880 00881 00882 void 00883 TimeMonitor::summarize (Ptr<const Comm<int> > comm, 00884 std::ostream& out, 00885 const bool alwaysWriteLocal, 00886 const bool writeGlobalStats, 00887 const bool writeZeroTimers, 00888 const ECounterSetOp setOp, 00889 const std::string& filter, 00890 const bool ignoreZeroTimers) 00891 { 00892 // 00893 // We can't just call computeGlobalTimerStatistics(), since 00894 // summarize() has different options that affect whether global 00895 // statistics are computed and printed. 00896 // 00897 const int numProcs = comm->getSize(); 00898 const int myRank = comm->getRank(); 00899 00900 // Collect local timer data and names. Filter out timers with 00901 // zero call counts if writeZeroTimers is false. Also, apply the 00902 // timer label filter at this point, so we don't have to compute 00903 // statistics on timers we don't want to display anyway. 00904 timer_map_t localTimerData; 00905 Array<std::string> localTimerNames; 00906 collectLocalTimerDataAndNames (localTimerData, localTimerNames, 00907 counters(), writeZeroTimers, filter); 00908 00909 // If we're computing global statistics, merge the local timer 00910 // data and names into global timer data and names, and compute 00911 // global timer statistics. Otherwise, leave the global data 00912 // empty. 00913 timer_map_t globalTimerData; 00914 Array<std::string> globalTimerNames; 00915 stat_map_type statData; 00916 std::vector<std::string> statNames; 00917 if (writeGlobalStats) { 00918 collectGlobalTimerData (globalTimerData, globalTimerNames, 00919 localTimerData, localTimerNames, 00920 comm, alwaysWriteLocal, setOp); 00921 // Compute statistics on the data, but only if the communicator 00922 // contains more than one process. Otherwise, statistics don't 00923 // make sense and we don't print them (see below). 00924 if (numProcs > 1) { 00925 computeGlobalTimerStats (statData, statNames, comm, globalTimerData, ignoreZeroTimers); 00926 } 00927 } 00928 00929 // Precision of floating-point numbers in the table. 00930 const int precision = format().precision(); 00931 00932 // All columns of the table, in order. 00933 Array<TableColumn> tableColumns; 00934 00935 // Labels of all the columns of the table. 00936 // We will append to this when we add each column. 00937 Array<std::string> titles; 00938 00939 // Widths (in number of characters) of each column. 00940 // We will append to this when we add each column. 00941 Array<int> columnWidths; 00942 00943 // Table column containing all timer names. If writeGlobalStats 00944 // is true, we use the global timer names, otherwise we use the 00945 // local timer names. We build the table on all processes 00946 // redundantly, but only print on Rank 0. 00947 { 00948 titles.append ("Timer Name"); 00949 00950 // The column labels depend on whether we are computing global statistics. 00951 TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames); 00952 tableColumns.append (nameCol); 00953 00954 // Each column is as wide as it needs to be to hold both its 00955 // title and all of the column data. This column's title is the 00956 // current last entry of the titles array. 00957 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol)); 00958 } 00959 00960 // Table column containing local timer stats, if applicable. We 00961 // only write local stats if asked, only on MPI Proc 0, and only 00962 // if there is more than one MPI process in the communicator 00963 // (otherwise local stats == global stats, so we just print the 00964 // global stats). In this case, we've padded the local data on 00965 // Proc 0 if necessary to match the global timer list, so that the 00966 // columns have the same number of rows. 00967 if (alwaysWriteLocal && numProcs > 1 && myRank == 0) { 00968 titles.append ("Local time (num calls)"); 00969 00970 // Copy local timer data out of the array-of-structs into 00971 // separate arrays, for display in the table. 00972 Array<double> localTimings; 00973 Array<double> localNumCalls; 00974 for (timer_map_t::const_iterator it = localTimerData.begin(); 00975 it != localTimerData.end(); ++it) { 00976 localTimings.push_back (it->second.first); 00977 localNumCalls.push_back (static_cast<double> (it->second.second)); 00978 } 00979 TableColumn timeAndCalls (localTimings, localNumCalls, precision, true); 00980 tableColumns.append (timeAndCalls); 00981 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls)); 00982 } 00983 00984 if (writeGlobalStats) { 00985 // If there's only 1 process in the communicator, don't display 00986 // statistics; statistics don't make sense in that case. Just 00987 // display the timings and call counts. If there's more than 1 00988 // process, do display statistics. 00989 if (numProcs == 1) { 00990 // Extract timings and the call counts from globalTimerData. 00991 Array<double> globalTimings; 00992 Array<double> globalNumCalls; 00993 for (timer_map_t::const_iterator it = globalTimerData.begin(); 00994 it != globalTimerData.end(); ++it) { 00995 globalTimings.push_back (it->second.first); 00996 globalNumCalls.push_back (static_cast<double> (it->second.second)); 00997 } 00998 // Print the table column. 00999 titles.append ("Global time (num calls)"); 01000 TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, true); 01001 tableColumns.append (timeAndCalls); 01002 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls)); 01003 } 01004 else { // numProcs > 1 01005 // Print a table column for each statistic. statNames and 01006 // each value in statData use the same ordering, so we can 01007 // iterate over valid indices of statNames to display the 01008 // statistics in the right order. 01009 const timer_map_t::size_type numGlobalTimers = globalTimerData.size(); 01010 for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) { 01011 // Extract lists of timings and their call counts for the 01012 // current statistic. 01013 Array<double> statTimings (numGlobalTimers); 01014 Array<double> statCallCounts (numGlobalTimers); 01015 stat_map_type::const_iterator it = statData.begin(); 01016 for (int k = 0; it != statData.end(); ++it, ++k) { 01017 statTimings[k] = (it->second[statInd]).first; 01018 statCallCounts[k] = (it->second[statInd]).second; 01019 } 01020 // Print the table column. 01021 const std::string& statisticName = statNames[statInd]; 01022 const std::string titleString = statisticName; 01023 titles.append (titleString); 01024 TableColumn timeAndCalls (statTimings, statCallCounts, precision, true); 01025 tableColumns.append (timeAndCalls); 01026 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls)); 01027 } 01028 } 01029 } 01030 01031 // Print the whole table to the given output stream on MPI Rank 0. 01032 format().setColumnWidths (columnWidths); 01033 if (myRank == 0) { 01034 std::ostringstream theTitle; 01035 theTitle << "TimeMonitor results over " << numProcs << " processor" 01036 << (numProcs > 1 ? "s" : ""); 01037 format().writeWholeTable (out, theTitle.str(), titles, tableColumns); 01038 } 01039 } 01040 01041 void 01042 TimeMonitor::summarize (std::ostream &out, 01043 const bool alwaysWriteLocal, 01044 const bool writeGlobalStats, 01045 const bool writeZeroTimers, 01046 const ECounterSetOp setOp, 01047 const std::string& filter, 01048 const bool ignoreZeroTimers) 01049 { 01050 // The default communicator. If Trilinos was built with MPI 01051 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should 01052 // be a "serial" (no MPI, one "process") communicator. 01053 RCP<const Comm<int> > comm = getDefaultComm(); 01054 01055 summarize (comm.ptr(), out, alwaysWriteLocal, 01056 writeGlobalStats, writeZeroTimers, setOp, filter, ignoreZeroTimers); 01057 } 01058 01059 void 01060 TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData, 01061 std::vector<std::string>& statNames, 01062 const ECounterSetOp setOp, 01063 const std::string& filter) 01064 { 01065 // The default communicator. If Trilinos was built with MPI 01066 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should 01067 // be a "serial" (no MPI, one "process") communicator. 01068 RCP<const Comm<int> > comm = getDefaultComm(); 01069 01070 computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter); 01071 } 01072 01073 01074 namespace { 01098 std::string 01099 quoteLabelForYaml (const std::string& label) 01100 { 01101 // YAML allows empty keys in key: value pairs. See Section 7.2 01102 // of the YAML 1.2 spec. We thus let an empty label pass 01103 // through without quoting or other special treatment. 01104 if (label.empty ()) { 01105 return label; 01106 } 01107 01108 // Check whether the label is already quoted. If so, we don't 01109 // need to quote it again. However, we do need to quote any 01110 // quote symbols in the string inside the outer quotes. 01111 const bool alreadyQuoted = label.size () >= 2 && 01112 label[0] == '"' && label[label.size() - 1] == '"'; 01113 01114 // We need to quote if there are any colons or (inner) quotes in 01115 // the string. We'll determine this as we read through the 01116 // string and escape any characters that need escaping. 01117 bool needToQuote = false; 01118 01119 std::string out; // To fill with the return value 01120 out.reserve (label.size ()); 01121 01122 const size_t startPos = alreadyQuoted ? 1 : 0; 01123 const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size (); 01124 for (size_t i = startPos; i < endPos; ++i) { 01125 const char c = label[i]; 01126 if (c == '"' || c == '\\') { 01127 out.push_back ('\\'); // Escape the quote or backslash. 01128 needToQuote = true; 01129 } 01130 else if (c == ':') { 01131 needToQuote = true; 01132 } 01133 out.push_back (c); 01134 } 01135 01136 if (needToQuote || alreadyQuoted) { 01137 // If the input string was already quoted, then out doesn't 01138 // include its quotes, so we have to add them back in. 01139 return "\"" + out + "\""; 01140 } 01141 else { 01142 return out; 01143 } 01144 } 01145 01146 } // namespace (anonymous) 01147 01148 01149 void TimeMonitor:: 01150 summarizeToYaml (Ptr<const Comm<int> > comm, 01151 std::ostream &out, 01152 const ETimeMonitorYamlFormat yamlStyle, 01153 const std::string& filter) 01154 { 01155 using Teuchos::FancyOStream; 01156 using Teuchos::fancyOStream; 01157 using Teuchos::getFancyOStream; 01158 using Teuchos::OSTab; 01159 using Teuchos::RCP; 01160 using Teuchos::rcpFromRef; 01161 using std::endl; 01162 typedef std::vector<std::string>::size_type size_type; 01163 01164 const bool compact = (yamlStyle == YAML_FORMAT_COMPACT); 01165 01166 // const bool writeGlobalStats = true; 01167 // const bool writeZeroTimers = true; 01168 // const bool alwaysWriteLocal = false; 01169 const ECounterSetOp setOp = Intersection; 01170 01171 stat_map_type statData; 01172 std::vector<std::string> statNames; 01173 computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter); 01174 01175 const int numProcs = comm->getSize(); 01176 01177 // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "- 01178 // " as the line prefix does not work, else I would prefer that 01179 // method for printing each line of a YAML block sequence (see 01180 // Section 8.2.1 of the YAML 1.2 spec). 01181 // 01182 // Also, I have to set the tab indent string here, rather than in 01183 // OSTab's constructor. This is because line prefix (which for 01184 // some reason is what OSTab's constructor takes, rather than tab 01185 // indent string) means something different from tab indent 01186 // string, and turning on the line prefix prints all sorts of 01187 // things including "|" for some reason. 01188 RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out)); 01189 pfout->setTabIndentStr (" "); 01190 FancyOStream& fout = *pfout; 01191 01192 fout << "# Teuchos::TimeMonitor report" << endl 01193 << "---" << endl; 01194 01195 // mfh 19 Aug 2012: An important goal of our chosen output format 01196 // was to minimize the nesting depth. We have managed to keep the 01197 // nesting depth to 3, which is the limit that the current version 01198 // of PylotDB imposes for its YAML input. 01199 01200 // Outermost level is a dictionary. (Individual entries of a 01201 // dictionary do _not_ begin with "- ".) We always print the 01202 // outermost level in standard style, not flow style, for better 01203 // readability. We begin the outermost level with metadata. 01204 fout << "Output mode: " << (compact ? "compact" : "spacious") << endl 01205 << "Number of processes: " << numProcs << endl 01206 << "Time unit: s" << endl; 01207 // For a key: value pair where the value is a sequence or 01208 // dictionary on the following line, YAML requires a space after 01209 // the colon. 01210 fout << "Statistics collected: "; 01211 // Print list of the names of all the statistics we collected. 01212 if (compact) { 01213 fout << " ["; 01214 for (size_type i = 0; i < statNames.size (); ++i) { 01215 fout << quoteLabelForYaml (statNames[i]); 01216 if (i + 1 < statNames.size ()) { 01217 fout << ", "; 01218 } 01219 } 01220 fout << "]" << endl; 01221 } 01222 else { 01223 fout << endl; 01224 OSTab tab1 (pfout); 01225 for (size_type i = 0; i < statNames.size (); ++i) { 01226 fout << "- " << quoteLabelForYaml (statNames[i]) << endl; 01227 } 01228 } 01229 01230 // Print the list of timer names. 01231 // 01232 // It might be nicer instead to print a map from timer name to all 01233 // of its data, but keeping the maximum nesting depth small 01234 // ensures better compatibility with different parsing tools. 01235 fout << "Timer names: "; 01236 if (compact) { 01237 fout << " ["; 01238 size_type ind = 0; 01239 for (stat_map_type::const_iterator it = statData.begin(); 01240 it != statData.end(); ++it, ++ind) { 01241 fout << quoteLabelForYaml (it->first); 01242 if (ind + 1 < statData.size ()) { 01243 fout << ", "; 01244 } 01245 } 01246 fout << "]" << endl; 01247 } 01248 else { 01249 fout << endl; 01250 OSTab tab1 (pfout); 01251 for (stat_map_type::const_iterator it = statData.begin(); 01252 it != statData.end(); ++it) { 01253 fout << "- " << quoteLabelForYaml (it->first) << endl; 01254 } 01255 } 01256 01257 // Print times for each timer, as a map from statistic name to its time. 01258 fout << "Total times: "; 01259 if (compact) { 01260 fout << " {"; 01261 size_type outerInd = 0; 01262 for (stat_map_type::const_iterator outerIter = statData.begin(); 01263 outerIter != statData.end(); ++outerIter, ++outerInd) { 01264 // Print timer name. 01265 fout << quoteLabelForYaml (outerIter->first) << ": "; 01266 // Print that timer's data. 01267 const std::vector<std::pair<double, double> >& curData = outerIter->second; 01268 fout << "{"; 01269 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) { 01270 fout << quoteLabelForYaml (statNames[innerInd]) << ": " 01271 << curData[innerInd].first; 01272 if (innerInd + 1 < curData.size ()) { 01273 fout << ", "; 01274 } 01275 } 01276 fout << "}"; 01277 if (outerInd + 1 < statData.size ()) { 01278 fout << ", "; 01279 } 01280 } 01281 fout << "}" << endl; 01282 } 01283 else { 01284 fout << endl; 01285 OSTab tab1 (pfout); 01286 size_type outerInd = 0; 01287 for (stat_map_type::const_iterator outerIter = statData.begin(); 01288 outerIter != statData.end(); ++outerIter, ++outerInd) { 01289 // Print timer name. 01290 fout << quoteLabelForYaml (outerIter->first) << ": " << endl; 01291 // Print that timer's data. 01292 OSTab tab2 (pfout); 01293 const std::vector<std::pair<double, double> >& curData = outerIter->second; 01294 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) { 01295 fout << quoteLabelForYaml (statNames[innerInd]) << ": " 01296 << curData[innerInd].first << endl; 01297 } 01298 } 01299 } 01300 01301 // Print call counts for each timer, for each statistic name. 01302 fout << "Call counts:"; 01303 if (compact) { 01304 fout << " {"; 01305 size_type outerInd = 0; 01306 for (stat_map_type::const_iterator outerIter = statData.begin(); 01307 outerIter != statData.end(); ++outerIter, ++outerInd) { 01308 // Print timer name. 01309 fout << quoteLabelForYaml (outerIter->first) << ": "; 01310 // Print that timer's data. 01311 const std::vector<std::pair<double, double> >& curData = outerIter->second; 01312 fout << "{"; 01313 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) { 01314 fout << quoteLabelForYaml (statNames[innerInd]) << ": " 01315 << curData[innerInd].second; 01316 if (innerInd + 1 < curData.size ()) { 01317 fout << ", "; 01318 } 01319 } 01320 fout << "}"; 01321 if (outerInd + 1 < statData.size ()) { 01322 fout << ", "; 01323 } 01324 } 01325 fout << "}" << endl; 01326 } 01327 else { 01328 fout << endl; 01329 OSTab tab1 (pfout); 01330 size_type outerInd = 0; 01331 for (stat_map_type::const_iterator outerIter = statData.begin(); 01332 outerIter != statData.end(); ++outerIter, ++outerInd) { 01333 // Print timer name. 01334 fout << quoteLabelForYaml (outerIter->first) << ": " << endl; 01335 // Print that timer's data. 01336 OSTab tab2 (pfout); 01337 const std::vector<std::pair<double, double> >& curData = outerIter->second; 01338 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) { 01339 fout << quoteLabelForYaml (statNames[innerInd]) << ": " 01340 << curData[innerInd].second << endl; 01341 } 01342 } 01343 } 01344 } 01345 01346 void TimeMonitor:: 01347 summarizeToYaml (std::ostream &out, 01348 const ETimeMonitorYamlFormat yamlStyle, 01349 const std::string& filter) 01350 { 01351 // The default communicator. If Trilinos was built with MPI 01352 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should 01353 // be a "serial" (no MPI, one "process") communicator. 01354 RCP<const Comm<int> > comm = getDefaultComm (); 01355 01356 summarizeToYaml (comm.ptr (), out, yamlStyle, filter); 01357 } 01358 01359 // Default value is false. We'll set to true once 01360 // setReportParameters() completes successfully. 01361 bool TimeMonitor::setParams_ = false; 01362 01363 // We have to declare all of these here in order to avoid linker errors. 01364 TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE; 01365 TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS; 01366 ECounterSetOp TimeMonitor::setOp_ = Intersection; 01367 bool TimeMonitor::alwaysWriteLocal_ = false; 01368 bool TimeMonitor::writeGlobalStats_ = true; 01369 bool TimeMonitor::writeZeroTimers_ = true; 01370 01371 void 01372 TimeMonitor::setReportFormatParameter (ParameterList& plist) 01373 { 01374 const std::string name ("Report format"); 01375 const std::string defaultValue ("Table"); 01376 const std::string docString ("Output format for report of timer statistics"); 01377 Array<std::string> strings; 01378 Array<std::string> docs; 01379 Array<ETimeMonitorReportFormat> values; 01380 01381 strings.push_back ("YAML"); 01382 docs.push_back ("YAML (see yaml.org) format"); 01383 values.push_back (REPORT_FORMAT_YAML); 01384 strings.push_back ("Table"); 01385 docs.push_back ("Tabular format via Teuchos::TableFormat"); 01386 values.push_back (REPORT_FORMAT_TABLE); 01387 01388 setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue, 01389 docString, 01390 strings (), docs (), 01391 values (), &plist); 01392 } 01393 01394 void 01395 TimeMonitor::setYamlFormatParameter (ParameterList& plist) 01396 { 01397 const std::string name ("YAML style"); 01398 const std::string defaultValue ("spacious"); 01399 const std::string docString ("YAML-specific output format"); 01400 Array<std::string> strings; 01401 Array<std::string> docs; 01402 Array<ETimeMonitorYamlFormat> values; 01403 01404 strings.push_back ("compact"); 01405 docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at " 01406 "yaml.org) for most sequences except the outermost sequence"); 01407 values.push_back (YAML_FORMAT_COMPACT); 01408 01409 strings.push_back ("spacious"); 01410 docs.push_back ("Spacious format: avoid flow style"); 01411 values.push_back (YAML_FORMAT_SPACIOUS); 01412 01413 setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue, 01414 docString, 01415 strings (), docs (), 01416 values (), &plist); 01417 } 01418 01419 void 01420 TimeMonitor::setSetOpParameter (ParameterList& plist) 01421 { 01422 const std::string name ("How to merge timer sets"); 01423 const std::string defaultValue ("Intersection"); 01424 const std::string docString ("How to merge differing sets of timers " 01425 "across processes"); 01426 Array<std::string> strings; 01427 Array<std::string> docs; 01428 Array<ECounterSetOp> values; 01429 01430 strings.push_back ("Intersection"); 01431 docs.push_back ("Compute intersection of timer sets over processes"); 01432 values.push_back (Intersection); 01433 strings.push_back ("Union"); 01434 docs.push_back ("Compute union of timer sets over processes"); 01435 values.push_back (Union); 01436 01437 setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString, 01438 strings (), docs (), values (), 01439 &plist); 01440 } 01441 01442 RCP<const ParameterList> 01443 TimeMonitor::getValidReportParameters () 01444 { 01445 // Our implementation favors recomputation over persistent 01446 // storage. That is, we simply recreate the list every time we 01447 // need it. 01448 RCP<ParameterList> plist = parameterList ("TimeMonitor::report"); 01449 01450 const bool alwaysWriteLocal = false; 01451 const bool writeGlobalStats = true; 01452 const bool writeZeroTimers = true; 01453 01454 setReportFormatParameter (*plist); 01455 setYamlFormatParameter (*plist); 01456 setSetOpParameter (*plist); 01457 plist->set ("alwaysWriteLocal", alwaysWriteLocal, 01458 "Always output local timers' values on Proc 0"); 01459 plist->set ("writeGlobalStats", writeGlobalStats, "Always output global " 01460 "statistics, even if there is only one process in the " 01461 "communicator"); 01462 plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for " 01463 "timers that have never been called"); 01464 01465 return rcp_const_cast<const ParameterList> (plist); 01466 } 01467 01468 void 01469 TimeMonitor::setReportParameters (const RCP<ParameterList>& params) 01470 { 01471 ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE; 01472 ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS; 01473 ECounterSetOp setOp = Intersection; 01474 bool alwaysWriteLocal = false; 01475 bool writeGlobalStats = true; 01476 bool writeZeroTimers = true; 01477 01478 if (params.is_null ()) { 01479 // If we've set parameters before, leave their current values. 01480 // Otherwise, set defaults (below). 01481 if (setParams_) { 01482 return; 01483 } 01484 } 01485 else { // params is nonnull. Let's read it! 01486 params->validateParametersAndSetDefaults (*getValidReportParameters ()); 01487 01488 reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format"); 01489 yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style"); 01490 setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets"); 01491 alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal"); 01492 writeGlobalStats = params->get<bool> ("writeGlobalStats"); 01493 writeZeroTimers = params->get<bool> ("writeZeroTimers"); 01494 } 01495 // Defer setting state until here, to ensure the strong exception 01496 // guarantee for this method (either it throws with no externally 01497 // visible state changes, or it returns normally). 01498 reportFormat_ = reportFormat; 01499 yamlStyle_ = yamlStyle; 01500 setOp_ = setOp; 01501 alwaysWriteLocal_ = alwaysWriteLocal; 01502 writeGlobalStats_ = writeGlobalStats; 01503 writeZeroTimers_ = writeZeroTimers; 01504 01505 setParams_ = true; // Yay, we successfully set parameters! 01506 } 01507 01508 void 01509 TimeMonitor::report (Ptr<const Comm<int> > comm, 01510 std::ostream& out, 01511 const std::string& filter, 01512 const RCP<ParameterList>& params) 01513 { 01514 setReportParameters (params); 01515 01516 if (reportFormat_ == REPORT_FORMAT_YAML) { 01517 summarizeToYaml (comm, out, yamlStyle_, filter); 01518 } 01519 else if (reportFormat_ == REPORT_FORMAT_TABLE) { 01520 summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_, 01521 writeZeroTimers_, setOp_, filter); 01522 } 01523 else { 01524 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: " 01525 "Invalid report format. This should never happen; ParameterList " 01526 "validation should have caught this. Please report this bug to the " 01527 "Teuchos developers."); 01528 } 01529 } 01530 01531 void 01532 TimeMonitor::report (Ptr<const Comm<int> > comm, 01533 std::ostream& out, 01534 const RCP<ParameterList>& params) 01535 { 01536 report (comm, out, "", params); 01537 } 01538 01539 void 01540 TimeMonitor::report (std::ostream& out, 01541 const std::string& filter, 01542 const RCP<ParameterList>& params) 01543 { 01544 RCP<const Comm<int> > comm = getDefaultComm (); 01545 report (comm.ptr (), out, filter, params); 01546 } 01547 01548 void 01549 TimeMonitor::report (std::ostream& out, 01550 const RCP<ParameterList>& params) 01551 { 01552 RCP<const Comm<int> > comm = getDefaultComm (); 01553 report (comm.ptr (), out, "", params); 01554 } 01555 01556 } // namespace Teuchos
1.7.6.1