|
IterationPack: General framework for building iterative algorithms
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization 00005 // Copyright (2003) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 // @HEADER 00041 00042 #include <signal.h> 00043 00044 #include <iterator> 00045 #include <numeric> 00046 00047 #include "IterationPack_Algorithm.hpp" 00048 #include "StopWatchPack_stopwatch.hpp" 00049 #include "Teuchos_Assert.hpp" 00050 #include "Teuchos_TypeNameTraits.hpp" 00051 #include "Teuchos_GlobalMPISession.hpp" 00052 00053 #ifdef HAVE_MPI 00054 #include "mpi.h" 00055 #endif 00056 00057 // Define to see MPI/interrupt deugging output 00058 //#define ITERATION_PACK_ALGORITHM_SHOW_MPI_DEBUG_INFO 00059 00060 // Define of the MPI implementation receives signals on all processes 00061 //#define ITERATION_PACK_ALGORITHM_SIGNALS_ON_ALL_PROCESSES; 00062 00063 extern "C" { 00064 00065 void sig_handler_interrupt_algorithm( int signum ) 00066 { 00067 IterationPack::Algorithm::interrupt(); 00068 } 00069 00070 } // extern "C" 00071 00072 namespace { 00073 00074 // Helper functions 00075 00076 template< class T > 00077 inline 00078 T my_max( const T& v1, const T& v2 ) { return v1 > v2 ? v1 : v2; } 00079 00080 // Private static data for IterationPack::Algorithm. 00081 // I put it here so that I can modify it without affecting the 00082 // header file and avoiding unnecessary recompilations. 00083 00084 enum EInterruptStatus { NOT_INTERRUPTED=0, STOP_END_STEP=1, STOP_END_ITER=2, ABORT_PROGRAM=3 }; 00085 00086 int static_mpi_initialized = false; 00087 int static_num_running_algorithms = 0; 00088 int static_num_proc = 0; // Flag that no algorithm has been even allocated yet! 00089 int static_proc_rank = 0; 00090 bool static_interrupt_called = false; 00091 bool static_processed_user_interrupt = false; 00092 EInterruptStatus static_interrupt_status = NOT_INTERRUPTED; 00093 bool static_interrupt_terminate_return = false; 00094 00095 } // end namespace 00096 00097 // ToDo: change step_itr and assoc_step_itr to just return iterators without 00098 // asserting if the names exist. This will be more useful. 00099 00100 namespace IterationPack { 00101 00102 // constructors / destructor 00103 00104 Algorithm::Algorithm() 00105 :running_state_(NOT_RUNNING), max_iter_(100) 00106 ,max_run_time_(std::numeric_limits<double>::max()) 00107 ,next_step_name_(0), do_step_next_called_(false), reconfigured_(false) 00108 ,time_stats_computed_(false) 00109 { 00110 // Set MPI info 00111 static_num_proc = 1; 00112 static_proc_rank = 0; 00113 #ifdef HAVE_MPI 00114 // If MPI is not initialized then this must be because the code was 00115 // compiled with support for MPI but it not actually using it. 00116 // Therefore, we will initialize MPI but not bother to finialize it. 00117 if(!static_mpi_initialized) { 00118 int mpi_initialized = false; 00119 MPI_Initialized(&mpi_initialized); 00120 if(!mpi_initialized) { 00121 int argc = 1; 00122 char arg_str[] = "dummy_prg"; 00123 char *arg_str_ptr = arg_str; 00124 char **argv = &arg_str_ptr; 00125 MPI_Init( &argc, &argv ); 00126 } 00127 static_mpi_initialized = true; 00128 } 00129 // ToDo: Allow the specification of another communicator if needed! 00130 MPI_Comm_size( MPI_COMM_WORLD, &static_num_proc ); 00131 MPI_Comm_rank( MPI_COMM_WORLD, &static_proc_rank ); 00132 #ifdef ITERATION_PACK_ALGORITHM_SHOW_MPI_DEBUG_INFO 00133 std::cerr << "\np=" << static_proc_rank << ": Algorithm::Algorithm() being called (num_proc = "<<static_num_proc<<") ... \n"; 00134 #endif 00135 #endif // HAVE_MPI 00136 } 00137 00138 Algorithm::~Algorithm() 00139 {} 00140 00141 // maximum iterations 00142 00143 void Algorithm::max_iter(size_t max_iter) 00144 { max_iter_ = max_iter; } 00145 00146 size_t Algorithm::max_iter() const 00147 { return max_iter_; } 00148 00149 // maximum run tine 00150 00151 void Algorithm::max_run_time(double max_run_time) 00152 { max_run_time_ = max_run_time; } 00153 00154 double Algorithm::max_run_time() const 00155 { return max_run_time_; } 00156 00157 00158 // step information / access 00159 00160 int Algorithm::num_steps() const 00161 { return steps_.size(); } 00162 00163 Algorithm::poss_type Algorithm::get_step_poss(const std::string& step_name) const 00164 { 00165 steps_t::const_iterator itr = step_itr(step_name); 00166 return itr == steps_.end() ? DOES_NOT_EXIST : std::distance( steps_.begin(), itr ) + 1; 00167 } 00168 00169 const std::string& Algorithm::get_step_name(poss_type step_poss) const 00170 { return steps_[validate(step_poss) - 1].name; } 00171 00172 Algorithm::step_ptr_t& Algorithm::get_step(poss_type step_poss) 00173 { return steps_[validate(step_poss) - 1].step_ptr; } 00174 00175 const Algorithm::step_ptr_t& Algorithm::get_step(poss_type step_poss) const 00176 { return steps_[validate(step_poss) - 1].step_ptr; } 00177 00178 // pre/post step information / access 00179 00180 int Algorithm::num_assoc_steps(poss_type step_poss, EAssocStepType type) const 00181 { return assoc_steps_[validate(step_poss) - 1][type].size(); } 00182 00183 Algorithm::poss_type Algorithm::get_assoc_step_poss(poss_type step_poss, EAssocStepType type 00184 ,const std::string& assoc_step_name) const 00185 { 00186 // ToDo: change to return DOES_NOT_EXIST if it does not exist. 00187 const assoc_steps_ele_list_t &assoc_list = assoc_steps_[validate(step_poss) - 1][type]; 00188 assoc_steps_ele_list_t::const_iterator itr = assoc_step_itr(assoc_list,assoc_step_name); 00189 return itr == assoc_list.end() ? DOES_NOT_EXIST : std::distance( assoc_list.begin() , itr ) + 1; 00190 } 00191 00192 const std::string& Algorithm::get_assoc_step_name(poss_type step_poss, EAssocStepType type 00193 , poss_type assoc_step_poss) const 00194 { 00195 const assoc_steps_ele_list_t &assoc_list= assoc_steps_[validate(step_poss) - 1][type]; 00196 validate(assoc_list,assoc_step_poss); 00197 assoc_steps_ele_list_t::const_iterator itr = assoc_list.begin(); 00198 std::advance( itr, assoc_step_poss - 1 ); 00199 return (*itr).name; 00200 } 00201 00202 Algorithm::step_ptr_t& Algorithm::get_assoc_step(poss_type step_poss, EAssocStepType type 00203 , poss_type assoc_step_poss) 00204 { 00205 assoc_steps_ele_list_t &assoc_list= assoc_steps_[validate(step_poss) - 1][type]; 00206 validate(assoc_list,assoc_step_poss); 00207 assoc_steps_ele_list_t::iterator itr = assoc_list.begin(); 00208 std::advance( itr, assoc_step_poss - 1 ); 00209 return (*itr).step_ptr; 00210 } 00211 00212 const Algorithm::step_ptr_t& Algorithm::get_assoc_step(poss_type step_poss, EAssocStepType type 00213 , poss_type assoc_step_poss) const 00214 { 00215 const assoc_steps_ele_list_t &assoc_list= assoc_steps_[validate(step_poss) - 1][type]; 00216 validate(assoc_list,assoc_step_poss); 00217 assoc_steps_ele_list_t::const_iterator itr = assoc_list.begin(); 00218 std::advance( itr, assoc_step_poss - 1 ); 00219 return (*itr).step_ptr; 00220 } 00221 00222 // step manipulation 00223 00224 void Algorithm::insert_step(poss_type step_poss, const std::string& step_name, const step_ptr_t& step) 00225 { 00226 validate_not_in_state(RUNNING); 00227 TEUCHOS_TEST_FOR_EXCEPTION( 00228 step.get() == NULL, std::invalid_argument 00229 ,"Algorithm::insert_step(...) : A step with the name = \'" << step_name 00230 << "\' being inserted into the position = " << step_poss 00231 << " has step.get() == NULL!" ); 00232 // Make sure a step with this name does not already exist. 00233 steps_t::iterator itr; 00234 if( steps_.end() != ( itr = step_itr(step_name) ) ) 00235 TEUCHOS_TEST_FOR_EXCEPTION( 00236 true, AlreadyExists 00237 ,"Algorithm::insert_step(...) : A step with the name = " << step_name 00238 << " already exists at step_poss = " << std::distance(steps_.begin(),itr) + 1 ); 00239 // insert the step in such a way that any container can be used for steps_ 00240 itr = steps_.begin(); 00241 std::advance ( itr , validate(step_poss,+1) - 1 ); 00242 steps_.insert( itr , steps_ele_t(step,step_name) ); 00243 // insert the assoc_step element in such a way that any container can be used for assoc_steps_ 00244 assoc_steps_t::iterator a_itr = assoc_steps_.begin(); 00245 std::advance ( a_itr , step_poss - 1 ); 00246 assoc_steps_.insert( a_itr , assoc_steps_ele_t() ); 00247 } 00248 00249 void Algorithm::change_step_name(poss_type step_poss, const std::string& new_name) 00250 { 00251 validate_not_in_state(RUNNING); 00252 if(running_state() == RUNNING_BEING_CONFIGURED) { 00253 validate_not_curr_step(validate(step_poss)); 00254 validate_not_next_step(steps_[step_poss - 1].name); 00255 } 00256 steps_[step_poss - 1].name = new_name; 00257 } 00258 00259 void Algorithm::replace_step(poss_type step_poss, const step_ptr_t& step) 00260 { 00261 validate_not_in_state(RUNNING); 00262 if(running_state() == RUNNING_BEING_CONFIGURED) validate_not_curr_step(validate(step_poss)); 00263 steps_[step_poss - 1].step_ptr = step; 00264 } 00265 00266 void Algorithm::remove_step(poss_type step_poss) 00267 { 00268 validate_not_in_state(RUNNING); 00269 if(running_state() == RUNNING_BEING_CONFIGURED) { 00270 validate_not_curr_step(validate(step_poss)); 00271 validate_not_next_step(steps_[step_poss - 1].name); 00272 } 00273 // remove the step in such a way that any container can be used for steps_ 00274 steps_t::iterator itr = steps_.begin(); 00275 std::advance ( itr , validate(step_poss) - 1 ); 00276 steps_.erase( itr ); 00277 // remove the assoc_step element in such a way that any container can be used for assoc_steps_ 00278 assoc_steps_t::iterator a_itr = assoc_steps_.begin(); 00279 std::advance ( a_itr , step_poss - 1 ); 00280 assoc_steps_.erase( a_itr ); 00281 } 00282 00283 // pre/post step manipulation 00284 00285 void Algorithm::insert_assoc_step(poss_type step_poss, EAssocStepType type, poss_type assoc_step_poss 00286 , const std::string& assoc_step_name, const step_ptr_t& assoc_step) 00287 { 00288 validate_not_in_state(RUNNING); 00289 TEUCHOS_TEST_FOR_EXCEPTION( 00290 assoc_step.get() == NULL, std::invalid_argument 00291 ,"Algorithm::insert_assoc_step(...) : A step with the name = \'" << assoc_step_name 00292 << "\' being inserted into the position = " << step_poss 00293 << "." << ( type == PRE_STEP 00294 ? (int)assoc_step_poss - num_assoc_steps(step_poss,type) - 1 00295 : assoc_step_poss ) 00296 << " has assoc_step.get() == NULL!" ); 00297 if(running_state() == RUNNING_BEING_CONFIGURED) validate_not_curr_step(validate(step_poss)); 00298 // Make sure an associated step with this name does not already exist. 00299 assoc_steps_ele_list_t &assoc_list = assoc_steps_[step_poss - 1][type]; 00300 validate(assoc_list,assoc_step_poss,+1); 00301 assoc_steps_ele_list_t::iterator itr = assoc_list.begin(); 00302 char assoc_type_name[2][10] = { "PRE_STEP" , "POST_STEP" }; 00303 if( assoc_list.end() != ( itr = assoc_step_itr(assoc_list,assoc_step_name) ) ) 00304 TEUCHOS_TEST_FOR_EXCEPTION( 00305 true, AlreadyExists 00306 ,"Algorithm::insert_assoc_step(...) : An associated step of type = " 00307 << assoc_type_name[type] 00308 << " with the name = " << assoc_step_name 00309 << " already exists at step_poss = " << step_poss 00310 << " and assoc_step_poss = " << std::distance(assoc_list.begin(),itr) + 1 ); 00311 // insert an associated step in such a way that any container could be used. 00312 itr = assoc_list.begin(); 00313 std::advance( itr, assoc_step_poss - 1 ); 00314 assoc_list.insert( itr , assoc_steps_ele_list_ele_t(assoc_step,assoc_step_name) ); 00315 } 00316 00317 void Algorithm::remove_assoc_step(poss_type step_poss, EAssocStepType type, poss_type assoc_step_poss) 00318 { 00319 validate_not_in_state(RUNNING); 00320 if(running_state() == RUNNING_BEING_CONFIGURED) validate_not_curr_step(validate(step_poss)); 00321 validate(step_poss); 00322 assoc_steps_ele_list_t &assos_list = assoc_steps_[step_poss - 1][type]; 00323 validate(assos_list,assoc_step_poss); 00324 assoc_steps_ele_list_t::iterator itr = assos_list.begin(); 00325 std::advance( itr, assoc_step_poss - 1 ); 00326 assos_list.erase( itr ); 00327 } 00328 00329 // runtime configuration updating control 00330 00331 void Algorithm::begin_config_update() 00332 { 00333 validate_in_state(RUNNING); 00334 saved_next_step_name_ = *next_step_name_; 00335 saved_curr_step_name_ = steps_[curr_step_poss_ - 1].name; 00336 change_running_state(RUNNING_BEING_CONFIGURED); 00337 } 00338 00339 void Algorithm::end_config_update() 00340 { 00341 validate_in_state(RUNNING_BEING_CONFIGURED); 00342 00343 // update next_step_poss_ and next_step_name_. 00344 steps_t::iterator itr = step_itr(saved_next_step_name_); 00345 TEUCHOS_TEST_FOR_EXCEPT( !( itr != steps_.end() ) ); // the step with this name should not have been deleted or changed. 00346 next_step_poss_ = std::distance( steps_.begin() , itr ) + 1; 00347 next_step_name_ = &(*itr).name; 00348 00349 // update curr_step_poss_ 00350 itr = step_itr(saved_curr_step_name_); 00351 TEUCHOS_TEST_FOR_EXCEPT( !( itr != steps_.end() ) ); // the step with this name should not have been deleted or changed. 00352 curr_step_poss_ = std::distance( steps_.begin() , itr ) + 1; 00353 00354 // inform the step objects that *this has changes. 00355 imp_inform_steps( &AlgorithmStep::inform_updated ); 00356 00357 change_running_state(RUNNING); 00358 reconfigured_ = true; 00359 } 00360 00361 // algorithmic control 00362 00363 void Algorithm::do_step_next(const std::string& step_name) 00364 { 00365 validate_in_state(RUNNING); 00366 steps_t::iterator itr = step_itr_and_assert(step_name); 00367 next_step_poss_ = std::distance( steps_.begin() , itr ) + 1; 00368 next_step_name_ = &(*itr).name; 00369 do_step_next_called_ = true; 00370 } 00371 00372 void Algorithm::do_step_next(poss_type step_poss) 00373 { 00374 validate_in_state(RUNNING); 00375 const steps_ele_t &ele = steps_[validate(step_poss) - 1]; 00376 next_step_poss_ = step_poss; 00377 next_step_name_ = &ele.name; 00378 do_step_next_called_ = true; 00379 } 00380 00381 const std::string& Algorithm::what_is_next_step_name() const 00382 { 00383 validate_in_state(RUNNING); 00384 return *next_step_name_; 00385 } 00386 00387 Algorithm::poss_type Algorithm::what_is_next_step_poss() const 00388 { 00389 validate_in_state(RUNNING); 00390 return next_step_poss_; 00391 } 00392 00393 bool Algorithm::do_step(const std::string& step_name) 00394 { 00395 validate_in_state(RUNNING); 00396 return imp_do_step( std::distance( steps_.begin() , step_itr_and_assert(step_name) ) + 1 ); 00397 } 00398 00399 bool Algorithm::do_step(poss_type step_poss) 00400 { 00401 validate_in_state(RUNNING); 00402 return imp_do_step(step_poss); 00403 } 00404 00405 void Algorithm::terminate(bool success) 00406 { 00407 validate_in_state(RUNNING); 00408 terminate_status_ = success ? STATUS_TERMINATE_TRUE : STATUS_TERMINATE_FALSE; 00409 } 00410 00411 // start iterations 00412 00413 EAlgoReturn Algorithm::do_algorithm(poss_type step_poss) 00414 { 00415 using StopWatchPack::stopwatch; 00416 00417 validate_in_state(NOT_RUNNING); 00418 00419 track().initialize(); 00420 00421 try{ 00422 00423 terminate_status_ = STATUS_KEEP_RUNNING; 00424 change_running_state(RUNNING); 00425 00426 first_k_ = state().k(); 00427 next_step_poss_ = validate(step_poss); 00428 next_step_name_ = &steps_[step_poss - 1].name; 00429 00430 // Prepair for timing algorithm 00431 step_times_.resize( algo_timing_ ? (num_steps()+1) * (max_iter()+1+NUM_STEP_TIME_STATS) : 0 ); 00432 if( algo_timing_ ) { 00433 // step_times_[ max_iter() ] = 0.0; // flag for statistics not calc. yet. 00434 // // set iteration totals to zero 00435 // if( step_times_[(max_iter() + 1 + 5) * num_steps()] != 0.0 ) 00436 // std::fill_n( step_times_.begin() + (max_iter() + 1 + 5) * num_steps(), max_iter(), 0.0 ); 00437 std::fill_n( step_times_.begin(), step_times_.size(), 0.0 ); // Try setting everything to zero? 00438 time_stats_computed_ = false; 00439 } 00440 stopwatch step_timer; 00441 stopwatch overall_timer; 00442 00443 imp_inform_steps( &AlgorithmStep::initialize_step ); 00444 00445 overall_timer.start(); 00446 for(;;) { 00447 00448 curr_step_poss_ = next_step_poss_; 00449 // Note that curr_step_poss_ may change if there is a runtime 00450 // change in the configuration of the steps. 00451 00452 bool keep_on = true; 00453 00454 // Execute the steps for this step 00455 00456 if( algo_timing_ ) { 00457 step_timer.reset(); 00458 step_timer.start(); 00459 } 00460 00461 keep_on = imp_do_step(curr_step_poss_); 00462 00463 if( algo_timing_ ) { 00464 const double time = my_max(step_timer.stop(),-1e-50); // negative somehow (g++ -O2 ?) 00465 // time for step k for the iteration 00466 step_times_[state().k()-first_k_+(curr_step_poss_-1)*(max_iter()+1+NUM_STEP_TIME_STATS)] = time; 00467 // Add to time for the full iteration 00468 step_times_[state().k()-first_k_+(num_steps())*(max_iter()+1+NUM_STEP_TIME_STATS)] += time; 00469 } 00470 00471 // See if a step object called terminate(...) 00472 if(terminate_status_ != STATUS_KEEP_RUNNING) { 00473 EAlgoReturn algo_return; 00474 if( static_interrupt_status == STOP_END_STEP ) { 00475 algo_return = ( terminate_status_ == STATUS_TERMINATE_TRUE 00476 ? INTERRUPTED_TERMINATE_TRUE 00477 : INTERRUPTED_TERMINATE_FALSE ); 00478 static_interrupt_status = NOT_INTERRUPTED; 00479 } 00480 else { 00481 algo_return = ( terminate_status_ == STATUS_TERMINATE_TRUE 00482 ? TERMINATE_TRUE 00483 : TERMINATE_FALSE ); 00484 } 00485 return finalize_algorithm(algo_return); 00486 } 00487 00488 if(keep_on) { 00489 00490 // All the step objects returned true so increment the step and loop around 00491 00492 if( curr_step_poss_ == static_cast<poss_type>(num_steps()) ) { 00493 00494 // 00495 // This is the last step in the algorithm 00496 // 00497 00498 // Output this iteration 00499 track().output_iteration(*this); 00500 00501 // Check if the maximum number of iterations has been exceeded. 00502 if( state().k() - first_k_ >= max_iter() ) { 00503 return finalize_algorithm(MAX_ITER_EXCEEDED); 00504 } 00505 00506 // Check if the maximum runtime has been exceeded. 00507 if( ( overall_timer.read() / 60 ) >= max_run_time() ) { 00508 return finalize_algorithm(MAX_RUN_TIME_EXCEEDED); 00509 } 00510 00511 // Set if the algorithm was interrupted 00512 if( static_interrupt_status == STOP_END_ITER ) { 00513 static_interrupt_status = NOT_INTERRUPTED; 00514 const EAlgoReturn algo_return = ( static_interrupt_terminate_return 00515 ? INTERRUPTED_TERMINATE_TRUE 00516 : INTERRUPTED_TERMINATE_FALSE ); 00517 return finalize_algorithm(algo_return); 00518 } 00519 00520 // Transition the iteration quantities to k = k + 1 00521 state().next_iteration(); 00522 00523 // Setup to start the major loop over again 00524 next_step_poss_ = 1; 00525 next_step_name_ = &steps_[0].name; 00526 00527 } 00528 else { 00529 00530 // else just increment the step 00531 ++next_step_poss_; 00532 next_step_name_ = &steps_[next_step_poss_ - 1].name; 00533 00534 } 00535 00536 continue; // loop around 00537 00538 } 00539 else { 00540 // some step object returned false from its do_step(..) operation so it 00541 // should have called do_step_next(...) to request a jump to 00542 // a specific operation. 00543 if(!do_step_next_called_) 00544 TEUCHOS_TEST_FOR_EXCEPTION( 00545 true, InvalidControlProtocal 00546 ,"EAlgoReturn Algorithm::do_algorithm(...) :" 00547 " A step object returned false from its do_step(...) operation" 00548 " without calling do_step_next(...) to request jump to a specific" 00549 " step." ); 00550 do_step_next_called_ = false; 00551 // just loop around and do the step that the step object requested 00552 // by changing next_step_poss_ by its call to do_step_next(...). 00553 } 00554 } // end for(;;) 00555 00556 } // end try 00557 catch(...) { 00558 try { 00559 finalize_algorithm(TERMINATE_FALSE); 00560 } 00561 catch(...) { 00562 // We tried to finalize gracefully but we failed! 00563 } 00564 throw; 00565 } 00566 } 00567 00568 // algorithm information output 00569 00570 void Algorithm::print_steps(std::ostream& out) const 00571 { 00572 out << "\n*** Algorithm Steps ***\n\n"; 00573 imp_print_algorithm(out,false); 00574 out << std::endl; 00575 } 00576 00577 void Algorithm::print_algorithm(std::ostream& out) const 00578 { 00579 out << "\n*** Iteration Quantities ***\n\n"; 00580 state().dump_iter_quant(out); 00581 out << std::endl; 00582 out << "\n*** Algorithm Description ***\n\n"; 00583 imp_print_algorithm(out,true); 00584 out << std::endl; 00585 } 00586 00587 // Algorithm Timing. 00588 00589 void Algorithm::set_algo_timing( bool algo_timing ) { 00590 validate_not_in_state(RUNNING); 00591 algo_timing_ = algo_timing; 00592 } 00593 00594 bool Algorithm::algo_timing() const { 00595 return algo_timing_; 00596 } 00597 00598 void Algorithm::print_algorithm_times( std::ostream& out ) const 00599 { 00600 using std::setw; 00601 using std::endl; 00602 00603 validate_not_in_state(RUNNING); 00604 00605 if( step_times_.size() == 0 ) { 00606 out << "No step timing was performed\n"; 00607 return; 00608 } 00609 00610 const int w = 10; 00611 const int prec = 4; 00612 const int n = num_steps(); // Total steps 00613 const int m = state().k() - first_k_ + 1; // Total number of iterations performed 00614 const int mm = max_iter()+1; // Total number of possible iterations 00615 const int mmm = mm + NUM_STEP_TIME_STATS; // total entries in a step_i row 00616 00617 // Print the header. 00618 out << "\n\n**************************************\n" 00619 << "*** Algorithm step CPU times (sec) ***\n"; 00620 00621 // Print the step names. 00622 out << "\nStep names" 00623 << "\n----------\n"; 00624 {for( int i = 1; i <= n; ++i ) { 00625 out << i << ") \"" << get_step_name(i) << "\"\n"; 00626 }} 00627 out << n+1 << ") Iteration total\n"; 00628 out << endl; 00629 00630 out << std::right << std::setprecision(prec); 00631 00632 // Print table header 00633 out << setw(w) << "" << " steps 1..." << n+1 << " ->\n\n"; 00634 00635 // print step numbers 00636 out << setw(w) << " iter k"; 00637 {for( int i = 1; i <= n+1; ++i ) { 00638 out << setw(w) << i; 00639 }} 00640 out << endl; 00641 out << setw(w) << "--------"; 00642 {for( int i = 1; i <= n+1; ++i ) { 00643 out << setw(w) << "--------"; 00644 }} 00645 out << endl; 00646 // Print the step times. 00647 {for( int k = 0; k < m; ++k ) { 00648 out << setw(w) << first_k_ + k; 00649 {for( int i = 0; i < n+1; ++i ) { 00650 out << setw(w) << step_times_[k+i*mmm]; 00651 }} 00652 out << endl; 00653 }} 00654 00655 // Compute the (1) totals for each step, the (2) average, (3) min and (4) max times 00656 // per iteration for each step and the (5) precentages for each step. 00657 00658 compute_final_time_stats(); 00659 00660 // Ouput time statistics. 00661 00662 out << setw(w) << "--------"; 00663 {for( int i = 1; i <= n+1; ++i ) { 00664 out << setw(w) << "--------"; 00665 }} 00666 00667 // Output the total times for each step. 00668 out << endl; 00669 out << setw(w) << "total(sec)"; 00670 {for( int i = 0; i < n+1; ++i ) { 00671 const double *step_i_times = &step_times_[i*mmm]; 00672 out << setw(w) << step_i_times[ mm + TIME_STAT_TOTALS_OFFSET ]; 00673 }} 00674 out << endl; 00675 00676 // Output the average times per iteration 00677 out << setw(w) << "av(sec)/k"; 00678 {for( int i = 0; i < n+1; ++i ) { 00679 const double *step_i_times = &step_times_[i*mmm]; 00680 out << setw(w) << step_i_times[ mm + TIME_STAT_AV_OFFSET ]; 00681 }} 00682 out << endl; 00683 00684 // Output the min times per iteration 00685 out << setw(w) << "min(sec)"; 00686 {for( int i = 0; i < n+1; ++i ) { 00687 const double *step_i_times = &step_times_[i*mmm]; 00688 out << setw(w) << step_i_times[ mm + TIME_STAT_MIN_OFFSET ]; 00689 }} 00690 out << endl; 00691 00692 // Output the max times per iteration 00693 out << setw(w) << "max(sec)"; 00694 {for( int i = 0; i < n+1; ++i ) { 00695 const double *step_i_times = &step_times_[i*mmm]; 00696 out << setw(w) << step_i_times[ mm + TIME_STAT_MAX_OFFSET ]; 00697 }} 00698 out << endl; 00699 00700 // Output the precentage times for each step. 00701 out << setw(w) << "% total"; 00702 {for( int i = 0; i < n+1; ++i ) { 00703 const double *step_i_times = &step_times_[i*mmm]; 00704 out << setw(w) << step_i_times[ mm + TIME_STAT_PERCENT_OFFSET ] * 100.0; 00705 }} 00706 out << endl; 00707 00708 00709 // Print total time for entire algorithm. 00710 out << "------------------------------" << endl 00711 << "total CPU time = " << total_time_ << " sec\n";; 00712 } 00713 00714 00715 void Algorithm::get_step_times_k( int offset, double step_times[] ) const 00716 { 00717 TEUCHOS_TEST_FOR_EXCEPTION( 00718 step_times_.size() == 0, std::logic_error 00719 ,"Algorithm::get_step_times_k(...) : times requested, but no times calculated!" 00720 ); 00721 TEUCHOS_TEST_FOR_EXCEPTION( 00722 offset > 0, std::invalid_argument 00723 ,"Algorithm::get_step_times_k(...) : Can\'t get times for an iteratin that has not occured yet!." 00724 ); 00725 00726 const int n = num_steps(); // Total steps 00727 //const int m = state().k() - first_k_ + 1; // Total number of iterations performed 00728 const int mm = max_iter()+1; // Total number of possible iterations 00729 const int mmm = mm + NUM_STEP_TIME_STATS; // total entries in a step_i row 00730 00731 const int k = state().k() + offset; 00732 {for (int step = 0; step < n+1; ++step) { 00733 step_times[step] = step_times_[step*mmm + k]; 00734 }} 00735 00736 } 00737 00738 void Algorithm::get_final_step_stats( size_t step, double* total, double* average, double* min, double* max, double* percent) const 00739 { 00740 // Compute the (1) totals for each step, the (2) average, (3) min and (4) max times 00741 // per iteration for each step and the (5) precentages for each step. 00742 compute_final_time_stats(); 00743 00744 //const int n = num_steps(); // Total steps 00745 //const int m = state().k() - first_k_ + 1; // Total number of iterations performed 00746 const int mm = max_iter()+1; // Total number of possible iterations 00747 const int mmm = mm + NUM_STEP_TIME_STATS; // total entries in a step_i row 00748 00749 double* step_i_times = &const_cast<step_times_t&>(step_times_)[step*mmm]; 00750 if (total) { 00751 *total = step_i_times[mm + TIME_STAT_TOTALS_OFFSET]; 00752 } 00753 if (average) { 00754 *average = step_i_times[mm + TIME_STAT_AV_OFFSET]; 00755 } 00756 if (min) { 00757 *min = step_i_times[mm + TIME_STAT_MIN_OFFSET]; 00758 } 00759 if (max) { 00760 *max = step_i_times[mm + TIME_STAT_MAX_OFFSET]; 00761 } 00762 if (percent) { 00763 *percent = step_i_times[mm + TIME_STAT_PERCENT_OFFSET]; 00764 } 00765 } 00766 00767 EAlgoReturn Algorithm::finalize_algorithm( EAlgoReturn algo_return ) 00768 { 00769 change_running_state(NOT_RUNNING); 00770 imp_inform_steps( &AlgorithmStep::finalize_step ); 00771 track().output_final(*this,algo_return); 00772 return algo_return; 00773 } 00774 00775 void Algorithm::compute_final_time_stats() const 00776 { 00777 if (!time_stats_computed_) { 00778 time_stats_computed_ = true; 00779 00780 const int n = num_steps(); // Total steps 00781 const int m = state().k() - first_k_ + 1; // Total number of iterations performed 00782 const int mm = max_iter()+1; // Total number of possible iterations 00783 const int mmm = mm + NUM_STEP_TIME_STATS; // total entries in a step_i row 00784 00785 // compute totals for each step (1...n) and the full iteration (n+1) 00786 double &_total_time = const_cast<double&>(total_time_); 00787 _total_time = 0.0; 00788 00789 {for( int i = 0; i < n+1; ++i ) { 00790 double *step_i_times = &const_cast<step_times_t&>(step_times_)[i*mmm]; 00791 // compute total step times (and total algorithm time) 00792 const double 00793 step_time = std::accumulate( step_i_times, step_i_times + m, (double)0.0 ); 00794 if(i < n) 00795 _total_time += step_time; 00796 step_i_times[ mm + TIME_STAT_TOTALS_OFFSET ] = step_time; 00797 // compute average per step. 00798 step_i_times[ mm + TIME_STAT_AV_OFFSET ] = step_time / m; 00799 // compute min per step 00800 step_i_times[ mm + TIME_STAT_MIN_OFFSET ]= *std::min_element( step_i_times, step_i_times + m ); 00801 // compute max per step 00802 step_i_times[ mm + TIME_STAT_MAX_OFFSET ]= *std::max_element( step_i_times, step_i_times + m ); 00803 }} 00804 00805 {for( int i = 0; i < n+1; ++i ) { 00806 double *step_i_times = &const_cast<step_times_t&>(step_times_)[i*mmm]; 00807 // compute fractions for each step. 00808 step_i_times[ mm + TIME_STAT_PERCENT_OFFSET ] 00809 = step_i_times[ mm + TIME_STAT_TOTALS_OFFSET ] / total_time_; 00810 }} 00811 } 00812 } 00813 00814 // private 00815 00816 void Algorithm::change_running_state(ERunningState _running_state) 00817 { 00818 if( running_state() != RUNNING && _running_state == RUNNING ) { 00819 if( static_num_running_algorithms == 0 ) { 00820 // Register the signal handler for the SIGINT 00821 signal( SIGINT, &sig_handler_interrupt_algorithm ); 00822 static_interrupt_called = false; 00823 static_processed_user_interrupt = false; 00824 } 00825 ++static_num_running_algorithms; 00826 } 00827 else if( running_state() != NOT_RUNNING && _running_state == NOT_RUNNING ) { 00828 --static_num_running_algorithms; 00829 if( static_num_running_algorithms == 0 ) { 00830 // Put back the default signal handler 00831 signal( SIGINT, SIG_DFL ); 00832 static_interrupt_called = false; 00833 static_processed_user_interrupt = false; 00834 } 00835 } 00836 running_state_ = _running_state; 00837 } 00838 00839 void Algorithm::validate_in_state(ERunningState _running_state) const { 00840 const char running_state_name[3][25] = { "NOT_RUNNING" , "RUNNING", "RUNNING_BEING_CONFIGURED" }; 00841 if(running_state() != _running_state) 00842 TEUCHOS_TEST_FOR_EXCEPTION( 00843 true, InvalidRunningState 00844 ,"Algorithm::validate_in_state(...) : The condition running_state() == " 00845 << running_state_name[running_state()] << " has been violated with " 00846 << " running_state = " << running_state_name[_running_state] ); 00847 } 00848 00849 void Algorithm::validate_not_in_state(ERunningState _running_state) const { 00850 const char running_state_name[3][25] = { "NOT_RUNNING" , "RUNNING", "RUNNING_BEING_CONFIGURED" }; 00851 if(running_state() == _running_state) 00852 TEUCHOS_TEST_FOR_EXCEPTION( 00853 true, InvalidRunningState 00854 ,"Algorithm::validate_not_in_state(...) : The condition running_state() != " 00855 << running_state_name[running_state()] << " has been violated" ); 00856 } 00857 00858 void Algorithm::validate_not_curr_step(poss_type step_poss) const { 00859 if(step_poss == curr_step_poss_) 00860 TEUCHOS_TEST_FOR_EXCEPTION( 00861 true, InvalidConfigChange 00862 ,"Algorithm::validate_not_curr_step(step_poss="<<step_poss<<") : " 00863 "Error, You can not modify the step being currently executed" ); 00864 } 00865 00866 void Algorithm::validate_not_next_step(const std::string& step_name) const { 00867 if( step_name == saved_next_step_name_ ) 00868 TEUCHOS_TEST_FOR_EXCEPTION( 00869 true, InvalidConfigChange, 00870 "Algorithm::validate_not_next_step(step_name): " 00871 "Error, You can not modify name or remove the step given by " 00872 "step_name = what_is_next_name() = " << step_name ); 00873 } 00874 00875 Algorithm::steps_t::iterator Algorithm::step_itr_and_assert(const std::string& step_name) 00876 { 00877 steps_t::iterator itr = step_itr(step_name); 00878 if(itr == steps_.end()) 00879 TEUCHOS_TEST_FOR_EXCEPTION( 00880 true, DoesNotExist 00881 ,"Algorithm::step_itr(...) : A step with the name " 00882 << step_name << " does not exist." ); 00883 return itr; 00884 } 00885 00886 Algorithm::steps_t::const_iterator Algorithm::step_itr_and_assert(const std::string& step_name) const 00887 { 00888 steps_t::const_iterator itr = step_itr(step_name); 00889 if(itr == steps_.end()) 00890 TEUCHOS_TEST_FOR_EXCEPTION( 00891 true, DoesNotExist 00892 ,"Algorithm::step_itr(...) : A step with the name " 00893 << step_name << " does not exist." ); 00894 return itr; 00895 } 00896 00897 bool Algorithm::imp_do_step(poss_type step_poss) { 00898 curr_step_poss_ = step_poss; 00899 // do the pre steps in order 00900 if( !imp_do_assoc_steps(PRE_STEP) ) return false; 00901 // do the main step 00902 if( !steps_[curr_step_poss_-1].step_ptr->do_step(*this, curr_step_poss_, DO_MAIN_STEP, 0) ) return false; 00903 // do the post steps in order 00904 if( !imp_do_assoc_steps(POST_STEP) ) return false; 00905 // if you get here all the pre steps, step, and post steps returned true. 00906 if( static_interrupt_status == NOT_INTERRUPTED ) 00907 look_for_interrupt(); 00908 if( static_interrupt_status == STOP_END_STEP ) { 00909 terminate( static_interrupt_terminate_return ); 00910 return false; 00911 } 00912 return true; 00913 } 00914 00915 bool Algorithm::imp_do_assoc_steps(EAssocStepType type) { 00916 assoc_steps_ele_list_t *assoc_list = &assoc_steps_[curr_step_poss_ - 1][type]; 00917 assoc_steps_ele_list_t::iterator itr = assoc_list->begin(); 00918 int n = assoc_list->size(); 00919 for(int i = 1; i <= n; ++itr, ++i) { 00920 if(reconfigured_) { 00921 // The associated step just has reconfigured *this 00922 // so we must update our pointers and iterators. 00923 // Since it is not allowed for this step or its associated steps 00924 // to have been changed, the next associated step to 00925 // execute will not change. 00926 assoc_list = &assoc_steps_[curr_step_poss_ - 1][type]; 00927 itr = assoc_list->begin(); 00928 std::advance( itr, i - 1 ); 00929 reconfigured_ = false; // This works as long as no one else needs to know 00930 // if *this has been reconfigured. 00931 } 00932 if( !(*(*itr).step_ptr).do_step(*this, curr_step_poss_, do_step_type(type), i) ) return false; 00933 } 00934 return true; // All the associated steps returned true. 00935 } 00936 00937 void Algorithm::imp_inform_steps(inform_func_ptr_t inform_func_ptr) 00938 { 00939 steps_t::const_iterator s_itr = steps_.begin(); 00940 assoc_steps_t::const_iterator a_itr = assoc_steps_.begin(); 00941 poss_type step_i = 1; 00942 for(; step_i <= static_cast<poss_type>(num_steps()); ++step_i, ++s_itr, ++a_itr) { 00943 // pre_steps (e.q. 2.-3, 2.-2, 2.-1) 00944 const assoc_steps_ele_list_t &pre_steps = (*a_itr)[PRE_STEP]; 00945 assoc_steps_ele_list_t::const_iterator pre_step_itr = pre_steps.begin(); 00946 for(int pre_step_i = - pre_steps.size(); pre_step_i < 0; ++pre_step_i, ++pre_step_itr) { 00947 ((&*(*pre_step_itr).step_ptr)->*inform_func_ptr)( 00948 *this, step_i, DO_PRE_STEP, pre_steps.size()+pre_step_i+1 00949 ); 00950 } 00951 // The main step. 00952 ((&*(*s_itr).step_ptr)->*inform_func_ptr)( *this, step_i, DO_MAIN_STEP, 0 ); 00953 // post_steps (e.q. 2.1, 2.2, 2.3) 00954 const assoc_steps_ele_list_t &post_steps = (*a_itr)[POST_STEP]; 00955 assoc_steps_ele_list_t::const_iterator post_step_itr = post_steps.begin(); 00956 for(int post_step_i = 1; post_step_i <= static_cast<int>(post_steps.size()); ++post_step_i, ++post_step_itr) { 00957 ((&*(*post_step_itr).step_ptr)->*inform_func_ptr)( 00958 *this, step_i, DO_POST_STEP, post_step_i 00959 ); 00960 } 00961 } 00962 } 00963 00964 void Algorithm::imp_print_algorithm(std::ostream& out, bool print_steps) const 00965 { 00966 using Teuchos::typeName; 00967 const std::string leading_str = " "; 00968 00969 steps_t::const_iterator s_itr = steps_.begin(); 00970 assoc_steps_t::const_iterator a_itr = assoc_steps_.begin(); 00971 poss_type step_i = 1; 00972 for(; step_i <= static_cast<poss_type>(num_steps()); ++step_i, ++s_itr, ++a_itr) { 00973 // list pre_steps (e.q. 2.-3, 2.-2, 2.-1) 00974 const assoc_steps_ele_list_t &pre_steps = (*a_itr)[PRE_STEP]; 00975 assoc_steps_ele_list_t::const_iterator pre_step_itr = pre_steps.begin(); 00976 for(int pre_step_i = - pre_steps.size(); pre_step_i < 0; ++pre_step_i, ++pre_step_itr) { 00977 out << step_i << "." << pre_step_i << ". \"" 00978 << (*pre_step_itr).name << "\"\n" 00979 << leading_str << "(" << typeName(*(*pre_step_itr).step_ptr) << ")\n"; 00980 if(print_steps) { 00981 (*(*pre_step_itr).step_ptr).print_step( *this, step_i, DO_PRE_STEP 00982 , pre_steps.size()+pre_step_i+1, out, leading_str ); 00983 out << std::endl; 00984 } 00985 } 00986 // The main step. 00987 out << step_i << ". \"" << (*s_itr).name 00988 << "\"\n" 00989 << leading_str << "(" << typeName(*(*s_itr).step_ptr) << ")\n"; 00990 if(print_steps) { 00991 (*(*s_itr).step_ptr).print_step( *this, step_i, DO_MAIN_STEP, 0, out, leading_str ); 00992 out << std::endl; 00993 } 00994 // list post_steps (e.q. 2.1, 2.2, 2.3) 00995 const assoc_steps_ele_list_t &post_steps = (*a_itr)[POST_STEP]; 00996 assoc_steps_ele_list_t::const_iterator post_step_itr = post_steps.begin(); 00997 for(int post_step_i = 1; post_step_i <= static_cast<poss_type>(post_steps.size()); ++post_step_i, ++post_step_itr) { 00998 out << step_i << "." << post_step_i << ". \"" 00999 << (*post_step_itr).name << "\"\n" 01000 << leading_str << "(" << typeName(*(*post_step_itr).step_ptr) << ")\n"; 01001 if(print_steps) { 01002 (*(*post_step_itr).step_ptr).print_step( *this, step_i, DO_POST_STEP, post_step_i 01003 , out, leading_str ); 01004 out << std::endl; 01005 } 01006 } 01007 } 01008 if(print_steps) { 01009 out 01010 << step_i << ". \"Major Loop\" :\n" 01011 << " if k >= max_iter then\n" 01012 << " terminate the algorithm\n" 01013 << " elseif run_time() >= max_run_time then\n" 01014 << " terminate the algorithm\n" 01015 << " else\n" 01016 << " k = k + 1\n" 01017 << " goto 1\n" 01018 << " end\n"; 01019 } 01020 } 01021 01022 // validate poss 01023 01024 Algorithm::poss_type Algorithm::validate(poss_type step_poss, int past_end) const 01025 { 01026 01027 TEUCHOS_TEST_FOR_EXCEPTION( 01028 step_poss < 1 || steps_.size() + past_end < step_poss, DoesNotExist 01029 ,"Algorithm::validate(step_poss) : The step_poss = " << step_poss 01030 << " is not in range of 1 to " << steps_.size() + past_end ); 01031 return step_poss; 01032 } 01033 01034 Algorithm::poss_type Algorithm::validate(const assoc_steps_ele_list_t& assoc_list 01035 , poss_type assoc_step_poss, int past_end) const 01036 { 01037 TEUCHOS_TEST_FOR_EXCEPTION( 01038 assoc_step_poss < 1 || assoc_list.size() + past_end < assoc_step_poss, DoesNotExist 01039 ,"Algorithm::validate(assoc_list,assoc_step_poss) : The assoc_step_poss = " 01040 << assoc_step_poss << " is not in range of 1 to " << assoc_list.size() + past_end ); 01041 return assoc_step_poss; 01042 } 01043 01044 void Algorithm::look_for_interrupt() 01045 { 01046 // 01047 // Get the mode of aborting from the user! 01048 // 01049 if( static_interrupt_called && !static_processed_user_interrupt && static_proc_rank == 0 ) { 01050 // Allow for another interrupt possibly 01051 static_interrupt_called = false; 01052 // 01053 // Get the response from the user 01054 // 01055 enum EResponse { R_ABORT_NOW, R_CONTINUE, R_STOP_END_STEP, R_STOP_END_ITER }; 01056 EResponse response = R_ABORT_NOW; 01057 const int max_tries = 3; 01058 bool valid_response = false; 01059 for( int tries = 0; !valid_response && tries < max_tries; ++tries ) { 01060 std::cerr 01061 << "\nIterationPack::Algorithm: Received signal SIGINT." 01062 << "\nJust completed current step curr_step_name = \"" 01063 << get_step_name(curr_step_poss_) << "\", curr_step_poss = " 01064 << curr_step_poss_ << " of steps [1..." << num_steps() << "]." 01065 << "\nDo you want to:\n" 01066 << " (a) Abort the program immediately?\n" 01067 << " (c) Continue with the algorithm?\n" 01068 << " (s) Gracefully terminate the algorithm at the end of this step?\n" 01069 << " (i) Gracefully terminate the algorithm at the end of this iteration?\n" 01070 << "Answer a, c, s or i ? "; 01071 char abort_mode = 'a'; 01072 std::cin >> abort_mode; 01073 if( abort_mode == 'a' ) { 01074 response = R_ABORT_NOW; 01075 valid_response = true; 01076 } 01077 else if( abort_mode == 'c' ) { 01078 response = R_CONTINUE; 01079 valid_response = true; 01080 } 01081 else if( abort_mode == 's' || abort_mode == 'i' ) { 01082 if( abort_mode == 's') 01083 response = R_STOP_END_STEP; 01084 else 01085 response = R_STOP_END_ITER; 01086 std::cerr 01087 << "\nTerminate the algorithm with true (t) or false (f) ? "; 01088 std::cin >> abort_mode; 01089 if( abort_mode == 't' ) { 01090 static_interrupt_terminate_return = true; 01091 valid_response = true; 01092 } 01093 else if( abort_mode == 'f' ) { 01094 static_interrupt_terminate_return = false; 01095 valid_response = true; 01096 } 01097 else { 01098 std::cerr << "Invalid response! Expecting \'t\' or \'f\'\n"; 01099 } 01100 } 01101 else { 01102 std::cerr << "\nInvalid response! Expecting \'a\', \'c\', \'s\' or \'i\'\n"; 01103 } 01104 std::cerr << std::endl; 01105 } 01106 if(!valid_response) { 01107 std::cerr << "Three strikes, you are out!\n"; 01108 } 01109 // 01110 // Interpret the response 01111 // 01112 switch(response) { 01113 case R_ABORT_NOW: { 01114 static_interrupt_status = ABORT_PROGRAM; 01115 break; 01116 } 01117 case R_CONTINUE: { 01118 static_interrupt_status = NOT_INTERRUPTED; 01119 break; 01120 } 01121 case R_STOP_END_STEP: { 01122 static_interrupt_status = STOP_END_STEP; 01123 break; 01124 } 01125 case R_STOP_END_ITER: { 01126 static_interrupt_status = STOP_END_ITER; 01127 break; 01128 } 01129 default: { 01130 TEUCHOS_TEST_FOR_EXCEPT(true); 01131 } 01132 } 01133 static_processed_user_interrupt = true; 01134 } 01135 else if( interrupt_file_name().length() && !static_processed_user_interrupt && static_proc_rank == 0 ) { 01136 // 01137 // If there was not an interactive interrupt then look for an 01138 // interrupt file if we have not already done this 01139 // (static_processed_user_interrupt). 01140 // 01141 std::ifstream interrupt_file(interrupt_file_name().c_str()); 01142 if(interrupt_file) { 01143 std::cerr 01144 << "\nIterationPack::Algorithm: Found the interrupt file \""<<interrupt_file_name()<<"\"!" 01145 << "\nJust completed current step curr_step_name = \"" 01146 << get_step_name(curr_step_poss_) << "\", curr_step_poss = " 01147 << curr_step_poss_ << " of steps [1..." << num_steps() << "].\n"; 01148 char abort_mode = 0; 01149 interrupt_file >> abort_mode; 01150 std::cerr << "Read a value of abort_mode = \'"<<abort_mode<<"\': "; 01151 if( abort_mode == 'a' ) { 01152 std::cerr << "Will abort the program immediatly!\n"; 01153 static_interrupt_status = ABORT_PROGRAM; 01154 } 01155 else if( abort_mode == 's' || abort_mode == 'i' ) { 01156 if( abort_mode == 's') { 01157 std::cerr << "Will abort the program gracefully at the end of this step!\n"; 01158 static_interrupt_status = STOP_END_STEP; 01159 } 01160 else { 01161 std::cerr << "Will abort the program gracefully at the end of this iteration!\n"; 01162 static_interrupt_status = STOP_END_ITER; 01163 } 01164 TEUCHOS_TEST_FOR_EXCEPTION( 01165 interrupt_file.eof(), std::logic_error, 01166 "IterationPack::Algorithm: Error, expected input for terminate_bool option from the " 01167 "file \""<<interrupt_file_name()<<"\"!" 01168 ); 01169 char terminate_bool = 0; 01170 interrupt_file >> terminate_bool; 01171 std::cerr << "Read a value of terminate_bool = \'"<<terminate_bool<<"\': "; 01172 if( terminate_bool == 't' ) { 01173 std::cerr << "Will return a success flag!\n"; 01174 static_interrupt_terminate_return = true; 01175 } 01176 else if( terminate_bool == 'f' ) { 01177 std::cerr << "Will return a failure flag!\n"; 01178 static_interrupt_terminate_return = false; 01179 } 01180 else { 01181 TEUCHOS_TEST_FOR_EXCEPTION( 01182 true, std::logic_error 01183 ,"Error, the value of terminate_bool = \'"<<terminate_bool<<"\' is not " 01184 "valid! Valid values include only \'t\' or \'f\'\n" 01185 ); 01186 } 01187 } 01188 else { 01189 TEUCHOS_TEST_FOR_EXCEPTION( 01190 true, std::logic_error 01191 ,"Error, the value of abort_mode = \'"<<abort_mode<<"\' is not " 01192 "valid! Valid values include only \'a\', \'s\' or \'i\'\n" 01193 ); 01194 } 01195 std::cerr << std::endl; 01196 static_processed_user_interrupt = true; 01197 } 01198 } 01199 // 01200 // Make sure that all of the processes get the same 01201 // response 01202 // 01203 #ifdef HAVE_MPI 01204 const bool query_for_interrupt = true; // ToDo: Make this an external option! 01205 if( static_num_proc > 1 && query_for_interrupt ) { 01206 // 01207 // Here we will do a global reduction to see of a processor has 01208 // recieved an interrupt. Here we will do a sum operation since only the 01209 // root process should be getting these options. 01210 // 01211 int sendbuf[2] = { 0, 0 }; 01212 int recvbuf[2] = { 0, 0 }; 01213 if(static_proc_rank == 0) { 01214 sendbuf[0] = (int)static_interrupt_status; 01215 sendbuf[1] = static_interrupt_terminate_return ? 1 : 0; 01216 } 01217 // Note: this global reduction will synchronize all of the processors! 01218 #ifdef ITERATION_PACK_ALGORITHM_SHOW_MPI_DEBUG_INFO 01219 std::cerr << "\np="<<static_proc_rank<<": IterationPack::Algorithm::interrupt(): Calling MPI_Allreduce(...) ...\n"; 01220 #endif 01221 MPI_Allreduce( 01222 sendbuf // sendbuf 01223 ,recvbuf // recvbuf 01224 ,2 // count 01225 ,MPI_INT // datatype 01226 ,MPI_SUM // op 01227 ,MPI_COMM_WORLD // comm (ToDo: Make more general?) 01228 ); 01229 #ifdef ITERATION_PACK_ALGORITHM_SHOW_MPI_DEBUG_INFO 01230 std::cerr 01231 << "\np="<<static_proc_rank<<": IterationPack::Algorithm::interrupt(): After MPI_Allreduce(...)" 01232 << "\np="<<static_proc_rank<<": recvbuf[0] = " << recvbuf[0] << ", recvbuf[1] = " << recvbuf[1] << std::endl; 01233 #endif 01234 // Set static_interrupt_status 01235 switch( (EInterruptStatus)recvbuf[0] ) { 01236 case NOT_INTERRUPTED: 01237 static_interrupt_status = NOT_INTERRUPTED; 01238 break; 01239 case STOP_END_STEP: 01240 static_interrupt_status = STOP_END_STEP; 01241 break; 01242 case STOP_END_ITER: 01243 static_interrupt_status = STOP_END_ITER; 01244 break; 01245 case ABORT_PROGRAM: 01246 static_interrupt_status = ABORT_PROGRAM; 01247 break; 01248 default: 01249 std::cerr 01250 << "p=" << static_proc_rank << ": Algorithm::look_for_interrupt(): Error, the globally reduced value of " 01251 "recvbuf[0] = " << recvbuf[0] << " is not valid!"; 01252 std::abort(); 01253 } 01254 // Set static_interrupt_terminate_return 01255 static_interrupt_terminate_return = ( recvbuf[1] == 0 ? false : true ); 01256 } 01257 // 01258 // Abort the program now if the user did not already press Ctrl-C again! 01259 // 01260 if( static_interrupt_status == ABORT_PROGRAM ) { 01261 if( static_proc_rank == 0 ) { 01262 std::cerr << "\nAborting the program now!\n"; 01263 } 01264 std::abort(); 01265 } 01266 #endif 01267 } 01268 01269 // static 01270 01271 void Algorithm::interrupt() 01272 { 01273 // 01274 // This function assumes that every process will recieve the same 01275 // signal which I found to be the case with MPICH. I am not clear 01276 // what the MPI standard says about interrupts so I can not 01277 // guarantee that this is 100% portable. If other behavior is 01278 // needed, this will have to be compiled in differently. 01279 // 01280 // Note: I have found that on MPICH that you can not guarantee that 01281 // only a single signal will be sent to a slave process so this 01282 // function will ignore interupts for slave processes. 01283 // 01284 // Note that you have to be very careful what you do inside of a 01285 // signal handler and in general you should only be setting flags or 01286 // aborting. 01287 // 01288 static_processed_user_interrupt = false; 01289 #ifdef ITERATION_PACK_ALGORITHM_SHOW_MPI_DEBUG_INFO 01290 std::cerr << "\np="<<static_proc_rank<<": IterationPack::Algorithm::interrupt() called!\n"; 01291 #endif 01292 // 01293 // See if an algorithm is possibly even running yet! 01294 // 01295 if( static_num_proc == 0 ) { 01296 if( static_proc_rank == 0 ) 01297 std::cerr 01298 << "\nIterationPack::Algorithm::interrupt(): Received signal SIGINT but an Algorithm " 01299 << "object has not been allocated yet and no algorithm is running.\n" 01300 << "\nAborting the program now!\n"; 01301 std::abort(); 01302 return; // Should not be called! 01303 } 01304 // 01305 // See if we are going to query for an interrupt when running in MPI mode 01306 // 01307 const bool query_for_interrupt = true; // ToDo: Make this an external option! 01308 if( !query_for_interrupt && static_num_proc > 1 ) { 01309 if( static_proc_rank == 0 ) { 01310 std::cerr 01311 << "\nIterationPack::Algorithm::interrupt(): Received signal SIGINT but num_proc = " 01312 << static_num_proc << " > 1 and query_for_interrupt = false so:\n" 01313 << "\nAborting the program now!\n"; 01314 } 01315 std::abort(); 01316 return; // Should not be called! 01317 } 01318 // 01319 // Remember that this interrupt has been called! 01320 // 01321 if( static_proc_rank == 0 ) { 01322 std::cerr 01323 << "\nIterationPack::Algorithm::interrupt(): Received signal SIGINT. " 01324 << "Wait for the end of the current step and respond to an interactive query, " 01325 << "kill the process by sending another signal (i.e. SIGKILL).\n"; 01326 } 01327 static_interrupt_called = true; 01328 } 01329 01330 } // end namespace IterationPack
1.7.6.1