|
Sierra Toolkit
Version of the Day
|
00001 00010 #include <stdexcept> 00011 #include <exception> 00012 #include <new> 00013 #include <typeinfo> 00014 #include <ios> 00015 #include <string> 00016 #include <sstream> 00017 #include <iostream> 00018 00019 #include <assert.h> 00020 00021 #include <stk_util/diag/Env.hpp> 00022 #include <stk_util/diag/Platform.hpp> 00023 #include <stk_util/parallel/Exception.hpp> 00024 #include <stk_util/parallel/ExceptionReport.hpp> 00025 #include <stk_util/parallel/ExceptionIos.hpp> 00026 #include <stk_util/diag/String.hpp> 00027 #include <stk_util/diag/Trace.hpp> 00028 00029 #include <stk_util/parallel/mpih.hpp> 00030 00031 namespace sierra { 00032 00033 void 00034 sierra_exception_throw() 00035 {} 00036 00037 00038 ParallelThrowRegistry & 00039 ParallelThrowRegistry::instance() 00040 { 00041 static ParallelThrowRegistry s_parallelThrowRegistry; 00042 00043 return s_parallelThrowRegistry; 00044 } 00045 00046 00047 ParallelThrowRegistry::Registry::Registry() 00048 {} 00049 00050 00051 ParallelThrowRegistry::Registry::~Registry() 00052 { 00053 // Truely sick. Each is registered twice, once for the parallel version of the 00054 // exception and once for the <stdexcept> base class version. The double increment 00055 // keeps from deleting it twice. See ParallelThrowRegistry::registerException. 00056 for (iterator it = begin(); it != end(); ++it, ++it) 00057 delete (*it).second; 00058 } 00059 00060 00061 ExParallel & 00062 ParallelThrowRegistry::register_exception_a( 00063 const std::type_info & exception_type, 00064 ExParallel * exception) 00065 { 00066 if (!findException(exception_type)) { 00067 m_registry.push_back(Registry::value_type(&exception_type, exception)); 00068 mpih::Add_Handle(*exception); 00069 } 00070 return *exception; 00071 } 00072 00073 ExParallel * 00074 ParallelThrowRegistry::findException( 00075 const std::type_info & exception_type) 00076 { 00077 for (Registry::iterator it = m_registry.begin(); it != m_registry.end(); ++it) 00078 if (*(*it).first == exception_type) 00079 return (*it).second; 00080 00081 return NULL; 00082 } 00083 00084 00085 void 00086 ExParallel::parallel_handler() 00087 {} 00088 00089 00090 void 00091 throw_copy( 00092 const std::exception & x, 00093 const std::string & append_message) 00094 { 00095 ExParallel *exception = ParallelThrowRegistry::instance().findException(typeid(x)); 00096 if (!exception) 00097 exception = ParallelThrowRegistry::instance().findException(typeid(Exception)); 00098 00099 exception->clear(); 00100 *exception << x.what() << append_message; 00101 00102 exception->throw_copy(); 00103 } 00104 00105 00106 void 00107 set_exception() 00108 { 00109 BadException x; 00110 x << "Unknown exception"; 00111 set_exception(static_cast<ExParallel &>(x)); 00112 } 00113 00114 00115 void 00116 set_exception( 00117 std::exception & x) 00118 { 00119 ExParallel *registered_exception = ParallelThrowRegistry::instance().findException(typeid(x)); 00120 00121 if (!registered_exception) 00122 registered_exception = ParallelThrowRegistry::instance().findException(typeid(Exception)); 00123 00124 registered_exception->setDescription(x.what()); 00125 registered_exception->setTraceback(Diag::Traceback::printTraceback(Diag::Traceback::snapshot())); 00126 00127 // std::cerr << "Exception " << demangle(typeid(*registered_exception).name()) << " will be thrown from processor " << Env::parallel_rank() << " on the next MPIH function:" << std::endl 00128 // << registered_exception->getDescription() << std::endl 00129 // << registered_exception->getTraceback() << std::endl; 00130 00131 mpih::Set_Local_Handle(const_cast<ExParallel &>(*registered_exception)); 00132 } 00133 00134 00135 void 00136 set_exception( 00137 ExParallel & x) 00138 { 00139 ExParallel *registered_exception = ParallelThrowRegistry::instance().findException(typeid(x)); 00140 00141 if (!registered_exception) 00142 registered_exception = ParallelThrowRegistry::instance().findException(typeid(Exception)); 00143 00144 registered_exception->setDescription(x.getDescription()); 00145 registered_exception->setTraceback(Diag::Traceback::printTraceback(Diag::Traceback::snapshot())); 00146 00147 // std::cerr << "Exception " << demangle(typeid(*registered_exception).name()) << " will be thrown from processor " << Env::parallel_rank() << " on the next MPIH function:" << std::endl 00148 // << registered_exception->getDescription() << std::endl 00149 // << registered_exception->getTraceback() << std::endl; 00150 00151 mpih::Set_Local_Handle(const_cast<ExParallel &>(*registered_exception)); 00152 } 00153 00154 00155 void 00156 register_stl_parallel_exceptions() 00157 { 00158 mpih::Enable(); 00159 00160 Exception::registerException(); 00161 BadAlloc::registerException(); 00162 BadCast::registerException(); 00163 BadTypeid::registerException(); 00164 LogicError::registerException(); 00165 DomainError::registerException(); 00166 InvalidArgument::registerException(); 00167 LengthError::registerException(); 00168 OutOfRange::registerException(); 00169 RuntimeError::registerException(); 00170 RangeError::registerException(); 00171 OverflowError::registerException(); 00172 UnderflowError::registerException(); 00173 BadException::registerException(); 00174 00175 mpih::Activate_Handles(); 00176 } 00177 00178 00179 void 00180 parallel_throw( 00181 MPI_Comm mpi_comm) 00182 { 00183 int nprocs; 00184 MPI_Comm_size(mpi_comm, &nprocs); 00185 00186 ExParallel **handles = new ExParallel* [nprocs]; 00187 00188 mpih::Get_Global_Handles(handles); 00189 00190 MPIH_Handler_compete handler_compete_fn; 00191 MPIH_Handler_execute handler_execute_fn; 00192 mpih::Get_Functions(&handler_compete_fn , 00193 &handler_execute_fn); 00194 00195 /* Now that we have the handles, 00196 * reset the handles so we don't throw again. This way 00197 * whatever function catches the exception we are about to 00198 * throw can call mpih and mpih will not just throw again. 00199 */ 00200 mpih::Reset_Local_Handle(); 00201 00202 /* First iterate through all of the exceptions thrown on all of 00203 * the processors, and if any of them were thrown on this 00204 * processor, print an error message and a traceback. 00205 * only the owning processor will have the traceback information. 00206 */ 00207 /* Iterate through all of the exceptions thrown on all of the processors 00208 * and call the parallel_handler() function defined by any derived from 00209 * ExParallel. This is done across all processors so that 00210 * it is valid to do collective communication inside of parallel_handler() 00211 */ 00212 for (int i = 0; i < nprocs; ++i) { 00213 if (handles[i]) { 00214 ExParallel *x = dynamic_cast<ExParallel *>(handles[i]); 00215 if (x) 00216 x->parallel_handler(); 00217 } 00218 } 00219 00220 /* Iterate through all of the exceptions thrown on all of the processors 00221 * and select the one to throw in parallel on all processors. We would 00222 * like to find one derived from ExParallel. 00223 */ 00224 00225 ExParallel *the_exception = NULL; 00226 int originating_processor = -1; 00227 00228 for (int i = 0; i < nprocs; ++i) { 00229 if (handles[i]) { 00230 ExParallel *x = dynamic_cast<ExParallel *>(handles[i]); 00231 if (x) { 00232 if (handler_compete_fn) 00233 (handler_compete_fn) (reinterpret_cast<void **>(&handles[i]), the_exception); 00234 if ( handles[i] != the_exception ) { 00235 the_exception = x; 00236 originating_processor = i; 00237 } 00238 } 00239 } 00240 } 00241 00242 delete [] handles; 00243 00244 /* Since this function is called in parallel, it is possible 00245 * to perform collective communication. Here the traceback 00246 * and error messages are broadcast and set on all processors. 00247 * These are the only two fields that are guarenteeded to be 00248 * in each exception class. Other data stored in specialized 00249 * derived classes will have to be communicated seperately. 00250 * If needed this communication could be added to a virtual 00251 * base class. That is a future enhancements depending on 00252 * the demand. 00253 */ 00254 if (the_exception) { 00255 // Copy the description from the originating process to everywhere. 00256 std::string description(the_exception->getDescriptionStream().str()); 00257 int description_len = description.length(); 00258 MPI_Bcast(&description_len, 00259 1, 00260 MPI_INT, 00261 originating_processor, 00262 mpi_comm); 00263 00264 char *description_buf = new char[description_len]; 00265 description.copy(description_buf, description_len); 00266 00267 MPI_Bcast(description_buf, 00268 description_len, 00269 MPI_CHAR, 00270 originating_processor, 00271 mpi_comm); 00272 00273 // Copy the traceback stack from the originating process to everywhere. 00274 const std::string &traceback(the_exception->getTraceback()); 00275 int traceback_len = traceback.length(); 00276 MPI_Bcast(&traceback_len, 00277 1, 00278 MPI_INT, 00279 originating_processor, 00280 mpi_comm); 00281 00282 char *traceback_buf = new char[traceback_len]; 00283 traceback.copy(traceback_buf, traceback_len); 00284 00285 MPI_Bcast(traceback_buf, 00286 traceback_len, 00287 MPI_CHAR, 00288 originating_processor, 00289 mpi_comm); 00290 00291 // Rebuild the exception from the broadcasted data 00292 the_exception->setDescription(std::string(description_buf, description_len)); 00293 the_exception->setTraceback(std::string(traceback_buf, traceback_len)); 00294 the_exception->setParallel(originating_processor); 00295 00296 // std::cerr << "Throwing exception " << demangle(typeid(*the_exception).name()) << " in parallel" << std::endl 00297 // << the_exception->getDescription() << std::endl 00298 // << the_exception->getTraceback() << std::endl; 00299 00300 #ifdef SIERRA_MPIH_VERBOSE 00301 Env::outputP0() 00302 <<"*************** Exception handling ***************"<<endl 00303 <<" A parallel exception of type "<< typeid(*the_exception).name()<<endl 00304 <<" will be thrown on all processors."<<endl; 00305 #endif 00306 00307 delete [] traceback_buf; 00308 delete [] description_buf; 00309 the_exception->throw_copy(); 00310 } 00311 else { 00312 #ifdef SIERRA_MPIH_VERBOSE 00313 Env::outputP0() 00314 <<"*************** Exception handling ***************"<<endl 00315 <<" A parallel exception of type Unknown_Exception"<<endl 00316 <<" will be thrown on all processors."<<endl; 00317 #endif 00318 throw Exception(); 00319 } 00320 } 00321 00322 } // namespace sierra