|
Sierra Toolkit
Version of the Day
|
00001 00010 #include <pwd.h> 00011 #include <unistd.h> 00012 00013 #include <ostream> 00014 #include <fstream> 00015 #include <sstream> 00016 #include <string> 00017 #include <cstring> 00018 #include <cstdlib> 00019 #include <stdexcept> 00020 #include <numeric> 00021 #include <iomanip> 00022 #include <algorithm> 00023 #include <locale> 00024 #include <map> 00025 00026 #include <stk_util/util/Null_Streambuf.hpp> 00027 #include <stk_util/parallel/mpi_filebuf.hpp> 00028 00029 #include <stk_util/diag/Timer.hpp> 00030 #include <stk_util/diag/Writer.hpp> 00031 #include <stk_util/diag/WriterRegistry.hpp> 00032 #include <stk_util/diag/Env.hpp> 00033 #include <stk_util/diag/Platform.hpp> 00034 #include <stk_util/diag/Signal.hpp> 00035 #include <stk_util/parallel/Exception.hpp> 00036 #include <stk_util/parallel/ExceptionReport.hpp> 00037 #include <stk_util/parallel/MPI.hpp> 00038 #include <stk_util/environment/OutputLog.hpp> 00039 #include <stk_util/environment/ProductRegistry.hpp> 00040 #include <stk_util/diag/StringUtil.hpp> 00041 #include <stk_util/diag/UserPlugin.hpp> 00042 #include <stk_util/parallel/mpih.hpp> 00043 #include <stk_util/diag/PreParse.hpp> 00044 00045 #include <stk_util/environment/OutputLog.hpp> 00046 #include <stk_util/environment/ProgramOptions.hpp> 00047 #include <stk_util/environment/RuntimeMessage.hpp> 00048 #include <stk_util/parallel/BroadcastArg.hpp> 00049 #include <stk_util/parallel/ParallelReduce.hpp> 00050 #include <stk_util/util/Bootstrap.hpp> 00051 #include <stk_util/util/IndentStreambuf.hpp> 00052 00053 namespace sierra { 00054 namespace Env { 00055 00056 namespace { 00057 00058 void bootstrap() 00059 { 00060 // Add my command line options to the option descriptions. 00061 boost::program_options::options_description desc("Runtime environment", 120); 00062 desc.add_options() 00063 ("help,h", "Display command line options") 00064 ("directory,d", boost::program_options::value<std::string>()->default_value("./"), "Set working directory") 00065 ("output-log,o", boost::program_options::value<std::string>()->default_value(""), "Output log file path, one of : 'cout', 'cerr', or a file path") 00066 ("logfile,l", boost::program_options::value<std::string>()->default_value(""), "Output log file path, one of : 'cout', 'cerr', or a file path") 00067 ("pout", boost::program_options::value<std::string>()->implicit_value("-"), "Per-processor log file path") 00068 ("dout", boost::program_options::value<std::string>()->implicit_value("out"), "Diagnostic output stream one of: 'cout', 'cerr', 'out' or a file path") 00069 // ("timer", boost::program_options::value<std::string>(), "Wall and CPU time options") // , &Diag::Timer::theTimerParser()) 00070 ("version", "Display version information") 00071 ("jamsub", boost::program_options::value<std::string>(), "Display user subroutine build command") 00072 ("runtest", boost::program_options::value<std::string>()->implicit_value("pid"), "Record process host and pid to this file") 00073 ("developer-mode", "Activate developer specific features") 00074 ("architecture", boost::program_options::value<std::string>(), "Specifies the architecture running the sierra application"); 00075 00076 stk_classic::get_options_description().add(desc); 00077 } 00078 00079 stk_classic::Bootstrap x(&bootstrap); 00080 00081 struct EnvData 00082 { 00083 typedef std::map<ExecType, ExecInfo> ExecMap; 00084 00085 static EnvData &instance() { 00086 static EnvData s_env; 00087 00088 return s_env; 00089 } 00090 00091 EnvData() 00092 : m_productName("not specified"), 00093 m_vm(stk_classic::get_variables_map()), 00094 m_nullBuf(), 00095 m_outputNull(&m_nullBuf), 00096 m_outputP0(&std::cout), 00097 m_output(), 00098 m_startTime((double) ::time(NULL)), 00099 m_executablePath(), 00100 m_shutdownRequested(false), 00101 m_inputFileRequired(true), 00102 m_checkSubCycle(false), 00103 m_worldComm(MPI_COMM_NULL), 00104 m_parallelComm(MPI_COMM_NULL), 00105 m_parallelSize(-1), 00106 m_parallelRank(-1), 00107 m_emptyString(), 00108 m_onString(PARAM_ON), 00109 m_inputFile("") 00110 { 00111 m_execMap[EXEC_TYPE_LAG].m_master = -1; 00112 m_execMap[EXEC_TYPE_LAG].m_groupComm = MPI_COMM_NULL; 00113 m_execMap[EXEC_TYPE_FLUID].m_master = -1; 00114 m_execMap[EXEC_TYPE_FLUID].m_groupComm = MPI_COMM_NULL; 00115 stk_classic::register_log_ostream(std::cout, "cout"); 00116 stk_classic::register_log_ostream(std::cerr, "cerr"); 00117 00118 stk_classic::register_ostream(sierra::out(), "out"); 00119 stk_classic::register_ostream(sierra::pout(), "pout"); 00120 stk_classic::register_ostream(sierra::dout(), "dout"); 00121 stk_classic::register_ostream(sierra::tout(), "tout"); 00122 00123 static_cast<stk_classic::indent_streambuf *>(sierra::dwout().rdbuf())->redirect(sierra::dout().rdbuf()); 00124 } 00125 00126 ~EnvData() 00127 { 00128 static_cast<stk_classic::indent_streambuf *>(sierra::dwout().rdbuf())->redirect(std::cout.rdbuf()); 00129 00130 stk_classic::unregister_ostream(tout()); 00131 stk_classic::unregister_ostream(dout()); 00132 stk_classic::unregister_ostream(pout()); 00133 stk_classic::unregister_ostream(out()); 00134 00135 stk_classic::unregister_log_ostream(std::cerr); 00136 stk_classic::unregister_log_ostream(std::cout); 00137 } 00138 00139 std::string m_productName; 00140 00141 boost::program_options::variables_map & m_vm; 00142 00143 null_streambuf m_nullBuf; 00144 std::ostream m_outputNull; 00145 std::ostream * m_outputP0; 00146 std::ostringstream m_output; 00147 00148 double m_startTime; 00149 std::string m_executablePath; 00150 00151 bool m_shutdownRequested; 00152 bool m_inputFileRequired; 00153 bool m_checkSubCycle; 00154 00155 MPI_Comm m_worldComm; 00156 00157 MPI_Comm m_parallelComm; 00158 int m_parallelSize; 00159 int m_parallelRank; 00160 00161 ExecMap m_execMap; 00162 00163 const std::string m_emptyString; 00164 const std::string m_onString; 00165 00166 std::string m_inputFile; 00167 }; 00168 00169 } // namespace <unnamed> 00170 00171 const std::string & 00172 product_name() 00173 { 00174 return EnvData::instance().m_productName; 00175 } 00176 00177 00178 const std::string & 00179 executable_file() 00180 { 00181 return EnvData::instance().m_executablePath; 00182 } 00183 00184 00185 const std::string & 00186 executable_date() 00187 { 00188 static std::string executable_date; 00189 00190 if (executable_date.empty()) 00191 executable_date = ProductRegistry::instance().getProductAttribute(EnvData::instance().m_productName, ProductRegistry::BUILD_TIME); 00192 00193 return executable_date; 00194 } 00195 00196 00197 const std::string & 00198 startup_date() 00199 { 00200 static std::string startup_date; 00201 00202 if (startup_date.empty()) 00203 startup_date = format_time(EnvData::instance().m_startTime).c_str(); 00204 00205 return startup_date; 00206 } 00207 00208 00209 double 00210 start_time() 00211 { 00212 return EnvData::instance().m_startTime; 00213 } 00214 00215 00216 bool 00217 developer_mode() 00218 { 00219 return !get_param("developer-mode").empty(); 00220 } 00221 00222 00223 void setInputFileName(std::string name) { 00224 EnvData::instance().m_inputFile = name; 00225 } 00226 00227 std::string getInputFileName() { 00228 return EnvData::instance().m_inputFile; 00229 } 00230 00231 void set_input_file_required(bool value) 00232 { 00233 EnvData::instance().m_inputFileRequired = value; 00234 } 00235 00236 void set_check_subcycle(bool value) 00237 { 00238 EnvData::instance().m_checkSubCycle = value; 00239 } 00240 00241 00242 const std::string & 00243 architecture() 00244 { 00245 return get_param("architecture"); 00246 } 00247 00248 00249 const std::string 00250 working_directory() { 00251 char cwd[PATH_MAX]; 00252 std::string directory = get_param("directory"); 00253 if (directory[0] != '/' && getcwd(cwd, PATH_MAX) != NULL) { 00254 directory = cwd; 00255 directory += '/'; 00256 } 00257 return directory; 00258 } 00259 00260 00261 std::ostream & 00262 output() 00263 { 00264 return EnvData::instance().m_output; 00265 } 00266 00267 00268 std::ostream & 00269 outputP0() 00270 { 00271 return *EnvData::instance().m_outputP0; 00272 } 00273 00274 00275 std::ostream & 00276 outputNull() { 00277 return EnvData::instance().m_outputNull; 00278 } 00279 00280 00281 const char * 00282 section_separator() 00283 { 00284 static const char *s_sectionSeparator = "+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----"; 00285 00286 return s_sectionSeparator; 00287 } 00288 00289 00290 const char * 00291 subsection_separator() 00292 { 00293 static const char *s_subsectionSeparator = "---------------------------------------------------"; 00294 00295 return s_subsectionSeparator; 00296 } 00297 00298 00299 std::string 00300 section_title( 00301 const std::string & title) 00302 { 00303 static size_t s_sectionSeparatorLength = std::strlen(section_separator()); 00304 00305 std::ostringstream strout; 00306 00307 strout << std::left << std::setw(s_sectionSeparatorLength - 20) << title << std::right << std::setw(20) << format_time(Env::wall_now()); 00308 return strout.str(); 00309 } 00310 00311 00312 int parallel_size() { 00313 return EnvData::instance().m_parallelSize; 00314 } 00315 00316 int parallel_rank() { 00317 return EnvData::instance().m_parallelRank; 00318 } 00319 00320 MPI_Comm 00321 parallel_comm() 00322 { 00323 return EnvData::instance().m_parallelComm; 00324 } 00325 00326 MPI_Comm 00327 parallel_world_comm() 00328 { 00329 return EnvData::instance().m_worldComm; 00330 } 00331 00332 int parallel_lag_master() { 00333 return EnvData::instance().m_execMap[EXEC_TYPE_LAG].m_master; 00334 } 00335 00336 int parallel_fluid_master() { 00337 return EnvData::instance().m_execMap[EXEC_TYPE_FLUID].m_master; 00338 } 00339 00340 int peer_group() { 00341 return EnvData::instance().m_execMap[EXEC_TYPE_PEER].m_master; 00342 } 00343 00344 std::string 00345 get_program_path(const char *program) 00346 { 00347 // If we already have the full path, just return it 00348 if (program[0] == '/') 00349 return program; 00350 00351 char full_path[PATH_MAX]; 00352 if (strchr(program, '/') != NULL) { 00353 realpath(program, full_path); 00354 return full_path; 00355 } 00356 00357 char *PATH = getenv("PATH"); 00358 while (PATH && *PATH) { 00359 // Get the character past the end of the next directory in PATH, i.e. 00360 // either the '/' or the '\0' 00361 char *end = strchr(PATH, ':'); 00362 if (!end) { 00363 end = PATH+strlen(PATH); 00364 } 00365 00366 // Set current = directory + '/' + program 00367 strncpy(full_path, PATH, end-PATH); 00368 full_path[end-PATH] = '/'; 00369 strcpy(&full_path[end-PATH+1], program); 00370 00371 // Check whether possible exists 00372 if (access(full_path, X_OK) == 0) 00373 return full_path; 00374 00375 // Advance to the next directory 00376 PATH = *end ? end+1 : end; 00377 } 00378 00379 // Not found; this shouldn't happen, but maybe the executable got deleted 00380 // after it was invoked before we got here -- or we have some crazy 00381 // parallel machine where the executable is inaccessible on the compute 00382 // nodes despite it somehow having been loaded. No big deal, just return 00383 // the non-absolute path. 00384 return program; 00385 } 00386 00387 void parse_options(MPI_Comm comm, int *argc, char ***argv); 00388 void startup_multi_exec(MPI_Comm world_comm, ExecType my_executable_type, const std::vector<int> *peer_sizes); 00389 00390 00391 00392 bool StartupSierra(int * argc, 00393 char *** argv, 00394 const char * product_name, 00395 const char * build_time, 00396 ExecType mpi_key, 00397 const std::vector<int> *peer_sizes) { 00398 bool returnValue = false; 00399 00400 stk_classic::Bootstrap::bootstrap(); 00401 00402 EnvData &env_data = EnvData::instance(); 00403 00404 env_data.m_executablePath = get_program_path(*argv[0]); 00405 env_data.m_productName = product_name; 00406 00407 ProductRegistry::instance().setProductName(product_name); 00408 00409 ProductRegistry::AttributeMap &product_attributes = ProductRegistry::instance().getProductAttributeMap(product_name); 00410 product_attributes[ProductRegistry::BUILD_TIME] = build_time; 00411 product_attributes[ProductRegistry::EXECUTABLE] = env_data.m_executablePath; 00412 00413 // Add Utility runtime library to the product registry 00414 sierra::register_product(); 00415 00416 // Add mpih to the product registry 00417 sierra::mpih::register_product(); 00418 00419 // Add operating system information to the product registry. 00420 ProductRegistry::AttributeMap &attr_map = ProductRegistry::instance().addProduct(osname().c_str()); 00421 attr_map[ProductRegistry::VERSION] = osversion().c_str(); 00422 00423 // Process the broadcast command line arguments 00424 namespace opt = boost::program_options; 00425 00426 opt::variables_map &vm = stk_classic::get_variables_map(); 00427 opt::options_description &od = stk_classic::get_options_description(); 00428 { 00429 boost::program_options::options_description desc("Diagnostic writers", 120); 00430 00431 for (Diag::WriterRegistry::iterator it = Diag::getWriterRegistry().begin(); it != Diag::getWriterRegistry().end(); ++it) { 00432 std::ostringstream str; 00433 str << "Diagnostic writer " << (*it).first << std::endl; 00434 (*it).second.second->describe(str); 00435 desc.add_options()((*it).first.c_str(), boost::program_options::value<std::string>(), str.str().c_str()); 00436 } 00437 00438 std::ostringstream str; 00439 str << "Wall and CPU time options" << std::endl; 00440 Diag::theTimerParser().describe(str); 00441 desc.add_options()("timer", boost::program_options::value<std::string>(), str.str().c_str()); 00442 00443 od.add(desc); 00444 } 00445 00446 for (int i = 0; i < *argc; ++i) { 00447 const std::string s((*argv)[i]); 00448 if (s == "-h" || s == "-help" || s == "--help") { 00449 std::cout << std::endl 00450 << "Sierra Usage: sierra " << lower(product_name) << " [sierra-options...] -O \"[" << lower(product_name) << "-options...]\"" << std::endl << std::endl 00451 // << "Usage: (MPI run) " << env_data.m_executablePath << " [options...]" << std::endl 00452 << "For example:" << std::endl 00453 << "" << std::endl 00454 << " sierra " << lower(product_name) << " -i input_deck.i -o sierra.log" << std::endl 00455 << " This creates the normal output file sierra.log" << std::endl 00456 << "" << std::endl 00457 << " sierra " << lower(product_name) << " -i input_deck.i -o sierra.log -O \"--pout=pp.log\"" << std::endl 00458 << " The per-processor output is written to pp.log.n.r for each rank, r, of n processors." << std::endl 00459 << "" << std::endl 00460 << " sierra " << lower(product_name) << " -i input_deck.i -o sierra.log -O \"--fmwkout=field,parameters\"" << std::endl 00461 << " Enable the framework field and parameter diagnostics" << std::endl 00462 << "" << std::endl 00463 << " sierra " << lower(product_name) << " -i input_deck.i -o sierra.log -O \"--timer=all\"" << std::endl 00464 << " Enable the all timers" << std::endl 00465 << std::endl 00466 << " For additional information see:" << std::endl 00467 << " http://sierra-dev.sandia.gov/stk/group__stk__util__output__log__detail.html#stk_util_output_log_howto_use_in_sierra_app" << std::endl << std::endl 00468 << product_name << " options are:" << std::endl 00469 << stk_classic::get_options_description() << std::endl; 00470 std::exit(0); 00471 } 00472 } 00473 00474 for (int i = 0; i < *argc; ++i) { 00475 const std::string s((*argv)[i]); 00476 if (s == "-jamsub" || s == "--jamsub") { 00477 const char *t = (*argv)[i + 1]; 00478 const char **symbol = sierra::Plugin::Registry::getsym<const char **>(t); 00479 if (symbol) { 00480 std::cout << *symbol << std::endl; 00481 std::exit(0); 00482 } 00483 else 00484 std::exit(1); 00485 } 00486 } 00487 00488 try { 00489 startup_preparallel_platform(); 00490 00491 // Communicator has not been set, initialize MPI if not already initialized 00492 int mpi_init_val = 0 ; 00493 if ( MPI_SUCCESS != MPI_Initialized( &mpi_init_val ) ) { 00494 throw RuntimeError() << "MPI_Initialized failed"; 00495 } 00496 00497 // Default startup communicator 00498 MPI_Comm startup_mpi_comm = MPI_COMM_WORLD; 00499 00500 // If we are initializing the comm, see if there are differing 00501 // executables running. If there are, find our partition and the 00502 // leads of the other partitions. 00503 if ( mpi_init_val == 0 ) { 00504 if ( MPI_SUCCESS != MPI_Init( argc , argv ) ) { 00505 throw RuntimeError() << "MPI_Init failed"; 00506 } 00507 00508 returnValue = true ; 00509 00510 if (mpi_key != EXEC_TYPE_WORLD) startup_multi_exec(startup_mpi_comm, mpi_key, peer_sizes); 00511 } 00512 00513 // Ready to reset the environment from NULL, we are the Lagrangian application at this point. 00514 MPI_Comm new_comm = mpi_key != EXEC_TYPE_WORLD ? env_data.m_execMap[mpi_key].m_groupComm : MPI_COMM_WORLD; 00515 reset(new_comm); 00516 } 00517 catch (const std::exception &x) { 00518 std::cerr << "SIERRA execution failed during mpi initialization with the following exception:" << std::endl 00519 << x.what() << std::endl; 00520 MPI_Abort(env_data.m_parallelComm , MPI_ERR_OTHER); 00521 } 00522 catch (...) { 00523 std::cerr << "SIERRA execution failed during mpi initialization with unknown exception:" << std::endl; 00524 00525 MPI_Abort(env_data.m_parallelComm, MPI_ERR_OTHER); 00526 } 00527 00528 parse_options(env_data.m_parallelComm, argc, argv); 00529 00530 { 00531 std::ostringstream output_description; 00532 00533 // On processor 0: 00534 // [outfile=path] [poutfile=path.n.r] [doutfile=path.n.r] out>{-|cout|cerr|outfile}+pout pout>{null|poutfile} dout>{out|doutfile} 00535 00536 // On processor 1..n: 00537 // [poutfile=path.n.r] [doutfile=path.n.r] out>pout pout>{null|poutfile} dout>{out|doutfile} 00538 00539 std::string out_path1 = vm["output-log"].as<std::string>(); 00540 std::string out_path2 = vm["logfile"].as<std::string>(); 00541 00542 00543 00544 std::string originalFileName = Env::get_param("input-deck"); 00545 std::string modifiedFileName = originalFileName; 00546 00547 if(originalFileName == "") { 00548 // 00549 // If no input file specified, error out (unless just running the --version or --help option) 00550 // 00551 if ( get_param("version").empty() && get_param("help").empty() ) { 00552 if (env_data.m_inputFileRequired) { 00553 throw RuntimeError() << "No input file specified. An input file must be specified with the '-i' option"; 00554 } else { 00555 std::cerr << "WARNING: No input file specified. An input file should be specified with the '-i' option!" << std::endl; 00556 } 00557 } 00558 } else if ( env_data.m_checkSubCycle ) { 00559 // Alter input-deck if subcycle present 00560 bool debugSubCycleSplit = false; 00561 std::string subCycleRegexp("^\\s*subcycle\\s+blocks\\s*="); 00562 bool subCycleSet = CaseInSensitiveRegexInFile(subCycleRegexp, originalFileName, debugSubCycleSplit); 00563 std::string coarseRegionRegexp("^\\s*begin\\s+presto\\s+region\\s+\\w+_AutoCoarseRegion\\>"); 00564 bool coarseRegionMade = CaseInSensitiveRegexInFile( coarseRegionRegexp, originalFileName, debugSubCycleSplit); 00565 std::string fineRegionRegexp("^\\s*begin\\s+presto\\s+region\\s+\\w+_AutoFineRegion\\>"); 00566 bool fineRegionMade = CaseInSensitiveRegexInFile( fineRegionRegexp, originalFileName, debugSubCycleSplit); 00567 if ( subCycleSet ) { 00568 if ( !coarseRegionMade && !fineRegionMade ) { 00569 modifiedFileName = CreateSubCycleInputFile( originalFileName ); 00570 } else { 00571 if(Env::parallel_rank() == 0) { 00572 std::cout<<"Input File: " << originalFileName << " Appears to have already been converted for subcycling. "; 00573 std::cout<<"Skipping input conversion " << std::endl; 00574 } 00575 } 00576 } 00577 } 00578 00579 setInputFileName(modifiedFileName); 00580 00581 00582 std::string trueOut; 00583 if(out_path2 != "") { 00584 trueOut = out_path2; 00585 } else if(out_path1 != "") { 00586 // 00587 // Old syntax compatibility, access the old output-file executable option if the logfile is not defined 00588 // 00589 trueOut = out_path1; 00590 } else { 00591 // 00592 // If log file name is unspecified, default it to (Base Input File Name).log 00593 // Use the following logic: 00594 // If the input file has an extension, replace the last ".extension" with ".log" 00595 // If the input file has no extension, append ".log" to the input file name 00596 // If the input file contains the word '.aprepro', assume aprepro was used to convert and strip out the aprepro 00597 // If the input file contains any directory movement (like ../) strip them out so log file is written to current director 00598 // 00599 00600 int dotPos = originalFileName.rfind("."); 00601 00602 if(dotPos == -1) { //No extension 00603 trueOut = originalFileName + ".log"; 00604 } else { //Extension found 00605 trueOut = originalFileName.substr(0, dotPos) + ".log"; 00606 } 00607 // 00608 // If the output path contains a ".aprepro" tag get rid of it 00609 // 00610 int apreproPos = trueOut.rfind(".aprepro"); 00611 if(apreproPos != -1) { 00612 trueOut.erase(apreproPos, 8); 00613 } 00614 // 00615 // If the output path contains a "aaa/input.i" pull off the initial directory redirects so that the log file is written int the current directory 00616 // 00617 int lastSlashPos = trueOut.rfind("/"); 00618 00619 if(lastSlashPos != -1) { 00620 trueOut.erase(0,lastSlashPos+1); 00621 } 00622 00623 00624 } 00625 00626 std::string out_path = trueOut; 00627 00628 if (out_path == "-") 00629 out_path = "cout"; 00630 00631 std::string out_ostream; 00632 00633 if (!stk_classic::get_log_ostream(out_path)) 00634 if (out_path.size() && out_path[0] != '/') 00635 out_path = working_directory() + out_path; 00636 00637 if (parallel_rank() == 0) { 00638 if (!stk_classic::get_log_ostream(out_path)) { 00639 output_description << "outfile=\"" << out_path << "\""; 00640 out_ostream = "outfile"; 00641 } 00642 else { 00643 out_ostream = out_path; 00644 } 00645 } 00646 else 00647 out_ostream = "null"; 00648 00649 std::string pout_ostream = "null"; 00650 if (vm.count("pout")) { 00651 std::string pout_path = vm["pout"].as<std::string>(); 00652 if (pout_path == "-") { 00653 std::ostringstream s; 00654 00655 if (stk_classic::get_log_ostream(out_path)) 00656 s << working_directory() << "sierra.log." << parallel_size() << "." << parallel_rank(); 00657 else 00658 s << out_path << "." << parallel_size() << "." << parallel_rank(); 00659 pout_path = s.str(); 00660 } 00661 else if (pout_path.find("/") == std::string::npos && !stk_classic::get_log_ostream(pout_path)) { 00662 std::ostringstream s; 00663 00664 s << working_directory() << pout_path << "." << parallel_size() << "." << parallel_rank(); 00665 pout_path = s.str(); 00666 } 00667 00668 if (!stk_classic::get_log_ostream(pout_path)) { 00669 output_description << " poutfile=\"" << pout_path << "\""; 00670 pout_ostream = "poutfile"; 00671 } 00672 else 00673 pout_ostream = pout_path; 00674 } 00675 00676 00677 std::string dout_ostream; 00678 if (vm.count("dout")) { 00679 std::string dout_path = vm["dout"].as<std::string>(); 00680 if (!dout_path.empty() && stk_classic::is_registered_ostream(dout_path)) 00681 dout_ostream = dout_path; 00682 else { 00683 std::ostringstream s; 00684 if (dout_path.size() && dout_path[0] != '/') 00685 s << working_directory() << dout_path << "." << parallel_size() << "." << parallel_rank(); 00686 else 00687 s << dout_path << parallel_size() << "." << parallel_rank();; 00688 dout_path = s.str(); 00689 output_description << " doutfile=\"" << dout_path << "\""; 00690 dout_ostream = "doutfile"; 00691 } 00692 } 00693 else 00694 dout_ostream = "out"; 00695 00696 if (parallel_rank() == 0) 00697 output_description << " out>" << out_ostream << "+pout"; 00698 else 00699 output_description << " out>pout"; 00700 00701 output_description << " pout>" << pout_ostream << " dout>" << dout_ostream; 00702 00703 00704 stk_classic::bind_output_streams(output_description.str()); 00705 } 00706 00707 env_data.m_outputP0 = &sierra::out(); 00708 00709 #ifdef SIERRA_EXPORT_CONTROL_EAR99 00710 // If you are using an EAR99 export controlled version of Sierra, 00711 // any attempt to modify or bypass this section of code is a 00712 // violation of U.S. Export Control Regulations and subject to 00713 // criminal prosecution. 00714 if (parallel_size() > SIERRA_EXPORT_CONTROL_EAR99) { 00715 if (parallel_rank() == 0) { 00716 std::cerr << "ERROR: You are running an EAR99 export controlled version of\n"; 00717 std::cerr << " Sierra. For this export control level, a maximum of\n"; 00718 std::cerr << " "<<SIERRA_EXPORT_CONTROL_EAR99<<" processors is permitted\n"; 00719 } 00720 MPI_Abort(env_data.m_parallelComm, MPI_ERR_OTHER); 00721 } 00722 #endif 00723 00724 try { 00725 // Create pid file if runtest command line option specified 00726 if ( !get_param("runtest").empty() ) { 00727 00728 mpi_filebuf mpi_buf; 00729 00730 mpi_buf.open(env_data.m_parallelComm, 0, std::ios::out, get_param("runtest").c_str()); 00731 00732 if ( ! mpi_buf.is_open() ) 00733 throw RuntimeError() << "failed to open pid file " << get_param("runtest"); 00734 00735 std::ostream s( &mpi_buf ); 00736 s << parallel_rank() << ":" << hostname() << domainname() << ":" << pid() << ":" << pgrp() << std::endl; 00737 } 00738 00739 // Enable the timers 00740 if (!get_param("timer").empty()) { 00741 Diag::TimerParser parser; 00742 00743 Diag::sierraTimerSet().setEnabledTimerMask(parser.parse(get_param("timer").c_str())); 00744 } 00745 00746 // Enable parallel exception handling, waited until now because it needs the Env output streams 00747 register_stl_parallel_exceptions(); 00748 } 00749 catch (const std::exception &x) { 00750 std::cerr << "SIERRA execution failed during diagnostic and timer initialization with the following exception:" << std::endl 00751 << x.what() << std::endl; 00752 abort(); 00753 } 00754 catch (...) { 00755 std::cerr << "SIERRA execution failed during diagnostic and timer initialization with unknown exception:" << std::endl; 00756 abort(); 00757 } 00758 00759 // Setup the hangup, segmentation violation, illegal instruction, bus error and 00760 // terminate signal handlers. 00761 if (get_param("nosignal").empty()) 00762 activate_signals(); 00763 00764 00765 return returnValue; 00766 } 00767 00768 00769 00770 00771 void 00772 Startup::startup( 00773 int * argc, 00774 char *** argv, 00775 const char * product_name, 00776 const char * build_time, 00777 ExecType mpi_key, 00778 const std::vector<int> *peer_sizes) { 00779 m_mpiInitFlag = StartupSierra(argc, argv, product_name, build_time, mpi_key, peer_sizes); 00780 } 00781 00782 00783 Startup::Startup( 00784 int * argc, 00785 char *** argv, 00786 const char * product_name, 00787 const char * build_date_time, 00788 ExecType mpi_key, 00789 const std::vector<int> *peer_sizes) 00790 : m_mpiInitFlag(false) 00791 { 00792 startup(argc, argv, product_name, build_date_time, mpi_key, peer_sizes); 00793 } 00794 00795 00796 void ShutDownSierra(bool mpiInitFlag) { 00797 if (get_param("nosignal").empty()) 00798 deactivate_signals(); 00799 00800 mpih::Delete_Handles(); 00801 00802 EnvData &env_data = EnvData::instance(); 00803 mpih::Keyval_delete(env_data.m_parallelComm); 00804 00805 reset(MPI_COMM_NULL); 00806 00807 if (mpiInitFlag) 00808 MPI_Finalize(); 00809 } 00810 00811 00812 00813 Startup::~Startup() { 00814 ShutDownSierra(m_mpiInitFlag); 00815 } 00816 00817 00818 void parse_options(MPI_Comm comm, 00819 int * argc, 00820 char *** argv) 00821 { 00822 try { 00823 char ** argv2 = new char *[*argc]; 00824 for (int i = 0; i < *argc; ++i) { 00825 if (std::strlen((*argv)[i]) > 2 && (*argv)[i][0] == '-' && (*argv)[i][1] != '-') { 00826 argv2[i] = new char[std::strlen((*argv)[i]) + 2]; 00827 argv2[i][0] = '-'; 00828 std::strcpy(&argv2[i][1], (*argv)[i]); 00829 } 00830 else { 00831 argv2[i] = new char[std::strlen((*argv)[i]) + 1]; 00832 std::strcpy(argv2[i], (*argv)[i]); 00833 } 00834 } 00835 00836 // Broadcast argc and argv to all processors. 00837 stk_classic::BroadcastArg b_arg(comm, *argc, argv2); 00838 00839 for (int i = 0; i < *argc; ++i) 00840 delete[] argv2[i]; 00841 delete[] argv2; 00842 00843 namespace opt = boost::program_options; 00844 opt::variables_map &vm = stk_classic::get_variables_map(); 00845 opt::options_description &od = stk_classic::get_options_description(); 00846 opt::store(opt::parse_command_line(b_arg.m_argc, b_arg.m_argv, od, opt::command_line_style::unix_style), vm); 00847 opt::notify(vm); 00848 00849 for (Diag::WriterRegistry::iterator it = Diag::getWriterRegistry().begin(); it != Diag::getWriterRegistry().end(); ++it) 00850 if (vm.count((*it).first.c_str())) 00851 (*it).second.second->parse(vm[(*it).first.c_str()].as<std::string>().c_str()); 00852 00853 00854 // Must have a working directory 00855 const std::string &working_dir = get_param("directory"); 00856 if ( working_dir.empty() || working_dir == PARAM_ON ) 00857 throw RuntimeError() << "working directory must be specified"; 00858 if (working_dir[working_dir.length() - 1] != '/') 00859 const_cast<std::string &>(working_dir) += '/'; 00860 00861 } 00862 catch (const std::exception &x) { 00863 std::cerr << "SIERRA execution failed during command line processing with the following exception:" << std::endl 00864 << x.what() << std::endl; 00865 MPI_Abort(comm, MPI_ERR_OTHER); 00866 } 00867 catch (...) { 00868 std::cerr << "SIERRA execution failed during command line processing with unknown exception:" << std::endl; 00869 00870 MPI_Abort(comm, MPI_ERR_OTHER); 00871 } 00872 } 00873 00874 void 00875 startup_multi_exec(MPI_Comm world_comm, 00876 ExecType my_executable_type, 00877 const std::vector<int> *peer_sizes) // can be NULL. 00878 { 00879 EnvData &env_data = EnvData::instance(); 00880 00881 // MPI interface construction 00882 int world_size = -1 ; 00883 int world_rank = -1 ; 00884 00885 if ( MPI_Comm_size(world_comm, &world_size) != MPI_SUCCESS) 00886 throw RuntimeError() << "MPI_Comm_size failed"; 00887 00888 if ( MPI_Comm_rank(world_comm, &world_rank) != MPI_SUCCESS || -1 == world_rank ) 00889 throw RuntimeError() << "MPI_Comm_rank failed"; 00890 00891 if (my_executable_type == EXEC_TYPE_FLUID || my_executable_type == EXEC_TYPE_LAG) { 00892 // This is specific for gemini. Gemini performs three broadcasts, one for the 00893 // EXEC_TYPE_FLUID and one for the EXEC_TYPE_LAG. Also note that the ranks of processors must 00894 // be ordered such that all gemini processors come first. Gemini mandates that it;s master is 00895 // processor 0 and use ranks through its size. 00896 int lag_master = 0; 00897 int lag_rank_size = -1; 00898 int fluid_master = 0; 00899 00900 if (world_rank == 0) { 00901 typedef std::map<ExecType, std::vector<int> > ExecTypeRanks; 00902 00903 ExecTypeRanks exec_type_ranks; 00904 00905 exec_type_ranks[my_executable_type].push_back(0); 00906 00907 for (int i = 1; i < world_size; ++i) { 00908 MPI_Status status; 00909 int proc_stat[2]; // rank, ExecType 00910 if (MPI_Recv(proc_stat, 2, MPI_INTEGER, i, MPI_ANY_TAG, world_comm, &status) != MPI_SUCCESS) 00911 throw RuntimeError() << "MPI_Recv failed"; 00912 00913 exec_type_ranks[(ExecType) proc_stat[1]].push_back(proc_stat[0]); 00914 } 00915 00916 std::vector<int> &fluid_ranks = exec_type_ranks[EXEC_TYPE_FLUID]; 00917 if (fluid_ranks.size()) 00918 fluid_master = fluid_ranks.front(); 00919 00920 if (MPI_Bcast(&fluid_master, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS) 00921 throw RuntimeError() << "MPI_Bcast failed"; 00922 00923 std::vector<int> &lag_ranks = exec_type_ranks[EXEC_TYPE_LAG]; 00924 if (lag_ranks.size()) 00925 lag_master = lag_ranks.front(); 00926 00927 if (MPI_Bcast(&lag_master, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS) 00928 throw RuntimeError() << "MPI_Bcast failed"; 00929 00930 lag_rank_size = lag_ranks.size(); 00931 if (MPI_Bcast(&lag_rank_size, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS) 00932 throw RuntimeError() << "MPI_Bcast failed"; 00933 } 00934 else { 00935 int proc_stat[2]; 00936 proc_stat[0] = world_rank; 00937 proc_stat[1] = my_executable_type; 00938 00939 if (MPI_Send(proc_stat, 2, MPI_INTEGER, 0, 0, world_comm) != MPI_SUCCESS) 00940 throw RuntimeError() << "MPI_Send failed"; 00941 00942 if (MPI_Bcast(&fluid_master, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS) 00943 throw RuntimeError() << "MPI_Bcast failed"; 00944 00945 if (MPI_Bcast(&lag_master, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS) 00946 throw RuntimeError() << "MPI_Bcast failed"; 00947 00948 if (MPI_Bcast(&lag_rank_size, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS) 00949 throw RuntimeError() << "MPI_Bcast failed"; 00950 } 00951 00952 MPI_Comm lag_comm = world_comm; 00953 MPI_Comm fluid_comm = MPI_COMM_NULL; 00954 const int fluid_rank_size = world_size - lag_rank_size; 00955 if (fluid_rank_size) { 00956 00957 MPI_Group world_group; 00958 MPI_Group lag_group; 00959 MPI_Group fluid_group; 00960 00961 if (MPI_Comm_group(world_comm, &world_group) != MPI_SUCCESS) 00962 throw RuntimeError() << "MPI_Comm_group failed"; 00963 00964 std::vector<int> lag_ranks; 00965 for (int i = 0; i < lag_rank_size; ++i) 00966 lag_ranks.push_back(lag_master + i); 00967 00968 if (MPI_Group_incl(world_group, lag_ranks.size(), &lag_ranks[0], &lag_group) != MPI_SUCCESS) 00969 throw RuntimeError() << "MPI_Group_incl failed"; 00970 if (MPI_Comm_create(world_comm, lag_group, &lag_comm) != MPI_SUCCESS) 00971 throw RuntimeError() << "MPI_Comm_create failed"; 00972 00973 std::vector<int> fluid_ranks; 00974 for (int i = 0; i < fluid_rank_size; ++i) 00975 fluid_ranks.push_back(fluid_master + i); 00976 00977 if (MPI_Group_incl(world_group, fluid_ranks.size(), &fluid_ranks[0], &fluid_group) != MPI_SUCCESS) 00978 throw RuntimeError() << "MPI_Group_incl failed"; 00979 if (MPI_Comm_create(world_comm, fluid_group, &fluid_comm) != MPI_SUCCESS) 00980 throw RuntimeError() << "MPI_Comm_create failed"; 00981 } 00982 00983 env_data.m_worldComm = world_comm; 00984 env_data.m_execMap[EXEC_TYPE_LAG].m_master = lag_master; 00985 env_data.m_execMap[EXEC_TYPE_LAG].m_groupComm = lag_comm; 00986 env_data.m_execMap[EXEC_TYPE_FLUID].m_master = fluid_master; 00987 env_data.m_execMap[EXEC_TYPE_FLUID].m_groupComm = fluid_comm; 00988 } 00989 else if (my_executable_type == EXEC_TYPE_PEER) { 00990 // This executable will run on 2 or more communicators. 00991 00992 // NOTE: Only 2 communicators is currently supported... 00993 00994 // If peer_sizes is NULL, then split world_comm into two equal 00995 // size communicators (peer(1) is larger if world_comm size is 00996 // odd) 00997 // If peer_sizes is not NULL, then split world_comm into 00998 // peer_sizes.size() sub communicators with peer(i) of size 00999 // peer_sizes(i). 01000 01001 // Sync 'peer_sizes' across all processors if non-null 01002 // For now, we limit the number of peer applications to 2. 01003 01004 if (peer_sizes != NULL && peer_sizes->size() > 2) { 01005 throw RuntimeError() << "The total number of peer application processor sizes specfied is " 01006 << peer_sizes->size() 01007 << ", but the current limit is 2."; 01008 } 01009 01010 // Peer sizes is only set correctly on processor 0 since it was passed in by the 01011 // main routine prior to MPI_Init being called. Broadcast the values to all processors. 01012 int peers[2]; 01013 if (world_rank == 0) { 01014 if (peer_sizes != NULL) { 01015 peers[0] = (*peer_sizes)[0]; 01016 peers[1] = (*peer_sizes)[1]; 01017 } else { 01018 peers[0] = world_size / 2; 01019 peers[1] = world_size - world_size/2; 01020 } 01021 } 01022 if (MPI_Bcast(peers, 2, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS) 01023 throw RuntimeError() << "MPI_Broadcast -- peers failed"; 01024 01025 // Check that the number of processes specified is equal to the 01026 // total number of processes 01027 int peer_proc_count = peers[0] + peers[1]; 01028 if (peer_proc_count != world_size) { 01029 throw RuntimeError() << "The total number of peer processors specfied is " << peer_proc_count 01030 << " which is not equal to the total number of processors (" << world_size << ")."; 01031 } 01032 01033 int my_peer_group = MPI_UNDEFINED; 01034 int sum = 0; 01035 for (size_t i=0; i < 2; i++) { 01036 sum += peers[i]; 01037 if (world_rank < sum) { 01038 my_peer_group = i; 01039 break; 01040 } 01041 } 01042 01043 MPI_Comm peer_comm; 01044 if (MPI_Comm_split(world_comm, my_peer_group, world_rank, &peer_comm) != MPI_SUCCESS) { 01045 throw RuntimeError() << "MPI_Comm_split failed"; 01046 } 01047 env_data.m_worldComm = world_comm; 01048 env_data.m_execMap[EXEC_TYPE_PEER].m_groupComm = peer_comm; 01049 env_data.m_execMap[EXEC_TYPE_PEER].m_master = my_peer_group; // Overloading meaning to peer group. 01050 } 01051 } 01052 01053 bool 01054 is_comm_valid() 01055 { 01056 EnvData &env_data = EnvData::instance(); 01057 if (env_data.m_parallelComm == MPI_COMM_NULL) { 01058 return false; 01059 } else { 01060 return true; 01061 } 01062 } 01063 01064 void 01065 reset( 01066 MPI_Comm new_comm) 01067 { 01068 EnvData &env_data = EnvData::instance(); 01069 01070 // Destroy old comm 01071 if (env_data.m_parallelComm != MPI_COMM_NULL) { 01072 01073 if (new_comm != MPI_COMM_NULL) { 01074 mpih::Sub_Communicator(env_data.m_parallelComm, new_comm); 01075 } 01076 01077 env_data.m_parallelComm = MPI_COMM_NULL ; 01078 env_data.m_parallelSize = -1; 01079 env_data.m_parallelRank = -1 ; 01080 } 01081 01082 setMpiCommunicator(new_comm); 01083 } 01084 01085 void setMpiCommunicator(MPI_Comm communicator) 01086 { 01087 EnvData &env_data = EnvData::instance(); 01088 if(communicator != MPI_COMM_NULL) 01089 { 01090 env_data.m_parallelComm = communicator; 01091 01092 if(MPI_Comm_size(env_data.m_parallelComm, &env_data.m_parallelSize) != MPI_SUCCESS 01093 || MPI_Comm_rank(env_data.m_parallelComm, &env_data.m_parallelRank) != MPI_SUCCESS 01094 || env_data.m_parallelSize == -1 01095 || env_data.m_parallelRank == -1) 01096 { 01097 throw RuntimeError() << "reset given bad MPI communicator"; 01098 } 01099 } 01100 } 01101 01102 void 01103 output_flush() 01104 { 01105 EnvData &env_data = EnvData::instance(); 01106 01107 stk_classic::report_deferred_messages(Env::parallel_comm()); 01108 01109 stk_classic::all_write_string(Env::parallel_comm(), *env_data.m_outputP0, env_data.m_output.str()); 01110 env_data.m_output.str(""); 01111 } 01112 01113 01114 void 01115 request_shutdown(bool shutdown) 01116 { 01117 EnvData::instance().m_shutdownRequested = shutdown; 01118 } 01119 01120 01121 bool 01122 is_shutdown_requested() 01123 { 01124 int shutdown_requested_in = EnvData::instance().m_shutdownRequested || Env::HUP_received(); 01125 int shutdown_requested; 01126 01127 MPI_Allreduce(&shutdown_requested_in, &shutdown_requested, 1, MPI_INT, MPI_SUM, Env::parallel_comm()); 01128 01129 return shutdown_requested != 0; 01130 } 01131 01132 01133 void abort() { 01134 EnvData &env_data = EnvData::instance(); 01135 01136 // Cannot be sure of parallel synchronization status; therefore, no communications can 01137 // occur. Grab and dump all pending output buffers to 'std::cerr'. 01138 std::cerr << std::endl 01139 << "*** SIERRA ABORT on P" << EnvData::instance().m_parallelRank << " ***" 01140 << std::endl 01141 << "*** check " << get_param("output-log") 01142 << " file for more information ***" 01143 << std::endl ; 01144 01145 if (!env_data.m_output.str().empty()) { 01146 std::cerr << "Buffer contents of deferred output stream on processor " << parallel_rank() 01147 << std::endl ; 01148 std::cerr << env_data.m_output.str(); 01149 } 01150 01151 std::cerr.flush(); 01152 std::cout.flush(); 01153 01154 ::sleep(1); // Give the other processors a chance at 01155 // catching up, seems to help hanging problems. 01156 MPI_Abort(env_data.m_parallelComm, MPI_ERR_OTHER); // First try to die 01157 std::exit( EXIT_FAILURE ); // Second try to die 01158 } 01159 01160 01161 const std::string & 01162 get_param( 01163 const char * const option) 01164 { 01165 if (EnvData::instance().m_vm.count(option)) { 01166 if (EnvData::instance().m_vm[option].as<std::string>().empty()) 01167 return EnvData::instance().m_onString; 01168 else 01169 return EnvData::instance().m_vm[option].as<std::string>(); 01170 } 01171 else 01172 return EnvData::instance().m_emptyString; 01173 } 01174 01175 01176 void 01177 set_param( 01178 const char * option, 01179 const std::string & value) { 01180 01181 01182 namespace opt = boost::program_options; 01183 01184 opt::variables_map &vm = stk_classic::get_variables_map(); 01185 opt::options_description &od = stk_classic::get_options_description(); 01186 01187 int argc = 1; 01188 char *s = std::strcpy(new char[std::strlen(option) + 1], option); 01189 01190 opt::store(opt::parse_command_line(argc, &s, od), vm); 01191 opt::notify(vm); 01192 01193 delete [] s; 01194 } 01195 01196 } // namespace Env 01197 } // namespace sierra