00001 /* @HEADER@ */ 00002 // ************************************************************************ 00003 // 00004 // Playa: Programmable Linear Algebra 00005 // Copyright 2012 Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Kevin Long (kevin.long@ttu.edu) 00038 // 00039 00040 /* @HEADER@ */ 00041 00042 #ifndef Playa_ERRORPOLLING_H 00043 #define Playa_ERRORPOLLING_H 00044 00045 #include "Teuchos_ConfigDefs.hpp" 00046 #include "Teuchos_Assert.hpp" 00047 00048 /*! \defgroup ErrorPolling_grp Utility code for synchronizing std::exception detection across processors. 00049 */ 00050 //@{ 00051 00052 namespace Playa 00053 { 00054 class MPIComm; 00055 00056 /** \brief ErrorPolling provides utilities for establishing agreement 00057 * between processors on whether an std::exception has been detected on any one 00058 * processor. 00059 * 00060 * The two functions must be used in a coordinated way. The simplest use 00061 * case is to embed a call to reportFailure() whenever an std::exception is 00062 * detected at the top-level try/catch block, and then to do a call to 00063 * pollForFailures() whenever it is desired to check for off-processor 00064 * errors before proceeding. The macro 00065 00066 \code 00067 TEUCHOS_TEST_FOR_FAILURE(comm); 00068 \endcode 00069 00070 * calls pollForFailures() and throws an std::exception if the return value is 00071 * true. 00072 * 00073 * Polling is a collective operation (an MPI_Reduce) and so incurs some 00074 * performance overhead. It can be disabled with a call to 00075 * \code 00076 * Teuchos::ErrorPolling::disable(); 00077 * \endcode 00078 * IMPORTANT: all processors must agree on whether collective error checking 00079 * is enabled or disabled. If there are inconsistent states, the reduction 00080 * operations in pollForFailures() will hang because some processors cannot be 00081 * contacted. 00082 */ 00083 class ErrorPolling 00084 { 00085 public: 00086 /** Call this function upon catching an std::exception in order to 00087 * inform other processors of the error. This function will do an 00088 * AllReduce in conjunction with calls to either this function or 00089 * its partner, pollForFailures(), on the other processors. This 00090 * procedure has the effect of communicating to the other 00091 * processors that an std::exception has been detected on this one. */ 00092 static void reportFailure(const MPIComm& comm); 00093 00094 /** Call this function after std::exception-free completion of a 00095 * try/catch block. This function will do an AllReduce in 00096 * conjunction with calls to either this function or its partner, 00097 * reportFailure(), on the other processors. If a failure has been 00098 * reported by another processor, the call to pollForFailures() 00099 * will return true and an std::exception can be thrown. */ 00100 static bool pollForFailures(const MPIComm& comm); 00101 00102 /** Activate error polling */ 00103 static void enable() {isActive()=true;} 00104 00105 /** Disable error polling */ 00106 static void disable() {isActive()=false;} 00107 00108 private: 00109 /** Set or check whether error polling is active */ 00110 static bool& isActive() {static bool rtn = true; return rtn;} 00111 }; 00112 00113 /** 00114 * This macro polls all processors in the given communicator to find 00115 * out whether an error has been reported by a call to 00116 * ErrorPolling::reportFailure(comm). 00117 * 00118 * @param comm [in] The communicator on which polling will be done 00119 */ 00120 #define TEUCHOS_POLL_FOR_FAILURES(comm) \ 00121 TEUCHOS_TEST_FOR_EXCEPTION(Playa::ErrorPolling::pollForFailures(comm), \ 00122 std::runtime_error, \ 00123 "off-processor error detected by proc=" << (comm).getRank()); 00124 } 00125 00126 //@} 00127 00128 #endif