PlayaErrorPolling.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 // @HEADER
00003 
00004 #ifndef Playa_ERRORPOLLING_H
00005 #define Playa_ERRORPOLLING_H
00006 
00007 #include "Teuchos_ConfigDefs.hpp"
00008 #include "Teuchos_Assert.hpp"
00009 
00010 /*! \defgroup ErrorPolling_grp Utility code for synchronizing std::exception detection across processors. 
00011 */
00012 //@{
00013 
00014 namespace Playa
00015 {
00016   class MPIComm;
00017 
00018   /** \brief ErrorPolling provides utilities for establishing agreement
00019    * between processors on whether an std::exception has been detected on any one
00020    * processor.
00021    *
00022    * The two functions must be used in a coordinated way. The simplest use
00023    * case is to embed a call to reportFailure() whenever an std::exception is
00024    * detected at the top-level try/catch block, and then to do a call to
00025    * pollForFailures() whenever it is desired to check for off-processor
00026    * errors before proceeding. The macro
00027 
00028     \code
00029     TEUCHOS_TEST_FOR_FAILURE(comm);
00030     \endcode  
00031 
00032    * calls pollForFailures() and throws an std::exception if the return value is
00033    * true.
00034    *
00035    * Polling is a collective operation (an MPI_Reduce) and so incurs some
00036    * performance overhead. It can be disabled with a call to 
00037    * \code
00038    * Teuchos::ErrorPolling::disable();
00039    * \endcode 
00040    * IMPORTANT: all processors must agree on whether collective error checking
00041    * is enabled or disabled. If there are inconsistent states, the reduction
00042    * operations in pollForFailures() will hang because some processors cannot be 
00043    * contacted. 
00044    */
00045   class TEUCHOS_LIB_DLL_EXPORT ErrorPolling
00046   {
00047   public:
00048     /** Call this function upon catching an std::exception in order to
00049      * inform other processors of the error. This function will do an
00050      * AllReduce in conjunction with calls to either this function or
00051      * its partner, pollForFailures(), on the other processors. This
00052      * procedure has the effect of communicating to the other
00053      * processors that an std::exception has been detected on this one. */
00054     static void reportFailure(const MPIComm& comm);
00055     
00056     /** Call this function after std::exception-free completion of a
00057      * try/catch block. This function will do an AllReduce in
00058      * conjunction with calls to either this function or its partner,
00059      * reportFailure(), on the other processors. If a failure has been
00060      * reported by another processor, the call to pollForFailures()
00061      * will return true and an std::exception can be thrown. */
00062     static bool pollForFailures(const MPIComm& comm);
00063     
00064     /** Activate error polling */
00065     static void enable() {isActive()=true;}
00066 
00067     /** Disable error polling */
00068     static void disable() {isActive()=false;}
00069 
00070   private:
00071     /** Set or check whether error polling is active */
00072     static bool& isActive() {static bool rtn = true; return rtn;}
00073   };
00074 
00075   /** 
00076    * This macro polls all processors in the given communicator to find
00077    * out whether an error has been reported by a call to 
00078    * ErrorPolling::reportFailure(comm).
00079    * 
00080    * @param comm [in] The communicator on which polling will be done
00081    */
00082 #define TEUCHOS_POLL_FOR_FAILURES(comm)                                  \
00083   TEUCHOS_TEST_FOR_EXCEPTION(Playa::ErrorPolling::pollForFailures(comm), \
00084                      std::runtime_error,                                     \
00085                      "off-processor error detected by proc=" << (comm).getRank());
00086 }
00087 
00088 //@}
00089 
00090 #endif

Site Contact