|
Teuchos Package Browser (Single Doxygen Collection)
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Teuchos: Common Tools Package 00005 // Copyright (2004) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 // @HEADER 00041 00042 #ifndef TEUCHOS_MPI_COMM_HPP 00043 #define TEUCHOS_MPI_COMM_HPP 00044 00045 00046 #include "Teuchos_Comm.hpp" 00047 #include "Teuchos_CommUtilities.hpp" 00048 #include "Teuchos_OrdinalTraits.hpp" 00049 #include "Teuchos_OpaqueWrapper.hpp" 00050 #include "Teuchos_MpiReductionOpSetter.hpp" 00051 #include "Teuchos_SerializationTraitsHelpers.hpp" 00052 #include "Teuchos_Workspace.hpp" 00053 #include "Teuchos_TypeNameTraits.hpp" 00054 #include "Teuchos_as.hpp" 00055 #include "Teuchos_Assert.hpp" 00056 #include "mpi.h" 00057 #include <iterator> 00058 00059 // This must be defined globally for the whole program! 00060 //#define TEUCHOS_MPI_COMM_DUMP 00061 00062 00063 #ifdef TEUCHOS_MPI_COMM_DUMP 00064 # include "Teuchos_VerboseObject.hpp" 00065 #endif 00066 00067 00068 namespace Teuchos { 00069 00071 std::string 00072 mpiErrorCodeToString (const int err); 00073 00074 namespace details { 00088 void safeCommFree (MPI_Comm* comm); 00089 00094 int setCommErrhandler (MPI_Comm comm, MPI_Errhandler handler); 00095 00096 } // namespace details 00097 00098 #ifdef TEUCHOS_MPI_COMM_DUMP 00099 template<typename Ordinal, typename T> 00100 void dumpBuffer( 00101 const std::string &funcName, const std::string &buffName 00102 ,const Ordinal bytes, const T buff[] 00103 ) 00104 { 00105 Teuchos::RCP<Teuchos::FancyOStream> 00106 out = Teuchos::VerboseObjectBase::getDefaultOStream(); 00107 Teuchos::OSTab tab(out); 00108 *out 00109 << "\n" << funcName << "::" << buffName << ":\n"; 00110 tab.incrTab(); 00111 for( Ordinal i = 0; i < bytes; ++i ) { 00112 *out << buffName << "[" << i << "] = '" << buff[i] << "'\n"; 00113 } 00114 *out << "\n"; 00115 } 00116 #endif // TEUCHOS_MPI_COMM_DUMP 00117 00118 00130 template<class OrdinalType> 00131 class MpiCommStatus : public CommStatus<OrdinalType> { 00132 public: 00133 MpiCommStatus (MPI_Status status) : status_ (status) {} 00134 00136 virtual ~MpiCommStatus() {} 00137 00139 OrdinalType getSourceRank () { return status_.MPI_SOURCE; } 00140 00142 OrdinalType getTag () { return status_.MPI_TAG; } 00143 00145 OrdinalType getError () { return status_.MPI_ERROR; } 00146 00147 private: 00149 MpiCommStatus (); 00150 00152 MPI_Status status_; 00153 }; 00154 00158 template<class OrdinalType> 00159 inline RCP<MpiCommStatus<OrdinalType> > 00160 mpiCommStatus (MPI_Status rawMpiStatus) 00161 { 00162 return rcp (new MpiCommStatus<OrdinalType> (rawMpiStatus)); 00163 } 00164 00165 00181 template<class OrdinalType> 00182 class MpiCommRequestBase : public CommRequest<OrdinalType> { 00183 public: 00185 MpiCommRequestBase () : 00186 rawMpiRequest_ (MPI_REQUEST_NULL) 00187 {} 00188 00190 MpiCommRequestBase (MPI_Request rawMpiRequest) : 00191 rawMpiRequest_ (rawMpiRequest) 00192 {} 00193 00201 MPI_Request releaseRawMpiRequest() 00202 { 00203 MPI_Request tmp_rawMpiRequest = rawMpiRequest_; 00204 rawMpiRequest_ = MPI_REQUEST_NULL; 00205 return tmp_rawMpiRequest; 00206 } 00207 00209 bool isNull() const { 00210 return rawMpiRequest_ == MPI_REQUEST_NULL; 00211 } 00212 00218 RCP<CommStatus<OrdinalType> > wait () { 00219 MPI_Status rawMpiStatus; 00220 // Whether this function satisfies the strong exception guarantee 00221 // depends on whether MPI_Wait modifies its input request on error. 00222 const int err = MPI_Wait (&rawMpiRequest_, &rawMpiStatus); 00223 TEUCHOS_TEST_FOR_EXCEPTION( 00224 err != MPI_SUCCESS, std::runtime_error, 00225 "Teuchos: MPI_Wait() failed with error \"" 00226 << mpiErrorCodeToString (err)); 00227 // MPI_Wait sets the MPI_Request to MPI_REQUEST_NULL on success. 00228 return mpiCommStatus<OrdinalType> (rawMpiStatus); 00229 } 00230 00235 RCP<CommStatus<OrdinalType> > cancel () { 00236 if (rawMpiRequest_ == MPI_REQUEST_NULL) { 00237 return null; 00238 } 00239 else { 00240 int err = MPI_Cancel (&rawMpiRequest_); 00241 TEUCHOS_TEST_FOR_EXCEPTION( 00242 err != MPI_SUCCESS, std::runtime_error, 00243 "Teuchos: MPI_Cancel failed with the following error: " 00244 << mpiErrorCodeToString (err)); 00245 00246 // Wait on the request. If successful, MPI_Wait will set the 00247 // MPI_Request to MPI_REQUEST_NULL. The returned status may 00248 // still be useful; for example, one may call MPI_Test_cancelled 00249 // to test an MPI_Status from a nonblocking send. 00250 MPI_Status status; 00251 err = MPI_Wait (&rawMpiRequest_, &status); 00252 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 00253 "Teuchos::MpiCommStatus::cancel: MPI_Wait failed with the following " 00254 "error: " << mpiErrorCodeToString (err)); 00255 return mpiCommStatus<OrdinalType> (status); 00256 } 00257 } 00258 00260 virtual ~MpiCommRequestBase () { 00261 if (rawMpiRequest_ != MPI_REQUEST_NULL) { 00262 // We're in a destructor, so don't throw errors. However, if 00263 // MPI_Cancel fails, it's probably a bad idea to call MPI_Wait. 00264 const int err = MPI_Cancel (&rawMpiRequest_); 00265 if (err == MPI_SUCCESS) { 00266 // The MPI_Cancel succeeded. Now wait on the request. Ignore 00267 // any reported error, since we can't do anything about those 00268 // in the destructor (other than kill the program). If 00269 // successful, MPI_Wait will set the MPI_Request to 00270 // MPI_REQUEST_NULL. We ignore the returned MPI_Status, since 00271 // if the user let the request fall out of scope, she must not 00272 // care about the status. 00273 // 00274 // mfh 21 Oct 2012: The MPI standard requires completing a 00275 // canceled request by calling a function like MPI_Wait, 00276 // MPI_Test, or MPI_Request_free. MPI_Wait on a canceled 00277 // request behaves like a local operation (it does not 00278 // communicate or block waiting for communication). One could 00279 // also call MPI_Request_free instead of MPI_Wait, but 00280 // MPI_Request_free is intended more for persistent requests 00281 // (created with functions like MPI_Recv_init). 00282 (void) MPI_Wait (&rawMpiRequest_, MPI_STATUS_IGNORE); 00283 } 00284 } 00285 } 00286 00287 private: 00289 MPI_Request rawMpiRequest_; 00290 }; 00291 00292 00308 template<class OrdinalType> 00309 class MpiCommRequest : public MpiCommRequestBase<OrdinalType> { 00310 public: 00312 MpiCommRequest () : 00313 MpiCommRequestBase<OrdinalType> (MPI_REQUEST_NULL), 00314 numBytes_ (0) 00315 {} 00316 00318 MpiCommRequest (MPI_Request rawMpiRequest, 00319 const ArrayView<char>::size_type numBytesInMessage) : 00320 MpiCommRequestBase<OrdinalType> (rawMpiRequest), 00321 numBytes_ (numBytesInMessage) 00322 {} 00323 00329 ArrayView<char>::size_type numBytes () const { 00330 return numBytes_; 00331 } 00332 00334 virtual ~MpiCommRequest () {} 00335 00336 private: 00338 ArrayView<char>::size_type numBytes_; 00339 }; 00340 00349 template<class OrdinalType> 00350 inline RCP<MpiCommRequest<OrdinalType> > 00351 mpiCommRequest (MPI_Request rawMpiRequest, 00352 const ArrayView<char>::size_type numBytes) 00353 { 00354 return rcp (new MpiCommRequest<OrdinalType> (rawMpiRequest, numBytes)); 00355 } 00356 00372 template<typename Ordinal> 00373 class MpiComm : public Comm<Ordinal> { 00374 public: 00375 00377 00378 00399 explicit MpiComm (MPI_Comm rawMpiComm); 00400 00415 MpiComm( 00416 const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm 00417 ); 00418 00436 MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm, 00437 const int defaultTag); 00438 00439 public: 00440 00457 MpiComm(const MpiComm<Ordinal>& other); 00458 00460 RCP<const OpaqueWrapper<MPI_Comm> > getRawMpiComm() const 00461 {return rawMpiComm_;} 00462 00527 void setErrorHandler (const RCP<const OpaqueWrapper<MPI_Errhandler> >& errHandler); 00528 00530 00531 00532 00534 virtual int getRank() const; 00536 virtual int getSize() const; 00538 virtual void barrier() const; 00540 virtual void broadcast( 00541 const int rootRank, const Ordinal bytes, char buffer[] 00542 ) const; 00544 virtual void 00545 gather (const Ordinal sendBytes, const char sendBuffer[], 00546 const Ordinal recvBytes, char recvBuffer[], 00547 const int root) const; 00549 virtual void gatherAll( 00550 const Ordinal sendBytes, const char sendBuffer[] 00551 ,const Ordinal recvBytes, char recvBuffer[] 00552 ) const; 00554 virtual void reduceAll( 00555 const ValueTypeReductionOp<Ordinal,char> &reductOp 00556 ,const Ordinal bytes, const char sendBuffer[], char globalReducts[] 00557 ) const; 00559 virtual void reduceAllAndScatter( 00560 const ValueTypeReductionOp<Ordinal,char> &reductOp 00561 ,const Ordinal sendBytes, const char sendBuffer[] 00562 ,const Ordinal recvCounts[], char myGlobalReducts[] 00563 ) const; 00565 virtual void scan( 00566 const ValueTypeReductionOp<Ordinal,char> &reductOp 00567 ,const Ordinal bytes, const char sendBuffer[], char scanReducts[] 00568 ) const; 00570 virtual void send( 00571 const Ordinal bytes, const char sendBuffer[], const int destRank 00572 ) const; 00574 virtual void 00575 send (const Ordinal bytes, 00576 const char sendBuffer[], 00577 const int destRank, 00578 const int tag) const; 00580 virtual void ssend( 00581 const Ordinal bytes, const char sendBuffer[], const int destRank 00582 ) const; 00584 virtual void 00585 ssend (const Ordinal bytes, 00586 const char sendBuffer[], 00587 const int destRank, 00588 const int tag) const; 00590 virtual int receive( 00591 const int sourceRank, const Ordinal bytes, char recvBuffer[] 00592 ) const; 00594 virtual void readySend( 00595 const ArrayView<const char> &sendBuffer, 00596 const int destRank 00597 ) const; 00599 virtual void 00600 readySend (const Ordinal bytes, 00601 const char sendBuffer[], 00602 const int destRank, 00603 const int tag) const; 00605 virtual RCP<CommRequest<Ordinal> > isend( 00606 const ArrayView<const char> &sendBuffer, 00607 const int destRank 00608 ) const; 00610 virtual RCP<CommRequest<Ordinal> > 00611 isend (const ArrayView<const char> &sendBuffer, 00612 const int destRank, 00613 const int tag) const; 00615 virtual RCP<CommRequest<Ordinal> > ireceive( 00616 const ArrayView<char> &Buffer, 00617 const int sourceRank 00618 ) const; 00620 virtual RCP<CommRequest<Ordinal> > 00621 ireceive (const ArrayView<char> &Buffer, 00622 const int sourceRank, 00623 const int tag) const; 00625 virtual void waitAll( 00626 const ArrayView<RCP<CommRequest<Ordinal> > > &requests 00627 ) const; 00629 virtual void 00630 waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests, 00631 const ArrayView<RCP<CommStatus<Ordinal> > >& statuses) const; 00633 virtual RCP<CommStatus<Ordinal> > 00634 wait (const Ptr<RCP<CommRequest<Ordinal> > >& request) const; 00636 virtual RCP< Comm<Ordinal> > duplicate() const; 00638 virtual RCP< Comm<Ordinal> > split(const int color, const int key) const; 00640 virtual RCP< Comm<Ordinal> > createSubcommunicator( 00641 const ArrayView<const int>& ranks) const; 00642 00644 00645 00646 00648 std::string description() const; 00649 00651 00652 // These should be private but the PGI compiler requires them be public 00653 00654 static int const minTag_ = 26000; // These came from Teuchos::MpiComm??? 00655 static int const maxTag_ = 26099; // "" 00656 00662 int getTag () const { return tag_; } 00663 00664 private: 00665 00669 void setupMembersFromComm(); 00670 static int tagCounter_; 00671 00679 RCP<const OpaqueWrapper<MPI_Comm> > rawMpiComm_; 00680 00682 int rank_; 00683 00685 int size_; 00686 00694 int tag_; 00695 00697 RCP<const OpaqueWrapper<MPI_Errhandler> > customErrorHandler_; 00698 00699 void assertRank(const int rank, const std::string &rankName) const; 00700 00701 // Not defined and not to be called! 00702 MpiComm(); 00703 00704 #ifdef TEUCHOS_MPI_COMM_DUMP 00705 public: 00706 static bool show_dump; 00707 #endif // TEUCHOS_MPI_COMM_DUMP 00708 00709 }; 00710 00711 00725 template<typename Ordinal> 00726 RCP<MpiComm<Ordinal> > 00727 createMpiComm( 00728 const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm 00729 ); 00730 00731 00732 // //////////////////////// 00733 // Implementations 00734 00735 00736 // Static members 00737 00738 00739 template<typename Ordinal> 00740 int MpiComm<Ordinal>::tagCounter_ = MpiComm<Ordinal>::minTag_; 00741 00742 00743 // Constructors 00744 00745 00746 template<typename Ordinal> 00747 MpiComm<Ordinal>:: 00748 MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm) 00749 { 00750 TEUCHOS_TEST_FOR_EXCEPTION( 00751 rawMpiComm.get () == NULL, std::invalid_argument, 00752 "Teuchos::MpiComm constructor: The input RCP is null."); 00753 TEUCHOS_TEST_FOR_EXCEPTION( 00754 *rawMpiComm == MPI_COMM_NULL, std::invalid_argument, 00755 "Teuchos::MpiComm constructor: The given MPI_Comm is MPI_COMM_NULL."); 00756 00757 rawMpiComm_ = rawMpiComm; 00758 00759 // mfh 09 Jul 2013: Please resist the temptation to modify the given 00760 // MPI communicator's error handler here. See Bug 5943. Note that 00761 // an MPI communicator's default error handler is 00762 // MPI_ERRORS_ARE_FATAL, which immediately aborts on error (without 00763 // returning an error code from the MPI function). Users who want 00764 // MPI functions instead to return an error code if they encounter 00765 // an error, should set the error handler to MPI_ERRORS_RETURN. DO 00766 // NOT SET THE ERROR HANDLER HERE!!! Teuchos' MPI wrappers should 00767 // always check the error code returned by an MPI function, 00768 // regardless of the error handler. Users who want to set the error 00769 // handler on an MpiComm may call its setErrorHandler method. 00770 00771 setupMembersFromComm (); 00772 } 00773 00774 00775 template<typename Ordinal> 00776 MpiComm<Ordinal>:: 00777 MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm, 00778 const int defaultTag) 00779 { 00780 TEUCHOS_TEST_FOR_EXCEPTION( 00781 rawMpiComm.get () == NULL, std::invalid_argument, 00782 "Teuchos::MpiComm constructor: The input RCP is null."); 00783 TEUCHOS_TEST_FOR_EXCEPTION( 00784 *rawMpiComm == MPI_COMM_NULL, std::invalid_argument, 00785 "Teuchos::MpiComm constructor: The given MPI_Comm is MPI_COMM_NULL."); 00786 00787 rawMpiComm_ = rawMpiComm; 00788 // Set size_ (the number of processes in the communicator). 00789 int err = MPI_Comm_size (*rawMpiComm_, &size_); 00790 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 00791 "Teuchos::MpiComm constructor: MPI_Comm_size failed with " 00792 "error \"" << mpiErrorCodeToString (err) << "\"."); 00793 // Set rank_ (the calling process' rank). 00794 err = MPI_Comm_rank (*rawMpiComm_, &rank_); 00795 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 00796 "Teuchos::MpiComm constructor: MPI_Comm_rank failed with " 00797 "error \"" << mpiErrorCodeToString (err) << "\"."); 00798 tag_ = defaultTag; // set the default message tag 00799 } 00800 00801 00802 template<typename Ordinal> 00803 MpiComm<Ordinal>::MpiComm (MPI_Comm rawMpiComm) 00804 { 00805 TEUCHOS_TEST_FOR_EXCEPTION(rawMpiComm == MPI_COMM_NULL, 00806 std::invalid_argument, "Teuchos::MpiComm constructor: The given MPI_Comm " 00807 "is MPI_COMM_NULL."); 00808 // We don't supply a "free" function here, since this version of the 00809 // constructor makes the caller responsible for freeing rawMpiComm 00810 // after use if necessary. 00811 rawMpiComm_ = opaqueWrapper<MPI_Comm> (rawMpiComm); 00812 00813 // mfh 09 Jul 2013: Please resist the temptation to modify the given 00814 // MPI communicator's error handler here. See Bug 5943. Note that 00815 // an MPI communicator's default error handler is 00816 // MPI_ERRORS_ARE_FATAL, which immediately aborts on error (without 00817 // returning an error code from the MPI function). Users who want 00818 // MPI functions instead to return an error code if they encounter 00819 // an error, should set the error handler to MPI_ERRORS_RETURN. DO 00820 // NOT SET THE ERROR HANDLER HERE!!! Teuchos' MPI wrappers should 00821 // always check the error code returned by an MPI function, 00822 // regardless of the error handler. Users who want to set the error 00823 // handler on an MpiComm may call its setErrorHandler method. 00824 00825 setupMembersFromComm (); 00826 } 00827 00828 00829 template<typename Ordinal> 00830 MpiComm<Ordinal>::MpiComm (const MpiComm<Ordinal>& other) : 00831 rawMpiComm_ (opaqueWrapper<MPI_Comm> (MPI_COMM_NULL)) // <- This will be set below 00832 { 00833 // These are logic errors, since they violate MpiComm's invariants. 00834 RCP<const OpaqueWrapper<MPI_Comm> > origCommPtr = other.getRawMpiComm (); 00835 TEUCHOS_TEST_FOR_EXCEPTION(origCommPtr == null, std::logic_error, 00836 "Teuchos::MpiComm copy constructor: " 00837 "The input's getRawMpiComm() method returns null."); 00838 MPI_Comm origComm = *origCommPtr; 00839 TEUCHOS_TEST_FOR_EXCEPTION(origComm == MPI_COMM_NULL, std::logic_error, 00840 "Teuchos::MpiComm copy constructor: " 00841 "The input's raw MPI_Comm is MPI_COMM_NULL."); 00842 00843 // mfh 19 Oct 2012: Don't change the behavior of MpiComm's copy 00844 // constructor for now. Later, we'll switch to the version that 00845 // calls MPI_Comm_dup. For now, we just copy other's handle over. 00846 // Note that the new MpiComm's tag is still different than the input 00847 // MpiComm's tag. See Bug 5740. 00848 if (true) { 00849 rawMpiComm_ = origCommPtr; 00850 } 00851 else { // false (not run) 00852 MPI_Comm newComm; 00853 const int err = MPI_Comm_dup (origComm, &newComm); 00854 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 00855 "Teuchos::MpiComm copy constructor: MPI_Comm_dup failed with " 00856 "the following error: " << mpiErrorCodeToString (err)); 00857 // No side effects until after everything has succeeded. 00858 rawMpiComm_ = opaqueWrapper (newComm, details::safeCommFree); 00859 } 00860 00861 setupMembersFromComm (); 00862 } 00863 00864 00865 template<typename Ordinal> 00866 void MpiComm<Ordinal>::setupMembersFromComm () 00867 { 00868 int err = MPI_Comm_size (*rawMpiComm_, &size_); 00869 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 00870 "Teuchos::MpiComm constructor: MPI_Comm_size failed with " 00871 "error \"" << mpiErrorCodeToString (err) << "\"."); 00872 err = MPI_Comm_rank (*rawMpiComm_, &rank_); 00873 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 00874 "Teuchos::MpiComm constructor: MPI_Comm_rank failed with " 00875 "error \"" << mpiErrorCodeToString (err) << "\"."); 00876 00877 // Set the default tag to make unique across all communicators 00878 if (tagCounter_ > maxTag_) { 00879 tagCounter_ = minTag_; 00880 } 00881 tag_ = tagCounter_++; 00882 // Ensure that the same tag is used on all processes. 00883 // 00884 // FIXME (mfh 09 Jul 2013) This would not be necessary if MpiComm 00885 // were just to call MPI_Comm_dup (as every library should) when 00886 // given its communicator. Of course, MPI_Comm_dup may also be 00887 // implemented as a collective, and may even be more expensive than 00888 // a broadcast. If we do decide to use MPI_Comm_dup, we can get rid 00889 // of the broadcast below, and also get rid of tag_, tagCounter_, 00890 // minTag_, and maxTag_. 00891 MPI_Bcast (&tag_, 1, MPI_INT, 0, *rawMpiComm_); 00892 } 00893 00894 00895 template<typename Ordinal> 00896 void 00897 MpiComm<Ordinal>:: 00898 setErrorHandler (const RCP<const OpaqueWrapper<MPI_Errhandler> >& errHandler) 00899 { 00900 if (! is_null (errHandler)) { 00901 const int err = details::setCommErrhandler (*getRawMpiComm (), *errHandler); 00902 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 00903 "Teuchos::MpiComm: Setting the MPI_Comm's error handler failed with " 00904 "error \"" << mpiErrorCodeToString (err) << "\"."); 00905 } 00906 // Wait to set this until the end, in case setting the error handler 00907 // doesn't succeed. 00908 customErrorHandler_ = errHandler; 00909 } 00910 00911 // 00912 // Overridden from Comm 00913 // 00914 00915 template<typename Ordinal> 00916 int MpiComm<Ordinal>::getRank() const 00917 { 00918 return rank_; 00919 } 00920 00921 00922 template<typename Ordinal> 00923 int MpiComm<Ordinal>::getSize() const 00924 { 00925 return size_; 00926 } 00927 00928 00929 template<typename Ordinal> 00930 void MpiComm<Ordinal>::barrier() const 00931 { 00932 TEUCHOS_COMM_TIME_MONITOR( 00933 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::barrier()" 00934 ); 00935 const int err = MPI_Barrier (*rawMpiComm_); 00936 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 00937 "Teuchos::MpiComm::barrier: MPI_Barrier failed with error \"" 00938 << mpiErrorCodeToString (err) << "\"."); 00939 } 00940 00941 00942 template<typename Ordinal> 00943 void MpiComm<Ordinal>::broadcast( 00944 const int rootRank, const Ordinal bytes, char buffer[] 00945 ) const 00946 { 00947 TEUCHOS_COMM_TIME_MONITOR( 00948 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::broadcast(...)" 00949 ); 00950 const int err = MPI_Bcast (buffer, bytes, MPI_CHAR, rootRank, *rawMpiComm_); 00951 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 00952 "Teuchos::MpiComm::broadcast: MPI_Bcast failed with error \"" 00953 << mpiErrorCodeToString (err) << "\"."); 00954 } 00955 00956 00957 template<typename Ordinal> 00958 void MpiComm<Ordinal>::gatherAll( 00959 const Ordinal sendBytes, const char sendBuffer[], 00960 const Ordinal recvBytes, char recvBuffer[] 00961 ) const 00962 { 00963 TEUCHOS_COMM_TIME_MONITOR( 00964 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::gatherAll(...)" 00965 ); 00966 TEUCHOS_ASSERT_EQUALITY((sendBytes*size_), recvBytes ); 00967 const int err = 00968 MPI_Allgather (const_cast<char *>(sendBuffer), sendBytes, MPI_CHAR, 00969 recvBuffer, sendBytes, MPI_CHAR, *rawMpiComm_); 00970 // NOTE: 'sendBytes' is being sent above for the MPI arg recvcount (which is 00971 // very confusing in the MPI documentation) for MPI_Allgether(...). 00972 00973 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 00974 "Teuchos::MpiComm::gatherAll: MPI_Allgather failed with error \"" 00975 << mpiErrorCodeToString (err) << "\"."); 00976 } 00977 00978 00979 template<typename Ordinal> 00980 void 00981 MpiComm<Ordinal>::gather (const Ordinal sendBytes, 00982 const char sendBuffer[], 00983 const Ordinal recvBytes, 00984 char recvBuffer[], 00985 const int root) const 00986 { 00987 (void) recvBytes; // silence compile warning for "unused parameter" 00988 00989 TEUCHOS_COMM_TIME_MONITOR( 00990 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::gather(...)" 00991 ); 00992 const int err = 00993 MPI_Gather (const_cast<char *> (sendBuffer), sendBytes, MPI_CHAR, 00994 recvBuffer, sendBytes, MPI_CHAR, root, *rawMpiComm_); 00995 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 00996 "Teuchos::MpiComm::gather: MPI_Gather failed with error \"" 00997 << mpiErrorCodeToString (err) << "\"."); 00998 } 00999 01000 01001 template<typename Ordinal> 01002 void 01003 MpiComm<Ordinal>:: 01004 reduceAll (const ValueTypeReductionOp<Ordinal,char> &reductOp, 01005 const Ordinal bytes, 01006 const char sendBuffer[], 01007 char globalReducts[]) const 01008 { 01009 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::reduceAll(...)" ); 01010 01011 MpiReductionOpSetter op(mpiReductionOp(rcp(&reductOp,false))); 01012 MPI_Datatype char_block; 01013 01014 // TODO (mfh 26 Mar 2012) Check returned error codes of the MPI 01015 // custom datatype functions. 01016 MPI_Type_contiguous(bytes, MPI_CHAR, &char_block); 01017 MPI_Type_commit(&char_block); 01018 01019 const int err = 01020 MPI_Allreduce (const_cast<char*>(sendBuffer), globalReducts, 1, char_block, 01021 op.mpi_op(), *rawMpiComm_); 01022 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01023 "Teuchos::MpiComm::reduceAll (custom op): MPI_Allreduce failed with error \"" 01024 << mpiErrorCodeToString (err) << "\"."); 01025 01026 // TODO (mfh 26 Mar 2012) Check returned error codes of the MPI 01027 // custom datatype functions. 01028 MPI_Type_free(&char_block); 01029 } 01030 01031 01032 template<typename Ordinal> 01033 void MpiComm<Ordinal>::reduceAllAndScatter( 01034 const ValueTypeReductionOp<Ordinal,char> &reductOp 01035 ,const Ordinal sendBytes, const char sendBuffer[] 01036 ,const Ordinal recvCounts[], char myGlobalReducts[] 01037 ) const 01038 { 01039 01040 (void)sendBytes; // Ignore if not in debug mode 01041 01042 TEUCHOS_COMM_TIME_MONITOR( 01043 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::reduceAllAndScatter(...)" 01044 ); 01045 01046 #ifdef TEUCHOS_DEBUG 01047 Ordinal sumRecvBytes = 0; 01048 for( Ordinal i = 0; i < size_; ++i ) { 01049 sumRecvBytes += recvCounts[i]; 01050 } 01051 TEUCHOS_TEST_FOR_EXCEPT(!(sumRecvBytes==sendBytes)); 01052 #endif // TEUCHOS_DEBUG 01053 01054 #ifdef TEUCHOS_MPI_COMM_DUMP 01055 if(show_dump) { 01056 dumpBuffer<Ordinal,char>( 01057 "Teuchos::MpiComm<Ordinal>::reduceAllAndScatter(...)", 01058 "sendBuffer", sendBytes, sendBuffer ); 01059 dumpBuffer<Ordinal,Ordinal>( 01060 "Teuchos::MpiComm<Ordinal>::reduceAllAndScatter(...)", 01061 "recvCounts", as<Ordinal>(size_), recvCounts ); 01062 dumpBuffer<Ordinal,char>( 01063 "Teuchos::MpiComm<Ordinal>::reduceAllAndScatter(...)", 01064 "myGlobalReducts", as<char>(recvCounts[rank_]), myGlobalReducts ); 01065 } 01066 #endif // TEUCHOS_MPI_COMM_DUMP 01067 01068 // Create a new recvCount[] if Ordinal!=int 01069 WorkspaceStore* wss = get_default_workspace_store().get(); 01070 const bool Ordinal_is_int = typeid(int)==typeid(Ordinal); 01071 Workspace<int> ws_int_recvCounts(wss,Ordinal_is_int?0:size_); 01072 const int *int_recvCounts = 0; 01073 if(Ordinal_is_int) { 01074 int_recvCounts = reinterpret_cast<const int*>(recvCounts); 01075 // Note: We must do an reinterpet cast since this must 01076 // compile even if it is not executed. I could implement 01077 // code that would not need to do this using template 01078 // conditionals but I don't want to bother. 01079 } 01080 else { 01081 std::copy(recvCounts, recvCounts+size_, &ws_int_recvCounts[0]); 01082 int_recvCounts = &ws_int_recvCounts[0]; 01083 } 01084 01085 // Perform the operation 01086 MpiReductionOpSetter op(mpiReductionOp(rcp(&reductOp, false))); 01087 01088 const int err = MPI_Reduce_scatter( 01089 const_cast<char*>(sendBuffer), myGlobalReducts, 01090 const_cast<int*>(int_recvCounts), 01091 MPI_CHAR, 01092 op.mpi_op(), 01093 *rawMpiComm_ 01094 ); 01095 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01096 "Teuchos::MpiComm::reduceAllAndScatter: MPI_Reduce_scatter failed with " 01097 "error \"" << mpiErrorCodeToString (err) << "\"."); 01098 } 01099 01100 01101 template<typename Ordinal> 01102 void MpiComm<Ordinal>::scan( 01103 const ValueTypeReductionOp<Ordinal,char> &reductOp 01104 ,const Ordinal bytes, const char sendBuffer[], char scanReducts[] 01105 ) const 01106 { 01107 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::scan(...)" ); 01108 01109 MpiReductionOpSetter op(mpiReductionOp(rcp(&reductOp,false))); 01110 const int err = 01111 MPI_Scan (const_cast<char*>(sendBuffer), scanReducts, bytes, MPI_CHAR, 01112 op.mpi_op(), *rawMpiComm_); 01113 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01114 "Teuchos::MpiComm::scan: MPI_Scan() failed with error \"" 01115 << mpiErrorCodeToString (err) << "\"."); 01116 } 01117 01118 01119 template<typename Ordinal> 01120 void 01121 MpiComm<Ordinal>::send (const Ordinal bytes, 01122 const char sendBuffer[], 01123 const int destRank) const 01124 { 01125 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::send(...)" ); 01126 01127 #ifdef TEUCHOS_MPI_COMM_DUMP 01128 if(show_dump) { 01129 dumpBuffer<Ordinal,char>( 01130 "Teuchos::MpiComm<Ordinal>::send(...)" 01131 ,"sendBuffer", bytes, sendBuffer 01132 ); 01133 } 01134 #endif // TEUCHOS_MPI_COMM_DUMP 01135 01136 const int err = MPI_Send (const_cast<char*>(sendBuffer), bytes, MPI_CHAR, 01137 destRank, tag_, *rawMpiComm_); 01138 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01139 "Teuchos::MpiComm::send: MPI_Send() failed with error \"" 01140 << mpiErrorCodeToString (err) << "\"."); 01141 } 01142 01143 01144 template<typename Ordinal> 01145 void 01146 MpiComm<Ordinal>::send (const Ordinal bytes, 01147 const char sendBuffer[], 01148 const int destRank, 01149 const int tag) const 01150 { 01151 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::send(...)" ); 01152 const int err = MPI_Send (const_cast<char*> (sendBuffer), bytes, MPI_CHAR, 01153 destRank, tag, *rawMpiComm_); 01154 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01155 "Teuchos::MpiComm::send: MPI_Send() failed with error \"" 01156 << mpiErrorCodeToString (err) << "\"."); 01157 } 01158 01159 01160 template<typename Ordinal> 01161 void 01162 MpiComm<Ordinal>::ssend (const Ordinal bytes, 01163 const char sendBuffer[], 01164 const int destRank) const 01165 { 01166 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ssend(...)" ); 01167 01168 #ifdef TEUCHOS_MPI_COMM_DUMP 01169 if(show_dump) { 01170 dumpBuffer<Ordinal,char>( 01171 "Teuchos::MpiComm<Ordinal>::send(...)" 01172 ,"sendBuffer", bytes, sendBuffer 01173 ); 01174 } 01175 #endif // TEUCHOS_MPI_COMM_DUMP 01176 01177 const int err = MPI_Ssend (const_cast<char*>(sendBuffer), bytes, MPI_CHAR, 01178 destRank, tag_, *rawMpiComm_); 01179 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01180 "Teuchos::MpiComm::send: MPI_Ssend() failed with error \"" 01181 << mpiErrorCodeToString (err) << "\"."); 01182 } 01183 01184 template<typename Ordinal> 01185 void 01186 MpiComm<Ordinal>::ssend (const Ordinal bytes, 01187 const char sendBuffer[], 01188 const int destRank, 01189 const int tag) const 01190 { 01191 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ssend(...)" ); 01192 const int err = 01193 MPI_Ssend (const_cast<char*>(sendBuffer), bytes, MPI_CHAR, 01194 destRank, tag, *rawMpiComm_); 01195 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01196 "Teuchos::MpiComm::send: MPI_Ssend() failed with error \"" 01197 << mpiErrorCodeToString (err) << "\"."); 01198 } 01199 01200 template<typename Ordinal> 01201 void MpiComm<Ordinal>::readySend( 01202 const ArrayView<const char> &sendBuffer, 01203 const int destRank 01204 ) const 01205 { 01206 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::readySend" ); 01207 01208 #ifdef TEUCHOS_MPI_COMM_DUMP 01209 if(show_dump) { 01210 dumpBuffer<Ordinal,char>( 01211 "Teuchos::MpiComm<Ordinal>::readySend(...)" 01212 ,"sendBuffer", bytes, sendBuffer 01213 ); 01214 } 01215 #endif // TEUCHOS_MPI_COMM_DUMP 01216 01217 const int err = 01218 MPI_Rsend (const_cast<char*>(sendBuffer.getRawPtr()), sendBuffer.size(), 01219 MPI_CHAR, destRank, tag_, *rawMpiComm_); 01220 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01221 "Teuchos::MpiComm::readySend: MPI_Rsend() failed with error \"" 01222 << mpiErrorCodeToString (err) << "\"."); 01223 } 01224 01225 01226 template<typename Ordinal> 01227 void MpiComm<Ordinal>:: 01228 readySend (const Ordinal bytes, 01229 const char sendBuffer[], 01230 const int destRank, 01231 const int tag) const 01232 { 01233 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::readySend" ); 01234 const int err = 01235 MPI_Rsend (const_cast<char*> (sendBuffer), bytes, 01236 MPI_CHAR, destRank, tag, *rawMpiComm_); 01237 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01238 "Teuchos::MpiComm::readySend: MPI_Rsend() failed with error \"" 01239 << mpiErrorCodeToString (err) << "\"."); 01240 } 01241 01242 01243 template<typename Ordinal> 01244 int 01245 MpiComm<Ordinal>::receive (const int sourceRank, 01246 const Ordinal bytes, 01247 char recvBuffer[]) const 01248 { 01249 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::receive(...)" ); 01250 01251 // A negative source rank indicates MPI_ANY_SOURCE, namely that we 01252 // will take an incoming message from any process, as long as the 01253 // tag matches. 01254 const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank; 01255 01256 MPI_Status status; 01257 const int err = MPI_Recv (recvBuffer, bytes, MPI_CHAR, theSrcRank, tag_, 01258 *rawMpiComm_, &status); 01259 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01260 "Teuchos::MpiComm::receive: MPI_Recv() failed with error \"" 01261 << mpiErrorCodeToString (err) << "\"."); 01262 01263 #ifdef TEUCHOS_MPI_COMM_DUMP 01264 if (show_dump) { 01265 dumpBuffer<Ordinal,char> ("Teuchos::MpiComm<Ordinal>::receive(...)", 01266 "recvBuffer", bytes, recvBuffer); 01267 } 01268 #endif // TEUCHOS_MPI_COMM_DUMP 01269 01270 // Returning the source rank is useful in the MPI_ANY_SOURCE case. 01271 return status.MPI_SOURCE; 01272 } 01273 01274 01275 template<typename Ordinal> 01276 RCP<CommRequest<Ordinal> > 01277 MpiComm<Ordinal>::isend (const ArrayView<const char> &sendBuffer, 01278 const int destRank) const 01279 { 01280 using Teuchos::as; 01281 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::isend(...)" ); 01282 01283 MPI_Request rawMpiRequest = MPI_REQUEST_NULL; 01284 const int err = 01285 MPI_Isend (const_cast<char*> (sendBuffer.getRawPtr ()), 01286 as<Ordinal> (sendBuffer.size ()), MPI_CHAR, 01287 destRank, tag_, *rawMpiComm_, &rawMpiRequest); 01288 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01289 "Teuchos::MpiComm::isend: MPI_Isend() failed with error \"" 01290 << mpiErrorCodeToString (err) << "\"."); 01291 01292 return mpiCommRequest<Ordinal> (rawMpiRequest, sendBuffer.size ()); 01293 } 01294 01295 01296 template<typename Ordinal> 01297 RCP<CommRequest<Ordinal> > 01298 MpiComm<Ordinal>:: 01299 isend (const ArrayView<const char> &sendBuffer, 01300 const int destRank, 01301 const int tag) const 01302 { 01303 using Teuchos::as; 01304 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::isend(...)" ); 01305 01306 MPI_Request rawMpiRequest = MPI_REQUEST_NULL; 01307 const int err = 01308 MPI_Isend (const_cast<char*> (sendBuffer.getRawPtr ()), 01309 as<Ordinal> (sendBuffer.size ()), MPI_CHAR, 01310 destRank, tag, *rawMpiComm_, &rawMpiRequest); 01311 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01312 "Teuchos::MpiComm::isend: MPI_Isend() failed with error \"" 01313 << mpiErrorCodeToString (err) << "\"."); 01314 01315 return mpiCommRequest<Ordinal> (rawMpiRequest, sendBuffer.size ()); 01316 } 01317 01318 01319 template<typename Ordinal> 01320 RCP<CommRequest<Ordinal> > 01321 MpiComm<Ordinal>::ireceive (const ArrayView<char> &recvBuffer, 01322 const int sourceRank) const 01323 { 01324 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ireceive(...)" ); 01325 01326 // A negative source rank indicates MPI_ANY_SOURCE, namely that we 01327 // will take an incoming message from any process, as long as the 01328 // tag matches. 01329 const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank; 01330 01331 MPI_Request rawMpiRequest = MPI_REQUEST_NULL; 01332 const int err = 01333 MPI_Irecv (const_cast<char*>(recvBuffer.getRawPtr()), recvBuffer.size(), 01334 MPI_CHAR, theSrcRank, tag_, *rawMpiComm_, &rawMpiRequest); 01335 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01336 "Teuchos::MpiComm::ireceive: MPI_Irecv() failed with error \"" 01337 << mpiErrorCodeToString (err) << "\"."); 01338 01339 return mpiCommRequest<Ordinal> (rawMpiRequest, recvBuffer.size()); 01340 } 01341 01342 template<typename Ordinal> 01343 RCP<CommRequest<Ordinal> > 01344 MpiComm<Ordinal>::ireceive (const ArrayView<char> &recvBuffer, 01345 const int sourceRank, 01346 const int tag) const 01347 { 01348 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ireceive(...)" ); 01349 01350 // A negative source rank indicates MPI_ANY_SOURCE, namely that we 01351 // will take an incoming message from any process, as long as the 01352 // tag matches. 01353 const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank; 01354 01355 MPI_Request rawMpiRequest = MPI_REQUEST_NULL; 01356 const int err = 01357 MPI_Irecv (const_cast<char*> (recvBuffer.getRawPtr ()), recvBuffer.size (), 01358 MPI_CHAR, theSrcRank, tag, *rawMpiComm_, &rawMpiRequest); 01359 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, 01360 "Teuchos::MpiComm::ireceive: MPI_Irecv() failed with error \"" 01361 << mpiErrorCodeToString (err) << "\"."); 01362 01363 return mpiCommRequest<Ordinal> (rawMpiRequest, recvBuffer.size ()); 01364 } 01365 01366 namespace { 01367 // Called by the two-argument MpiComm::waitAll() variant. 01368 template<typename Ordinal> 01369 void 01370 waitAllImpl (const ArrayView<RCP<CommRequest<Ordinal> > >& requests, 01371 const ArrayView<MPI_Status>& rawMpiStatuses) 01372 { 01373 typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type; 01374 const size_type count = requests.size(); 01375 // waitAllImpl() is not meant to be called by users, so it's a bug 01376 // for the two views to have different lengths. 01377 TEUCHOS_TEST_FOR_EXCEPTION(rawMpiStatuses.size() != count, 01378 std::logic_error, "Teuchos::MpiComm's waitAllImpl: rawMpiStatus.size() = " 01379 << rawMpiStatuses.size() << " != requests.size() = " << requests.size() 01380 << ". Please report this bug to the Tpetra developers."); 01381 if (count == 0) { 01382 return; // No requests on which to wait 01383 } 01384 01385 // MpiComm wraps MPI and can't expose any MPI structs or opaque 01386 // objects. Thus, we have to unpack requests into a separate array. 01387 // If that's too slow, then your code should just call into MPI 01388 // directly. 01389 // 01390 // Pull out the raw MPI requests from the wrapped requests. 01391 // MPI_Waitall should not fail if a request is MPI_REQUEST_NULL, but 01392 // we keep track just to inform the user. 01393 bool someNullRequests = false; 01394 Array<MPI_Request> rawMpiRequests (count, MPI_REQUEST_NULL); 01395 for (int i = 0; i < count; ++i) { 01396 RCP<CommRequest<Ordinal> > request = requests[i]; 01397 if (! is_null (request)) { 01398 RCP<MpiCommRequestBase<Ordinal> > mpiRequest = 01399 rcp_dynamic_cast<MpiCommRequestBase<Ordinal> > (request); 01400 // releaseRawMpiRequest() sets the MpiCommRequest's raw 01401 // MPI_Request to MPI_REQUEST_NULL. This makes waitAll() not 01402 // satisfy the strong exception guarantee. That's OK because 01403 // MPI_Waitall() doesn't promise that it satisfies the strong 01404 // exception guarantee, and we would rather conservatively 01405 // invalidate the handles than leave dangling requests around 01406 // and risk users trying to wait on the same request twice. 01407 rawMpiRequests[i] = mpiRequest->releaseRawMpiRequest(); 01408 } 01409 else { // Null requests map to MPI_REQUEST_NULL 01410 rawMpiRequests[i] = MPI_REQUEST_NULL; 01411 someNullRequests = true; 01412 } 01413 } 01414 01415 // This is the part where we've finally peeled off the wrapper and 01416 // we can now interact with MPI directly. 01417 // 01418 // One option in the one-argument version of waitAll() is to ignore 01419 // the statuses completely. MPI lets you pass in the named constant 01420 // MPI_STATUSES_IGNORE for the MPI_Status array output argument in 01421 // MPI_Waitall(), which would tell MPI not to bother with the 01422 // statuses. However, we want the statuses because we can use them 01423 // for detailed error diagnostics in case something goes wrong. 01424 const int err = MPI_Waitall (count, rawMpiRequests.getRawPtr(), 01425 rawMpiStatuses.getRawPtr()); 01426 01427 // In MPI_Waitall(), an error indicates that one or more requests 01428 // failed. In that case, there could be requests that completed 01429 // (their MPI_Status' error field is MPI_SUCCESS), and other 01430 // requests that have not completed yet but have not necessarily 01431 // failed (MPI_PENDING). We make no attempt here to wait on the 01432 // pending requests. It doesn't make sense for us to do so, because 01433 // in general Teuchos::Comm doesn't attempt to provide robust 01434 // recovery from failed messages. 01435 if (err != MPI_SUCCESS) { 01436 if (err == MPI_ERR_IN_STATUS) { 01437 // 01438 // When MPI_Waitall returns MPI_ERR_IN_STATUS (a standard error 01439 // class), it's telling us to check the error codes in the 01440 // returned statuses. In that case, we do so and generate a 01441 // detailed exception message. 01442 // 01443 // Figure out which of the requests failed. 01444 Array<std::pair<size_type, int> > errorLocationsAndCodes; 01445 for (size_type k = 0; k < rawMpiStatuses.size(); ++k) { 01446 const int curErr = rawMpiStatuses[k].MPI_ERROR; 01447 if (curErr != MPI_SUCCESS) { 01448 errorLocationsAndCodes.push_back (std::make_pair (k, curErr)); 01449 } 01450 } 01451 const size_type numErrs = errorLocationsAndCodes.size(); 01452 if (numErrs > 0) { 01453 // There was at least one error. Assemble a detailed 01454 // exception message reporting which requests failed, 01455 // their error codes, and their source 01456 std::ostringstream os; 01457 os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \"" 01458 << mpiErrorCodeToString (err) << "\". Of the " << count 01459 << " total request" << (count != 1 ? "s" : "") << ", " << numErrs 01460 << " failed. Here are the indices of the failed requests, and the " 01461 "error codes extracted from their returned MPI_Status objects:" 01462 << std::endl; 01463 for (size_type k = 0; k < numErrs; ++k) { 01464 const size_type errInd = errorLocationsAndCodes[k].first; 01465 os << "Request " << errInd << ": MPI_ERROR = " 01466 << mpiErrorCodeToString (rawMpiStatuses[errInd].MPI_ERROR) 01467 << std::endl; 01468 } 01469 if (someNullRequests) { 01470 os << " On input to MPI_Waitall, there was at least one MPI_" 01471 "Request that was MPI_REQUEST_NULL. MPI_Waitall should not " 01472 "normally fail in that case, but we thought we should let you know " 01473 "regardless."; 01474 } 01475 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str()); 01476 } 01477 // If there were no actual errors in the returned statuses, 01478 // well, then I guess everything is OK. Just keep going. 01479 } 01480 else { 01481 std::ostringstream os; 01482 os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \"" 01483 << mpiErrorCodeToString (err) << "\"."; 01484 if (someNullRequests) { 01485 os << " On input to MPI_Waitall, there was at least one MPI_Request " 01486 "that was MPI_REQUEST_NULL. MPI_Waitall should not normally fail in " 01487 "that case, but we thought we should let you know regardless."; 01488 } 01489 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str()); 01490 } 01491 } 01492 01493 // Invalidate the input array of requests by setting all entries 01494 // to null. 01495 std::fill (requests.begin(), requests.end(), null); 01496 } 01497 01498 01499 01500 // Called by the one-argument MpiComm::waitAll() variant. 01501 template<typename Ordinal> 01502 void 01503 waitAllImpl (const ArrayView<RCP<CommRequest<Ordinal> > >& requests) 01504 { 01505 typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type; 01506 const size_type count = requests.size (); 01507 if (count == 0) { 01508 return; // No requests on which to wait 01509 } 01510 01511 // MpiComm wraps MPI and can't expose any MPI structs or opaque 01512 // objects. Thus, we have to unpack requests into a separate 01513 // array. If that's too slow, then your code should just call 01514 // into MPI directly. 01515 // 01516 // Pull out the raw MPI requests from the wrapped requests. 01517 // MPI_Waitall should not fail if a request is MPI_REQUEST_NULL, 01518 // but we keep track just to inform the user. 01519 bool someNullRequests = false; 01520 Array<MPI_Request> rawMpiRequests (count, MPI_REQUEST_NULL); 01521 for (int i = 0; i < count; ++i) { 01522 RCP<CommRequest<Ordinal> > request = requests[i]; 01523 if (! request.is_null ()) { 01524 RCP<MpiCommRequestBase<Ordinal> > mpiRequest = 01525 rcp_dynamic_cast<MpiCommRequestBase<Ordinal> > (request); 01526 // releaseRawMpiRequest() sets the MpiCommRequest's raw 01527 // MPI_Request to MPI_REQUEST_NULL. This makes waitAll() not 01528 // satisfy the strong exception guarantee. That's OK because 01529 // MPI_Waitall() doesn't promise that it satisfies the strong 01530 // exception guarantee, and we would rather conservatively 01531 // invalidate the handles than leave dangling requests around 01532 // and risk users trying to wait on the same request twice. 01533 rawMpiRequests[i] = mpiRequest->releaseRawMpiRequest (); 01534 } 01535 else { // Null requests map to MPI_REQUEST_NULL 01536 rawMpiRequests[i] = MPI_REQUEST_NULL; 01537 someNullRequests = true; 01538 } 01539 } 01540 01541 // This is the part where we've finally peeled off the wrapper and 01542 // we can now interact with MPI directly. 01543 // 01544 // MPI lets us pass in the named constant MPI_STATUSES_IGNORE for 01545 // the MPI_Status array output argument in MPI_Waitall(), which 01546 // tells MPI not to bother writing out the statuses. 01547 const int err = MPI_Waitall (count, rawMpiRequests.getRawPtr(), 01548 MPI_STATUSES_IGNORE); 01549 01550 // In MPI_Waitall(), an error indicates that one or more requests 01551 // failed. In that case, there could be requests that completed 01552 // (their MPI_Status' error field is MPI_SUCCESS), and other 01553 // requests that have not completed yet but have not necessarily 01554 // failed (MPI_PENDING). We make no attempt here to wait on the 01555 // pending requests. It doesn't make sense for us to do so, 01556 // because in general Teuchos::Comm doesn't attempt to provide 01557 // robust recovery from failed messages. 01558 if (err != MPI_SUCCESS) { 01559 std::ostringstream os; 01560 os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \"" 01561 << mpiErrorCodeToString (err) << "\"."; 01562 if (someNullRequests) { 01563 os << std::endl << "On input to MPI_Waitall, there was at least one " 01564 "MPI_Request that was MPI_REQUEST_NULL. MPI_Waitall should not " 01565 "normally fail in that case, but we thought we should let you know " 01566 "regardless."; 01567 } 01568 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str()); 01569 } 01570 01571 // Invalidate the input array of requests by setting all entries 01572 // to null. We delay this until the end, since some 01573 // implementations of CommRequest might hold the only reference to 01574 // the communication buffer, and we don't want that to go away 01575 // until we've waited on the communication operation. 01576 std::fill (requests.begin(), requests.end(), null); 01577 } 01578 01579 } // namespace (anonymous) 01580 01581 01582 01583 template<typename Ordinal> 01584 void 01585 MpiComm<Ordinal>:: 01586 waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests) const 01587 { 01588 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::waitAll(requests)" ); 01589 // Call the one-argument version of waitAllImpl, to avoid overhead 01590 // of handling statuses (which the user didn't want anyway). 01591 waitAllImpl<Ordinal> (requests); 01592 } 01593 01594 01595 template<typename Ordinal> 01596 void 01597 MpiComm<Ordinal>:: 01598 waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests, 01599 const ArrayView<RCP<CommStatus<Ordinal> > >& statuses) const 01600 { 01601 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::waitAll(requests, statuses)" ); 01602 01603 typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type; 01604 const size_type count = requests.size(); 01605 01606 TEUCHOS_TEST_FOR_EXCEPTION(count != statuses.size(), 01607 std::invalid_argument, "Teuchos::MpiComm::waitAll: requests.size() = " 01608 << count << " != statuses.size() = " << statuses.size() << "."); 01609 01610 Array<MPI_Status> rawMpiStatuses (count); 01611 waitAllImpl<Ordinal> (requests, rawMpiStatuses()); 01612 01613 // Repackage the raw MPI_Status structs into the wrappers. 01614 for (size_type i = 0; i < count; ++i) { 01615 statuses[i] = mpiCommStatus<Ordinal> (rawMpiStatuses[i]); 01616 } 01617 } 01618 01619 01620 template<typename Ordinal> 01621 RCP<CommStatus<Ordinal> > 01622 MpiComm<Ordinal>::wait (const Ptr<RCP<CommRequest<Ordinal> > >& request) const 01623 { 01624 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::wait(...)" ); 01625 01626 if (is_null (*request)) { 01627 return null; // Nothing to wait on ... 01628 } 01629 else { 01630 RCP<CommStatus<Ordinal> > status = (*request)->wait (); 01631 // mfh 22 Oct 2012: The unit tests expect waiting on the 01632 // CommRequest to invalidate it by setting it to null. 01633 *request = null; 01634 return status; 01635 } 01636 } 01637 01638 template<typename Ordinal> 01639 RCP< Comm<Ordinal> > 01640 MpiComm<Ordinal>::duplicate() const 01641 { 01642 MPI_Comm origRawComm = *rawMpiComm_; 01643 MPI_Comm newRawComm = MPI_COMM_NULL; 01644 const int err = MPI_Comm_dup (origRawComm, &newRawComm); 01645 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, "Teuchos" 01646 "::MpiComm::duplicate: MPI_Comm_dup failed with the following error: " 01647 << mpiErrorCodeToString (err)); 01648 01649 // Wrap the raw communicator, and pass the (const) wrapped 01650 // communicator to MpiComm's constructor. We created the raw comm, 01651 // so we have to supply a function that frees it after use. 01652 RCP<OpaqueWrapper<MPI_Comm> > wrapped = 01653 opaqueWrapper<MPI_Comm> (newRawComm, details::safeCommFree); 01654 // Since newComm's raw MPI_Comm is the result of an MPI_Comm_dup, 01655 // its messages cannot collide with those of any other MpiComm. 01656 // This means we can assign its tag without an MPI_Bcast. 01657 RCP<MpiComm<Ordinal> > newComm = 01658 rcp (new MpiComm<Ordinal> (wrapped.getConst (), minTag_)); 01659 return rcp_implicit_cast<Comm<Ordinal> > (newComm); 01660 } 01661 01662 01663 template<typename Ordinal> 01664 RCP< Comm<Ordinal> > 01665 MpiComm<Ordinal>::split(const int color, const int key) const 01666 { 01667 MPI_Comm newComm; 01668 const int splitReturn = 01669 MPI_Comm_split (*rawMpiComm_, 01670 color < 0 ? MPI_UNDEFINED : color, 01671 key, 01672 &newComm); 01673 TEUCHOS_TEST_FOR_EXCEPTION( 01674 splitReturn != MPI_SUCCESS, 01675 std::logic_error, 01676 "Teuchos::MpiComm::split: Failed to create communicator with color " 01677 << color << "and key " << key << ". MPI_Comm_split failed with error \"" 01678 << mpiErrorCodeToString (splitReturn) << "\"."); 01679 if (newComm == MPI_COMM_NULL) { 01680 return RCP< Comm<Ordinal> >(); 01681 } else { 01682 RCP<const OpaqueWrapper<MPI_Comm> > wrapped = 01683 opaqueWrapper<MPI_Comm> (newComm, details::safeCommFree); 01684 // Since newComm's raw MPI_Comm is the result of an 01685 // MPI_Comm_split, its messages cannot collide with those of any 01686 // other MpiComm. This means we can assign its tag without an 01687 // MPI_Bcast. 01688 return rcp (new MpiComm<Ordinal> (wrapped, minTag_)); 01689 } 01690 } 01691 01692 01693 template<typename Ordinal> 01694 RCP< Comm<Ordinal> > 01695 MpiComm<Ordinal>::createSubcommunicator(const ArrayView<const int> &ranks) const 01696 { 01697 int err = MPI_SUCCESS; // For error codes returned by MPI functions 01698 01699 // Get the group that this communicator is in. 01700 MPI_Group thisGroup; 01701 err = MPI_Comm_group (*rawMpiComm_, &thisGroup); 01702 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error, 01703 "Failed to obtain the current communicator's group. " 01704 "MPI_Comm_group failed with error \"" 01705 << mpiErrorCodeToString (err) << "\"."); 01706 01707 // Create a new group with the specified members. 01708 MPI_Group newGroup; 01709 // It's rude to cast away const, but MPI functions demand it. 01710 // 01711 // NOTE (mfh 14 Aug 2012) Please don't ask for &ranks[0] unless you 01712 // know that ranks.size() > 0. That's why I'm using getRawPtr(). 01713 err = MPI_Group_incl (thisGroup, ranks.size(), 01714 const_cast<int*> (ranks.getRawPtr ()), &newGroup); 01715 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error, 01716 "Failed to create subgroup. MPI_Group_incl failed with error \"" 01717 << mpiErrorCodeToString (err) << "\"."); 01718 01719 // Create a new communicator from the new group. 01720 MPI_Comm newComm; 01721 try { 01722 err = MPI_Comm_create (*rawMpiComm_, newGroup, &newComm); 01723 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error, 01724 "Failed to create subcommunicator. MPI_Comm_create failed with error \"" 01725 << mpiErrorCodeToString (err) << "\"."); 01726 } catch (...) { 01727 // Attempt to free the new group before rethrowing. If 01728 // successful, this will prevent a memory leak due to the "lost" 01729 // group that was allocated successfully above. Since we're 01730 // throwing std::logic_error anyway, we can only promise 01731 // best-effort recovery; thus, we don't check the error code. 01732 (void) MPI_Group_free (&newGroup); 01733 (void) MPI_Group_free (&thisGroup); 01734 throw; 01735 } 01736 01737 // We don't need the group any more, so free it. 01738 err = MPI_Group_free (&newGroup); 01739 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error, 01740 "Failed to free subgroup. MPI_Group_free failed with error \"" 01741 << mpiErrorCodeToString (err) << "\"."); 01742 err = MPI_Group_free (&thisGroup); 01743 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error, 01744 "Failed to free subgroup. MPI_Group_free failed with error \"" 01745 << mpiErrorCodeToString (err) << "\"."); 01746 01747 if (newComm == MPI_COMM_NULL) { 01748 return RCP<Comm<Ordinal> > (); 01749 } else { 01750 using Teuchos::details::safeCommFree; 01751 typedef OpaqueWrapper<MPI_Comm> ow_type; 01752 RCP<const ow_type> wrapper = 01753 rcp_implicit_cast<const ow_type> (opaqueWrapper (newComm, safeCommFree)); 01754 // Since newComm's raw MPI_Comm is the result of an 01755 // MPI_Comm_create, its messages cannot collide with those of any 01756 // other MpiComm. This means we can assign its tag without an 01757 // MPI_Bcast. 01758 return rcp (new MpiComm<Ordinal> (wrapper, minTag_)); 01759 } 01760 } 01761 01762 01763 // Overridden from Describable 01764 01765 01766 template<typename Ordinal> 01767 std::string MpiComm<Ordinal>::description() const 01768 { 01769 std::ostringstream oss; 01770 oss 01771 << typeName(*this) 01772 << "{" 01773 << "size="<<size_ 01774 << ",rank="<<rank_ 01775 << ",rawMpiComm="<<static_cast<MPI_Comm>(*rawMpiComm_) 01776 <<"}"; 01777 return oss.str(); 01778 } 01779 01780 01781 #ifdef TEUCHOS_MPI_COMM_DUMP 01782 template<typename Ordinal> 01783 bool MpiComm<Ordinal>::show_dump = false; 01784 #endif 01785 01786 01787 // private 01788 01789 01790 template<typename Ordinal> 01791 void MpiComm<Ordinal>::assertRank(const int rank, const std::string &rankName) const 01792 { 01793 TEUCHOS_TEST_FOR_EXCEPTION( 01794 ! ( 0 <= rank && rank < size_ ), std::logic_error 01795 ,"Error, "<<rankName<<" = " << rank << " is not < 0 or is not" 01796 " in the range [0,"<<size_-1<<"]!" 01797 ); 01798 } 01799 01800 01801 } // namespace Teuchos 01802 01803 01804 template<typename Ordinal> 01805 Teuchos::RCP<Teuchos::MpiComm<Ordinal> > 01806 Teuchos::createMpiComm( 01807 const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm 01808 ) 01809 { 01810 if( rawMpiComm.get()!=NULL && *rawMpiComm != MPI_COMM_NULL ) 01811 return rcp(new MpiComm<Ordinal>(rawMpiComm)); 01812 return Teuchos::null; 01813 } 01814 01815 01816 #endif // TEUCHOS_MPI_COMM_HPP
1.7.6.1