Tpetra Matrix/Vector Services  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
Tpetra_ComputeGatherMap.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef __Tpetra_ComputeGatherMap_hpp
00043 #define __Tpetra_ComputeGatherMap_hpp
00044 
00049 #include "Tpetra_Map.hpp"
00050 
00051 // Macro that marks a function as "possibly unused," in order to
00052 // suppress build warnings.
00053 #if ! defined(TRILINOS_UNUSED_FUNCTION)
00054 #  if defined(__GNUC__) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
00055 #    define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
00056 #  elif defined(__clang__)
00057 #    if __has_attribute(unused)
00058 #      define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
00059 #    else
00060 #      define TRILINOS_UNUSED_FUNCTION
00061 #    endif // Clang has 'unused' attribute
00062 #  elif defined(__IBMCPP__)
00063 // IBM's C++ compiler for Blue Gene/Q (V12.1) implements 'used' but not 'unused'.
00064 //
00065 // http://pic.dhe.ibm.com/infocenter/compbg/v121v141/index.jsp
00066 #    define TRILINOS_UNUSED_FUNCTION
00067 #  else // some other compiler
00068 #    define TRILINOS_UNUSED_FUNCTION
00069 #  endif
00070 #endif // ! defined(TRILINOS_UNUSED_FUNCTION)
00071 
00072 
00073 namespace Tpetra {
00074   namespace Details {
00075 
00076     namespace {
00077 #ifdef HAVE_MPI
00078       // We're communicating integers of type IntType.  Figure
00079       // out the right MPI_Datatype for IntType.  Usually it
00080       // is int or long, so these are good enough for now.
00081       template<class IntType> TRILINOS_UNUSED_FUNCTION MPI_Datatype
00082       getMpiDatatype () {
00083         TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
00084           "Not implemented for IntType != int, long, or long long");
00085       }
00086 
00087       template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
00088       getMpiDatatype<int> () { return MPI_INT; }
00089 
00090       template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
00091       getMpiDatatype<long> () { return MPI_LONG; }
00092 
00093       template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
00094       getMpiDatatype<long long> () { return MPI_LONG_LONG; }
00095 #endif // HAVE_MPI
00096 
00097       template<class IntType>
00098       void
00099       gather (const IntType sendbuf[], const int sendcnt,
00100               IntType recvbuf[], const int recvcnt,
00101               const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
00102       {
00103         using Teuchos::RCP;
00104         using Teuchos::rcp_dynamic_cast;
00105 #ifdef HAVE_MPI
00106         using Teuchos::MpiComm;
00107 
00108         // Get the raw MPI communicator, so we don't have to wrap MPI_Gather in Teuchos.
00109         RCP<const MpiComm<int> > mpiComm = rcp_dynamic_cast<const MpiComm<int> > (comm);
00110         if (! mpiComm.is_null ()) {
00111           MPI_Datatype sendtype = getMpiDatatype<IntType> ();
00112           MPI_Datatype recvtype = sendtype;
00113           MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
00114           const int err = MPI_Gather (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
00115                                       sendcnt,
00116                                       sendtype,
00117                                       reinterpret_cast<void*> (recvbuf),
00118                                       recvcnt,
00119                                       recvtype,
00120                                       root,
00121                                       rawMpiComm);
00122           TEUCHOS_TEST_FOR_EXCEPTION(
00123             err != MPI_SUCCESS, std::runtime_error, "MPI_Gather failed");
00124           return;
00125         }
00126 #endif // HAVE_MPI
00127 
00128         TEUCHOS_TEST_FOR_EXCEPTION(
00129           recvcnt > sendcnt, std::invalid_argument,
00130           "gather: If the input communicator contains only one process, "
00131           "then you cannot receive more entries than you send.  "
00132           "You aim to receive " << recvcnt << " entries, "
00133           "but to send " << sendcnt << ".");
00134         // Serial communicator case: just copy.  recvcnt is the
00135         // amount to receive, so it's the amount to copy.
00136         std::copy (sendbuf, sendbuf + recvcnt, recvbuf);
00137       }
00138 
00139       template<class IntType>
00140       void
00141       gatherv (const IntType sendbuf[], const int sendcnt,
00142                IntType recvbuf[], const int recvcnts[], const int displs[],
00143                const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
00144       {
00145         using Teuchos::RCP;
00146         using Teuchos::rcp_dynamic_cast;
00147 #ifdef HAVE_MPI
00148         using Teuchos::MpiComm;
00149 
00150         // Get the raw MPI communicator, so we don't have to wrap
00151         // MPI_Gather in Teuchos.
00152         RCP<const MpiComm<int> > mpiComm =
00153           rcp_dynamic_cast<const MpiComm<int> > (comm);
00154         if (! mpiComm.is_null ()) {
00155           MPI_Datatype sendtype = getMpiDatatype<IntType> ();
00156           MPI_Datatype recvtype = sendtype;
00157           MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
00158           const int err = MPI_Gatherv (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
00159                                        sendcnt,
00160                                        sendtype,
00161                                        reinterpret_cast<void*> (recvbuf),
00162                                        const_cast<int*> (recvcnts),
00163                                        const_cast<int*> (displs),
00164                                        recvtype,
00165                                        root,
00166                                        rawMpiComm);
00167           TEUCHOS_TEST_FOR_EXCEPTION(
00168             err != MPI_SUCCESS, std::runtime_error, "MPI_Gatherv failed");
00169           return;
00170         }
00171 #endif // HAVE_MPI
00172         TEUCHOS_TEST_FOR_EXCEPTION(
00173           recvcnts[0] > sendcnt,
00174           std::invalid_argument,
00175           "gatherv: If the input communicator contains only one process, "
00176           "then you cannot receive more entries than you send.  "
00177           "You aim to receive " << recvcnts[0] << " entries, "
00178           "but to send " << sendcnt << ".");
00179         // Serial communicator case: just copy.  recvcnts[0] is the
00180         // amount to receive, so it's the amount to copy.  Start
00181         // writing to recvbuf at the offset displs[0].
00182         std::copy (sendbuf, sendbuf + recvcnts[0], recvbuf + displs[0]);
00183       }
00184     } // namespace (anonymous)
00185 
00186 
00187     // Given an arbitrary Map, compute a Map containing all the GIDs
00188     // in the same order as in (the one-to-one version of) map, but
00189     // all owned exclusively by Proc 0.
00190     template<class MapType>
00191     Teuchos::RCP<const MapType>
00192     computeGatherMap (Teuchos::RCP<const MapType> map,
00193                       const Teuchos::RCP<Teuchos::FancyOStream>& err,
00194                       const bool dbg=false)
00195     {
00196       using Tpetra::createOneToOne;
00197       using Tpetra::global_size_t;
00198       using Teuchos::Array;
00199       using Teuchos::ArrayRCP;
00200       using Teuchos::ArrayView;
00201       using Teuchos::as;
00202       using Teuchos::Comm;
00203       using Teuchos::RCP;
00204       using std::endl;
00205       typedef typename MapType::local_ordinal_type LO;
00206       typedef typename MapType::global_ordinal_type GO;
00207       typedef typename MapType::node_type NT;
00208 
00209       RCP<const Comm<int> > comm = map->getComm ();
00210       const int numProcs = comm->getSize ();
00211       const int myRank = comm->getRank ();
00212 
00213       if (! err.is_null ()) {
00214         err->pushTab ();
00215       }
00216       if (dbg) {
00217         *err << myRank << ": computeGatherMap:" << endl;
00218       }
00219       if (! err.is_null ()) {
00220         err->pushTab ();
00221       }
00222 
00223       RCP<const MapType> oneToOneMap;
00224       if (map->isContiguous ()) {
00225         oneToOneMap = map; // contiguous Maps are always 1-to-1
00226       } else {
00227         if (dbg) {
00228           *err << myRank << ": computeGatherMap: Calling createOneToOne" << endl;
00229         }
00230         // It could be that Map is one-to-one, but the class doesn't
00231         // give us a way to test this, other than to create the
00232         // one-to-one Map.
00233         oneToOneMap = createOneToOne<LO, GO, NT> (map);
00234       }
00235 
00236       RCP<const MapType> gatherMap;
00237       if (numProcs == 1) {
00238         gatherMap = oneToOneMap;
00239       } else {
00240         if (dbg) {
00241           *err << myRank << ": computeGatherMap: Gathering Map counts" << endl;
00242         }
00243         // Gather each process' count of Map elements to Proc 0,
00244         // into the recvCounts array.  This will tell Proc 0 how
00245         // many GIDs to expect from each process when calling
00246         // MPI_Gatherv.  Counts and offsets are all int, because
00247         // that's what MPI uses.  Teuchos::as will at least prevent
00248         // bad casts to int in a debug build.
00249         //
00250         // Yeah, it's not memory scalable.  Guess what: We're going
00251         // to bring ALL the data to Proc 0, not just the receive
00252         // counts.  The first MPI_Gather is only the beginning...
00253         // Seriously, if you want to make this memory scalable, the
00254         // right thing to do (after the Export to the one-to-one
00255         // Map) is to go round robin through the processes, having
00256         // each send a chunk of data (with its GIDs, to get the
00257         // order of rows right) at a time, and overlapping writing
00258         // to the file (resp. reading from it) with receiving (resp.
00259         // sending) the next chunk.
00260         const int myEltCount = as<int> (oneToOneMap->getNodeNumElements ());
00261         Array<int> recvCounts (numProcs);
00262         const int rootProc = 0;
00263         gather<int> (&myEltCount, 1, recvCounts.getRawPtr (), 1, rootProc, comm);
00264 
00265         ArrayView<const GO> myGlobalElts = oneToOneMap->getNodeElementList ();
00266         const int numMyGlobalElts = as<int> (myGlobalElts.size ());
00267         // Only Proc 0 needs to receive and store all the GIDs (from
00268         // all processes).
00269         ArrayRCP<GO> allGlobalElts;
00270         if (myRank == 0) {
00271           allGlobalElts = arcp<GO> (oneToOneMap->getGlobalNumElements ());
00272           std::fill (allGlobalElts.begin (), allGlobalElts.end (), 0);
00273         }
00274 
00275         if (dbg) {
00276           *err << myRank << ": computeGatherMap: Computing MPI_Gatherv "
00277             "displacements" << endl;
00278         }
00279         // Displacements for gatherv() in this case (where we are
00280         // gathering into a contiguous array) are an exclusive
00281         // partial sum (first entry is zero, second starts the
00282         // partial sum) of recvCounts.
00283         Array<int> displs (numProcs, 0);
00284         std::partial_sum (recvCounts.begin (), recvCounts.end () - 1,
00285                           displs.begin () + 1);
00286         if (dbg) {
00287           *err << myRank << ": computeGatherMap: Calling MPI_Gatherv" << endl;
00288         }
00289         gatherv<GO> (myGlobalElts.getRawPtr (), numMyGlobalElts,
00290                      allGlobalElts.getRawPtr (), recvCounts.getRawPtr (),
00291                      displs.getRawPtr (), rootProc, comm);
00292 
00293         if (dbg) {
00294           *err << myRank << ": computeGatherMap: Creating gather Map" << endl;
00295         }
00296         // Create a Map with all the GIDs, in the same order as in
00297         // the one-to-one Map, but owned by Proc 0.
00298         ArrayView<const GO> allElts (NULL, 0);
00299         if (myRank == 0) {
00300           allElts = allGlobalElts ();
00301         }
00302         const global_size_t INVALID = Teuchos::OrdinalTraits<global_size_t>::invalid ();
00303         gatherMap = rcp (new MapType (INVALID, allElts,
00304                                       oneToOneMap->getIndexBase (),
00305                                       comm, map->getNode ()));
00306       }
00307       if (! err.is_null ()) {
00308         err->popTab ();
00309       }
00310       if (dbg) {
00311         *err << myRank << ": computeGatherMap: done" << endl;
00312       }
00313       if (! err.is_null ()) {
00314         err->popTab ();
00315       }
00316       return gatherMap;
00317     }
00318 
00319   } // namespace Details
00320 } // namespace Tpetra
00321 
00322 #endif // __Tpetra_ComputeGatherMap_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines