|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef __Tpetra_ComputeGatherMap_hpp 00043 #define __Tpetra_ComputeGatherMap_hpp 00044 00049 #include "Tpetra_Map.hpp" 00050 00051 // Macro that marks a function as "possibly unused," in order to 00052 // suppress build warnings. 00053 #if ! defined(TRILINOS_UNUSED_FUNCTION) 00054 # if defined(__GNUC__) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) 00055 # define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__)) 00056 # elif defined(__clang__) 00057 # if __has_attribute(unused) 00058 # define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__)) 00059 # else 00060 # define TRILINOS_UNUSED_FUNCTION 00061 # endif // Clang has 'unused' attribute 00062 # elif defined(__IBMCPP__) 00063 // IBM's C++ compiler for Blue Gene/Q (V12.1) implements 'used' but not 'unused'. 00064 // 00065 // http://pic.dhe.ibm.com/infocenter/compbg/v121v141/index.jsp 00066 # define TRILINOS_UNUSED_FUNCTION 00067 # else // some other compiler 00068 # define TRILINOS_UNUSED_FUNCTION 00069 # endif 00070 #endif // ! defined(TRILINOS_UNUSED_FUNCTION) 00071 00072 00073 namespace Tpetra { 00074 namespace Details { 00075 00076 namespace { 00077 #ifdef HAVE_MPI 00078 // We're communicating integers of type IntType. Figure 00079 // out the right MPI_Datatype for IntType. Usually it 00080 // is int or long, so these are good enough for now. 00081 template<class IntType> TRILINOS_UNUSED_FUNCTION MPI_Datatype 00082 getMpiDatatype () { 00083 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 00084 "Not implemented for IntType != int, long, or long long"); 00085 } 00086 00087 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype 00088 getMpiDatatype<int> () { return MPI_INT; } 00089 00090 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype 00091 getMpiDatatype<long> () { return MPI_LONG; } 00092 00093 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype 00094 getMpiDatatype<long long> () { return MPI_LONG_LONG; } 00095 #endif // HAVE_MPI 00096 00097 template<class IntType> 00098 void 00099 gather (const IntType sendbuf[], const int sendcnt, 00100 IntType recvbuf[], const int recvcnt, 00101 const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm) 00102 { 00103 using Teuchos::RCP; 00104 using Teuchos::rcp_dynamic_cast; 00105 #ifdef HAVE_MPI 00106 using Teuchos::MpiComm; 00107 00108 // Get the raw MPI communicator, so we don't have to wrap MPI_Gather in Teuchos. 00109 RCP<const MpiComm<int> > mpiComm = rcp_dynamic_cast<const MpiComm<int> > (comm); 00110 if (! mpiComm.is_null ()) { 00111 MPI_Datatype sendtype = getMpiDatatype<IntType> (); 00112 MPI_Datatype recvtype = sendtype; 00113 MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ()); 00114 const int err = MPI_Gather (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)), 00115 sendcnt, 00116 sendtype, 00117 reinterpret_cast<void*> (recvbuf), 00118 recvcnt, 00119 recvtype, 00120 root, 00121 rawMpiComm); 00122 TEUCHOS_TEST_FOR_EXCEPTION( 00123 err != MPI_SUCCESS, std::runtime_error, "MPI_Gather failed"); 00124 return; 00125 } 00126 #endif // HAVE_MPI 00127 00128 TEUCHOS_TEST_FOR_EXCEPTION( 00129 recvcnt > sendcnt, std::invalid_argument, 00130 "gather: If the input communicator contains only one process, " 00131 "then you cannot receive more entries than you send. " 00132 "You aim to receive " << recvcnt << " entries, " 00133 "but to send " << sendcnt << "."); 00134 // Serial communicator case: just copy. recvcnt is the 00135 // amount to receive, so it's the amount to copy. 00136 std::copy (sendbuf, sendbuf + recvcnt, recvbuf); 00137 } 00138 00139 template<class IntType> 00140 void 00141 gatherv (const IntType sendbuf[], const int sendcnt, 00142 IntType recvbuf[], const int recvcnts[], const int displs[], 00143 const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm) 00144 { 00145 using Teuchos::RCP; 00146 using Teuchos::rcp_dynamic_cast; 00147 #ifdef HAVE_MPI 00148 using Teuchos::MpiComm; 00149 00150 // Get the raw MPI communicator, so we don't have to wrap 00151 // MPI_Gather in Teuchos. 00152 RCP<const MpiComm<int> > mpiComm = 00153 rcp_dynamic_cast<const MpiComm<int> > (comm); 00154 if (! mpiComm.is_null ()) { 00155 MPI_Datatype sendtype = getMpiDatatype<IntType> (); 00156 MPI_Datatype recvtype = sendtype; 00157 MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ()); 00158 const int err = MPI_Gatherv (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)), 00159 sendcnt, 00160 sendtype, 00161 reinterpret_cast<void*> (recvbuf), 00162 const_cast<int*> (recvcnts), 00163 const_cast<int*> (displs), 00164 recvtype, 00165 root, 00166 rawMpiComm); 00167 TEUCHOS_TEST_FOR_EXCEPTION( 00168 err != MPI_SUCCESS, std::runtime_error, "MPI_Gatherv failed"); 00169 return; 00170 } 00171 #endif // HAVE_MPI 00172 TEUCHOS_TEST_FOR_EXCEPTION( 00173 recvcnts[0] > sendcnt, 00174 std::invalid_argument, 00175 "gatherv: If the input communicator contains only one process, " 00176 "then you cannot receive more entries than you send. " 00177 "You aim to receive " << recvcnts[0] << " entries, " 00178 "but to send " << sendcnt << "."); 00179 // Serial communicator case: just copy. recvcnts[0] is the 00180 // amount to receive, so it's the amount to copy. Start 00181 // writing to recvbuf at the offset displs[0]. 00182 std::copy (sendbuf, sendbuf + recvcnts[0], recvbuf + displs[0]); 00183 } 00184 } // namespace (anonymous) 00185 00186 00187 // Given an arbitrary Map, compute a Map containing all the GIDs 00188 // in the same order as in (the one-to-one version of) map, but 00189 // all owned exclusively by Proc 0. 00190 template<class MapType> 00191 Teuchos::RCP<const MapType> 00192 computeGatherMap (Teuchos::RCP<const MapType> map, 00193 const Teuchos::RCP<Teuchos::FancyOStream>& err, 00194 const bool dbg=false) 00195 { 00196 using Tpetra::createOneToOne; 00197 using Tpetra::global_size_t; 00198 using Teuchos::Array; 00199 using Teuchos::ArrayRCP; 00200 using Teuchos::ArrayView; 00201 using Teuchos::as; 00202 using Teuchos::Comm; 00203 using Teuchos::RCP; 00204 using std::endl; 00205 typedef typename MapType::local_ordinal_type LO; 00206 typedef typename MapType::global_ordinal_type GO; 00207 typedef typename MapType::node_type NT; 00208 00209 RCP<const Comm<int> > comm = map->getComm (); 00210 const int numProcs = comm->getSize (); 00211 const int myRank = comm->getRank (); 00212 00213 if (! err.is_null ()) { 00214 err->pushTab (); 00215 } 00216 if (dbg) { 00217 *err << myRank << ": computeGatherMap:" << endl; 00218 } 00219 if (! err.is_null ()) { 00220 err->pushTab (); 00221 } 00222 00223 RCP<const MapType> oneToOneMap; 00224 if (map->isContiguous ()) { 00225 oneToOneMap = map; // contiguous Maps are always 1-to-1 00226 } else { 00227 if (dbg) { 00228 *err << myRank << ": computeGatherMap: Calling createOneToOne" << endl; 00229 } 00230 // It could be that Map is one-to-one, but the class doesn't 00231 // give us a way to test this, other than to create the 00232 // one-to-one Map. 00233 oneToOneMap = createOneToOne<LO, GO, NT> (map); 00234 } 00235 00236 RCP<const MapType> gatherMap; 00237 if (numProcs == 1) { 00238 gatherMap = oneToOneMap; 00239 } else { 00240 if (dbg) { 00241 *err << myRank << ": computeGatherMap: Gathering Map counts" << endl; 00242 } 00243 // Gather each process' count of Map elements to Proc 0, 00244 // into the recvCounts array. This will tell Proc 0 how 00245 // many GIDs to expect from each process when calling 00246 // MPI_Gatherv. Counts and offsets are all int, because 00247 // that's what MPI uses. Teuchos::as will at least prevent 00248 // bad casts to int in a debug build. 00249 // 00250 // Yeah, it's not memory scalable. Guess what: We're going 00251 // to bring ALL the data to Proc 0, not just the receive 00252 // counts. The first MPI_Gather is only the beginning... 00253 // Seriously, if you want to make this memory scalable, the 00254 // right thing to do (after the Export to the one-to-one 00255 // Map) is to go round robin through the processes, having 00256 // each send a chunk of data (with its GIDs, to get the 00257 // order of rows right) at a time, and overlapping writing 00258 // to the file (resp. reading from it) with receiving (resp. 00259 // sending) the next chunk. 00260 const int myEltCount = as<int> (oneToOneMap->getNodeNumElements ()); 00261 Array<int> recvCounts (numProcs); 00262 const int rootProc = 0; 00263 gather<int> (&myEltCount, 1, recvCounts.getRawPtr (), 1, rootProc, comm); 00264 00265 ArrayView<const GO> myGlobalElts = oneToOneMap->getNodeElementList (); 00266 const int numMyGlobalElts = as<int> (myGlobalElts.size ()); 00267 // Only Proc 0 needs to receive and store all the GIDs (from 00268 // all processes). 00269 ArrayRCP<GO> allGlobalElts; 00270 if (myRank == 0) { 00271 allGlobalElts = arcp<GO> (oneToOneMap->getGlobalNumElements ()); 00272 std::fill (allGlobalElts.begin (), allGlobalElts.end (), 0); 00273 } 00274 00275 if (dbg) { 00276 *err << myRank << ": computeGatherMap: Computing MPI_Gatherv " 00277 "displacements" << endl; 00278 } 00279 // Displacements for gatherv() in this case (where we are 00280 // gathering into a contiguous array) are an exclusive 00281 // partial sum (first entry is zero, second starts the 00282 // partial sum) of recvCounts. 00283 Array<int> displs (numProcs, 0); 00284 std::partial_sum (recvCounts.begin (), recvCounts.end () - 1, 00285 displs.begin () + 1); 00286 if (dbg) { 00287 *err << myRank << ": computeGatherMap: Calling MPI_Gatherv" << endl; 00288 } 00289 gatherv<GO> (myGlobalElts.getRawPtr (), numMyGlobalElts, 00290 allGlobalElts.getRawPtr (), recvCounts.getRawPtr (), 00291 displs.getRawPtr (), rootProc, comm); 00292 00293 if (dbg) { 00294 *err << myRank << ": computeGatherMap: Creating gather Map" << endl; 00295 } 00296 // Create a Map with all the GIDs, in the same order as in 00297 // the one-to-one Map, but owned by Proc 0. 00298 ArrayView<const GO> allElts (NULL, 0); 00299 if (myRank == 0) { 00300 allElts = allGlobalElts (); 00301 } 00302 const global_size_t INVALID = Teuchos::OrdinalTraits<global_size_t>::invalid (); 00303 gatherMap = rcp (new MapType (INVALID, allElts, 00304 oneToOneMap->getIndexBase (), 00305 comm, map->getNode ())); 00306 } 00307 if (! err.is_null ()) { 00308 err->popTab (); 00309 } 00310 if (dbg) { 00311 *err << myRank << ": computeGatherMap: done" << endl; 00312 } 00313 if (! err.is_null ()) { 00314 err->popTab (); 00315 } 00316 return gatherMap; 00317 } 00318 00319 } // namespace Details 00320 } // namespace Tpetra 00321 00322 #endif // __Tpetra_ComputeGatherMap_hpp
1.7.6.1