|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_DISTOBJECT_DEF_HPP 00043 #define TPETRA_DISTOBJECT_DEF_HPP 00044 00045 #include "Tpetra_ConfigDefs.hpp" 00046 #include "Tpetra_Map.hpp" 00047 #include "Tpetra_Import.hpp" 00048 #include "Tpetra_Export.hpp" 00049 #include "Tpetra_Distributor.hpp" 00050 00051 #ifdef DOXYGEN_USE_ONLY 00052 # include "Tpetra_DistObjectKA_decl.hpp" 00053 #endif // DOXYGEN_USE_ONLY 00054 00055 #if TPETRA_USE_KOKKOS_DISTOBJECT 00056 00057 namespace Tpetra { 00058 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00059 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>:: 00060 DistObjectKA (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map) 00061 : map_ (map) 00062 { 00063 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 00064 using Teuchos::RCP; 00065 using Teuchos::Time; 00066 using Teuchos::TimeMonitor; 00067 00068 RCP<Time> doXferTimer = 00069 TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer"); 00070 if (doXferTimer.is_null ()) { 00071 doXferTimer = 00072 TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer"); 00073 } 00074 doXferTimer_ = doXferTimer; 00075 00076 RCP<Time> copyAndPermuteTimer = 00077 TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute"); 00078 if (copyAndPermuteTimer.is_null ()) { 00079 copyAndPermuteTimer = 00080 TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute"); 00081 } 00082 copyAndPermuteTimer_ = copyAndPermuteTimer; 00083 00084 RCP<Time> packAndPrepareTimer = 00085 TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare"); 00086 if (packAndPrepareTimer.is_null ()) { 00087 packAndPrepareTimer = 00088 TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare"); 00089 } 00090 packAndPrepareTimer_ = packAndPrepareTimer; 00091 00092 RCP<Time> doPostsAndWaitsTimer = 00093 TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits"); 00094 if (doPostsAndWaitsTimer.is_null ()) { 00095 doPostsAndWaitsTimer = 00096 TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits"); 00097 } 00098 doPostsAndWaitsTimer_ = doPostsAndWaitsTimer; 00099 00100 RCP<Time> unpackAndCombineTimer = 00101 TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine"); 00102 if (unpackAndCombineTimer.is_null ()) { 00103 unpackAndCombineTimer = 00104 TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine"); 00105 } 00106 unpackAndCombineTimer_ = unpackAndCombineTimer; 00107 #endif // HAVE_TPETRA_TRANSFER_TIMERS 00108 } 00109 00110 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00111 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>:: 00112 DistObjectKA (const DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>& rhs) 00113 : map_ (rhs.map_) 00114 {} 00115 00116 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00117 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::~DistObjectKA() 00118 {} 00119 00120 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00121 std::string 00122 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::description () const 00123 { 00124 using Teuchos::TypeNameTraits; 00125 00126 std::ostringstream os; 00127 os << "Tpetra::DistObject<" 00128 << TypeNameTraits<Packet>::name () 00129 << ", " << TypeNameTraits<LocalOrdinal>::name () 00130 << ", " << TypeNameTraits<GlobalOrdinal>::name () 00131 << ", " << TypeNameTraits<Node>::name () 00132 << ">"; 00133 return os.str (); 00134 } 00135 00136 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00137 void 00138 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>:: 00139 describe (Teuchos::FancyOStream &out, 00140 const Teuchos::EVerbosityLevel verbLevel) const 00141 { 00142 using Teuchos::rcpFromRef; 00143 using std::endl; 00144 00145 const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ? 00146 Teuchos::VERB_LOW : verbLevel; 00147 00148 if (vl != Teuchos::VERB_NONE) { 00149 out << this->description () << endl; 00150 Teuchos::OSTab tab (rcpFromRef (out)); 00151 out << "Export buffer size (in packets): " << exports_.size() << endl 00152 << "Import buffer size (in packets): " << imports_.size() << endl 00153 << "Map over which this object is distributed:" << endl; 00154 map_->describe (out, vl); 00155 } 00156 } 00157 00158 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00159 void 00160 DistObjectKA<Packet, LocalOrdinal, GlobalOrdinal, Node>:: 00161 removeEmptyProcessesInPlace (const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& newMap) 00162 { 00163 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 00164 "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented"); 00165 } 00166 00167 template<class DistObjectType> 00168 void 00169 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input, 00170 const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type, 00171 typename DistObjectType::global_ordinal_type, 00172 typename DistObjectType::node_type> >& newMap) 00173 { 00174 input->removeEmptyProcessesInPlace (newMap); 00175 if (newMap.is_null ()) { // my process is excluded 00176 input = Teuchos::null; 00177 } 00178 } 00179 00180 template<class DistObjectType> 00181 void 00182 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input) 00183 { 00184 using Teuchos::RCP; 00185 typedef typename DistObjectType::local_ordinal_type LO; 00186 typedef typename DistObjectType::global_ordinal_type GO; 00187 typedef typename DistObjectType::node_type NT; 00188 typedef Map<LO, GO, NT> map_type; 00189 00190 RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses (); 00191 removeEmptyProcessesInPlace<DistObjectType> (input, newMap); 00192 } 00193 00194 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00195 void 00196 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>:: 00197 doImport (const SrcDistObject& source, 00198 const Import<LocalOrdinal,GlobalOrdinal,Node>& importer, 00199 CombineMode CM) 00200 { 00201 TEUCHOS_TEST_FOR_EXCEPTION(*getMap() != *importer.getTargetMap(), 00202 std::invalid_argument, "doImport: The target DistObject's Map is not " 00203 "identical to the Import's target Map."); 00204 #ifdef HAVE_TPETRA_DEBUG 00205 { 00206 typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type; 00207 const this_type* srcDistObj = dynamic_cast<const this_type*> (&source); 00208 TEUCHOS_TEST_FOR_EXCEPTION( 00209 srcDistObj != NULL && * (srcDistObj->getMap ()) != *importer.getSourceMap(), 00210 std::invalid_argument, "doImport: The source is a DistObject, yet its " 00211 "Map is not identical to the Import's source Map."); 00212 } 00213 #endif // HAVE_TPETRA_DEBUG 00214 size_t numSameIDs = importer.getNumSameIDs (); 00215 00216 typedef Teuchos::ArrayView<const LocalOrdinal> view_type; 00217 const view_type exportLIDs = importer.getExportLIDs(); 00218 const view_type remoteLIDs = importer.getRemoteLIDs(); 00219 const view_type permuteToLIDs = importer.getPermuteToLIDs(); 00220 const view_type permuteFromLIDs = importer.getPermuteFromLIDs(); 00221 this->doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs, 00222 remoteLIDs, exportLIDs, importer.getDistributor (), 00223 DoForward); 00224 } 00225 00226 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00227 void 00228 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>:: 00229 doExport (const SrcDistObject& source, 00230 const Export<LocalOrdinal,GlobalOrdinal,Node>& exporter, 00231 CombineMode CM) 00232 { 00233 TEUCHOS_TEST_FOR_EXCEPTION( 00234 *getMap() != *exporter.getTargetMap(), std::invalid_argument, 00235 "doExport: The target DistObject's Map is not identical to the Export's " 00236 "target Map."); 00237 #ifdef HAVE_TPETRA_DEBUG 00238 { 00239 typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type; 00240 const this_type* srcDistObj = dynamic_cast<const this_type*> (&source); 00241 TEUCHOS_TEST_FOR_EXCEPTION( 00242 srcDistObj != NULL && * (srcDistObj->getMap ()) != *exporter.getSourceMap(), 00243 std::invalid_argument, "doExport: The source is a DistObject, yet its " 00244 "Map is not identical to the Export's source Map."); 00245 } 00246 #endif // HAVE_TPETRA_DEBUG 00247 size_t numSameIDs = exporter.getNumSameIDs(); 00248 00249 typedef ArrayView<const LocalOrdinal> view_type; 00250 view_type exportLIDs = exporter.getExportLIDs(); 00251 view_type remoteLIDs = exporter.getRemoteLIDs(); 00252 view_type permuteToLIDs = exporter.getPermuteToLIDs(); 00253 view_type permuteFromLIDs = exporter.getPermuteFromLIDs(); 00254 doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs, remoteLIDs, 00255 exportLIDs, exporter.getDistributor (), DoForward); 00256 } 00257 00258 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00259 void 00260 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>:: 00261 doImport (const SrcDistObject& source, 00262 const Export<LocalOrdinal,GlobalOrdinal,Node> & exporter, 00263 CombineMode CM) 00264 { 00265 TEUCHOS_TEST_FOR_EXCEPTION( 00266 *getMap() != *exporter.getSourceMap(), std::invalid_argument, 00267 "doImport (reverse mode): The target DistObject's Map is not identical " 00268 "to the Export's source Map."); 00269 #ifdef HAVE_TPETRA_DEBUG 00270 { 00271 typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type; 00272 const this_type* srcDistObj = dynamic_cast<const this_type*> (&source); 00273 TEUCHOS_TEST_FOR_EXCEPTION( 00274 srcDistObj != NULL && * (srcDistObj->getMap ()) != *exporter.getTargetMap(), 00275 std::invalid_argument, 00276 "doImport (reverse mode): The source is a DistObject, yet its " 00277 "Map is not identical to the Export's target Map."); 00278 } 00279 #endif // HAVE_TPETRA_DEBUG 00280 size_t numSameIDs = exporter.getNumSameIDs(); 00281 00282 typedef ArrayView<const LocalOrdinal> view_type; 00283 view_type exportLIDs = exporter.getRemoteLIDs(); 00284 view_type remoteLIDs = exporter.getExportLIDs(); 00285 view_type permuteToLIDs = exporter.getPermuteFromLIDs(); 00286 view_type permuteFromLIDs = exporter.getPermuteToLIDs(); 00287 doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs, remoteLIDs, 00288 exportLIDs, exporter.getDistributor (), DoReverse); 00289 } 00290 00291 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00292 void 00293 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>:: 00294 doExport (const SrcDistObject& source, 00295 const Import<LocalOrdinal,GlobalOrdinal,Node> & importer, 00296 CombineMode CM) 00297 { 00298 TEUCHOS_TEST_FOR_EXCEPTION( 00299 *getMap() != *importer.getSourceMap(), std::invalid_argument, 00300 "doExport (reverse mode): The target object's Map " 00301 "is not identical to the Import's source Map."); 00302 #ifdef HAVE_TPETRA_DEBUG 00303 { 00304 typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type; 00305 const this_type* srcDistObj = dynamic_cast<const this_type*> (&source); 00306 TEUCHOS_TEST_FOR_EXCEPTION( 00307 srcDistObj != NULL && * (srcDistObj->getMap ()) != *importer.getTargetMap(), 00308 std::invalid_argument, 00309 "doExport (reverse mode): The source is a DistObject, yet its " 00310 "Map is not identical to the Import's target Map."); 00311 } 00312 #endif // HAVE_TPETRA_DEBUG 00313 size_t numSameIDs = importer.getNumSameIDs(); 00314 00315 typedef ArrayView<const LocalOrdinal> view_type; 00316 view_type exportLIDs = importer.getRemoteLIDs(); 00317 view_type remoteLIDs = importer.getExportLIDs(); 00318 view_type permuteToLIDs = importer.getPermuteFromLIDs(); 00319 view_type permuteFromLIDs = importer.getPermuteToLIDs(); 00320 doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs, remoteLIDs, 00321 exportLIDs, importer.getDistributor (), DoReverse); 00322 } 00323 00324 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00325 bool 00326 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::isDistributed() const { 00327 return map_->isDistributed (); 00328 } 00329 00330 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00331 size_t 00332 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>:: 00333 constantNumberOfPackets () const { 00334 return 0; // default implementation; subclasses may override 00335 } 00336 00337 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00338 void 00339 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>:: 00340 doTransfer (const SrcDistObject& src, 00341 CombineMode CM, 00342 size_t numSameIDs, 00343 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs_, 00344 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs_, 00345 const Teuchos::ArrayView<const LocalOrdinal>& remoteLIDs_, 00346 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs_, 00347 Distributor &distor, 00348 ReverseOption revOp) 00349 { 00350 using Teuchos::as; 00351 using Kokkos::Compat::getArrayView; 00352 using Kokkos::Compat::getConstArrayView; 00353 using Kokkos::Compat::getKokkosViewDeepCopy; 00354 using Kokkos::Compat::create_const_view; 00355 00356 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 00357 Teuchos::TimeMonitor doXferMon (*doXferTimer_); 00358 #endif // HAVE_TPETRA_TRANSFER_TIMERS 00359 00360 // Convert arguments to Kokkos::View's (involves deep copy to device) 00361 typedef Kokkos::View<const LocalOrdinal*, device_type> lo_const_view_type; 00362 lo_const_view_type permuteToLIDs = 00363 getKokkosViewDeepCopy<device_type> (permuteToLIDs_); 00364 lo_const_view_type permuteFromLIDs = 00365 getKokkosViewDeepCopy<device_type> (permuteFromLIDs_); 00366 lo_const_view_type remoteLIDs = 00367 getKokkosViewDeepCopy<device_type> (remoteLIDs_); 00368 lo_const_view_type exportLIDs = 00369 getKokkosViewDeepCopy<device_type> (exportLIDs_); 00370 00371 TEUCHOS_TEST_FOR_EXCEPTION( 00372 ! checkSizes (src), std::invalid_argument, 00373 "Tpetra::DistObject::doTransfer(): checkSizes() indicates that the " 00374 "destination object is not a legal target for redistribution from the " 00375 "source object. This probably means that they do not have the same " 00376 "dimensions. For example, MultiVectors must have the same number of " 00377 "rows and columns."); 00378 KokkosClassic::ReadWriteOption rwo = KokkosClassic::ReadWrite; 00379 if (CM == INSERT || CM == REPLACE) { 00380 const size_t numIDsToWrite = numSameIDs + 00381 as<size_t> (permuteToLIDs.size ()) + 00382 as<size_t> (remoteLIDs.size ()); 00383 if (numIDsToWrite == this->getMap ()->getNodeNumElements ()) { 00384 // We're overwriting all of our local data in the destination 00385 // object, so a write-only view suffices. 00386 // 00387 // FIXME (mfh 10 Apr 2012) This doesn't make sense for a 00388 // CrsMatrix with a dynamic graph. INSERT mode could mean 00389 // that we're adding new entries to the object, but we don't 00390 // want to get rid of the old ones. 00391 rwo = KokkosClassic::WriteOnly; 00392 } 00393 } 00394 // Tell the source to create a read-only view of its data. On a 00395 // discrete accelerator such as a GPU, this brings EVERYTHING from 00396 // device memory to host memory. 00397 // 00398 // FIXME (mfh 23 Mar 2012) By passing in the list of GIDs (or 00399 // rather, local LIDs to send) and packet counts, createViews() 00400 // could create a "sparse view" that only brings in the necessary 00401 // data from device to host memory. 00402 typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type; 00403 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src); 00404 if (srcDistObj != NULL) { 00405 srcDistObj->createViews (); 00406 } 00407 00408 // Tell the target to create a view of its data. Depending on 00409 // rwo, this could be a write-only view or a read-and-write view. 00410 // On a discrete accelerator such as a GPU, a write-only view only 00411 // requires a transfer from host to device memory. A 00412 // read-and-write view requires a two-way transfer. This has the 00413 // same problem as createViews(): it transfers EVERYTHING, not 00414 // just the necessary data. 00415 // 00416 // FIXME (mfh 23 Mar 2012) By passing in the list of GIDs (or 00417 // rather, local LIDs into which to receive) and packet counts, 00418 // createViewsNonConst() could create a "sparse view" that only 00419 // transfers the necessary data. 00420 this->createViewsNonConst (rwo); 00421 00422 if (numSameIDs + permuteToLIDs.size()) { 00423 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 00424 Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_); 00425 #endif // HAVE_TPETRA_TRANSFER_TIMERS 00426 // There is at least one GID to copy or permute. 00427 copyAndPermute (src, numSameIDs, permuteToLIDs, permuteFromLIDs); 00428 } 00429 00430 // The method may return zero even if the implementation actually 00431 // does have a constant number of packets per LID. However, if it 00432 // returns nonzero, we may use this information to avoid 00433 // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare() 00434 // will set this to its final value. 00435 // 00436 // We only need this if CM != ZERO, but it has to be lifted out of 00437 // that scope because there are multiple tests for CM != ZERO. 00438 size_t constantNumPackets = this->constantNumberOfPackets (); 00439 00440 // We only need to pack communication buffers if the combine mode 00441 // is not ZERO. A "ZERO combine mode" means that the results are 00442 // the same as if we had received all zeros, and added them to the 00443 // existing values. That means we don't need to communicate. 00444 if (CM != ZERO) { 00445 if (constantNumPackets == 0) { 00446 Kokkos::Compat::realloc (numExportPacketsPerLID_, exportLIDs.size ()); 00447 Kokkos::Compat::realloc (numImportPacketsPerLID_, remoteLIDs.size ()); 00448 } 00449 00450 { 00451 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 00452 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_); 00453 #endif // HAVE_TPETRA_TRANSFER_TIMERS 00454 // Ask the source to pack data. Also ask it whether there are a 00455 // constant number of packets per element (constantNumPackets is 00456 // an output argument). If there are, constantNumPackets will 00457 // come back nonzero. Otherwise, the source will fill the 00458 // numExportPacketsPerLID_ array. 00459 packAndPrepare (src, exportLIDs, exports_, numExportPacketsPerLID_, 00460 constantNumPackets, distor); 00461 } 00462 } 00463 00464 // We don't need the source's data anymore, so it can let go of 00465 // its views. On an accelerator device with a separate memory 00466 // space (like a GPU), this frees host memory, since device memory 00467 // has the "master" version of the data. 00468 if (srcDistObj != NULL) { 00469 srcDistObj->releaseViews (); 00470 } 00471 00472 // We only need to send data if the combine mode is not ZERO. 00473 if (CM != ZERO) { 00474 if (constantNumPackets != 0) { 00475 // There are a constant number of packets per element. We 00476 // already know (from the number of "remote" (incoming) 00477 // elements) how many incoming elements we expect, so we can 00478 // resize the buffer accordingly. 00479 const size_t rbufLen = remoteLIDs.size() * constantNumPackets; 00480 if (as<size_t> (imports_.size()) != rbufLen) { 00481 Kokkos::Compat::realloc (imports_, rbufLen); 00482 } 00483 } 00484 00485 // Create mirror views of [import|export]PacketsPerLID 00486 typename Kokkos::View<size_t*,device_type>::HostMirror host_numExportPacketsPerLID = Kokkos::create_mirror_view (numExportPacketsPerLID_); 00487 typename Kokkos::View<size_t*,device_type>::HostMirror host_numImportPacketsPerLID = Kokkos::create_mirror_view (numImportPacketsPerLID_); 00488 00489 // Copy numExportPacketsPerLID to host 00490 Kokkos::deep_copy (host_numExportPacketsPerLID, numExportPacketsPerLID_); 00491 00492 // Do we need to do communication (via doPostsAndWaits)? 00493 bool needCommunication = true; 00494 if (revOp == DoReverse && ! isDistributed ()) { 00495 needCommunication = false; 00496 } 00497 // FIXME (mfh 30 Jun 2013): Checking whether the source object 00498 // is distributed requires a cast to DistObject. If it's not a 00499 // DistObject, then I'm not quite sure what to do. Perhaps it 00500 // would be more appropriate for SrcDistObject to have an 00501 // isDistributed() method. For now, I'll just assume that we 00502 // need to do communication unless the cast succeeds and the 00503 // source is not distributed. 00504 else if (revOp == DoForward && srcDistObj != NULL && 00505 ! srcDistObj->isDistributed ()) { 00506 needCommunication = false; 00507 } 00508 00509 if (needCommunication) { 00510 if (revOp == DoReverse) { 00511 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 00512 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_); 00513 #endif // HAVE_TPETRA_TRANSFER_TIMERS 00514 if (constantNumPackets == 0) { //variable num-packets-per-LID: 00515 distor.doReversePostsAndWaits (create_const_view (host_numExportPacketsPerLID), 00516 1, 00517 host_numImportPacketsPerLID); 00518 size_t totalImportPackets = 0; 00519 for (view_size_type i = 0; i < numImportPacketsPerLID_.size(); ++i) { 00520 totalImportPackets += host_numImportPacketsPerLID[i]; 00521 } 00522 Kokkos::Compat::realloc (imports_, totalImportPackets); 00523 distor.doReversePostsAndWaits (create_const_view (exports_), 00524 getArrayView (host_numExportPacketsPerLID), 00525 imports_, 00526 getArrayView (host_numImportPacketsPerLID)); 00527 } 00528 else { 00529 distor.doReversePostsAndWaits (create_const_view (exports_), 00530 constantNumPackets, 00531 imports_); 00532 } 00533 } 00534 else { // revOp == DoForward 00535 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 00536 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_); 00537 #endif // HAVE_TPETRA_TRANSFER_TIMERS 00538 if (constantNumPackets == 0) { //variable num-packets-per-LID: 00539 distor.doPostsAndWaits (create_const_view (host_numExportPacketsPerLID), 1, 00540 host_numImportPacketsPerLID); 00541 size_t totalImportPackets = 0; 00542 for (view_size_type i = 0; i < numImportPacketsPerLID_.size(); ++i) { 00543 totalImportPackets += host_numImportPacketsPerLID[i]; 00544 } 00545 Kokkos::Compat::realloc (imports_, totalImportPackets); 00546 distor.doPostsAndWaits (create_const_view (exports_), 00547 getArrayView (host_numExportPacketsPerLID), 00548 imports_, 00549 getArrayView (host_numImportPacketsPerLID)); 00550 } 00551 else { 00552 distor.doPostsAndWaits (create_const_view (exports_), 00553 constantNumPackets, 00554 imports_); 00555 } 00556 } 00557 00558 // Copy numImportPacketsPerLID to device 00559 Kokkos::deep_copy (numImportPacketsPerLID_, host_numImportPacketsPerLID); 00560 00561 { 00562 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 00563 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_); 00564 #endif // HAVE_TPETRA_TRANSFER_TIMERS 00565 unpackAndCombine (remoteLIDs, imports_, numImportPacketsPerLID_, 00566 constantNumPackets, distor, CM); 00567 } 00568 } 00569 } // if (CM != ZERO) 00570 00571 this->releaseViews (); 00572 } 00573 00574 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00575 void 00576 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::print (std::ostream &os) const 00577 { 00578 using Teuchos::FancyOStream; 00579 using Teuchos::getFancyOStream; 00580 using Teuchos::RCP; 00581 using Teuchos::rcpFromRef; 00582 using std::endl; 00583 00584 RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os)); 00585 this->describe (*out, Teuchos::VERB_DEFAULT); 00586 } 00587 00588 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00589 void 00590 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::createViews () const 00591 {} 00592 00593 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00594 void 00595 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>:: 00596 createViewsNonConst (KokkosClassic::ReadWriteOption /*rwo*/) 00597 {} 00598 00599 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node> 00600 void 00601 DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>:: 00602 releaseViews () const 00603 {} 00604 00605 #define TPETRA_DISTOBJECTKA_INSTANT(SCALAR, LO, GO, NODE) \ 00606 \ 00607 template class DistObjectKA< SCALAR , LO , GO , NODE >; 00608 00609 // The "SLGN" stuff above doesn't work for Packet=char. 00610 #define TPETRA_DISTOBJECTKA_INSTANT_CHAR(LO, GO, NODE) \ 00611 \ 00612 template class DistObjectKA< char , LO , GO , NODE >; 00613 00614 00615 } // namespace Tpetra 00616 00617 #endif /* TPETRA_ENABLE_KOKKOSARRAY_DISTOBJECT */ 00618 00619 #endif /* TPETRA_DISTOBJECT_DEF_HPP */
1.7.6.1