Tpetra Matrix/Vector Services  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
Tpetra_DistObjectKA_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_DISTOBJECT_DEF_HPP
00043 #define TPETRA_DISTOBJECT_DEF_HPP
00044 
00045 #include "Tpetra_ConfigDefs.hpp"
00046 #include "Tpetra_Map.hpp"
00047 #include "Tpetra_Import.hpp"
00048 #include "Tpetra_Export.hpp"
00049 #include "Tpetra_Distributor.hpp"
00050 
00051 #ifdef DOXYGEN_USE_ONLY
00052 #  include "Tpetra_DistObjectKA_decl.hpp"
00053 #endif // DOXYGEN_USE_ONLY
00054 
00055 #if TPETRA_USE_KOKKOS_DISTOBJECT
00056 
00057 namespace Tpetra {
00058   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00059   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
00060   DistObjectKA (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map)
00061     : map_ (map)
00062   {
00063 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
00064     using Teuchos::RCP;
00065     using Teuchos::Time;
00066     using Teuchos::TimeMonitor;
00067 
00068     RCP<Time> doXferTimer =
00069       TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
00070     if (doXferTimer.is_null ()) {
00071       doXferTimer =
00072         TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
00073     }
00074     doXferTimer_ = doXferTimer;
00075 
00076     RCP<Time> copyAndPermuteTimer =
00077       TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
00078     if (copyAndPermuteTimer.is_null ()) {
00079       copyAndPermuteTimer =
00080         TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
00081     }
00082     copyAndPermuteTimer_ = copyAndPermuteTimer;
00083 
00084     RCP<Time> packAndPrepareTimer =
00085       TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
00086     if (packAndPrepareTimer.is_null ()) {
00087       packAndPrepareTimer =
00088         TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
00089     }
00090     packAndPrepareTimer_ = packAndPrepareTimer;
00091 
00092     RCP<Time> doPostsAndWaitsTimer =
00093       TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
00094     if (doPostsAndWaitsTimer.is_null ()) {
00095       doPostsAndWaitsTimer =
00096         TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
00097     }
00098     doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
00099 
00100     RCP<Time> unpackAndCombineTimer =
00101       TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
00102     if (unpackAndCombineTimer.is_null ()) {
00103       unpackAndCombineTimer =
00104         TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
00105     }
00106     unpackAndCombineTimer_ = unpackAndCombineTimer;
00107 #endif // HAVE_TPETRA_TRANSFER_TIMERS
00108   }
00109 
00110   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00111   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
00112   DistObjectKA (const DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>& rhs)
00113     : map_ (rhs.map_)
00114   {}
00115 
00116   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00117   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::~DistObjectKA()
00118   {}
00119 
00120   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00121   std::string
00122   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::description () const
00123   {
00124     using Teuchos::TypeNameTraits;
00125 
00126     std::ostringstream os;
00127     os << "Tpetra::DistObject<"
00128        << TypeNameTraits<Packet>::name ()
00129        << ", " << TypeNameTraits<LocalOrdinal>::name ()
00130        << ", " << TypeNameTraits<GlobalOrdinal>::name ()
00131        << ", " << TypeNameTraits<Node>::name ()
00132        << ">";
00133     return os.str ();
00134   }
00135 
00136   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00137   void
00138   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
00139   describe (Teuchos::FancyOStream &out,
00140             const Teuchos::EVerbosityLevel verbLevel) const
00141   {
00142     using Teuchos::rcpFromRef;
00143     using std::endl;
00144 
00145     const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
00146       Teuchos::VERB_LOW : verbLevel;
00147 
00148     if (vl != Teuchos::VERB_NONE) {
00149       out << this->description () << endl;
00150       Teuchos::OSTab tab (rcpFromRef (out));
00151       out << "Export buffer size (in packets): " << exports_.size() << endl
00152           << "Import buffer size (in packets): " << imports_.size() << endl
00153           << "Map over which this object is distributed:" << endl;
00154       map_->describe (out, vl);
00155     }
00156   }
00157 
00158   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00159   void
00160   DistObjectKA<Packet, LocalOrdinal, GlobalOrdinal, Node>::
00161   removeEmptyProcessesInPlace (const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& newMap)
00162   {
00163     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
00164       "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
00165   }
00166 
00167   template<class DistObjectType>
00168   void
00169   removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
00170                                const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
00171                                                             typename DistObjectType::global_ordinal_type,
00172                                                             typename DistObjectType::node_type> >& newMap)
00173   {
00174     input->removeEmptyProcessesInPlace (newMap);
00175     if (newMap.is_null ()) { // my process is excluded
00176       input = Teuchos::null;
00177     }
00178   }
00179 
00180   template<class DistObjectType>
00181   void
00182   removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
00183   {
00184     using Teuchos::RCP;
00185     typedef typename DistObjectType::local_ordinal_type LO;
00186     typedef typename DistObjectType::global_ordinal_type GO;
00187     typedef typename DistObjectType::node_type NT;
00188     typedef Map<LO, GO, NT> map_type;
00189 
00190     RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
00191     removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
00192   }
00193 
00194   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00195   void
00196   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
00197   doImport (const SrcDistObject& source,
00198             const Import<LocalOrdinal,GlobalOrdinal,Node>& importer,
00199             CombineMode CM)
00200   {
00201     TEUCHOS_TEST_FOR_EXCEPTION(*getMap() != *importer.getTargetMap(),
00202       std::invalid_argument, "doImport: The target DistObject's Map is not "
00203       "identical to the Import's target Map.");
00204 #ifdef HAVE_TPETRA_DEBUG
00205     {
00206       typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type;
00207       const this_type* srcDistObj = dynamic_cast<const this_type*> (&source);
00208       TEUCHOS_TEST_FOR_EXCEPTION(
00209         srcDistObj != NULL && * (srcDistObj->getMap ()) != *importer.getSourceMap(),
00210         std::invalid_argument, "doImport: The source is a DistObject, yet its "
00211         "Map is not identical to the Import's source Map.");
00212     }
00213 #endif // HAVE_TPETRA_DEBUG
00214     size_t numSameIDs = importer.getNumSameIDs ();
00215 
00216     typedef Teuchos::ArrayView<const LocalOrdinal> view_type;
00217     const view_type exportLIDs      = importer.getExportLIDs();
00218     const view_type remoteLIDs      = importer.getRemoteLIDs();
00219     const view_type permuteToLIDs   = importer.getPermuteToLIDs();
00220     const view_type permuteFromLIDs = importer.getPermuteFromLIDs();
00221     this->doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs,
00222                       remoteLIDs, exportLIDs, importer.getDistributor (),
00223                       DoForward);
00224   }
00225 
00226   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00227   void
00228   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
00229   doExport (const SrcDistObject& source,
00230             const Export<LocalOrdinal,GlobalOrdinal,Node>& exporter,
00231             CombineMode CM)
00232   {
00233     TEUCHOS_TEST_FOR_EXCEPTION(
00234       *getMap() != *exporter.getTargetMap(), std::invalid_argument,
00235       "doExport: The target DistObject's Map is not identical to the Export's "
00236       "target Map.");
00237 #ifdef HAVE_TPETRA_DEBUG
00238     {
00239       typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type;
00240       const this_type* srcDistObj = dynamic_cast<const this_type*> (&source);
00241       TEUCHOS_TEST_FOR_EXCEPTION(
00242         srcDistObj != NULL && * (srcDistObj->getMap ()) != *exporter.getSourceMap(),
00243         std::invalid_argument, "doExport: The source is a DistObject, yet its "
00244         "Map is not identical to the Export's source Map.");
00245     }
00246 #endif // HAVE_TPETRA_DEBUG
00247     size_t numSameIDs = exporter.getNumSameIDs();
00248 
00249     typedef ArrayView<const LocalOrdinal> view_type;
00250     view_type exportLIDs      = exporter.getExportLIDs();
00251     view_type remoteLIDs      = exporter.getRemoteLIDs();
00252     view_type permuteToLIDs   = exporter.getPermuteToLIDs();
00253     view_type permuteFromLIDs = exporter.getPermuteFromLIDs();
00254     doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs, remoteLIDs,
00255                 exportLIDs, exporter.getDistributor (), DoForward);
00256   }
00257 
00258   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00259   void
00260   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
00261   doImport (const SrcDistObject& source,
00262             const Export<LocalOrdinal,GlobalOrdinal,Node> & exporter,
00263             CombineMode CM)
00264   {
00265     TEUCHOS_TEST_FOR_EXCEPTION(
00266       *getMap() != *exporter.getSourceMap(), std::invalid_argument,
00267       "doImport (reverse mode): The target DistObject's Map is not identical "
00268       "to the Export's source Map.");
00269 #ifdef HAVE_TPETRA_DEBUG
00270     {
00271       typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type;
00272       const this_type* srcDistObj = dynamic_cast<const this_type*> (&source);
00273       TEUCHOS_TEST_FOR_EXCEPTION(
00274         srcDistObj != NULL && * (srcDistObj->getMap ()) != *exporter.getTargetMap(),
00275         std::invalid_argument,
00276         "doImport (reverse mode): The source is a DistObject, yet its "
00277         "Map is not identical to the Export's target Map.");
00278     }
00279 #endif // HAVE_TPETRA_DEBUG
00280     size_t numSameIDs = exporter.getNumSameIDs();
00281 
00282     typedef ArrayView<const LocalOrdinal> view_type;
00283     view_type exportLIDs      = exporter.getRemoteLIDs();
00284     view_type remoteLIDs      = exporter.getExportLIDs();
00285     view_type permuteToLIDs   = exporter.getPermuteFromLIDs();
00286     view_type permuteFromLIDs = exporter.getPermuteToLIDs();
00287     doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs, remoteLIDs,
00288                 exportLIDs, exporter.getDistributor (), DoReverse);
00289   }
00290 
00291   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00292   void
00293   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
00294   doExport (const SrcDistObject& source,
00295             const Import<LocalOrdinal,GlobalOrdinal,Node> & importer,
00296             CombineMode CM)
00297   {
00298     TEUCHOS_TEST_FOR_EXCEPTION(
00299       *getMap() != *importer.getSourceMap(), std::invalid_argument,
00300       "doExport (reverse mode): The target object's Map "
00301       "is not identical to the Import's source Map.");
00302 #ifdef HAVE_TPETRA_DEBUG
00303     {
00304       typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type;
00305       const this_type* srcDistObj = dynamic_cast<const this_type*> (&source);
00306       TEUCHOS_TEST_FOR_EXCEPTION(
00307         srcDistObj != NULL && * (srcDistObj->getMap ()) != *importer.getTargetMap(),
00308         std::invalid_argument,
00309         "doExport (reverse mode): The source is a DistObject, yet its "
00310         "Map is not identical to the Import's target Map.");
00311     }
00312 #endif // HAVE_TPETRA_DEBUG
00313     size_t numSameIDs = importer.getNumSameIDs();
00314 
00315     typedef ArrayView<const LocalOrdinal> view_type;
00316     view_type exportLIDs      = importer.getRemoteLIDs();
00317     view_type remoteLIDs      = importer.getExportLIDs();
00318     view_type permuteToLIDs   = importer.getPermuteFromLIDs();
00319     view_type permuteFromLIDs = importer.getPermuteToLIDs();
00320     doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs, remoteLIDs,
00321                 exportLIDs, importer.getDistributor (), DoReverse);
00322   }
00323 
00324   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00325   bool
00326   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::isDistributed() const {
00327     return map_->isDistributed ();
00328   }
00329 
00330   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00331   size_t
00332   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
00333   constantNumberOfPackets () const {
00334     return 0; // default implementation; subclasses may override
00335   }
00336 
00337   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00338   void
00339   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
00340   doTransfer (const SrcDistObject& src,
00341               CombineMode CM,
00342               size_t numSameIDs,
00343               const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs_,
00344               const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs_,
00345               const Teuchos::ArrayView<const LocalOrdinal>& remoteLIDs_,
00346               const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs_,
00347               Distributor &distor,
00348               ReverseOption revOp)
00349   {
00350     using Teuchos::as;
00351     using Kokkos::Compat::getArrayView;
00352     using Kokkos::Compat::getConstArrayView;
00353     using Kokkos::Compat::getKokkosViewDeepCopy;
00354     using Kokkos::Compat::create_const_view;
00355 
00356 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
00357     Teuchos::TimeMonitor doXferMon (*doXferTimer_);
00358 #endif // HAVE_TPETRA_TRANSFER_TIMERS
00359 
00360     // Convert arguments to Kokkos::View's (involves deep copy to device)
00361     typedef Kokkos::View<const LocalOrdinal*, device_type> lo_const_view_type;
00362     lo_const_view_type permuteToLIDs =
00363       getKokkosViewDeepCopy<device_type> (permuteToLIDs_);
00364     lo_const_view_type permuteFromLIDs =
00365       getKokkosViewDeepCopy<device_type> (permuteFromLIDs_);
00366     lo_const_view_type remoteLIDs =
00367       getKokkosViewDeepCopy<device_type> (remoteLIDs_);
00368     lo_const_view_type exportLIDs =
00369       getKokkosViewDeepCopy<device_type> (exportLIDs_);
00370 
00371     TEUCHOS_TEST_FOR_EXCEPTION(
00372       ! checkSizes (src), std::invalid_argument,
00373       "Tpetra::DistObject::doTransfer(): checkSizes() indicates that the "
00374       "destination object is not a legal target for redistribution from the "
00375       "source object.  This probably means that they do not have the same "
00376       "dimensions.  For example, MultiVectors must have the same number of "
00377       "rows and columns.");
00378     KokkosClassic::ReadWriteOption rwo = KokkosClassic::ReadWrite;
00379     if (CM == INSERT || CM == REPLACE) {
00380       const size_t numIDsToWrite = numSameIDs +
00381         as<size_t> (permuteToLIDs.size ()) +
00382         as<size_t> (remoteLIDs.size ());
00383       if (numIDsToWrite == this->getMap ()->getNodeNumElements ()) {
00384         // We're overwriting all of our local data in the destination
00385         // object, so a write-only view suffices.
00386         //
00387         // FIXME (mfh 10 Apr 2012) This doesn't make sense for a
00388         // CrsMatrix with a dynamic graph.  INSERT mode could mean
00389         // that we're adding new entries to the object, but we don't
00390         // want to get rid of the old ones.
00391         rwo = KokkosClassic::WriteOnly;
00392       }
00393     }
00394     // Tell the source to create a read-only view of its data.  On a
00395     // discrete accelerator such as a GPU, this brings EVERYTHING from
00396     // device memory to host memory.
00397     //
00398     // FIXME (mfh 23 Mar 2012) By passing in the list of GIDs (or
00399     // rather, local LIDs to send) and packet counts, createViews()
00400     // could create a "sparse view" that only brings in the necessary
00401     // data from device to host memory.
00402     typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type;
00403     const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
00404     if (srcDistObj != NULL) {
00405       srcDistObj->createViews ();
00406     }
00407 
00408     // Tell the target to create a view of its data.  Depending on
00409     // rwo, this could be a write-only view or a read-and-write view.
00410     // On a discrete accelerator such as a GPU, a write-only view only
00411     // requires a transfer from host to device memory.  A
00412     // read-and-write view requires a two-way transfer.  This has the
00413     // same problem as createViews(): it transfers EVERYTHING, not
00414     // just the necessary data.
00415     //
00416     // FIXME (mfh 23 Mar 2012) By passing in the list of GIDs (or
00417     // rather, local LIDs into which to receive) and packet counts,
00418     // createViewsNonConst() could create a "sparse view" that only
00419     // transfers the necessary data.
00420     this->createViewsNonConst (rwo);
00421 
00422     if (numSameIDs + permuteToLIDs.size()) {
00423 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
00424       Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
00425 #endif // HAVE_TPETRA_TRANSFER_TIMERS
00426       // There is at least one GID to copy or permute.
00427       copyAndPermute (src, numSameIDs, permuteToLIDs, permuteFromLIDs);
00428     }
00429 
00430     // The method may return zero even if the implementation actually
00431     // does have a constant number of packets per LID.  However, if it
00432     // returns nonzero, we may use this information to avoid
00433     // (re)allocating num{Ex,Im}portPacketsPerLID_.  packAndPrepare()
00434     // will set this to its final value.
00435     //
00436     // We only need this if CM != ZERO, but it has to be lifted out of
00437     // that scope because there are multiple tests for CM != ZERO.
00438     size_t constantNumPackets = this->constantNumberOfPackets ();
00439 
00440     // We only need to pack communication buffers if the combine mode
00441     // is not ZERO. A "ZERO combine mode" means that the results are
00442     // the same as if we had received all zeros, and added them to the
00443     // existing values. That means we don't need to communicate.
00444     if (CM != ZERO) {
00445       if (constantNumPackets == 0) {
00446         Kokkos::Compat::realloc (numExportPacketsPerLID_, exportLIDs.size ());
00447         Kokkos::Compat::realloc (numImportPacketsPerLID_, remoteLIDs.size ());
00448       }
00449 
00450       {
00451 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
00452         Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
00453 #endif // HAVE_TPETRA_TRANSFER_TIMERS
00454         // Ask the source to pack data.  Also ask it whether there are a
00455         // constant number of packets per element (constantNumPackets is
00456         // an output argument).  If there are, constantNumPackets will
00457         // come back nonzero.  Otherwise, the source will fill the
00458         // numExportPacketsPerLID_ array.
00459         packAndPrepare (src, exportLIDs, exports_, numExportPacketsPerLID_,
00460                         constantNumPackets, distor);
00461       }
00462     }
00463 
00464     // We don't need the source's data anymore, so it can let go of
00465     // its views.  On an accelerator device with a separate memory
00466     // space (like a GPU), this frees host memory, since device memory
00467     // has the "master" version of the data.
00468     if (srcDistObj != NULL) {
00469       srcDistObj->releaseViews ();
00470     }
00471 
00472     // We only need to send data if the combine mode is not ZERO.
00473     if (CM != ZERO) {
00474       if (constantNumPackets != 0) {
00475         // There are a constant number of packets per element.  We
00476         // already know (from the number of "remote" (incoming)
00477         // elements) how many incoming elements we expect, so we can
00478         // resize the buffer accordingly.
00479         const size_t rbufLen = remoteLIDs.size() * constantNumPackets;
00480         if (as<size_t> (imports_.size()) != rbufLen) {
00481           Kokkos::Compat::realloc (imports_, rbufLen);
00482         }
00483       }
00484 
00485       // Create mirror views of [import|export]PacketsPerLID
00486       typename Kokkos::View<size_t*,device_type>::HostMirror host_numExportPacketsPerLID = Kokkos::create_mirror_view (numExportPacketsPerLID_);
00487       typename Kokkos::View<size_t*,device_type>::HostMirror host_numImportPacketsPerLID = Kokkos::create_mirror_view (numImportPacketsPerLID_);
00488 
00489       // Copy numExportPacketsPerLID to host
00490       Kokkos::deep_copy (host_numExportPacketsPerLID, numExportPacketsPerLID_);
00491 
00492       // Do we need to do communication (via doPostsAndWaits)?
00493       bool needCommunication = true;
00494       if (revOp == DoReverse && ! isDistributed ()) {
00495         needCommunication = false;
00496       }
00497       // FIXME (mfh 30 Jun 2013): Checking whether the source object
00498       // is distributed requires a cast to DistObject.  If it's not a
00499       // DistObject, then I'm not quite sure what to do.  Perhaps it
00500       // would be more appropriate for SrcDistObject to have an
00501       // isDistributed() method.  For now, I'll just assume that we
00502       // need to do communication unless the cast succeeds and the
00503       // source is not distributed.
00504       else if (revOp == DoForward && srcDistObj != NULL &&
00505                ! srcDistObj->isDistributed ()) {
00506         needCommunication = false;
00507       }
00508 
00509       if (needCommunication) {
00510         if (revOp == DoReverse) {
00511 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
00512           Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
00513 #endif // HAVE_TPETRA_TRANSFER_TIMERS
00514           if (constantNumPackets == 0) { //variable num-packets-per-LID:
00515             distor.doReversePostsAndWaits (create_const_view (host_numExportPacketsPerLID),
00516                                            1,
00517                                            host_numImportPacketsPerLID);
00518             size_t totalImportPackets = 0;
00519             for (view_size_type i = 0; i < numImportPacketsPerLID_.size(); ++i) {
00520               totalImportPackets += host_numImportPacketsPerLID[i];
00521             }
00522             Kokkos::Compat::realloc (imports_, totalImportPackets);
00523             distor.doReversePostsAndWaits (create_const_view (exports_),
00524                                            getArrayView (host_numExportPacketsPerLID),
00525                                            imports_,
00526                                            getArrayView (host_numImportPacketsPerLID));
00527           }
00528           else {
00529             distor.doReversePostsAndWaits (create_const_view (exports_),
00530                                            constantNumPackets,
00531                                            imports_);
00532           }
00533         }
00534         else { // revOp == DoForward
00535 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
00536           Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
00537 #endif // HAVE_TPETRA_TRANSFER_TIMERS
00538           if (constantNumPackets == 0) { //variable num-packets-per-LID:
00539             distor.doPostsAndWaits (create_const_view (host_numExportPacketsPerLID), 1,
00540                                     host_numImportPacketsPerLID);
00541             size_t totalImportPackets = 0;
00542             for (view_size_type i = 0; i < numImportPacketsPerLID_.size(); ++i) {
00543               totalImportPackets += host_numImportPacketsPerLID[i];
00544             }
00545             Kokkos::Compat::realloc (imports_, totalImportPackets);
00546             distor.doPostsAndWaits (create_const_view (exports_),
00547                                     getArrayView (host_numExportPacketsPerLID),
00548                                     imports_,
00549                                     getArrayView (host_numImportPacketsPerLID));
00550           }
00551           else {
00552             distor.doPostsAndWaits (create_const_view (exports_),
00553                                     constantNumPackets,
00554                                     imports_);
00555           }
00556         }
00557 
00558         // Copy numImportPacketsPerLID to device
00559         Kokkos::deep_copy (numImportPacketsPerLID_, host_numImportPacketsPerLID);
00560 
00561         {
00562 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
00563           Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
00564 #endif // HAVE_TPETRA_TRANSFER_TIMERS
00565           unpackAndCombine (remoteLIDs, imports_, numImportPacketsPerLID_,
00566                             constantNumPackets, distor, CM);
00567         }
00568       }
00569     } // if (CM != ZERO)
00570 
00571     this->releaseViews ();
00572   }
00573 
00574   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00575   void
00576   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::print (std::ostream &os) const
00577   {
00578     using Teuchos::FancyOStream;
00579     using Teuchos::getFancyOStream;
00580     using Teuchos::RCP;
00581     using Teuchos::rcpFromRef;
00582     using std::endl;
00583 
00584     RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os));
00585     this->describe (*out, Teuchos::VERB_DEFAULT);
00586   }
00587 
00588   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00589   void
00590   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::createViews () const
00591   {}
00592 
00593   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00594   void
00595   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
00596   createViewsNonConst (KokkosClassic::ReadWriteOption /*rwo*/)
00597   {}
00598 
00599   template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
00600   void
00601   DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
00602   releaseViews () const
00603   {}
00604 
00605 #define TPETRA_DISTOBJECTKA_INSTANT(SCALAR, LO, GO, NODE) \
00606   \
00607   template class DistObjectKA< SCALAR , LO , GO , NODE >;
00608 
00609  // The "SLGN" stuff above doesn't work for Packet=char.
00610 #define TPETRA_DISTOBJECTKA_INSTANT_CHAR(LO, GO, NODE) \
00611   \
00612   template class DistObjectKA< char , LO , GO , NODE >;
00613 
00614 
00615 } // namespace Tpetra
00616 
00617 #endif /* TPETRA_ENABLE_KOKKOSARRAY_DISTOBJECT */
00618 
00619 #endif /* TPETRA_DISTOBJECT_DEF_HPP */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines