Tpetra Matrix/Vector Services  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
Tpetra_KokkosRefactor_Map_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_KOKKOSREFACTOR_MAP_DEF_HPP
00043 #define TPETRA_KOKKOSREFACTOR_MAP_DEF_HPP
00044 
00045 #include <Tpetra_Directory.hpp> // must include for implicit instantiation to work
00046 #include <Tpetra_Util.hpp>
00047 #include <Teuchos_as.hpp>
00048 #include <stdexcept>
00049 
00050 #ifdef DOXYGEN_USE_ONLY
00051 #  include "Tpetra_Map_decl.hpp"
00052 #endif
00053 
00054 namespace Tpetra {
00055 
00056   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00057   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00058   Map () :
00059     comm_ (new Teuchos::SerialComm<int> ()),
00060     node_ (KokkosClassic::Details::getNode<Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > ()),
00061     directory_ (new Directory<LocalOrdinal, GlobalOrdinal, node_type> ())
00062   {}
00063 
00064   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00065   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00066   Map (const global_size_t globalNumIndices,
00067        const GlobalOrdinal indexBase,
00068        const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
00069        const LocalGlobal lOrG,
00070        const Teuchos::RCP<node_type>& node) :
00071     comm_ (comm),
00072     node_ (node),
00073     directory_ (new Directory<LocalOrdinal, GlobalOrdinal, node_type> ())
00074   {
00075     // Start with a host Map implementation, since this will make this
00076     // class' public (host) methods work.  If users want device
00077     // methods, they will call getDeviceView(), which will initialize
00078     // the device Map implementation.
00079     //
00080     // NOTE (mfh 06 Feb 2014) If we're using UVM, we don't really need
00081     // the host and device Maps to be separate.
00082     mapHost_ = host_impl_type (Teuchos::as<GlobalOrdinal> (globalNumIndices),
00083                                indexBase, *comm, lOrG);
00084   }
00085 
00086   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00087   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00088   Map (const global_size_t globalNumIndices,
00089        const size_t myNumIndices,
00090        const GlobalOrdinal indexBase,
00091        const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
00092        const Teuchos::RCP<node_type>& node) :
00093     comm_ (comm),
00094     node_ (node),
00095     directory_ (new Directory<LocalOrdinal, GlobalOrdinal, node_type> ())
00096   {
00097     typedef GlobalOrdinal GO;
00098     const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
00099 
00100     const GO globalNumInds = (globalNumIndices == GSTI) ?
00101       getInvalidGlobalIndex () : Teuchos::as<GO> (globalNumIndices);
00102     const GO myNumInds = (myNumIndices == GSTI) ?
00103       getInvalidLocalIndex () : Teuchos::as<GO> (myNumIndices);
00104 
00105     // Start with a host Map implementation, since this will make this
00106     // class' public (host) methods work.  If users want device
00107     // methods, they will call getDeviceView(), which will initialize
00108     // the device Map implementation.
00109     //
00110     // NOTE (mfh 06 Feb 2014) If we're using UVM, we don't really need
00111     // the host and device Maps to be separate.
00112     mapHost_ = host_impl_type (globalNumInds, myNumInds, indexBase, *comm);
00113 
00114     // Create the Directory on demand in getRemoteIndexList().
00115   }
00116 
00117   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00118   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00119   Map (const global_size_t globalNumIndices,
00120        const Kokkos::View<const GlobalOrdinal*, device_type>& myGlobalIndices,
00121        const GlobalOrdinal indexBase,
00122        const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
00123        const Teuchos::RCP<node_type>& node) :
00124     comm_ (comm),
00125     node_ (node),
00126     directory_ (new Directory<LocalOrdinal, GlobalOrdinal, node_type> ())
00127   {
00128     typedef GlobalOrdinal GO;
00129     const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
00130     const GO globalNumInds = (globalNumIndices == GSTI) ?
00131       getInvalidGlobalIndex () : Teuchos::as<GO> (globalNumIndices);
00132 
00133     // Since we already have device data, start here with a device Map
00134     // implementation.  In order to make this class' host methods work
00135     // and still be const (that is, legal to call in a host parallel
00136     // operation), we can't create the host mirror on demand; we must
00137     // create it here, in advance.
00138     //
00139     // NOTE (mfh 06 Feb 2014) If we're using UVM, we don't really need
00140     // the host and device Maps to be separate.
00141     mapDevice_ = device_impl_type (globalNumInds, myGlobalIndices, indexBase, *comm);
00142     mapHost_.create_copy_view (mapDevice_);
00143 
00144     // Create the Directory on demand in getRemoteIndexList().
00145   }
00146 
00147   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00148   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00149   Map (const global_size_t globalNumIndices,
00150        const Teuchos::ArrayView<const GlobalOrdinal>& myGlobalIndices,
00151        const GlobalOrdinal indexBase,
00152        const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
00153        const Teuchos::RCP<node_type> &node) :
00154     comm_ (comm),
00155     node_ (node),
00156     directory_ (new Directory<LocalOrdinal, GlobalOrdinal, node_type> ())
00157   {
00158     typedef GlobalOrdinal GO;
00159     typedef Kokkos::View<const GlobalOrdinal*, typename device_type::host_mirror_device_type> const_host_view_type;
00160     typedef Kokkos::View<GlobalOrdinal*, typename device_type::host_mirror_device_type> host_view_type;
00161     typedef Kokkos::View<GlobalOrdinal*, device_type> device_view_type;
00162 
00163     // Copy the input GID list from host (we assume that
00164     // Teuchos::ArrayView should only view host memory) to device.
00165     //
00166     // FIXME (mfh 06 Feb, 24 Mar 2014) We could use the CUDA API
00167     // function here that can look at a pointer and tell whether it
00168     // lives on host or device, to tell whether the Teuchos::ArrayView
00169     // is viewing host or device memory.  Regardless, we don't own the
00170     // data and we will need a deep copy anyway, so we might as well
00171     // copy it.
00172     const_host_view_type gidsHost_view (Kokkos::ViewWithoutManaging(),myGlobalIndices.getRawPtr (), myGlobalIndices.size ());
00173     host_view_type gidsHost ("GIDS_Host", myGlobalIndices.size ());
00174     Kokkos::deep_copy (gidsHost, gidsHost_view);
00175     device_view_type gidsDevice ("GIDs", myGlobalIndices.size ());
00176     Kokkos::deep_copy (gidsDevice, gidsHost);
00177 
00178     const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
00179     const GO globalNumInds = (globalNumIndices == GSTI) ?
00180       getInvalidGlobalIndex () : Teuchos::as<GO> (globalNumIndices);
00181 
00182     // Start with a host Map implementation, since this will make this
00183     // class' public (host) methods work.  If users want device
00184     // methods, they will call getDeviceView(), which will initialize
00185     // the device Map implementation.
00186     //
00187     // NOTE (mfh 06 Feb 2014) If we're using UVM, we don't really need
00188     // the host and device Maps to be separate, but we need to create a compatible
00189     // host view of the GIDs using the device ptr.
00190     mapDevice_ = device_impl_type (globalNumInds, gidsDevice , indexBase, *comm);
00191     #ifdef KOKKOS_USE_CUDA_UVM
00192       const GlobalOrdinal* ptr = gidsDevice.ptr_on_device();
00193       mapHost_ = host_impl_type (globalNumInds, const_host_view_type(Kokkos::ViewWithoutManaging(),ptr,myGlobalIndices.size ()), indexBase, *comm);
00194     #else
00195       mapHost_ = host_impl_type (globalNumInds, gidsHost, indexBase, *comm);
00196     #endif
00197     // Create the Directory on demand in getRemoteIndexList().
00198   }
00199 
00200   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00201   global_size_t
00202   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00203   getGlobalNumElements () const {
00204     return mapHost_.getGlobalNumIndices ();
00205   }
00206 
00207   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00208   size_t
00209   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00210   getNodeNumElements () const {
00211     return mapHost_.getMyNumIndices ();
00212   }
00213 
00214   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00215   GlobalOrdinal
00216   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00217   getIndexBase () const {
00218     return mapHost_.getIndexBase ();
00219   }
00220 
00221   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00222   GlobalOrdinal
00223   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00224   getInvalidGlobalIndex () const {
00225     return mapHost_.getInvalidGlobalIndex ();
00226   }
00227 
00228   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00229   LocalOrdinal
00230   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00231   getInvalidLocalIndex () const {
00232     return mapHost_.getInvalidLocalIndex ();
00233   }
00234 
00235   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00236   LocalOrdinal
00237   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00238   getMinLocalIndex () const {
00239     return static_cast<LocalOrdinal> (0);
00240   }
00241 
00242   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00243   LocalOrdinal
00244   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00245   getMaxLocalIndex () const {
00246     return mapHost_.getMaxLocalIndex ();
00247   }
00248 
00249   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00250   GlobalOrdinal
00251   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00252   getMinGlobalIndex () const {
00253     return mapHost_.getMinGlobalIndex ();
00254   }
00255 
00256   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00257   GlobalOrdinal
00258   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00259   getMaxGlobalIndex () const {
00260     return mapHost_.getMaxGlobalIndex ();
00261   }
00262 
00263   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00264   GlobalOrdinal
00265   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00266   getMinAllGlobalIndex () const {
00267     return mapHost_.getMinAllGlobalIndex ();
00268   }
00269 
00270   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00271   GlobalOrdinal
00272   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00273   getMaxAllGlobalIndex () const {
00274     return mapHost_.getMaxAllGlobalIndex ();
00275   }
00276 
00277   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00278   LocalOrdinal
00279   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00280   getLocalElement (const GlobalOrdinal globalIndex) const
00281   {
00282     return mapHost_.getLocalIndex (globalIndex);
00283   }
00284 
00285   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00286   GlobalOrdinal
00287   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00288   getGlobalElement (const LocalOrdinal localIndex) const
00289   {
00290     return mapHost_.getGlobalIndex (localIndex);
00291   }
00292 
00293   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00294   bool
00295   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00296   isNodeLocalElement (const LocalOrdinal localIndex) const {
00297     return mapHost_.isOwnedLocalIndex (localIndex);
00298   }
00299 
00300   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00301   bool
00302   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00303   isNodeGlobalElement (const GlobalOrdinal globalIndex) const {
00304     return mapHost_.isOwnedGlobalIndex (globalIndex);
00305   }
00306 
00307   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00308   bool
00309   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00310   isUniform () const {
00311     return mapHost_.isUniform ();
00312   }
00313 
00314   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00315   bool
00316   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00317   isContiguous () const {
00318     return mapHost_.isContiguous ();
00319   }
00320 
00321   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00322   bool
00323   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00324   isCompatible (const Map<LocalOrdinal, GlobalOrdinal, node_type>& map) const
00325   {
00326     using Teuchos::outArg;
00327     using Teuchos::REDUCE_MIN;
00328     using Teuchos::reduceAll;
00329     //
00330     // Tests that avoid the Boolean all-reduce below by using
00331     // globally consistent quantities.
00332     //
00333     if (this == &map) {
00334       // Pointer equality on one process always implies pointer
00335       // equality on all processes, since Map is immutable.
00336       return true;
00337     }
00338     else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
00339       // The two communicators have different numbers of processes.
00340       // It's not correct to call isCompatible() in that case.  This
00341       // may result in the all-reduce hanging below.
00342       return false;
00343     }
00344     else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
00345       // Two Maps are definitely NOT compatible if they have different
00346       // global numbers of indices.
00347       return false;
00348     }
00349     else if (isContiguous () && isUniform () &&
00350              map.isContiguous () && map.isUniform ()) {
00351       // Contiguous uniform Maps with the same number of processes in
00352       // their communicators, and with the same global numbers of
00353       // indices, are always compatible.
00354       return true;
00355     }
00356 
00357     TEUCHOS_TEST_FOR_EXCEPTION(
00358       getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
00359       "Tpetra::Map::isCompatible: There's a bug in this method.  We've already "
00360       "checked that this condition is true above, but it's false here.  "
00361       "Please report this bug to the Tpetra developers.");
00362 
00363     // Do both Maps have the same number of indices on each process?
00364     const int locallyCompat =
00365       (getNodeNumElements () == map.getNodeNumElements ()) ? 1 : 0;
00366 
00367     int globallyCompat = 0;
00368     reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
00369     return (globallyCompat == 1);
00370   }
00371 
00372   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00373   bool
00374   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00375   locallySameAs (const Map<LocalOrdinal, GlobalOrdinal, node_type>& map) const
00376   {
00377     using Teuchos::ArrayView;
00378     typedef GlobalOrdinal GO;
00379     typedef typename ArrayView<const GO>::size_type size_type;
00380 
00381     // If both Maps are contiguous, we can compare their GID ranges
00382     // easily by looking at the min and max GID on this process.
00383     // Otherwise, we'll compare their GID lists.  If only one Map is
00384     // contiguous, then we only have to call getNodeElementList() on
00385     // the noncontiguous Map.  (It's best to avoid calling it on a
00386     // contiguous Map, since it results in unnecessary storage that
00387     // persists for the lifetime of the Map.)
00388 
00389     if (getNodeNumElements () != map.getNodeNumElements ()) {
00390       return false;
00391     }
00392     else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
00393              getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
00394       return false;
00395     }
00396     else {
00397       if (isContiguous ()) {
00398         if (map.isContiguous ()) {
00399           return true; // min and max match, so the ranges match.
00400         }
00401         else { // *this is contiguous, but map is not contiguous
00402           TEUCHOS_TEST_FOR_EXCEPTION(
00403             ! this->isContiguous () || map.isContiguous (), std::logic_error,
00404             "Tpetra::Map::locallySameAs: BUG");
00405           ArrayView<const GO> rhsElts = map.getNodeElementList ();
00406           const GO minLhsGid = this->getMinGlobalIndex ();
00407           const size_type numRhsElts = rhsElts.size ();
00408           for (size_type k = 0; k < numRhsElts; ++k) {
00409             const GO curLhsGid = minLhsGid + static_cast<GO> (k);
00410             if (curLhsGid != rhsElts[k]) {
00411               return false; // stop on first mismatch
00412             }
00413           }
00414           return true;
00415         }
00416       }
00417       else if (map.isContiguous ()) { // *this is not contiguous, but map is
00418         TEUCHOS_TEST_FOR_EXCEPTION(
00419           this->isContiguous () || ! map.isContiguous (), std::logic_error,
00420           "Tpetra::Map::locallySameAs: BUG");
00421         ArrayView<const GO> lhsElts = this->getNodeElementList ();
00422         const GO minRhsGid = map.getMinGlobalIndex ();
00423         const size_type numLhsElts = lhsElts.size ();
00424         for (size_type k = 0; k < numLhsElts; ++k) {
00425           const GO curRhsGid = minRhsGid + static_cast<GO> (k);
00426           if (curRhsGid != lhsElts[k]) {
00427             return false; // stop on first mismatch
00428           }
00429         }
00430         return true;
00431       }
00432       else { // neither *this nor map are contiguous
00433         // std::equal requires that the latter range is as large as
00434         // the former.  We know the ranges have equal length, because
00435         // they have the same number of local entries.
00436         ArrayView<const GO> lhsElts =     getNodeElementList ();
00437         ArrayView<const GO> rhsElts = map.getNodeElementList ();
00438         return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
00439       }
00440     }
00441   }
00442 
00443   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00444   bool
00445   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00446   isSameAs (const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > &map) const
00447   {
00448     using Teuchos::outArg;
00449     using Teuchos::REDUCE_MIN;
00450     using Teuchos::reduceAll;
00451     //
00452     // Tests that avoid the Boolean all-reduce below by using
00453     // globally consistent quantities.
00454     //
00455     if (this == &map) {
00456       // Pointer equality on one process always implies pointer
00457       // equality on all processes, since Map is immutable.
00458       return true;
00459     }
00460     else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
00461       // The two communicators have different numbers of processes.
00462       // It's not correct to call isSameAs() in that case.  This
00463       // may result in the all-reduce hanging below.
00464       return false;
00465     }
00466     else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
00467       // Two Maps are definitely NOT the same if they have different
00468       // global numbers of indices.
00469       return false;
00470     }
00471     else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
00472              getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () ||
00473              getIndexBase () != map.getIndexBase ()) {
00474       // If the global min or max global index doesn't match, or if
00475       // the index base doesn't match, then the Maps aren't the same.
00476       return false;
00477     }
00478     else if (isDistributed () != map.isDistributed ()) {
00479       // One Map is distributed and the other is not, which means that
00480       // the Maps aren't the same.
00481       return false;
00482     }
00483     else if (isContiguous () && isUniform () &&
00484              map.isContiguous () && map.isUniform ()) {
00485       // Contiguous uniform Maps with the same number of processes in
00486       // their communicators, with the same global numbers of indices,
00487       // and with matching index bases and ranges, must be the same.
00488       return true;
00489     }
00490 
00491     // The two communicators must have the same number of processes,
00492     // with process ranks occurring in the same order.  This uses
00493     // MPI_COMM_COMPARE.  The MPI 3.1 standard (Section 6.4) says:
00494     // "Operations that access communicators are local and their
00495     // execution does not require interprocess communication."
00496     // However, just to be sure, I'll put this call after the above
00497     // tests that don't communicate.
00498     if (! Details::congruent (*comm_, * (map.getComm ()))) {
00499       return false;
00500     }
00501 
00502     // If we get this far, we need to check local properties and then
00503     // communicate local sameness across all processes.
00504     const int isSame_lcl = locallySameAs (map) ? 1 : 0;
00505 
00506     // Return true if and only if all processes report local sameness.
00507     int isSame_gbl = 0;
00508     reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
00509     return isSame_gbl == 1;
00510   }
00511 
00512   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00513   Teuchos::ArrayView<const GlobalOrdinal>
00514   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00515   getNodeElementList () const
00516   {
00517     typedef GlobalOrdinal GO;
00518     Kokkos::View<const GO*, host_mirror_device_type> myGlobalInds =
00519       mapHost_.getMyGlobalIndices (); // creates it if it doesn't exist
00520 
00521     return Teuchos::ArrayView<const GO> (myGlobalInds.ptr_on_device (),
00522                                          myGlobalInds.dimension_0 ());
00523   }
00524 
00525   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00526   bool
00527   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00528   isDistributed () const {
00529     return mapHost_.isDistributed ();
00530   }
00531 
00532   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00533   std::string
00534   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00535   description () const {
00536     using Teuchos::TypeNameTraits;
00537     std::ostringstream os;
00538 
00539     os << "\"Tpetra::Map\": {"
00540        << "LocalOrdinalType: " << TypeNameTraits<local_ordinal_type>::name ()
00541        << ", GlobalOrdinalType: " << TypeNameTraits<global_ordinal_type>::name ()
00542        << ", DeviceType: " << TypeNameTraits<device_type>::name ();
00543     if (this->getObjectLabel () != "") {
00544       os << ", Label: \"" << this->getObjectLabel () << "\"";
00545     }
00546     os << ", Global number of entries: " << getGlobalNumElements ()
00547        << ", Number of processes: " << getComm ()->getSize ()
00548        << ", Uniform: " << (isUniform () ? "true" : "false")
00549        << ", Contiguous: " << (isContiguous () ? "true" : "false")
00550        << ", Distributed: " << (isDistributed () ? "true" : "false")
00551        << "}";
00552     return os.str ();
00553   }
00554 
00555   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00556   void
00557   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00558   describe (Teuchos::FancyOStream &out,
00559             const Teuchos::EVerbosityLevel verbLevel) const
00560   {
00561     using std::endl;
00562     using std::setw;
00563     using Teuchos::ArrayView;
00564     using Teuchos::as;
00565     using Teuchos::OSTab;
00566     using Teuchos::toString;
00567     using Teuchos::TypeNameTraits;
00568     using Teuchos::VERB_DEFAULT;
00569     using Teuchos::VERB_NONE;
00570     using Teuchos::VERB_LOW;
00571     using Teuchos::VERB_MEDIUM;
00572     using Teuchos::VERB_HIGH;
00573     using Teuchos::VERB_EXTREME;
00574     typedef typename ArrayView<const GlobalOrdinal>::size_type size_type;
00575 
00576     const size_t nME = getNodeNumElements ();
00577     ArrayView<const GlobalOrdinal> myEntries = getNodeElementList ();
00578     const int myRank = comm_->getRank ();
00579     const int numProcs = comm_->getSize ();
00580 
00581     const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
00582 
00583     // By convention, describe() always begins with a tab before printing.
00584     OSTab tab0 (out);
00585 
00586     if (vl == VERB_NONE) {
00587       // do nothing
00588     }
00589     else if (vl == VERB_LOW) {
00590       if (myRank == 0) {
00591         out << "\"Tpetra::Map\":" << endl;
00592         OSTab tab1 (out);
00593         out << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name () << endl
00594             << "GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name () << endl
00595             << "DeviceType: " << TypeNameTraits<device_type>::name () << endl;
00596         if (this->getObjectLabel () != "") {
00597           out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
00598         }
00599         out << "Global number of entries: " << getGlobalNumElements () << endl
00600             << "Minimum global index: " << getMinAllGlobalIndex () << endl
00601             << "Maximum global index: " << getMaxAllGlobalIndex () << endl
00602             << "Index base: " << getIndexBase () << endl
00603             << "Number of processes: " << getComm ()->getSize () << endl
00604             << "Uniform: " << (isUniform () ? "true" : "false") << endl
00605             << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
00606             << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
00607       }
00608     }
00609 
00610     if (vl >= VERB_HIGH) { // HIGH or EXTREME
00611       for (int p = 0; p < numProcs; ++p) {
00612         if (myRank == p) {
00613           out << "Process " << myRank << ":" << endl;
00614           OSTab tab1 (out);
00615           out << "My number of entries: " << nME << endl
00616               << "My minimum global index: " << getMinGlobalIndex () << endl
00617               << "My maximum global index: " << getMaxGlobalIndex () << endl;
00618           if (vl == VERB_EXTREME) {
00619             out << "My global indices: [";
00620             for (size_type k = 0; k < myEntries.size (); ++k) {
00621               out << myEntries[k];
00622               if (k + 1 < myEntries.size ()) {
00623                 out << ", ";
00624               }
00625             }
00626             out << "]" << endl;
00627           }
00628           std::flush (out);
00629         }
00630         // Do a few global ops to give I/O a chance to complete
00631         comm_->barrier ();
00632         comm_->barrier ();
00633         comm_->barrier ();
00634       }
00635     }
00636   }
00637 
00638   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00639   Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > >
00640   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00641   replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
00642   {
00643     using Teuchos::ArrayView;
00644     using Teuchos::outArg;
00645     using Teuchos::RCP;
00646     using Teuchos::REDUCE_MIN;
00647     using Teuchos::reduceAll;
00648     typedef global_size_t GST;
00649     typedef LocalOrdinal LO;
00650     typedef GlobalOrdinal GO;
00651     typedef Map<LO, GO, node_type> map_type;
00652 
00653     // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
00654     // the Map by calling its ordinary public constructor, using the
00655     // original Map's data.  This only involves O(1) all-reduces over
00656     // the new communicator, which in the common case only includes a
00657     // small number of processes.
00658 
00659     // Create the Map to return.
00660     if (newComm.is_null ()) {
00661       return Teuchos::null; // my process does not participate in the new Map
00662     } else {
00663       // Map requires that the index base equal the global min GID.
00664       // Figuring out the global min GID requires a reduction over all
00665       // processes in the new communicator.  It could be that some (or
00666       // even all) of these processes contain zero entries.  (Recall
00667       // that this method, unlike removeEmptyProcesses(), may remove
00668       // an arbitrary subset of processes.)  We deal with this by
00669       // doing a min over the min GID on each process if the process
00670       // has more than zero entries, or the global max GID, if that
00671       // process has zero entries.  If no processes have any entries,
00672       // then the index base doesn't matter anyway.
00673       const GO myMinGid = (this->getNodeNumElements () == 0) ?
00674         this->getMaxAllGlobalIndex () : this->getMinGlobalIndex ();
00675       GO newIndexBase = this->getInvalidGlobalIndex ();
00676       reduceAll<int, GO> (*newComm, REDUCE_MIN, myMinGid, outArg (newIndexBase));
00677 
00678       // Make Map's constructor compute the global number of indices.
00679       const GST globalNumInds = Teuchos::OrdinalTraits<GST>::invalid ();
00680 
00681       if (mapDevice_.initialized ()) {
00682         Kokkos::View<const GO*, DeviceType> myGIDs =
00683           mapDevice_.getMyGlobalIndices ();
00684         return rcp (new map_type (globalNumInds, myGIDs, newIndexBase,
00685                                   newComm, this->getNode ()));
00686       }
00687       else {
00688         Kokkos::View<const GO*, host_mirror_device_type> myGidsHostView =
00689           mapHost_.getMyGlobalIndices ();
00690         ArrayView<const GO> myGidsArrayView (myGidsHostView.ptr_on_device (),
00691                                              myGidsHostView.dimension_0 ());
00692         return rcp (new map_type (globalNumInds, myGidsArrayView, newIndexBase,
00693                                   newComm, this->getNode ()));
00694       }
00695     }
00696   }
00697 
00698   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00699   Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > >
00700   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00701   removeEmptyProcesses () const
00702   {
00703     using Teuchos::Comm;
00704     using Teuchos::null;
00705     using Teuchos::outArg;
00706     using Teuchos::RCP;
00707     using Teuchos::REDUCE_MIN;
00708     using Teuchos::reduceAll;
00709 
00710     // Create the new communicator.  split() returns a valid
00711     // communicator on all processes.  On processes where color == 0,
00712     // ignore the result.  Passing key == 0 tells MPI to order the
00713     // processes in the new communicator by their rank in the old
00714     // communicator.
00715     const int color = (getNodeNumElements () == 0) ? 0 : 1;
00716     // MPI_Comm_split must be called collectively over the original
00717     // communicator.  We can't just call it on processes with color
00718     // one, even though we will ignore its result on processes with
00719     // color zero.
00720     RCP<const Comm<int> > newComm = comm_->split (color, 0);
00721     if (color == 0) {
00722       newComm = null;
00723     }
00724 
00725     // Create the Map to return.
00726     if (newComm.is_null ()) {
00727       return null; // my process does not participate in the new Map
00728     } else {
00729       // The default constructor that's useful for clone() above is
00730       // also useful here.
00731       RCP<Map> map    = rcp (new Map ());
00732       map->comm_      = newComm;
00733       map->node_      = node_;
00734       map->mapHost_   = mapHost_;
00735       map->mapDevice_ = mapDevice_;
00736 
00737       // Uniformity and contiguity have not changed.  The directory
00738       // has changed, but we've taken care of that above.  However,
00739       // distributed-ness may have changed, since the communicator has
00740       // changed.
00741       //
00742       // If the original Map was NOT distributed, then the new Map
00743       // cannot be distributed.  If the number of processes in the new
00744       // communicator is 1, then the new Map is not distributed.
00745       // Otherwise, we have to check the new Map using an all-reduce
00746       // (over the new communicator).  For example, the original Map
00747       // may have had some processes with zero elements, and all other
00748       // processes with the same number of elements as in the whole
00749       // Map.  That Map is technically distributed, because of the
00750       // processes with zero elements.  Removing those processes would
00751       // make the new Map locally replicated.
00752       if (! isDistributed () || newComm->getSize () == 1) {
00753         map->mapHost_.setDistributed (false);
00754         map->mapDevice_.setDistributed (false);
00755       } else {
00756         const int iOwnAllGids =
00757           (getNodeNumElements () == getGlobalNumElements ()) ? 1 : 0;
00758         int allProcsOwnAllGids = 0;
00759         reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids,
00760                              outArg (allProcsOwnAllGids));
00761         map->mapHost_.setDistributed (allProcsOwnAllGids != 1);
00762         map->mapDevice_.setDistributed (allProcsOwnAllGids != 1);
00763       }
00764 
00765       // Map's default constructor creates an uninitialized Directory.
00766       // The Directory will be initialized on demand in
00767       // getRemoteIndexList().
00768       //
00769       // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
00770       // directory more efficiently than just recreating it.  If
00771       // directory recreation proves a bottleneck, we can always
00772       // revisit this.  On the other hand, Directory creation is only
00773       // collective over the new, presumably much smaller
00774       // communicator, so it may not be worth the effort to optimize.
00775 
00776       return map;
00777     }
00778   }
00779 
00780   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00781   void
00782   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00783   setupDirectory () const
00784   {
00785     TEUCHOS_TEST_FOR_EXCEPTION(
00786       directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
00787       "The Directory is null.  "
00788       "Please report this bug to the Tpetra developers.");
00789 
00790     // Only create the Directory if it hasn't been created yet.
00791     // This is a collective operation.
00792     if (! directory_->initialized ()) {
00793       directory_->initialize (*this);
00794     }
00795   }
00796 
00797   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00798   LookupStatus
00799   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00800   getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
00801                       const Teuchos::ArrayView<int>& PIDs,
00802                       const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
00803   {
00804     TEUCHOS_TEST_FOR_EXCEPTION(
00805       GIDs.size () != PIDs.size (), std::invalid_argument,
00806       "Tpetra::Map (Kokkos refactor)::getRemoteIndexList (3 args): GIDs.size ()"
00807       " = " << GIDs.size () << " != PIDs.size () = " << PIDs.size () << ".");
00808     TEUCHOS_TEST_FOR_EXCEPTION(
00809       GIDs.size () != LIDs.size (), std::invalid_argument,
00810       "Tpetra::Map (Kokkos refactor)::getRemoteIndexList (3 args): GIDs.size ()"
00811       " = " << GIDs.size () << " != LIDs.size () = " << LIDs.size () << ".");
00812 
00813     // Empty Maps (i.e., containing no indices on any processes in the
00814     // Map's communicator) are perfectly valid.  In that case, if the
00815     // input GID list is nonempty, we fill the output arrays with
00816     // invalid values, and return IDNotPresent to notify the caller.
00817     // It's perfectly valid to give getRemoteIndexList GIDs that the
00818     // Map doesn't own.  SubmapImport test 2 needs this functionality.
00819     if (getGlobalNumElements () == 0) {
00820       if (GIDs.size () == 0) {
00821         return AllIDsPresent; // trivially
00822       } else {
00823         for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
00824           PIDs[k] = Teuchos::OrdinalTraits<int>::invalid ();
00825         }
00826         for (typename Teuchos::ArrayView<LocalOrdinal>::size_type k = 0;
00827              k < LIDs.size (); ++k) {
00828           LIDs[k] = Teuchos::OrdinalTraits<LocalOrdinal>::invalid ();
00829         }
00830         return IDNotPresent;
00831       }
00832     }
00833 
00834     // getRemoteIndexList must be called collectively, and Directory
00835     // initialization is collective too, so it's OK to initialize the
00836     // Directory on demand.
00837     setupDirectory ();
00838     return directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
00839   }
00840 
00841   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00842   LookupStatus
00843   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00844   getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
00845                       const Teuchos::ArrayView<int>& PIDs) const
00846   {
00847     TEUCHOS_TEST_FOR_EXCEPTION(
00848       GIDs.size () != PIDs.size (), std::invalid_argument,
00849       "Tpetra::Map (Kokkos refactor)::getRemoteIndexList (2 args): GIDs.size ()"
00850       " = " << GIDs.size () << " != PIDs.size () = " << PIDs.size () << ".");
00851 
00852     // Empty Maps (i.e., containing no indices on any processes in the
00853     // Map's communicator) are perfectly valid.  In that case, if the
00854     // input GID list is nonempty, we fill the output array with
00855     // invalid values, and return IDNotPresent to notify the caller.
00856     // It's perfectly valid to give getRemoteIndexList GIDs that the
00857     // Map doesn't own.  SubmapImport test 2 needs this functionality.
00858     if (getGlobalNumElements () == 0) {
00859       if (GIDs.size () == 0) {
00860         return AllIDsPresent; // trivially
00861       } else {
00862         // The Map contains no indices, so all output PIDs are invalid.
00863         for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
00864           PIDs[k] = Teuchos::OrdinalTraits<int>::invalid ();
00865         }
00866         return IDNotPresent;
00867       }
00868     }
00869 
00870     // getRemoteIndexList must be called collectively, and Directory
00871     // creation is collective too, so it's OK to create the Directory
00872     // on demand.
00873     setupDirectory ();
00874     return directory_->getDirectoryEntries (*this, GIDs, PIDs);
00875   }
00876 
00877   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00878   Teuchos::RCP<const Teuchos::Comm<int> >
00879   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00880   getComm () const {
00881     return comm_;
00882   }
00883 
00884   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00885   Teuchos::RCP<Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >
00886   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00887   getNode () const {
00888     return node_;
00889   }
00890 
00891   template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
00892   bool
00893   Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
00894   isOneToOne () const
00895   {
00896     TEUCHOS_TEST_FOR_EXCEPTION(
00897       getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
00898       "getComm() returns null.  Please report this bug to the Tpetra "
00899       "developers.");
00900 
00901     // This is a collective operation, if it hasn't been called before.
00902     setupDirectory ();
00903     return directory_->isOneToOne (*this);
00904   }
00905 
00906 } // namespace Tpetra
00907 
00908 #endif // TPETRA_KOKKOSREFACTOR_MAP_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines