Tpetra Matrix/Vector Services  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
Tpetra_KokkosRefactor_Details_Map.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_KOKKOSREFACTOR_DETAILS_MAP_HPP
00043 #define TPETRA_KOKKOSREFACTOR_DETAILS_MAP_HPP
00044 
00045 #include <Teuchos_Describable.hpp>
00046 #include <Kokkos_UnorderedMap.hpp>
00047 
00048 namespace Tpetra {
00049   namespace Details {
00050 
00057     template<class IntType, class DeviceType>
00058     class Iota {
00059     public:
00060       typedef DeviceType device_type;
00061 
00062       Iota (const Kokkos::View<IntType*, DeviceType>& x,
00063             const IntType first) : x_ (x), first_ (first) {}
00064 
00065       KOKKOS_INLINE_FUNCTION void
00066       operator () (const typename DeviceType::size_type i) const {
00067         x_(i) = first_ + static_cast<IntType> (i);
00068       }
00069 
00070     private:
00071       Kokkos::View<IntType*, DeviceType> x_;
00072       const IntType first_;
00073     };
00074 
00076     template<class IntType, class DeviceType>
00077     void
00078     iota (const Kokkos::View<IntType*, DeviceType>& x,
00079           const IntType first)
00080     {
00081       Kokkos::parallel_for (x.dimension_0 (), Iota<IntType, DeviceType> (x, first));
00082     }
00083 
00088     template<class LO, class GO, class DeviceType>
00089     struct MapData {
00090       GO minMyGID; 
00091       GO maxMyGID; 
00092 
00093 
00094 
00095 
00096       LO failedCount;
00111       LO duplicateCount;
00112 
00114       MapData () :
00115         minMyGID (0), maxMyGID (0), failedCount (0), duplicateCount (0)
00116       {}
00117     };
00118 
00125     template<class LO, class GO, class DeviceType>
00126     class GlobalToLocalTableFiller {
00127     public:
00128       typedef DeviceType device_type;
00129       typedef typename DeviceType::size_type size_type;
00130       typedef MapData<LO, GO, DeviceType> value_type;
00131 
00142       GlobalToLocalTableFiller (const Kokkos::UnorderedMap<GO, LO, DeviceType>& glMap,
00143                                 const Kokkos::View<const GO*, DeviceType>& entries,
00144                                 const GO firstContiguousGID,
00145                                 const GO lastContiguousGID) :
00146         glMap_ (glMap),
00147         entries_ (entries),
00148         firstContiguousGID_ (firstContiguousGID),
00149         lastContiguousGID_ (lastContiguousGID)
00150       {}
00151 
00160       KOKKOS_INLINE_FUNCTION void
00161       init (value_type& dst) const
00162       {
00163         dst.minMyGID = firstContiguousGID_;
00164         dst.maxMyGID = lastContiguousGID_;
00165         dst.failedCount = 0;
00166         dst.duplicateCount = 0;
00167       }
00168 
00172       KOKKOS_INLINE_FUNCTION void
00173       join (volatile value_type& dst ,
00174             const volatile value_type& src) const
00175       {
00176         if (src.maxMyGID > dst.maxMyGID) {
00177           dst.maxMyGID = src.maxMyGID;
00178         }
00179         if (src.minMyGID < dst.minMyGID) {
00180           dst.minMyGID = src.minMyGID;
00181         }
00182         dst.failedCount += src.failedCount;
00183         dst.duplicateCount += src.duplicateCount;
00184       }
00185 
00190       KOKKOS_INLINE_FUNCTION void
00191       operator () (const size_type i, value_type& dst) const
00192       {
00193         // [firstContiguousGID_, lastContiguousGID_] as GIDs map to
00194         // [0, lastContiguousGID_ - firstContiguousGID_ + 1] as LIDs.
00195         const LO lid =
00196           static_cast<LO> (lastContiguousGID_ - firstContiguousGID_ + 1) +
00197           static_cast<LO> (i);
00198         const GO gid = entries_(i);
00199 
00200         if (gid > dst.maxMyGID) {
00201           dst.maxMyGID = gid;
00202         }
00203         if (gid < dst.minMyGID) {
00204           dst.minMyGID = gid;
00205         }
00206         // The table should not run out of space, but if it does, the
00207         // caller will try again.
00208         const Kokkos::UnorderedMapInsertResult result = glMap_.insert (gid, lid);
00209 
00210         if (result.failed ()) {
00211           dst.failedCount++;
00212         }
00213         else if (result.existing ()) {
00214           dst.duplicateCount++;
00215         }
00216       }
00217 
00218     private:
00220       Kokkos::UnorderedMap<GO, LO, DeviceType> glMap_;
00222       Kokkos::View<const GO*, DeviceType> entries_;
00224       const GO firstContiguousGID_;
00226       const GO lastContiguousGID_;
00227     };
00228 
00251     template<class LO, class GO, class DeviceType>
00252     MapData<LO, GO, DeviceType>
00253     fillGlobalToLocalTable (Kokkos::UnorderedMap<GO, LO, DeviceType>& glMap,
00254                             const Kokkos::View<const GO*, DeviceType>& entries,
00255                             const GO firstContiguousGID,
00256                             const GO lastContiguousGID)
00257     {
00258       typedef typename DeviceType::size_type size_type;
00259       typedef GlobalToLocalTableFiller<LO, GO, DeviceType> functor_type;
00260 
00261       const size_type numEnt = entries.dimension_0 ();
00262       const size_type mapCap = glMap.capacity ();
00263       if (mapCap < numEnt) {
00264         Teuchos::ArrayView<const GO> lgMapAv (entries.ptr_on_device (), numEnt);
00265         TEUCHOS_TEST_FOR_EXCEPTION(
00266           mapCap < numEnt, std::logic_error, "fillGlobalToLocalTable: Input "
00267           "GID->LID table to fill does not have sufficient capacity for the "
00268           "given list of GIDs.  The table has capacity " << mapCap << ", but the "
00269           "input list of GIDs has " << numEnt << " entr" <<
00270           (numEnt != 1 ? "ies" : "y") << ".");
00271       }
00272 
00273       functor_type filler (glMap, entries, firstContiguousGID, lastContiguousGID);
00274 
00275       MapData<LO, GO, DeviceType> result;
00276       Kokkos::parallel_reduce (numEnt, filler, result);
00277       return result;
00278     }
00279 
00294     template<class LO, class GO, class DeviceType>
00295     class Map {
00296       template<class LO2, class GO2, class D2> friend class Map;
00297     public:
00299 
00300 
00302       typedef LO local_ordinal_type;
00304       typedef GO global_ordinal_type;
00306       typedef DeviceType device_type;
00307 
00309 
00310 
00311 
00313       Map () :
00314         invalidGlobalIndex_ (Teuchos::OrdinalTraits<GO>::invalid ()), // final
00315         invalidLocalIndex_ (Teuchos::OrdinalTraits<LO>::invalid ()), // final
00316         globalNumIndices_ (0),
00317         myNumIndices_ (0),
00318         indexBase_ (0),
00319         firstContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()),
00320         lastContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()),
00321         minMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()),
00322         maxMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()),
00323         minAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()),
00324         maxAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()),
00325         contiguous_ (false),
00326         distributed_ (true), // could be either false or true
00327         uniform_ (false),
00328         initialized_ (false)
00329       {}
00330 
00332       Map (const GO globalNumIndices,
00333            const GO indexBase,
00334            const Teuchos::Comm<int>& comm,
00335            const LocalGlobal lOrG) :
00336         invalidGlobalIndex_ (Teuchos::OrdinalTraits<GO>::invalid ()), // final
00337         invalidLocalIndex_ (Teuchos::OrdinalTraits<LO>::invalid ()), // final
00338         globalNumIndices_ (globalNumIndices), // final
00339         myNumIndices_ (0), // set below
00340         indexBase_ (indexBase), // final
00341         minMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // set below
00342         maxMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // set below
00343         minAllGID_ (globalNumIndices == 0 ?
00344                     invalidGlobalIndex_ :
00345                     indexBase), // final
00346         maxAllGID_ (globalNumIndices == 0 ?
00347                     invalidGlobalIndex_ :
00348                     indexBase + globalNumIndices - 1), // final
00349         contiguous_ (true), // final
00350         distributed_ (true), // this may be changed below
00351         uniform_ (true), // final
00352         initialized_ (true) // final
00353       {
00354         using Teuchos::as;
00355         using Teuchos::broadcast;
00356         using Teuchos::outArg;
00357         using Teuchos::reduceAll;
00358         using Teuchos::REDUCE_MIN;
00359         using Teuchos::REDUCE_MAX;
00360         using Teuchos::typeName;
00361 
00362 #ifdef HAVE_TPETRA_DEBUG
00363         // In debug mode only, check whether globalNumIndices and
00364         // indexBase are the same over all processes in comm.
00365         {
00366           GO proc0NumGlobalIndices = globalNumIndices;
00367           broadcast<int, GO> (comm, 0, outArg (proc0NumGlobalIndices));
00368           GO minGlobalNumIndices = globalNumIndices;
00369           GO maxGlobalNumIndices = globalNumIndices;
00370           reduceAll<int, GO> (comm, REDUCE_MIN, globalNumIndices, outArg (minGlobalNumIndices));
00371           reduceAll<int, GO> (comm, REDUCE_MAX, globalNumIndices, outArg (maxGlobalNumIndices));
00372           TEUCHOS_TEST_FOR_EXCEPTION(
00373              minGlobalNumIndices != maxGlobalNumIndices || globalNumIndices != minGlobalNumIndices,
00374              std::invalid_argument,
00375              "Tpetra::Map constructor: All processes must provide the same number "
00376              "of global indices.  Process 0 set globalNumIndidces = "
00377              << proc0NumGlobalIndices << ".  The calling process "
00378              << comm.getRank () << " set globalNumIndices = " << globalNumIndices
00379              << ".  The min and max values over all processes are "
00380              << minGlobalNumIndices << " resp. " << maxGlobalNumIndices << ".");
00381 
00382           GO proc0IndexBase = indexBase;
00383           broadcast<int, GO> (comm, 0, outArg (proc0IndexBase));
00384           GO minIndexBase = indexBase;
00385           GO maxIndexBase = indexBase;
00386           reduceAll<int, GO> (comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
00387           reduceAll<int, GO> (comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
00388           TEUCHOS_TEST_FOR_EXCEPTION(
00389             minIndexBase != maxIndexBase || indexBase != minIndexBase,
00390           std::invalid_argument,
00391           "Tpetra::Map constructor: "
00392           "All processes must provide the same indexBase argument.  "
00393           "Process 0 set indexBase = " << proc0IndexBase << ".  The calling "
00394           "process " << comm.getRank () << " set indexBase = " << indexBase
00395           << ".  The min and max values over all processes are "
00396           << minIndexBase << " resp. " << maxIndexBase << ".");
00397         }
00398 #endif // HAVE_TPETRA_DEBUG
00399 
00400         // Distribute the GIDs (global indices) across the processes in
00401         // the given communicator so that they are
00402         //
00403         //   - Nonoverlapping (only one process owns each GID)
00404         //   - Contiguous (the sequence of GIDs is nondecreasing, and no
00405         //     two adjacent GIDs differ by more than one)
00406         //   - As evenly distributed as possible (the numbers of GIDs on
00407         //     two different processes do not differ by more than one)
00408 
00409         // All processes have the same globalNumIndices, but we still need
00410         // to check that it is valid.  globalNumIndices must be positive
00411         // and not the "invalid" value.
00412         //
00413         // This comparison looks funny, but it avoids compiler warnings
00414         // for comparing unsigned integers (GO might possibly be unsigned).
00415         TEUCHOS_TEST_FOR_EXCEPTION(
00416           (globalNumIndices < 1 && globalNumIndices != 0), std::invalid_argument,
00417           "Tpetra::Map constructor: globalNumIndices (= " << globalNumIndices
00418           << ") must be nonnegative.");
00419 
00420         TEUCHOS_TEST_FOR_EXCEPTION(
00421           globalNumIndices == static_cast<GO> (getInvalidGlobalIndex ()), std::invalid_argument,
00422           "Tpetra::Map constructor: You provided globalNumIndices = Teuchos::"
00423           "OrdinalTraits<GO>::invalid().  This version of the constructor "
00424           "requires a valid value of globalNumIndices.  You probably mistook "
00425           "this constructor for the \"contiguous nonuniform\" constructor, "
00426           "which can compute the global number of indices for you if you set "
00427           "globalNumIndices to that value.");
00428 
00429         if (lOrG == GloballyDistributed) {
00430           if (globalNumIndices == 0) {
00431             myNumIndices_ = 0;
00432             minMyGID_ = getInvalidGlobalIndex ();
00433             maxMyGID_ = getInvalidGlobalIndex ();
00434           }
00435           else { // globalNumIndices != 0, should be > 0
00436             // Compute myNumIndices:
00437             //
00438             // If globalNumIndices == numProcs * B + remainder,
00439             // then Proc r gets B+1 elements if r < remainder,
00440             // and B elements if r >= remainder.
00441             //
00442             // This strategy is valid for any value of globalNumIndices and
00443             // numProcs, including the following border cases:
00444             //   - numProcs == 1
00445             //   - myNumIndices < numProcs
00446             //
00447             // In the former case, remainder == 0 && globalNumIndices ==
00448             // myNumIndices.  In the latter case, remainder ==
00449             // globalNumIndices && myNumIndices is either 0 or 1.
00450             const GO numProcs = static_cast<GO> (comm.getSize ());
00451             const GO myRank = static_cast<GO> (comm.getRank ());
00452             const GO quotient  = globalNumIndices / numProcs;
00453             const GO remainder = globalNumIndices - quotient * numProcs;
00454 
00455             GO startIndex;
00456             if (myRank < remainder) {
00457               myNumIndices_ = 1 + quotient;
00458               // myRank was originally an int, so it should never overflow
00459               // reasonable GO types.
00460               startIndex = static_cast<GO> (myRank) * static_cast<GO> (myNumIndices_);
00461             } else {
00462               myNumIndices_ = static_cast<LO> (quotient);
00463               startIndex = static_cast<GO> (myRank) * static_cast<GO> (myNumIndices_) +
00464                 static_cast<GO> (remainder);
00465             }
00466             minMyGID_ = indexBase + startIndex;
00467             maxMyGID_ = indexBase + startIndex + myNumIndices_ - 1;
00468           }
00469           distributed_ = (comm.getSize () > 1);
00470         }
00471         else {  // lOrG == LocallyReplicated
00472           myNumIndices_ = static_cast<LO> (globalNumIndices);
00473           if (globalNumIndices == 0) {
00474             minMyGID_ = getInvalidGlobalIndex ();
00475             maxMyGID_ = getInvalidGlobalIndex ();
00476           }
00477           else {
00478             minMyGID_ = indexBase;
00479             maxMyGID_ = indexBase + globalNumIndices - 1;
00480           }
00481           distributed_ = false;
00482         }
00483 
00484         firstContiguousGID_ = minMyGID_;
00485         lastContiguousGID_ = maxMyGID_;
00486       }
00487 
00489       Map (const GO globalNumIndices,
00490            const LO myNumIndices,
00491            const GO indexBase,
00492            const Teuchos::Comm<int>& comm) :
00493         invalidGlobalIndex_ (Teuchos::OrdinalTraits<GO>::invalid ()), // final
00494         invalidLocalIndex_ (Teuchos::OrdinalTraits<LO>::invalid ()), // final
00495         globalNumIndices_ (globalNumIndices), // provisional, if invalid()
00496         myNumIndices_ (myNumIndices), // final
00497         indexBase_ (indexBase), // final
00498         firstContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below
00499         lastContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below
00500         minMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // set below
00501         maxMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // set below
00502         minAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below
00503         maxAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below
00504         contiguous_ (true),  // final
00505         distributed_ (true), // set below
00506         uniform_ (false),    // final (conservative; we could try to detect this)
00507         initialized_ (true)  // final
00508       {
00509         using Teuchos::broadcast;
00510         using Teuchos::outArg;
00511         using Teuchos::reduceAll;
00512         using Teuchos::REDUCE_MIN;
00513         using Teuchos::REDUCE_MAX;
00514         using Teuchos::REDUCE_SUM;
00515         using Teuchos::scan;
00516 
00517 #ifdef HAVE_TPETRA_DEBUG
00518         // Keep this for later debug checks.
00519         GO debugGlobalSum = 0; // Will be global sum of myNumIndices
00520         reduceAll<int, GO> (comm, REDUCE_SUM, static_cast<GO> (myNumIndices),
00521                             outArg (debugGlobalSum));
00522         // In debug mode only, check whether globalNumIndices and
00523         // indexBase are the same over all processes in comm.
00524         {
00525           GO proc0GlobalNumIndices = globalNumIndices;
00526           broadcast<int, GO> (comm, 0, outArg (proc0GlobalNumIndices));
00527           GO minGlobalNumIndices = globalNumIndices;
00528           GO maxGlobalNumIndices = globalNumIndices;
00529           reduceAll<int, GO> (comm, REDUCE_MIN, globalNumIndices, outArg (minGlobalNumIndices));
00530           reduceAll<int, GO> (comm, REDUCE_MAX, globalNumIndices, outArg (maxGlobalNumIndices));
00531           TEUCHOS_TEST_FOR_EXCEPTION(
00532             minGlobalNumIndices != maxGlobalNumIndices || globalNumIndices != minGlobalNumIndices,
00533             std::invalid_argument,
00534             "Tpetra::Map constructor: All processes must provide the same number "
00535             "of global indices.  This is true even if that argument is Teuchos::"
00536             "OrdinalTraits<global_size_t>::invalid() to signal that the Map should "
00537             "compute the global number of elements.  Process 0 set globalNumIndices"
00538             " = " << proc0GlobalNumIndices << ".  The calling process "
00539             << comm.getRank () << " set globalNumIndices = " << globalNumIndices
00540             << ".  The min and max values over all processes are "
00541             << minGlobalNumIndices << " resp. " << maxGlobalNumIndices << ".");
00542 
00543           GO proc0IndexBase = indexBase;
00544           broadcast<int, GO> (comm, 0, outArg (proc0IndexBase));
00545           GO minIndexBase = indexBase;
00546           GO maxIndexBase = indexBase;
00547           reduceAll<int, GO> (comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
00548           reduceAll<int, GO> (comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
00549           TEUCHOS_TEST_FOR_EXCEPTION(
00550             minIndexBase != maxIndexBase || indexBase != minIndexBase,
00551             std::invalid_argument,
00552             "Tpetra::Map constructor: "
00553             "All processes must provide the same indexBase argument.  "
00554             "Process 0 set indexBase = " << proc0IndexBase << ".  The calling "
00555             "process " << comm.getRank () << " set indexBase = " << indexBase
00556             << ".  The min and max values over all processes are "
00557             << minIndexBase << " resp. " << maxIndexBase << ".");
00558 
00559           // Make sure that the sum of myNumIndices over all processes
00560           // equals globalNumIndices.
00561           TEUCHOS_TEST_FOR_EXCEPTION(
00562             globalNumIndices != getInvalidGlobalIndex () && debugGlobalSum != globalNumIndices,
00563             std::invalid_argument,
00564             "Tpetra::Map constructor: The sum of myNumIndices over all "
00565             "processes = " << debugGlobalSum << " != globalNumIndices = "
00566             << globalNumIndices << ".  If you would like this constructor to "
00567             "compute globalNumIndices for you, you may set globalNumIndices = "
00568             "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() on input.");
00569         }
00570 #endif // HAVE_TPETRA_DEBUG
00571 
00572         // Distribute the GIDs (global indices) across the processes in
00573         // the given communicator so that they are
00574         //
00575         //   - Nonoverlapping (only one process owns each GID)
00576         //   - Contiguous (the sequence of GIDs is nondecreasing, and no
00577         //     two adjacent GIDs differ by more than one)
00578         //
00579         // This differs from the first Map constructor (that only takes a
00580         // global number of GIDs) in that the user has specified the
00581         // number of LIDs (local indices), so that the GIDs are not
00582         // (necessarily) evenly distributed over the processes.
00583 
00584         // Compute my local offset.  This is an inclusive scan, so to get
00585         // the final offset, we subtract off the input.
00586         GO scanResult = 0;
00587         scan<int, GO> (comm, REDUCE_SUM, myNumIndices, outArg (scanResult));
00588         const GO myOffset = scanResult - myNumIndices;
00589 
00590         if (globalNumIndices != static_cast<GO> (getInvalidGlobalIndex ())) {
00591           globalNumIndices_ = globalNumIndices; // Use the user's value.
00592         }
00593         else {
00594           // Inclusive scan means that the last process has the final sum.
00595           // Rather than doing a reduceAll to get the sum of
00596           // myNumIndices, we can just have the last process broadcast
00597           // its result.  That saves us a round of log(numProcs) messages.
00598           const int numProcs = comm.getSize ();
00599           GO globalSum = scanResult;
00600           if (numProcs > 1) {
00601             broadcast<int, GO> (comm, numProcs - 1, outArg (globalSum));
00602           }
00603           globalNumIndices_ = globalSum;
00604 
00605 #ifdef HAVE_TPETRA_DEBUG
00606           // No need for an all-reduce here; both come from collectives.
00607           TEUCHOS_TEST_FOR_EXCEPTION(
00608             globalSum != debugGlobalSum, std::logic_error,
00609             "Tpetra::Map constructor (contiguous nonuniform): "
00610             "globalSum = " << globalSum << " != debugGlobalSum = " << debugGlobalSum
00611             << ".  Please report this bug to the Tpetra developers.");
00612 #endif // HAVE_TPETRA_DEBUG
00613         }
00614         // Now globalNumIndices_ is set.
00615 
00616         if (globalNumIndices_ != 0) {
00617           minMyGID_ = indexBase + myOffset;
00618           maxMyGID_ = indexBase + myOffset + myNumIndices - 1;
00619           firstContiguousGID_ = minMyGID_;
00620           lastContiguousGID_ = maxMyGID_;
00621           minAllGID_ = indexBase;
00622           maxAllGID_ = indexBase + globalNumIndices_ - 1;
00623         }
00624 
00625         distributed_ = checkIsDist (comm);
00626       }
00627 
00629       Map (const GO globalNumIndices,
00630            const Kokkos::View<const GO*, DeviceType>& myGlobalIndices,
00631            const GO indexBase,
00632            const Teuchos::Comm<int>& comm) :
00633         invalidGlobalIndex_ (Teuchos::OrdinalTraits<GO>::invalid ()), // final
00634         invalidLocalIndex_ (Teuchos::OrdinalTraits<LO>::invalid ()), // final
00635         globalNumIndices_ (globalNumIndices), // provisional, if invalid()
00636         myNumIndices_ (myGlobalIndices.dimension_0 ()), // final
00637         indexBase_ (indexBase), // final
00638         firstContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below
00639         lastContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below
00640         minMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below
00641         maxMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below
00642         minAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below
00643         maxAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below
00644         lgMap_ (myGlobalIndices), // final (input assumed correct)
00645         contiguous_ (false), // final (conservative; we could try to detect this)
00646         distributed_ (true), // set below
00647         uniform_ (false),    // final (conservative; we could try to detect this)
00648         initialized_ (true)  // final
00649       {
00650         using Teuchos::outArg;
00651         using Teuchos::reduceAll;
00652 
00653         // FIXME (mfh 20 Feb 2013, 05 Feb 2014) The global reduction
00654         // is redundant, since the directory Map will have to do the
00655         // same thing.  We should actually do the scan and broadcast
00656         // for the directory Map here, and give the computed offsets
00657         // to the directory Map's constructor.
00658         if (globalNumIndices_ == invalidGlobalIndex_) {
00659           // The user wants us to compute the sum.
00660           reduceAll<int, GO> (comm, Teuchos::REDUCE_SUM,
00661                               static_cast<GO> (myNumIndices_),
00662                               outArg (globalNumIndices_));
00663         } // now globalNumIndices_ is final.
00664 
00665         Kokkos::View<const GO*, DeviceType> nonContigEntries;
00666         if (myNumIndices_ > 0) {
00667           // Find contiguous GID range, with the restriction that the
00668           // beginning of the range starts with the first entry.
00669 
00670           // NOTE (mfh 05 Feb 2014) This assumes UVM, in that the
00671           // host here is reading from a device View.
00672           firstContiguousGID_ = lgMap_(0);
00673           //lastContiguousGID_ = firstContiguousGID_ + 1;
00674 
00675           // This is always true, as long as there is at least one
00676           // GID; that is, there is always at least one entry in the
00677           // sequence of contiguous GIDs, namely the first entry.
00678           lastContiguousGID_ = firstContiguousGID_;
00679 
00680           // By choosing LO so that it can count the local number of
00681           // elements, the user asserts that it can fit any values in
00682           // [0, myNumIndices_-1].  Thus, it's OK for i to be LO.
00683           GO i = 1;
00684           for ( ; i < myNumIndices_; ++i) {
00685             const GO curGid = lgMap_(i);
00686             const GO newLastContigGID = lastContiguousGID_ + 1;
00687             if (newLastContigGID != curGid) break;
00688             lastContiguousGID_ = newLastContigGID;
00689           }
00690           //--lastContiguousGID_;
00691 
00692           // [firstContiguousGID, lastContigousGID] is the initial
00693           // sequence of contiguous GIDs.  The sequence always
00694           // contains at least the first GID.
00695 
00696           const size_t numContig =
00697             static_cast<size_t> (lastContiguousGID_ - firstContiguousGID_ + 1);
00698           const size_t numNonContig =
00699             static_cast<size_t> (myNumIndices_) - numContig;
00700 
00701           // Make a view of the given GIDs, _not_ including the
00702           // initial sequence of contiguous GIDs.  The GID -> LID
00703           // table does not store this initial sequence.
00704 
00705           std::pair<size_t, size_t> offsetInfo (numContig, numContig + numNonContig);
00706           nonContigEntries =
00707             Kokkos::subview<Kokkos::View<const GO*, DeviceType> > (lgMap_, offsetInfo);
00708 
00709           TEUCHOS_TEST_FOR_EXCEPTION(
00710             static_cast<size_t> (nonContigEntries.dimension_0 ()) != numNonContig,
00711             std::logic_error, "Tpetra::Details::Map: On Process " << comm.getRank ()
00712             << ", computed offset into noncontiguous entries incorrectly.  "
00713             "nonContigEntries.dimension_0() = " << nonContigEntries.dimension_0 ()
00714             << ", but numNonContig = " << numNonContig << ".");
00715 
00716           TEUCHOS_TEST_FOR_EXCEPTION(
00717             numNonContig != 0 && nonContigEntries(0) != lgMap_(numContig),
00718             std::logic_error, "Tpetra::Details::Map: On Process " << comm.getRank ()
00719             << ", computed view of noncontiguous entries incorrectly.  "
00720             "nonContigEntries(0) = " << nonContigEntries(0) << " != "
00721             "lgMap_(numNonContig = " << numNonContig << ") = " <<
00722             lgMap_(numNonContig) << ".");
00723 
00724           //
00725           // Fill the GID -> LID table, and compute local min and max
00726           // GIDs, both at the same time in a single parallel kernel.
00727           //
00728 
00729           // Allocate space for GID -> LID table.  Leave extra space
00730           // to avoid excessive collisions.
00731           typedef typename DeviceType::size_type size_type;
00732           const size_type tableSize =
00733             static_cast<size_type> (1.25 * static_cast<double> (numNonContig));
00734           Kokkos::UnorderedMap<GO, LO, DeviceType> glMap (tableSize);
00735           MapData<LO, GO, DeviceType> result =
00736             fillGlobalToLocalTable (glMap, nonContigEntries,
00737                                     firstContiguousGID_,
00738                                     lastContiguousGID_);
00739 
00740           // There should be no failed inserts, since we made the
00741           // UnorderedMap more than big enough to hold all the GIDs.
00742           TEUCHOS_TEST_FOR_EXCEPTION(
00743             result.failedCount != 0, std::logic_error, "Tpetra::Details::Map: "
00744             << result.failedCount << " insert(s) into GID->LID table failed "
00745             "out of " << numNonContig << " total GIDs to insert.");
00746 
00747           minMyGID_ = result.minMyGID;
00748           maxMyGID_ = result.maxMyGID;
00749 
00750           // This is a shallow copy, that casts to const.  After
00751           // creating the UnorderedMap, we don't need to modify it, so
00752           // we store it as const.  This improves look-up performance
00753           // on NVIDIA GPUs, since lookups can go through (read-only)
00754           // texture cache.
00755           glMap_ = glMap;
00756         }
00757         else {
00758           // mfh 10 Feb 2014: A common case for users of Map is to
00759           // execute the following loop:
00760           //
00761           // for (GO g = map.getMinGlobalIndex (); g <= map.getMaxGlobalIndex (); ++g) { ... }
00762           //
00763           // Unfortunately, Tpetra chose to make this an inclusive
00764           // range.  This means that if the calling process owns no
00765           // GIDs, we need to make the above loop have zero
00766           // iterations.  We can't do this if both the min and max GID
00767           // on the calling process are the invalid GID.  We can if we
00768           // set the min GID to indexBase+1 and the max GID to
00769           // indexBase.
00770           minMyGID_ = indexBase + 1;
00771           maxMyGID_ = indexBase;
00772 
00773           // This insures tests for GIDs in the range [firstContiguousGID,
00774           // lastContiguousGID] fail for processes with no local elements.
00775           firstContiguousGID_ = indexBase + 1;
00776           lastContiguousGID_ = indexBase;
00777         }
00778 
00779         // mfh 20 Feb 2014: Commenting out the exception test below
00780         // fixes Bug 5401.  In particular, it needs to be OK to supply
00781         // -1 as the min GID, even though Teuchos::OrdinalTraits<GO>
00782         // for signed GO is -1 (which is actually annoying -- the most
00783         // negative integer (e.g., INT_MIN for GO = int) would be
00784         // better).
00785         //
00786         // TEUCHOS_TEST_FOR_EXCEPTION(
00787         //   minMyGID_ == getInvalidGlobalIndex () ||
00788         //   maxMyGID_ == getInvalidGlobalIndex (),
00789         //   std::logic_error, "Tpetra::Details::Map noncontig ctor: The calling "
00790         //   "process " << comm.getRank () << " owns " << myNumIndices_ << " (!= "
00791         //   "0) global indices, but was unable to find the min or max global "
00792         //   "index on this process.  Please report this bug to the Tpetra "
00793         //   "developers.  Tell them that minMyGID_ == " << minMyGID_ << " and "
00794         //   "maxMyGID_ == " << maxMyGID_ << ".");
00795 
00796         if (globalNumIndices_ == 0) {
00797           // mfh 10 Feb 2014: Users might want to execute the
00798           // following loop:
00799           //
00800           // for (GO g = map.getMinAllGlobalIndex (); g <= map.getMaxAllGlobalIndex (); ++g) { ... }
00801           //
00802           // Unfortunately, Tpetra chose to make this an inclusive
00803           // range.  This means that if the Map is empty (has no GIDs
00804           // on any process), we need to make the above loop have zero
00805           // iterations.  We can't do this if both the global min and
00806           // max GIDs are the invalid GID.  We can if we set the
00807           // global min GID to indexBase+1 and the global max GID to
00808           // indexBase.
00809           minAllGID_ = indexBase + 1;
00810           maxAllGID_ = indexBase;
00811           distributed_ = (comm.getSize () > 1);
00812         }
00813         else {
00814           // Compute the min and max of all processes' GIDs.  If
00815           // myNumIndices_ == 0 on this process, that makes things a bit
00816           // tricky.  Since Tpetra requires that indexBase be the min
00817           // GID over all processes, that makes things a bit easier.  If
00818           // a process owns no GIDs, we can therefore set its min and
00819           // max GID to indexBase in the all-reduce.  This will not
00820           // influence the final result.
00821           //
00822           // mfh 10 Feb 2014: If Tpetra decides to change to allow
00823           // indexBase not to be the min GID, that would break the above
00824           // approach.  One of the Tpetra tests already assumes this: it
00825           // uses an index base different than the actual global min
00826           // GID.  In that case, if GO is signed, and a process owns no
00827           // GIDs, we can do the following on that process:
00828           //
00829           //   - Set its min GID (in the all-reduce, not the value of
00830           //     minMyGID_) to std::numeric_limits<GO>::max(), and its
00831           //     max GID to std::numeric_limits<GO>::min().
00832           //   - While we're at it, use the same all-reduce to figure
00833           //     out if the Map is distributed.  "Distributed" means
00834           //     that there is at least one process with a number of
00835           //     local elements less than the number of global elements.
00836           //
00837           // This works if GO is signed because in that case,
00838           // std::numeric_limits<GO>::min() is the biggest negative
00839           // value, which users are unlikely to want to use as a GID.
00840           // If GO is unsigned, however, this won't work.  In that case,
00841           // std::numeric_limits<GO>::min() is zero, which is a typical
00842           // smallest GID.  There are two approaches we could take:
00843           //
00844           //   1. Find the max GID first, in a separate all-reduce.
00845           //      (On processes that own no GIDs, use 0 (the least
00846           //      unsigned integer) as the input of the all-reduce.)
00847           //      Then, on processes that own no GIDs, use that result
00848           //      (the global max GID) in place of
00849           //      std::numeric_limits<GO>::max(), when doing the
00850           //      all-reduce to find the global min GID.
00851           //
00852           //   2. Convert from GO to a signed type of the same size,
00853           //      and use the above approach for signed GO.
00854           //
00855           // Option 1 is better because Option 2 reduces the range of
00856           // allowed GID values by a factor of two.
00857           if (std::numeric_limits<GO>::is_signed) {
00858             // Compute the min and max of all processes' GIDs using a
00859             // single MAX all-reduce, using min(x,y) == -max(-x,-y).
00860             // This only works if x and y are signed.  If the calling
00861             // process owns no GIDs, make its min GID input to the
00862             // all-reduce -std::numeric_limits<GO>::min(), which should
00863             // be no greater than std::numeric_limits<GO>::max().
00864             //
00865             // If each process sets localDist=1 if its number of local
00866             // elements is strictly less than the number of global
00867             // elements, and localDist=0 otherwise, then a MAX
00868             // all-reduce on localDist tells us if the Map is
00869             // distributed (1 if yes, 0 if no).  Thus, we can append
00870             // localDist onto the end of the data and get the global
00871             // result from the all-reduce.
00872 
00873             // Does my process NOT own all the elements?
00874             const GO localDist =
00875               (static_cast<GO> (myNumIndices_) < globalNumIndices_) ? 1 : 0;
00876 
00877             GO minMaxInput[3];
00878             minMaxInput[0] = (myNumIndices_ == 0) ?
00879               -std::numeric_limits<GO>::min () : -minMyGID_;
00880             minMaxInput[1] = (myNumIndices_ == 0) ?
00881               std::numeric_limits<GO>::min () : maxMyGID_;
00882             minMaxInput[2] = localDist;
00883 
00884             GO minMaxOutput[3];
00885             minMaxOutput[0] = 0; // arbitrary initialization
00886             minMaxOutput[1] = 0; // arbitrary initialization
00887             minMaxOutput[2] = 0; // arbitrary initialization
00888             reduceAll<int, GO> (comm, Teuchos::REDUCE_MAX, 3,
00889                                 minMaxInput, minMaxOutput);
00890             minAllGID_ = -minMaxOutput[0];
00891             maxAllGID_ = minMaxOutput[1];
00892             const GO globalDist = minMaxOutput[2];
00893             distributed_ = (comm.getSize () > 1 && globalDist == 1);
00894           }
00895           else { // GO is unsigned
00896             const GO maxInput = (myNumIndices_ == 0) ? 0 : maxMyGID_;
00897             reduceAll<int, GO> (comm, Teuchos::REDUCE_MAX, maxInput,
00898                                 outArg (maxAllGID_));
00899             const GO minInput = (myNumIndices_ == 0) ? maxAllGID_ : minMyGID_;
00900             reduceAll<int, GO> (comm, Teuchos::REDUCE_MIN, minInput,
00901                                 outArg (minAllGID_));
00902 
00903             // FIXME (mfh 10 Feb 2014) We could combine the MIN
00904             // all-reduce above with the MIN all-reduce below.
00905             if (comm.getSize () > 1) {
00906               // The communicator has more than one process, but that
00907               // doesn't necessarily mean the Map is distributed.
00908               int localRep = 0;
00909               if (getGlobalNumIndices () == static_cast<GO> (getMyNumIndices ())) {
00910                 // The number of local elements on this process equals
00911                 // the number of global elements.
00912                 //
00913                 // NOTE (mfh 22 Nov 2011) Does this still work if there
00914                 // were duplicates in the global ID list on input (the
00915                 // third Map constructor), so that the number of local
00916                 // elements (which are not duplicated) on this process
00917                 // could be less than the number of global elements,
00918                 // even if this process owns all the elements?
00919                 localRep = 1;
00920               }
00921               int allLocalRep;
00922               reduceAll<int, int> (comm, Teuchos::REDUCE_MIN, localRep,
00923                                    outArg (allLocalRep));
00924               if (allLocalRep != 1) {
00925                 // At least one process does not own all the elements.
00926                 // This makes the Map a distributed Map.
00927                 distributed_ = true;
00928               }
00929             }
00930             else {
00931               // If the communicator has only one process, then the Map
00932               // is not distributed.
00933               distributed_ = false;
00934             }
00935           } // if GO is signed
00936         }
00937 
00938         // mfh 10 Feb 2014: tpetra/test/Map/Map_UnitTests.cpp,
00939         // indexBaseAndAllMin test uses a different indexBase input
00940         // (0) than the actual min GID (1).  Other tests seem to
00941         // depend on this as well.  That's why I commented out the
00942         // test below.
00943 
00944         // TEUCHOS_TEST_FOR_EXCEPTION(
00945         //   globalNumIndices_ != 0 && minAllGID_ != indexBase_,
00946         //   std::invalid_argument,
00947         //   "Tpetra::Details::Map constructor (noncontiguous): "
00948         //   "The Map has " << globalNumIndices_ << " > 0 global indices, but the "
00949         //   "min global index " << minAllGID_ << " over all process(es) does not "
00950         //   "equal the given indexBase " << indexBase_ << ".");
00951       }
00952 
00955       template<class InDeviceType>
00956       void
00957       create_copy_view (const Map<LO, GO, InDeviceType>& map)
00958       {
00959         if (! map.initialized ()) {
00960           invalidGlobalIndex_ = Teuchos::OrdinalTraits<GO>::invalid ();
00961           invalidLocalIndex_ = Teuchos::OrdinalTraits<LO>::invalid ();
00962           globalNumIndices_ = 0;
00963           myNumIndices_ = 0;
00964           indexBase_ = 0;
00965           firstContiguousGID_ = Teuchos::OrdinalTraits<GO>::invalid ();
00966           lastContiguousGID_ = Teuchos::OrdinalTraits<GO>::invalid ();
00967           minMyGID_ = Teuchos::OrdinalTraits<GO>::invalid ();
00968           maxMyGID_ = Teuchos::OrdinalTraits<GO>::invalid ();
00969           minAllGID_ = Teuchos::OrdinalTraits<GO>::invalid ();
00970           maxAllGID_ = Teuchos::OrdinalTraits<GO>::invalid ();
00971           contiguous_ = false;
00972           distributed_ = true;
00973           uniform_ = false;
00974           initialized_ = false;
00975         }
00976         else {
00977           invalidGlobalIndex_ = map.invalidGlobalIndex_;
00978           invalidLocalIndex_ = map.invalidLocalIndex_;
00979           globalNumIndices_ = map.globalNumIndices_;
00980           myNumIndices_ = map.myNumIndices_;
00981           indexBase_ = map.indexBase_;
00982           firstContiguousGID_ = map.firstContiguousGID_;
00983           lastContiguousGID_ = map.lastContiguousGID_;
00984           minMyGID_ = map.minMyGID_;
00985           maxMyGID_ = map.maxMyGID_;
00986           minAllGID_ = map.minAllGID_;
00987           maxAllGID_ = map.maxAllGID_;
00988 
00989           if (! map.contiguous_) {
00990             if (map.myNumIndices_ != 0 && map.glMap_.size () != 0) {
00991               // The calling process owns one or more GIDs, and some of
00992               // these GIDs are not contiguous.
00993               Kokkos::UnorderedMap<GO, LO, DeviceType> glMap;
00994               glMap.create_copy_view (map.glMap_);
00995               glMap_ = glMap;
00996             }
00997 
00998             // It's OK for this to be dimension 0; that means it hasn't been initialized yet.
00999             Kokkos::View<GO*, DeviceType> lgMap ("LID->GID", map.lgMap_.dimension_0 ());
01000             if (map.lgMap_.dimension_0 () != 0) {
01001               Kokkos::deep_copy (lgMap, map.lgMap_);
01002             }
01003             lgMap_ = lgMap; // shallow copy and cast to const
01004           }
01005 
01006           contiguous_ = map.contiguous_;
01007           distributed_ = map.distributed_;
01008           uniform_ = map.uniform_;
01009           initialized_ = map.initialized_;
01010         }
01011       }
01012 
01020       void setDistributed (const bool distributed) {
01021         distributed_ = distributed;
01022       }
01023 
01034       bool initialized () const {
01035         return initialized_;
01036       }
01037 
01039 
01040 
01041 
01043       KOKKOS_INLINE_FUNCTION GO getGlobalNumIndices () const {
01044         return globalNumIndices_;
01045       }
01046 
01048       KOKKOS_INLINE_FUNCTION LO getMyNumIndices () const {
01049         return myNumIndices_;
01050       }
01051 
01053       KOKKOS_INLINE_FUNCTION GO getIndexBase () const {
01054         return indexBase_;
01055       }
01056 
01064       KOKKOS_INLINE_FUNCTION LO getInvalidLocalIndex () const {
01065         return invalidLocalIndex_;
01066       }
01067 
01069       KOKKOS_INLINE_FUNCTION LO getMinLocalIndex () const {
01070         return static_cast<LO> (0);
01071       }
01072 
01078       KOKKOS_INLINE_FUNCTION LO getMaxLocalIndex () const {
01079         const LO myNumIndices = getMyNumIndices ();
01080         // Local indices are always zero-based.
01081         return (myNumIndices == 0) ? getInvalidLocalIndex () : (myNumIndices - 1);
01082       }
01083 
01091       KOKKOS_INLINE_FUNCTION LO getInvalidGlobalIndex () const {
01092         return invalidGlobalIndex_;
01093       }
01094 
01096       KOKKOS_INLINE_FUNCTION GO getMinGlobalIndex () const {
01097         return minMyGID_;
01098       }
01099 
01101       KOKKOS_INLINE_FUNCTION GO getMaxGlobalIndex () const {
01102         return maxMyGID_;
01103       }
01104 
01106       KOKKOS_INLINE_FUNCTION GO getMinAllGlobalIndex () const {
01107         return minAllGID_;
01108       }
01109 
01111       KOKKOS_INLINE_FUNCTION GO getMaxAllGlobalIndex () const {
01112         return maxAllGID_;
01113       }
01114 
01119       KOKKOS_INLINE_FUNCTION LO getLocalIndex (const GO globalIndex) const {
01120         if (getMyNumIndices() == 0) return getInvalidLocalIndex();
01121         if (isContiguous ()) {
01122           if (globalIndex >= getMinGlobalIndex () &&
01123               globalIndex <= getMaxGlobalIndex ()) {
01124             return static_cast<LO> (globalIndex - getMinGlobalIndex ());
01125           }
01126           else {
01127             return getInvalidLocalIndex ();
01128           }
01129         }
01130         else if (globalIndex >= firstContiguousGID_ &&
01131                  globalIndex <= lastContiguousGID_) {
01132           return static_cast<LO> (globalIndex - firstContiguousGID_);
01133         }
01134         else {
01135           // This also works if this process owns no indices.
01136           // In that case, glMap_ will be empty, so this branch
01137           // will always return the invalid LID.
01138           const typename global_to_local_table_type::size_type i =
01139             glMap_.find (globalIndex);
01140           return glMap_.valid_at (i) ? // if the GID is in the map, ...
01141             glMap_.value_at (i) : // ... return its LID, ...
01142             getInvalidLocalIndex (); // ... else return the invalid LID.
01143         }
01144       }
01145 
01150       KOKKOS_INLINE_FUNCTION GO getGlobalIndex (const LO localIndex) const {
01151         if (getMyNumIndices() == 0) return getInvalidLocalIndex();
01152         if (localIndex < getMinLocalIndex () ||
01153             localIndex > getMaxLocalIndex ()) {
01154           // This process will always take this branch if it owns no indices.
01155           return getInvalidGlobalIndex ();
01156         }
01157         else if (isContiguous ()) {
01158           return getMinGlobalIndex () + localIndex;
01159         }
01160         else {
01161           return lgMap_(localIndex);
01162         }
01163       }
01164 
01166       KOKKOS_INLINE_FUNCTION bool
01167       isOwnedLocalIndex (const LO localIndex) const {
01168         return localIndex >= getMinLocalIndex () &&
01169           localIndex <= getMaxLocalIndex ();
01170       }
01171 
01173       KOKKOS_INLINE_FUNCTION bool
01174       isOwnedGlobalIndex (const GO globalIndex) const {
01175         return this->getLocalIndex (globalIndex) != getInvalidLocalIndex ();
01176       }
01177 
01185       KOKKOS_INLINE_FUNCTION bool isUniform () const {
01186         return uniform_;
01187       }
01188 
01200       KOKKOS_INLINE_FUNCTION bool isContiguous () const {
01201         return contiguous_;
01202       }
01203 
01225       KOKKOS_INLINE_FUNCTION bool isDistributed () const {
01226         return distributed_;
01227       }
01228 
01230 
01231 
01232 
01239       Kokkos::View<const GO*, DeviceType> getMyGlobalIndices () {
01240         const GO myNumIndices = getMyNumIndices ();
01241 
01242         if (myNumIndices != 0 && lgMap_.dimension_0 () == 0) {
01243           Kokkos::View<GO*, DeviceType> lgMap ("LID->GID", myNumIndices);
01244           // fill with [getMinGlobalIndex(), getMinGlobalIndex()+myNumIndices-1].
01245           iota<GO, DeviceType> (lgMap, this->getMinGlobalIndex ());
01246           lgMap_ = lgMap; // shallow copy; cast to const
01247         }
01248         return lgMap_;
01249       }
01250 
01252       std::string description () {
01253         return "\"Tpetra::Details::Map\"";
01254       }
01255 
01257       void
01258       describe (Teuchos::FancyOStream &out,
01259                 const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default)
01260       {
01261         using std::endl;
01262         const Teuchos::EVerbosityLevel vl =
01263           (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
01264 
01265         if (vl != Teuchos::VERB_NONE) {
01266           // By convention, describe() always starts with a tab.
01267           Teuchos::OSTab tab0 (out);
01268           out << description () << endl; // TODO (mfh 05 Feb 2014) Print more info.
01269         }
01270       }
01271 
01272     private:
01273       void validateLocally (const int myRank) {
01274         TEUCHOS_TEST_FOR_EXCEPTION(
01275           invalidGlobalIndex_ != Teuchos::OrdinalTraits<GO>::invalid (),
01276           std::logic_error,
01277           "Tpetra::Details::Map: On Process " << myRank << ", "
01278           "invalidGlobalIndex_ was not set.");
01279         TEUCHOS_TEST_FOR_EXCEPTION(
01280           invalidLocalIndex_ != Teuchos::OrdinalTraits<LO>::invalid (),
01281           std::logic_error,
01282           "Tpetra::Details::Map: On Process " << myRank << ", "
01283           "invalidLocalIndex_ was not set.");
01284         TEUCHOS_TEST_FOR_EXCEPTION(
01285           ! contiguous_ && myNumIndices_ != lgMap_.dimension_0 (),
01286           std::logic_error, "Tpetra::Details::Map: On Process " << myRank <<
01287           ", this noncontiguous Map has myNumIndices_ = " << myNumIndices_ <<
01288           " != lgMap_.dimension_0 () = " << lgMap_.dimension_0 () << ".");
01289 
01290         // Test whether the Map contains all the noncontiguous indices
01291         // it claims to contain.
01292         if (! contiguous_ && myNumIndices_ > 0) {
01293           bool bad = false;
01294           Teuchos::Array<GO> badGlobals;
01295           Teuchos::Array<LO> badLocalsIn;
01296           Teuchos::Array<LO> badLocalsOut;
01297 
01298           for (LO localIndexIn = 0; localIndexIn < myNumIndices_; ++localIndexIn) {
01299             const GO globalIndex = this->getGlobalIndex (localIndexIn);
01300             const LO localIndexOut = this->getLocalIndex (globalIndex);
01301 
01302             if (localIndexIn != localIndexOut) {
01303               bad = true;
01304               badGlobals.push_back (globalIndex);
01305               badLocalsIn.push_back (localIndexIn);
01306               badLocalsOut.push_back (localIndexOut);
01307             }
01308           }
01309 
01310           // Get the GID->LID table's keys and values as Teuchos::Array.
01311           typedef typename DeviceType::size_type size_type;
01312           const size_type mapSize = glMap_.size ();
01313           Teuchos::Array<GO> keys;
01314           Teuchos::Array<LO> vals;
01315           keys.reserve (mapSize);
01316           vals.reserve (mapSize);
01317           for (size_type k = 0; k < mapSize; ++k) {
01318             keys.push_back (glMap_.key_at (k));
01319             vals.push_back (glMap_.value_at (k));
01320           }
01321           // Get the LID->GID table as an ArrayView for easy printing.
01322           Teuchos::ArrayView<const GO> lgMapAv (lgMap_.ptr_on_device (),
01323                                                 lgMap_.dimension_0 ());
01324           TEUCHOS_TEST_FOR_EXCEPTION(
01325             bad, std::logic_error, "Tpetra::Details::Map: On Process " <<
01326             myRank << ", this noncontiguous Map's GID->LID lookup is broken.  "
01327             "The following GIDs are in this process' GID list, but attempting "
01328             "to look up their LIDs gives the wrong answer: " << badGlobals () <<
01329             ".  The corresponding LID list should have been " << badLocalsIn ()
01330             << ", but was " << badLocalsOut << " instead.  The Map's GID list "
01331             "is " << lgMapAv << ".  The keys in the GID->LID table are " <<
01332             keys () << " and their values are " << vals () << ".  The GID->LID "
01333             "table reports its size as " << mapSize << " and its capacity as "
01334             << glMap_.capacity () << ".  The initial sequence of contiguous "
01335             "GIDs on this process is [" << firstContiguousGID_ << "," <<
01336             lastContiguousGID_ << "].");
01337         }
01338       }
01339 
01356       bool checkIsDist (const Teuchos::Comm<int>& comm) {
01357         using Teuchos::outArg;
01358         using Teuchos::REDUCE_MIN;
01359         using Teuchos::reduceAll;
01360 
01361         bool global = false;
01362         if (comm.getSize () > 1) {
01363           // The communicator has more than one process, but that
01364           // doesn't necessarily mean the Map is distributed.
01365           int localRep = 0;
01366           if (getGlobalNumIndices () == static_cast<GO> (getMyNumIndices ())) {
01367             // The number of local indices on this process equals the
01368             // number of global indices.
01369             //
01370             // NOTE (mfh 22 Nov 2011) Does this still work if there
01371             // were duplicates in the GID list on input (the third Map
01372             // constructor), so that the number of LIDs (which are not
01373             // duplicated) on this process could be less than the
01374             // number of GIDs, even if this process owns all the IDs?
01375             localRep = 1;
01376           }
01377           int allLocalRep;
01378           reduceAll<int, int> (comm, REDUCE_MIN, localRep, outArg (allLocalRep));
01379           if (allLocalRep != 1) {
01380             // At least one process does not own all the elements.
01381             // This makes the Map a distributed Map.
01382             global = true;
01383           }
01384         }
01385         // If the communicator has only one process, then the Map is
01386         // not distributed.
01387         return global;
01388       }
01389 
01394       GO invalidGlobalIndex_;
01395 
01400       LO invalidLocalIndex_;
01401 
01403       GO globalNumIndices_;
01404 
01406       GO myNumIndices_;
01407 
01409       GO indexBase_;
01410 
01412       GO firstContiguousGID_;
01413 
01415       GO lastContiguousGID_;
01416 
01420       GO minMyGID_;
01421 
01425       GO maxMyGID_;
01426 
01429       GO minAllGID_;
01430 
01433       GO maxAllGID_;
01434 
01441       typedef Kokkos::UnorderedMap<const GO, const LO, DeviceType> global_to_local_table_type;
01442 
01459       global_to_local_table_type glMap_;
01460 
01472       Kokkos::View<const GO*, DeviceType> lgMap_;
01473 
01479       bool contiguous_;
01480 
01483       bool distributed_;
01484 
01490       bool uniform_;
01491 
01495       bool initialized_;
01496     };
01497 
01523     template<class OutMapType, class DeviceMapType, class HostMapType>
01524     struct MapMirrorer {
01525       typedef OutMapType output_map_type;
01526       typedef DeviceMapType device_map_type;
01527       typedef HostMapType host_map_type;
01528 
01530       static output_map_type
01531       mirror (device_map_type& deviceMap, host_map_type& hostMap);
01532     };
01533 
01534     // Partial specialization for when all three Maps have different
01535     // device types.
01536     template<class LO, class GO, class OutDeviceType, class DeviceType, class HostDeviceType>
01537     struct MapMirrorer<Map<LO, GO, OutDeviceType>, Map<LO, GO, DeviceType>, Map<LO, GO, HostDeviceType> > {
01538       typedef Map<LO, GO, OutDeviceType> output_map_type;
01539       typedef Map<LO, GO, DeviceType> device_map_type;
01540       typedef Map<LO, GO, HostDeviceType> host_map_type;
01541 
01542       static output_map_type
01543       mirror (device_map_type& deviceMap, host_map_type& hostMap) {
01544         output_map_type outputMap;
01545 
01546         // Favor the host Map, since if OutDeviceType != DeviceType,
01547         // then OutDeviceType is likely not to be a CUDA device (e.g.,
01548         // DeviceType = Cuda, HostDeviceType = OpenMP, and
01549         // OutDeviceType = Serial).
01550         if (hostMap.initialized ()) {
01551           outputMap.template create_copy_view<HostDeviceType> (hostMap);
01552         } else {
01553           outputMap.template create_copy_view<DeviceType> (deviceMap);
01554         }
01555         return outputMap;
01556       }
01557     };
01558 
01559     // Partial specialization for when the device and host Maps have
01560     // different device types, and the output Map has the same device
01561     // type as the device Map.
01562     template<class LO, class GO, class DeviceType, class HostDeviceType>
01563     struct MapMirrorer<Map<LO, GO, DeviceType>, Map<LO, GO, DeviceType>, Map<LO, GO, HostDeviceType> > {
01564       typedef Map<LO, GO, DeviceType> output_map_type;
01565       typedef Map<LO, GO, DeviceType> device_map_type;
01566       typedef Map<LO, GO, HostDeviceType> host_map_type;
01567 
01568       static output_map_type
01569       mirror (device_map_type& deviceMap, host_map_type& hostMap) {
01570         if (deviceMap.initialized ()) {
01571           return deviceMap;
01572         }
01573         else {
01574           output_map_type outMap;
01575           outMap.template create_copy_view<HostDeviceType> (hostMap);
01576           if (outMap.initialized () && ! deviceMap.initialized ()) {
01577             deviceMap = outMap; // cache the deep copy
01578           }
01579           return outMap;
01580         }
01581       }
01582     };
01583 
01584     // Partial specialization for when the device and host Maps have
01585     // different device types, and the output Map has the same device
01586     // type as the host Map.
01587     template<class LO, class GO, class DeviceType, class HostDeviceType>
01588     struct MapMirrorer<Map<LO, GO, HostDeviceType>, Map<LO, GO, DeviceType>, Map<LO, GO, HostDeviceType> > {
01589       typedef Map<LO, GO, HostDeviceType> output_map_type;
01590       typedef Map<LO, GO, DeviceType> device_map_type;
01591       typedef Map<LO, GO, HostDeviceType> host_map_type;
01592 
01593       static output_map_type
01594       mirror (device_map_type& deviceMap, host_map_type& hostMap) {
01595         if (hostMap.initialized ()) {
01596           return hostMap;
01597         }
01598         else {
01599           output_map_type outMap;
01600           outMap.template create_copy_view<DeviceType> (deviceMap);
01601           if (outMap.initialized ()) {
01602             hostMap = outMap; // cache the deep copy
01603           }
01604           return outMap;
01605         }
01606       }
01607     };
01608 
01609     // Partial specialization for when the device, host, and output
01610     // Maps have the same device types.
01611     template<class LO, class GO, class DeviceType>
01612     struct MapMirrorer<Map<LO, GO, DeviceType>, Map<LO, GO, DeviceType>, Map<LO, GO, DeviceType> > {
01613       typedef Map<LO, GO, DeviceType> output_map_type;
01614       typedef Map<LO, GO, DeviceType> device_map_type;
01615       typedef Map<LO, GO, DeviceType> host_map_type;
01616 
01617       static output_map_type
01618       mirror (device_map_type& deviceMap, const host_map_type& /* hostMap */ ) {
01619         return deviceMap;
01620       }
01621     };
01622 
01623   }
01624 }
01625 
01626 #endif // TPETRA_KOKKOSREFACTOR_DETAILS_MAP_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines