|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_KOKKOSREFACTOR_DETAILS_MAP_HPP 00043 #define TPETRA_KOKKOSREFACTOR_DETAILS_MAP_HPP 00044 00045 #include <Teuchos_Describable.hpp> 00046 #include <Kokkos_UnorderedMap.hpp> 00047 00048 namespace Tpetra { 00049 namespace Details { 00050 00057 template<class IntType, class DeviceType> 00058 class Iota { 00059 public: 00060 typedef DeviceType device_type; 00061 00062 Iota (const Kokkos::View<IntType*, DeviceType>& x, 00063 const IntType first) : x_ (x), first_ (first) {} 00064 00065 KOKKOS_INLINE_FUNCTION void 00066 operator () (const typename DeviceType::size_type i) const { 00067 x_(i) = first_ + static_cast<IntType> (i); 00068 } 00069 00070 private: 00071 Kokkos::View<IntType*, DeviceType> x_; 00072 const IntType first_; 00073 }; 00074 00076 template<class IntType, class DeviceType> 00077 void 00078 iota (const Kokkos::View<IntType*, DeviceType>& x, 00079 const IntType first) 00080 { 00081 Kokkos::parallel_for (x.dimension_0 (), Iota<IntType, DeviceType> (x, first)); 00082 } 00083 00088 template<class LO, class GO, class DeviceType> 00089 struct MapData { 00090 GO minMyGID; 00091 GO maxMyGID; 00092 00093 00094 00095 00096 LO failedCount; 00111 LO duplicateCount; 00112 00114 MapData () : 00115 minMyGID (0), maxMyGID (0), failedCount (0), duplicateCount (0) 00116 {} 00117 }; 00118 00125 template<class LO, class GO, class DeviceType> 00126 class GlobalToLocalTableFiller { 00127 public: 00128 typedef DeviceType device_type; 00129 typedef typename DeviceType::size_type size_type; 00130 typedef MapData<LO, GO, DeviceType> value_type; 00131 00142 GlobalToLocalTableFiller (const Kokkos::UnorderedMap<GO, LO, DeviceType>& glMap, 00143 const Kokkos::View<const GO*, DeviceType>& entries, 00144 const GO firstContiguousGID, 00145 const GO lastContiguousGID) : 00146 glMap_ (glMap), 00147 entries_ (entries), 00148 firstContiguousGID_ (firstContiguousGID), 00149 lastContiguousGID_ (lastContiguousGID) 00150 {} 00151 00160 KOKKOS_INLINE_FUNCTION void 00161 init (value_type& dst) const 00162 { 00163 dst.minMyGID = firstContiguousGID_; 00164 dst.maxMyGID = lastContiguousGID_; 00165 dst.failedCount = 0; 00166 dst.duplicateCount = 0; 00167 } 00168 00172 KOKKOS_INLINE_FUNCTION void 00173 join (volatile value_type& dst , 00174 const volatile value_type& src) const 00175 { 00176 if (src.maxMyGID > dst.maxMyGID) { 00177 dst.maxMyGID = src.maxMyGID; 00178 } 00179 if (src.minMyGID < dst.minMyGID) { 00180 dst.minMyGID = src.minMyGID; 00181 } 00182 dst.failedCount += src.failedCount; 00183 dst.duplicateCount += src.duplicateCount; 00184 } 00185 00190 KOKKOS_INLINE_FUNCTION void 00191 operator () (const size_type i, value_type& dst) const 00192 { 00193 // [firstContiguousGID_, lastContiguousGID_] as GIDs map to 00194 // [0, lastContiguousGID_ - firstContiguousGID_ + 1] as LIDs. 00195 const LO lid = 00196 static_cast<LO> (lastContiguousGID_ - firstContiguousGID_ + 1) + 00197 static_cast<LO> (i); 00198 const GO gid = entries_(i); 00199 00200 if (gid > dst.maxMyGID) { 00201 dst.maxMyGID = gid; 00202 } 00203 if (gid < dst.minMyGID) { 00204 dst.minMyGID = gid; 00205 } 00206 // The table should not run out of space, but if it does, the 00207 // caller will try again. 00208 const Kokkos::UnorderedMapInsertResult result = glMap_.insert (gid, lid); 00209 00210 if (result.failed ()) { 00211 dst.failedCount++; 00212 } 00213 else if (result.existing ()) { 00214 dst.duplicateCount++; 00215 } 00216 } 00217 00218 private: 00220 Kokkos::UnorderedMap<GO, LO, DeviceType> glMap_; 00222 Kokkos::View<const GO*, DeviceType> entries_; 00224 const GO firstContiguousGID_; 00226 const GO lastContiguousGID_; 00227 }; 00228 00251 template<class LO, class GO, class DeviceType> 00252 MapData<LO, GO, DeviceType> 00253 fillGlobalToLocalTable (Kokkos::UnorderedMap<GO, LO, DeviceType>& glMap, 00254 const Kokkos::View<const GO*, DeviceType>& entries, 00255 const GO firstContiguousGID, 00256 const GO lastContiguousGID) 00257 { 00258 typedef typename DeviceType::size_type size_type; 00259 typedef GlobalToLocalTableFiller<LO, GO, DeviceType> functor_type; 00260 00261 const size_type numEnt = entries.dimension_0 (); 00262 const size_type mapCap = glMap.capacity (); 00263 if (mapCap < numEnt) { 00264 Teuchos::ArrayView<const GO> lgMapAv (entries.ptr_on_device (), numEnt); 00265 TEUCHOS_TEST_FOR_EXCEPTION( 00266 mapCap < numEnt, std::logic_error, "fillGlobalToLocalTable: Input " 00267 "GID->LID table to fill does not have sufficient capacity for the " 00268 "given list of GIDs. The table has capacity " << mapCap << ", but the " 00269 "input list of GIDs has " << numEnt << " entr" << 00270 (numEnt != 1 ? "ies" : "y") << "."); 00271 } 00272 00273 functor_type filler (glMap, entries, firstContiguousGID, lastContiguousGID); 00274 00275 MapData<LO, GO, DeviceType> result; 00276 Kokkos::parallel_reduce (numEnt, filler, result); 00277 return result; 00278 } 00279 00294 template<class LO, class GO, class DeviceType> 00295 class Map { 00296 template<class LO2, class GO2, class D2> friend class Map; 00297 public: 00299 00300 00302 typedef LO local_ordinal_type; 00304 typedef GO global_ordinal_type; 00306 typedef DeviceType device_type; 00307 00309 00310 00311 00313 Map () : 00314 invalidGlobalIndex_ (Teuchos::OrdinalTraits<GO>::invalid ()), // final 00315 invalidLocalIndex_ (Teuchos::OrdinalTraits<LO>::invalid ()), // final 00316 globalNumIndices_ (0), 00317 myNumIndices_ (0), 00318 indexBase_ (0), 00319 firstContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), 00320 lastContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), 00321 minMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), 00322 maxMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), 00323 minAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), 00324 maxAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), 00325 contiguous_ (false), 00326 distributed_ (true), // could be either false or true 00327 uniform_ (false), 00328 initialized_ (false) 00329 {} 00330 00332 Map (const GO globalNumIndices, 00333 const GO indexBase, 00334 const Teuchos::Comm<int>& comm, 00335 const LocalGlobal lOrG) : 00336 invalidGlobalIndex_ (Teuchos::OrdinalTraits<GO>::invalid ()), // final 00337 invalidLocalIndex_ (Teuchos::OrdinalTraits<LO>::invalid ()), // final 00338 globalNumIndices_ (globalNumIndices), // final 00339 myNumIndices_ (0), // set below 00340 indexBase_ (indexBase), // final 00341 minMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // set below 00342 maxMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // set below 00343 minAllGID_ (globalNumIndices == 0 ? 00344 invalidGlobalIndex_ : 00345 indexBase), // final 00346 maxAllGID_ (globalNumIndices == 0 ? 00347 invalidGlobalIndex_ : 00348 indexBase + globalNumIndices - 1), // final 00349 contiguous_ (true), // final 00350 distributed_ (true), // this may be changed below 00351 uniform_ (true), // final 00352 initialized_ (true) // final 00353 { 00354 using Teuchos::as; 00355 using Teuchos::broadcast; 00356 using Teuchos::outArg; 00357 using Teuchos::reduceAll; 00358 using Teuchos::REDUCE_MIN; 00359 using Teuchos::REDUCE_MAX; 00360 using Teuchos::typeName; 00361 00362 #ifdef HAVE_TPETRA_DEBUG 00363 // In debug mode only, check whether globalNumIndices and 00364 // indexBase are the same over all processes in comm. 00365 { 00366 GO proc0NumGlobalIndices = globalNumIndices; 00367 broadcast<int, GO> (comm, 0, outArg (proc0NumGlobalIndices)); 00368 GO minGlobalNumIndices = globalNumIndices; 00369 GO maxGlobalNumIndices = globalNumIndices; 00370 reduceAll<int, GO> (comm, REDUCE_MIN, globalNumIndices, outArg (minGlobalNumIndices)); 00371 reduceAll<int, GO> (comm, REDUCE_MAX, globalNumIndices, outArg (maxGlobalNumIndices)); 00372 TEUCHOS_TEST_FOR_EXCEPTION( 00373 minGlobalNumIndices != maxGlobalNumIndices || globalNumIndices != minGlobalNumIndices, 00374 std::invalid_argument, 00375 "Tpetra::Map constructor: All processes must provide the same number " 00376 "of global indices. Process 0 set globalNumIndidces = " 00377 << proc0NumGlobalIndices << ". The calling process " 00378 << comm.getRank () << " set globalNumIndices = " << globalNumIndices 00379 << ". The min and max values over all processes are " 00380 << minGlobalNumIndices << " resp. " << maxGlobalNumIndices << "."); 00381 00382 GO proc0IndexBase = indexBase; 00383 broadcast<int, GO> (comm, 0, outArg (proc0IndexBase)); 00384 GO minIndexBase = indexBase; 00385 GO maxIndexBase = indexBase; 00386 reduceAll<int, GO> (comm, REDUCE_MIN, indexBase, outArg (minIndexBase)); 00387 reduceAll<int, GO> (comm, REDUCE_MAX, indexBase, outArg (maxIndexBase)); 00388 TEUCHOS_TEST_FOR_EXCEPTION( 00389 minIndexBase != maxIndexBase || indexBase != minIndexBase, 00390 std::invalid_argument, 00391 "Tpetra::Map constructor: " 00392 "All processes must provide the same indexBase argument. " 00393 "Process 0 set indexBase = " << proc0IndexBase << ". The calling " 00394 "process " << comm.getRank () << " set indexBase = " << indexBase 00395 << ". The min and max values over all processes are " 00396 << minIndexBase << " resp. " << maxIndexBase << "."); 00397 } 00398 #endif // HAVE_TPETRA_DEBUG 00399 00400 // Distribute the GIDs (global indices) across the processes in 00401 // the given communicator so that they are 00402 // 00403 // - Nonoverlapping (only one process owns each GID) 00404 // - Contiguous (the sequence of GIDs is nondecreasing, and no 00405 // two adjacent GIDs differ by more than one) 00406 // - As evenly distributed as possible (the numbers of GIDs on 00407 // two different processes do not differ by more than one) 00408 00409 // All processes have the same globalNumIndices, but we still need 00410 // to check that it is valid. globalNumIndices must be positive 00411 // and not the "invalid" value. 00412 // 00413 // This comparison looks funny, but it avoids compiler warnings 00414 // for comparing unsigned integers (GO might possibly be unsigned). 00415 TEUCHOS_TEST_FOR_EXCEPTION( 00416 (globalNumIndices < 1 && globalNumIndices != 0), std::invalid_argument, 00417 "Tpetra::Map constructor: globalNumIndices (= " << globalNumIndices 00418 << ") must be nonnegative."); 00419 00420 TEUCHOS_TEST_FOR_EXCEPTION( 00421 globalNumIndices == static_cast<GO> (getInvalidGlobalIndex ()), std::invalid_argument, 00422 "Tpetra::Map constructor: You provided globalNumIndices = Teuchos::" 00423 "OrdinalTraits<GO>::invalid(). This version of the constructor " 00424 "requires a valid value of globalNumIndices. You probably mistook " 00425 "this constructor for the \"contiguous nonuniform\" constructor, " 00426 "which can compute the global number of indices for you if you set " 00427 "globalNumIndices to that value."); 00428 00429 if (lOrG == GloballyDistributed) { 00430 if (globalNumIndices == 0) { 00431 myNumIndices_ = 0; 00432 minMyGID_ = getInvalidGlobalIndex (); 00433 maxMyGID_ = getInvalidGlobalIndex (); 00434 } 00435 else { // globalNumIndices != 0, should be > 0 00436 // Compute myNumIndices: 00437 // 00438 // If globalNumIndices == numProcs * B + remainder, 00439 // then Proc r gets B+1 elements if r < remainder, 00440 // and B elements if r >= remainder. 00441 // 00442 // This strategy is valid for any value of globalNumIndices and 00443 // numProcs, including the following border cases: 00444 // - numProcs == 1 00445 // - myNumIndices < numProcs 00446 // 00447 // In the former case, remainder == 0 && globalNumIndices == 00448 // myNumIndices. In the latter case, remainder == 00449 // globalNumIndices && myNumIndices is either 0 or 1. 00450 const GO numProcs = static_cast<GO> (comm.getSize ()); 00451 const GO myRank = static_cast<GO> (comm.getRank ()); 00452 const GO quotient = globalNumIndices / numProcs; 00453 const GO remainder = globalNumIndices - quotient * numProcs; 00454 00455 GO startIndex; 00456 if (myRank < remainder) { 00457 myNumIndices_ = 1 + quotient; 00458 // myRank was originally an int, so it should never overflow 00459 // reasonable GO types. 00460 startIndex = static_cast<GO> (myRank) * static_cast<GO> (myNumIndices_); 00461 } else { 00462 myNumIndices_ = static_cast<LO> (quotient); 00463 startIndex = static_cast<GO> (myRank) * static_cast<GO> (myNumIndices_) + 00464 static_cast<GO> (remainder); 00465 } 00466 minMyGID_ = indexBase + startIndex; 00467 maxMyGID_ = indexBase + startIndex + myNumIndices_ - 1; 00468 } 00469 distributed_ = (comm.getSize () > 1); 00470 } 00471 else { // lOrG == LocallyReplicated 00472 myNumIndices_ = static_cast<LO> (globalNumIndices); 00473 if (globalNumIndices == 0) { 00474 minMyGID_ = getInvalidGlobalIndex (); 00475 maxMyGID_ = getInvalidGlobalIndex (); 00476 } 00477 else { 00478 minMyGID_ = indexBase; 00479 maxMyGID_ = indexBase + globalNumIndices - 1; 00480 } 00481 distributed_ = false; 00482 } 00483 00484 firstContiguousGID_ = minMyGID_; 00485 lastContiguousGID_ = maxMyGID_; 00486 } 00487 00489 Map (const GO globalNumIndices, 00490 const LO myNumIndices, 00491 const GO indexBase, 00492 const Teuchos::Comm<int>& comm) : 00493 invalidGlobalIndex_ (Teuchos::OrdinalTraits<GO>::invalid ()), // final 00494 invalidLocalIndex_ (Teuchos::OrdinalTraits<LO>::invalid ()), // final 00495 globalNumIndices_ (globalNumIndices), // provisional, if invalid() 00496 myNumIndices_ (myNumIndices), // final 00497 indexBase_ (indexBase), // final 00498 firstContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below 00499 lastContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below 00500 minMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // set below 00501 maxMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // set below 00502 minAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below 00503 maxAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below 00504 contiguous_ (true), // final 00505 distributed_ (true), // set below 00506 uniform_ (false), // final (conservative; we could try to detect this) 00507 initialized_ (true) // final 00508 { 00509 using Teuchos::broadcast; 00510 using Teuchos::outArg; 00511 using Teuchos::reduceAll; 00512 using Teuchos::REDUCE_MIN; 00513 using Teuchos::REDUCE_MAX; 00514 using Teuchos::REDUCE_SUM; 00515 using Teuchos::scan; 00516 00517 #ifdef HAVE_TPETRA_DEBUG 00518 // Keep this for later debug checks. 00519 GO debugGlobalSum = 0; // Will be global sum of myNumIndices 00520 reduceAll<int, GO> (comm, REDUCE_SUM, static_cast<GO> (myNumIndices), 00521 outArg (debugGlobalSum)); 00522 // In debug mode only, check whether globalNumIndices and 00523 // indexBase are the same over all processes in comm. 00524 { 00525 GO proc0GlobalNumIndices = globalNumIndices; 00526 broadcast<int, GO> (comm, 0, outArg (proc0GlobalNumIndices)); 00527 GO minGlobalNumIndices = globalNumIndices; 00528 GO maxGlobalNumIndices = globalNumIndices; 00529 reduceAll<int, GO> (comm, REDUCE_MIN, globalNumIndices, outArg (minGlobalNumIndices)); 00530 reduceAll<int, GO> (comm, REDUCE_MAX, globalNumIndices, outArg (maxGlobalNumIndices)); 00531 TEUCHOS_TEST_FOR_EXCEPTION( 00532 minGlobalNumIndices != maxGlobalNumIndices || globalNumIndices != minGlobalNumIndices, 00533 std::invalid_argument, 00534 "Tpetra::Map constructor: All processes must provide the same number " 00535 "of global indices. This is true even if that argument is Teuchos::" 00536 "OrdinalTraits<global_size_t>::invalid() to signal that the Map should " 00537 "compute the global number of elements. Process 0 set globalNumIndices" 00538 " = " << proc0GlobalNumIndices << ". The calling process " 00539 << comm.getRank () << " set globalNumIndices = " << globalNumIndices 00540 << ". The min and max values over all processes are " 00541 << minGlobalNumIndices << " resp. " << maxGlobalNumIndices << "."); 00542 00543 GO proc0IndexBase = indexBase; 00544 broadcast<int, GO> (comm, 0, outArg (proc0IndexBase)); 00545 GO minIndexBase = indexBase; 00546 GO maxIndexBase = indexBase; 00547 reduceAll<int, GO> (comm, REDUCE_MIN, indexBase, outArg (minIndexBase)); 00548 reduceAll<int, GO> (comm, REDUCE_MAX, indexBase, outArg (maxIndexBase)); 00549 TEUCHOS_TEST_FOR_EXCEPTION( 00550 minIndexBase != maxIndexBase || indexBase != minIndexBase, 00551 std::invalid_argument, 00552 "Tpetra::Map constructor: " 00553 "All processes must provide the same indexBase argument. " 00554 "Process 0 set indexBase = " << proc0IndexBase << ". The calling " 00555 "process " << comm.getRank () << " set indexBase = " << indexBase 00556 << ". The min and max values over all processes are " 00557 << minIndexBase << " resp. " << maxIndexBase << "."); 00558 00559 // Make sure that the sum of myNumIndices over all processes 00560 // equals globalNumIndices. 00561 TEUCHOS_TEST_FOR_EXCEPTION( 00562 globalNumIndices != getInvalidGlobalIndex () && debugGlobalSum != globalNumIndices, 00563 std::invalid_argument, 00564 "Tpetra::Map constructor: The sum of myNumIndices over all " 00565 "processes = " << debugGlobalSum << " != globalNumIndices = " 00566 << globalNumIndices << ". If you would like this constructor to " 00567 "compute globalNumIndices for you, you may set globalNumIndices = " 00568 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() on input."); 00569 } 00570 #endif // HAVE_TPETRA_DEBUG 00571 00572 // Distribute the GIDs (global indices) across the processes in 00573 // the given communicator so that they are 00574 // 00575 // - Nonoverlapping (only one process owns each GID) 00576 // - Contiguous (the sequence of GIDs is nondecreasing, and no 00577 // two adjacent GIDs differ by more than one) 00578 // 00579 // This differs from the first Map constructor (that only takes a 00580 // global number of GIDs) in that the user has specified the 00581 // number of LIDs (local indices), so that the GIDs are not 00582 // (necessarily) evenly distributed over the processes. 00583 00584 // Compute my local offset. This is an inclusive scan, so to get 00585 // the final offset, we subtract off the input. 00586 GO scanResult = 0; 00587 scan<int, GO> (comm, REDUCE_SUM, myNumIndices, outArg (scanResult)); 00588 const GO myOffset = scanResult - myNumIndices; 00589 00590 if (globalNumIndices != static_cast<GO> (getInvalidGlobalIndex ())) { 00591 globalNumIndices_ = globalNumIndices; // Use the user's value. 00592 } 00593 else { 00594 // Inclusive scan means that the last process has the final sum. 00595 // Rather than doing a reduceAll to get the sum of 00596 // myNumIndices, we can just have the last process broadcast 00597 // its result. That saves us a round of log(numProcs) messages. 00598 const int numProcs = comm.getSize (); 00599 GO globalSum = scanResult; 00600 if (numProcs > 1) { 00601 broadcast<int, GO> (comm, numProcs - 1, outArg (globalSum)); 00602 } 00603 globalNumIndices_ = globalSum; 00604 00605 #ifdef HAVE_TPETRA_DEBUG 00606 // No need for an all-reduce here; both come from collectives. 00607 TEUCHOS_TEST_FOR_EXCEPTION( 00608 globalSum != debugGlobalSum, std::logic_error, 00609 "Tpetra::Map constructor (contiguous nonuniform): " 00610 "globalSum = " << globalSum << " != debugGlobalSum = " << debugGlobalSum 00611 << ". Please report this bug to the Tpetra developers."); 00612 #endif // HAVE_TPETRA_DEBUG 00613 } 00614 // Now globalNumIndices_ is set. 00615 00616 if (globalNumIndices_ != 0) { 00617 minMyGID_ = indexBase + myOffset; 00618 maxMyGID_ = indexBase + myOffset + myNumIndices - 1; 00619 firstContiguousGID_ = minMyGID_; 00620 lastContiguousGID_ = maxMyGID_; 00621 minAllGID_ = indexBase; 00622 maxAllGID_ = indexBase + globalNumIndices_ - 1; 00623 } 00624 00625 distributed_ = checkIsDist (comm); 00626 } 00627 00629 Map (const GO globalNumIndices, 00630 const Kokkos::View<const GO*, DeviceType>& myGlobalIndices, 00631 const GO indexBase, 00632 const Teuchos::Comm<int>& comm) : 00633 invalidGlobalIndex_ (Teuchos::OrdinalTraits<GO>::invalid ()), // final 00634 invalidLocalIndex_ (Teuchos::OrdinalTraits<LO>::invalid ()), // final 00635 globalNumIndices_ (globalNumIndices), // provisional, if invalid() 00636 myNumIndices_ (myGlobalIndices.dimension_0 ()), // final 00637 indexBase_ (indexBase), // final 00638 firstContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below 00639 lastContiguousGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below 00640 minMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below 00641 maxMyGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below 00642 minAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below 00643 maxAllGID_ (Teuchos::OrdinalTraits<GO>::invalid ()), // initialize below 00644 lgMap_ (myGlobalIndices), // final (input assumed correct) 00645 contiguous_ (false), // final (conservative; we could try to detect this) 00646 distributed_ (true), // set below 00647 uniform_ (false), // final (conservative; we could try to detect this) 00648 initialized_ (true) // final 00649 { 00650 using Teuchos::outArg; 00651 using Teuchos::reduceAll; 00652 00653 // FIXME (mfh 20 Feb 2013, 05 Feb 2014) The global reduction 00654 // is redundant, since the directory Map will have to do the 00655 // same thing. We should actually do the scan and broadcast 00656 // for the directory Map here, and give the computed offsets 00657 // to the directory Map's constructor. 00658 if (globalNumIndices_ == invalidGlobalIndex_) { 00659 // The user wants us to compute the sum. 00660 reduceAll<int, GO> (comm, Teuchos::REDUCE_SUM, 00661 static_cast<GO> (myNumIndices_), 00662 outArg (globalNumIndices_)); 00663 } // now globalNumIndices_ is final. 00664 00665 Kokkos::View<const GO*, DeviceType> nonContigEntries; 00666 if (myNumIndices_ > 0) { 00667 // Find contiguous GID range, with the restriction that the 00668 // beginning of the range starts with the first entry. 00669 00670 // NOTE (mfh 05 Feb 2014) This assumes UVM, in that the 00671 // host here is reading from a device View. 00672 firstContiguousGID_ = lgMap_(0); 00673 //lastContiguousGID_ = firstContiguousGID_ + 1; 00674 00675 // This is always true, as long as there is at least one 00676 // GID; that is, there is always at least one entry in the 00677 // sequence of contiguous GIDs, namely the first entry. 00678 lastContiguousGID_ = firstContiguousGID_; 00679 00680 // By choosing LO so that it can count the local number of 00681 // elements, the user asserts that it can fit any values in 00682 // [0, myNumIndices_-1]. Thus, it's OK for i to be LO. 00683 GO i = 1; 00684 for ( ; i < myNumIndices_; ++i) { 00685 const GO curGid = lgMap_(i); 00686 const GO newLastContigGID = lastContiguousGID_ + 1; 00687 if (newLastContigGID != curGid) break; 00688 lastContiguousGID_ = newLastContigGID; 00689 } 00690 //--lastContiguousGID_; 00691 00692 // [firstContiguousGID, lastContigousGID] is the initial 00693 // sequence of contiguous GIDs. The sequence always 00694 // contains at least the first GID. 00695 00696 const size_t numContig = 00697 static_cast<size_t> (lastContiguousGID_ - firstContiguousGID_ + 1); 00698 const size_t numNonContig = 00699 static_cast<size_t> (myNumIndices_) - numContig; 00700 00701 // Make a view of the given GIDs, _not_ including the 00702 // initial sequence of contiguous GIDs. The GID -> LID 00703 // table does not store this initial sequence. 00704 00705 std::pair<size_t, size_t> offsetInfo (numContig, numContig + numNonContig); 00706 nonContigEntries = 00707 Kokkos::subview<Kokkos::View<const GO*, DeviceType> > (lgMap_, offsetInfo); 00708 00709 TEUCHOS_TEST_FOR_EXCEPTION( 00710 static_cast<size_t> (nonContigEntries.dimension_0 ()) != numNonContig, 00711 std::logic_error, "Tpetra::Details::Map: On Process " << comm.getRank () 00712 << ", computed offset into noncontiguous entries incorrectly. " 00713 "nonContigEntries.dimension_0() = " << nonContigEntries.dimension_0 () 00714 << ", but numNonContig = " << numNonContig << "."); 00715 00716 TEUCHOS_TEST_FOR_EXCEPTION( 00717 numNonContig != 0 && nonContigEntries(0) != lgMap_(numContig), 00718 std::logic_error, "Tpetra::Details::Map: On Process " << comm.getRank () 00719 << ", computed view of noncontiguous entries incorrectly. " 00720 "nonContigEntries(0) = " << nonContigEntries(0) << " != " 00721 "lgMap_(numNonContig = " << numNonContig << ") = " << 00722 lgMap_(numNonContig) << "."); 00723 00724 // 00725 // Fill the GID -> LID table, and compute local min and max 00726 // GIDs, both at the same time in a single parallel kernel. 00727 // 00728 00729 // Allocate space for GID -> LID table. Leave extra space 00730 // to avoid excessive collisions. 00731 typedef typename DeviceType::size_type size_type; 00732 const size_type tableSize = 00733 static_cast<size_type> (1.25 * static_cast<double> (numNonContig)); 00734 Kokkos::UnorderedMap<GO, LO, DeviceType> glMap (tableSize); 00735 MapData<LO, GO, DeviceType> result = 00736 fillGlobalToLocalTable (glMap, nonContigEntries, 00737 firstContiguousGID_, 00738 lastContiguousGID_); 00739 00740 // There should be no failed inserts, since we made the 00741 // UnorderedMap more than big enough to hold all the GIDs. 00742 TEUCHOS_TEST_FOR_EXCEPTION( 00743 result.failedCount != 0, std::logic_error, "Tpetra::Details::Map: " 00744 << result.failedCount << " insert(s) into GID->LID table failed " 00745 "out of " << numNonContig << " total GIDs to insert."); 00746 00747 minMyGID_ = result.minMyGID; 00748 maxMyGID_ = result.maxMyGID; 00749 00750 // This is a shallow copy, that casts to const. After 00751 // creating the UnorderedMap, we don't need to modify it, so 00752 // we store it as const. This improves look-up performance 00753 // on NVIDIA GPUs, since lookups can go through (read-only) 00754 // texture cache. 00755 glMap_ = glMap; 00756 } 00757 else { 00758 // mfh 10 Feb 2014: A common case for users of Map is to 00759 // execute the following loop: 00760 // 00761 // for (GO g = map.getMinGlobalIndex (); g <= map.getMaxGlobalIndex (); ++g) { ... } 00762 // 00763 // Unfortunately, Tpetra chose to make this an inclusive 00764 // range. This means that if the calling process owns no 00765 // GIDs, we need to make the above loop have zero 00766 // iterations. We can't do this if both the min and max GID 00767 // on the calling process are the invalid GID. We can if we 00768 // set the min GID to indexBase+1 and the max GID to 00769 // indexBase. 00770 minMyGID_ = indexBase + 1; 00771 maxMyGID_ = indexBase; 00772 00773 // This insures tests for GIDs in the range [firstContiguousGID, 00774 // lastContiguousGID] fail for processes with no local elements. 00775 firstContiguousGID_ = indexBase + 1; 00776 lastContiguousGID_ = indexBase; 00777 } 00778 00779 // mfh 20 Feb 2014: Commenting out the exception test below 00780 // fixes Bug 5401. In particular, it needs to be OK to supply 00781 // -1 as the min GID, even though Teuchos::OrdinalTraits<GO> 00782 // for signed GO is -1 (which is actually annoying -- the most 00783 // negative integer (e.g., INT_MIN for GO = int) would be 00784 // better). 00785 // 00786 // TEUCHOS_TEST_FOR_EXCEPTION( 00787 // minMyGID_ == getInvalidGlobalIndex () || 00788 // maxMyGID_ == getInvalidGlobalIndex (), 00789 // std::logic_error, "Tpetra::Details::Map noncontig ctor: The calling " 00790 // "process " << comm.getRank () << " owns " << myNumIndices_ << " (!= " 00791 // "0) global indices, but was unable to find the min or max global " 00792 // "index on this process. Please report this bug to the Tpetra " 00793 // "developers. Tell them that minMyGID_ == " << minMyGID_ << " and " 00794 // "maxMyGID_ == " << maxMyGID_ << "."); 00795 00796 if (globalNumIndices_ == 0) { 00797 // mfh 10 Feb 2014: Users might want to execute the 00798 // following loop: 00799 // 00800 // for (GO g = map.getMinAllGlobalIndex (); g <= map.getMaxAllGlobalIndex (); ++g) { ... } 00801 // 00802 // Unfortunately, Tpetra chose to make this an inclusive 00803 // range. This means that if the Map is empty (has no GIDs 00804 // on any process), we need to make the above loop have zero 00805 // iterations. We can't do this if both the global min and 00806 // max GIDs are the invalid GID. We can if we set the 00807 // global min GID to indexBase+1 and the global max GID to 00808 // indexBase. 00809 minAllGID_ = indexBase + 1; 00810 maxAllGID_ = indexBase; 00811 distributed_ = (comm.getSize () > 1); 00812 } 00813 else { 00814 // Compute the min and max of all processes' GIDs. If 00815 // myNumIndices_ == 0 on this process, that makes things a bit 00816 // tricky. Since Tpetra requires that indexBase be the min 00817 // GID over all processes, that makes things a bit easier. If 00818 // a process owns no GIDs, we can therefore set its min and 00819 // max GID to indexBase in the all-reduce. This will not 00820 // influence the final result. 00821 // 00822 // mfh 10 Feb 2014: If Tpetra decides to change to allow 00823 // indexBase not to be the min GID, that would break the above 00824 // approach. One of the Tpetra tests already assumes this: it 00825 // uses an index base different than the actual global min 00826 // GID. In that case, if GO is signed, and a process owns no 00827 // GIDs, we can do the following on that process: 00828 // 00829 // - Set its min GID (in the all-reduce, not the value of 00830 // minMyGID_) to std::numeric_limits<GO>::max(), and its 00831 // max GID to std::numeric_limits<GO>::min(). 00832 // - While we're at it, use the same all-reduce to figure 00833 // out if the Map is distributed. "Distributed" means 00834 // that there is at least one process with a number of 00835 // local elements less than the number of global elements. 00836 // 00837 // This works if GO is signed because in that case, 00838 // std::numeric_limits<GO>::min() is the biggest negative 00839 // value, which users are unlikely to want to use as a GID. 00840 // If GO is unsigned, however, this won't work. In that case, 00841 // std::numeric_limits<GO>::min() is zero, which is a typical 00842 // smallest GID. There are two approaches we could take: 00843 // 00844 // 1. Find the max GID first, in a separate all-reduce. 00845 // (On processes that own no GIDs, use 0 (the least 00846 // unsigned integer) as the input of the all-reduce.) 00847 // Then, on processes that own no GIDs, use that result 00848 // (the global max GID) in place of 00849 // std::numeric_limits<GO>::max(), when doing the 00850 // all-reduce to find the global min GID. 00851 // 00852 // 2. Convert from GO to a signed type of the same size, 00853 // and use the above approach for signed GO. 00854 // 00855 // Option 1 is better because Option 2 reduces the range of 00856 // allowed GID values by a factor of two. 00857 if (std::numeric_limits<GO>::is_signed) { 00858 // Compute the min and max of all processes' GIDs using a 00859 // single MAX all-reduce, using min(x,y) == -max(-x,-y). 00860 // This only works if x and y are signed. If the calling 00861 // process owns no GIDs, make its min GID input to the 00862 // all-reduce -std::numeric_limits<GO>::min(), which should 00863 // be no greater than std::numeric_limits<GO>::max(). 00864 // 00865 // If each process sets localDist=1 if its number of local 00866 // elements is strictly less than the number of global 00867 // elements, and localDist=0 otherwise, then a MAX 00868 // all-reduce on localDist tells us if the Map is 00869 // distributed (1 if yes, 0 if no). Thus, we can append 00870 // localDist onto the end of the data and get the global 00871 // result from the all-reduce. 00872 00873 // Does my process NOT own all the elements? 00874 const GO localDist = 00875 (static_cast<GO> (myNumIndices_) < globalNumIndices_) ? 1 : 0; 00876 00877 GO minMaxInput[3]; 00878 minMaxInput[0] = (myNumIndices_ == 0) ? 00879 -std::numeric_limits<GO>::min () : -minMyGID_; 00880 minMaxInput[1] = (myNumIndices_ == 0) ? 00881 std::numeric_limits<GO>::min () : maxMyGID_; 00882 minMaxInput[2] = localDist; 00883 00884 GO minMaxOutput[3]; 00885 minMaxOutput[0] = 0; // arbitrary initialization 00886 minMaxOutput[1] = 0; // arbitrary initialization 00887 minMaxOutput[2] = 0; // arbitrary initialization 00888 reduceAll<int, GO> (comm, Teuchos::REDUCE_MAX, 3, 00889 minMaxInput, minMaxOutput); 00890 minAllGID_ = -minMaxOutput[0]; 00891 maxAllGID_ = minMaxOutput[1]; 00892 const GO globalDist = minMaxOutput[2]; 00893 distributed_ = (comm.getSize () > 1 && globalDist == 1); 00894 } 00895 else { // GO is unsigned 00896 const GO maxInput = (myNumIndices_ == 0) ? 0 : maxMyGID_; 00897 reduceAll<int, GO> (comm, Teuchos::REDUCE_MAX, maxInput, 00898 outArg (maxAllGID_)); 00899 const GO minInput = (myNumIndices_ == 0) ? maxAllGID_ : minMyGID_; 00900 reduceAll<int, GO> (comm, Teuchos::REDUCE_MIN, minInput, 00901 outArg (minAllGID_)); 00902 00903 // FIXME (mfh 10 Feb 2014) We could combine the MIN 00904 // all-reduce above with the MIN all-reduce below. 00905 if (comm.getSize () > 1) { 00906 // The communicator has more than one process, but that 00907 // doesn't necessarily mean the Map is distributed. 00908 int localRep = 0; 00909 if (getGlobalNumIndices () == static_cast<GO> (getMyNumIndices ())) { 00910 // The number of local elements on this process equals 00911 // the number of global elements. 00912 // 00913 // NOTE (mfh 22 Nov 2011) Does this still work if there 00914 // were duplicates in the global ID list on input (the 00915 // third Map constructor), so that the number of local 00916 // elements (which are not duplicated) on this process 00917 // could be less than the number of global elements, 00918 // even if this process owns all the elements? 00919 localRep = 1; 00920 } 00921 int allLocalRep; 00922 reduceAll<int, int> (comm, Teuchos::REDUCE_MIN, localRep, 00923 outArg (allLocalRep)); 00924 if (allLocalRep != 1) { 00925 // At least one process does not own all the elements. 00926 // This makes the Map a distributed Map. 00927 distributed_ = true; 00928 } 00929 } 00930 else { 00931 // If the communicator has only one process, then the Map 00932 // is not distributed. 00933 distributed_ = false; 00934 } 00935 } // if GO is signed 00936 } 00937 00938 // mfh 10 Feb 2014: tpetra/test/Map/Map_UnitTests.cpp, 00939 // indexBaseAndAllMin test uses a different indexBase input 00940 // (0) than the actual min GID (1). Other tests seem to 00941 // depend on this as well. That's why I commented out the 00942 // test below. 00943 00944 // TEUCHOS_TEST_FOR_EXCEPTION( 00945 // globalNumIndices_ != 0 && minAllGID_ != indexBase_, 00946 // std::invalid_argument, 00947 // "Tpetra::Details::Map constructor (noncontiguous): " 00948 // "The Map has " << globalNumIndices_ << " > 0 global indices, but the " 00949 // "min global index " << minAllGID_ << " over all process(es) does not " 00950 // "equal the given indexBase " << indexBase_ << "."); 00951 } 00952 00955 template<class InDeviceType> 00956 void 00957 create_copy_view (const Map<LO, GO, InDeviceType>& map) 00958 { 00959 if (! map.initialized ()) { 00960 invalidGlobalIndex_ = Teuchos::OrdinalTraits<GO>::invalid (); 00961 invalidLocalIndex_ = Teuchos::OrdinalTraits<LO>::invalid (); 00962 globalNumIndices_ = 0; 00963 myNumIndices_ = 0; 00964 indexBase_ = 0; 00965 firstContiguousGID_ = Teuchos::OrdinalTraits<GO>::invalid (); 00966 lastContiguousGID_ = Teuchos::OrdinalTraits<GO>::invalid (); 00967 minMyGID_ = Teuchos::OrdinalTraits<GO>::invalid (); 00968 maxMyGID_ = Teuchos::OrdinalTraits<GO>::invalid (); 00969 minAllGID_ = Teuchos::OrdinalTraits<GO>::invalid (); 00970 maxAllGID_ = Teuchos::OrdinalTraits<GO>::invalid (); 00971 contiguous_ = false; 00972 distributed_ = true; 00973 uniform_ = false; 00974 initialized_ = false; 00975 } 00976 else { 00977 invalidGlobalIndex_ = map.invalidGlobalIndex_; 00978 invalidLocalIndex_ = map.invalidLocalIndex_; 00979 globalNumIndices_ = map.globalNumIndices_; 00980 myNumIndices_ = map.myNumIndices_; 00981 indexBase_ = map.indexBase_; 00982 firstContiguousGID_ = map.firstContiguousGID_; 00983 lastContiguousGID_ = map.lastContiguousGID_; 00984 minMyGID_ = map.minMyGID_; 00985 maxMyGID_ = map.maxMyGID_; 00986 minAllGID_ = map.minAllGID_; 00987 maxAllGID_ = map.maxAllGID_; 00988 00989 if (! map.contiguous_) { 00990 if (map.myNumIndices_ != 0 && map.glMap_.size () != 0) { 00991 // The calling process owns one or more GIDs, and some of 00992 // these GIDs are not contiguous. 00993 Kokkos::UnorderedMap<GO, LO, DeviceType> glMap; 00994 glMap.create_copy_view (map.glMap_); 00995 glMap_ = glMap; 00996 } 00997 00998 // It's OK for this to be dimension 0; that means it hasn't been initialized yet. 00999 Kokkos::View<GO*, DeviceType> lgMap ("LID->GID", map.lgMap_.dimension_0 ()); 01000 if (map.lgMap_.dimension_0 () != 0) { 01001 Kokkos::deep_copy (lgMap, map.lgMap_); 01002 } 01003 lgMap_ = lgMap; // shallow copy and cast to const 01004 } 01005 01006 contiguous_ = map.contiguous_; 01007 distributed_ = map.distributed_; 01008 uniform_ = map.uniform_; 01009 initialized_ = map.initialized_; 01010 } 01011 } 01012 01020 void setDistributed (const bool distributed) { 01021 distributed_ = distributed; 01022 } 01023 01034 bool initialized () const { 01035 return initialized_; 01036 } 01037 01039 01040 01041 01043 KOKKOS_INLINE_FUNCTION GO getGlobalNumIndices () const { 01044 return globalNumIndices_; 01045 } 01046 01048 KOKKOS_INLINE_FUNCTION LO getMyNumIndices () const { 01049 return myNumIndices_; 01050 } 01051 01053 KOKKOS_INLINE_FUNCTION GO getIndexBase () const { 01054 return indexBase_; 01055 } 01056 01064 KOKKOS_INLINE_FUNCTION LO getInvalidLocalIndex () const { 01065 return invalidLocalIndex_; 01066 } 01067 01069 KOKKOS_INLINE_FUNCTION LO getMinLocalIndex () const { 01070 return static_cast<LO> (0); 01071 } 01072 01078 KOKKOS_INLINE_FUNCTION LO getMaxLocalIndex () const { 01079 const LO myNumIndices = getMyNumIndices (); 01080 // Local indices are always zero-based. 01081 return (myNumIndices == 0) ? getInvalidLocalIndex () : (myNumIndices - 1); 01082 } 01083 01091 KOKKOS_INLINE_FUNCTION LO getInvalidGlobalIndex () const { 01092 return invalidGlobalIndex_; 01093 } 01094 01096 KOKKOS_INLINE_FUNCTION GO getMinGlobalIndex () const { 01097 return minMyGID_; 01098 } 01099 01101 KOKKOS_INLINE_FUNCTION GO getMaxGlobalIndex () const { 01102 return maxMyGID_; 01103 } 01104 01106 KOKKOS_INLINE_FUNCTION GO getMinAllGlobalIndex () const { 01107 return minAllGID_; 01108 } 01109 01111 KOKKOS_INLINE_FUNCTION GO getMaxAllGlobalIndex () const { 01112 return maxAllGID_; 01113 } 01114 01119 KOKKOS_INLINE_FUNCTION LO getLocalIndex (const GO globalIndex) const { 01120 if (getMyNumIndices() == 0) return getInvalidLocalIndex(); 01121 if (isContiguous ()) { 01122 if (globalIndex >= getMinGlobalIndex () && 01123 globalIndex <= getMaxGlobalIndex ()) { 01124 return static_cast<LO> (globalIndex - getMinGlobalIndex ()); 01125 } 01126 else { 01127 return getInvalidLocalIndex (); 01128 } 01129 } 01130 else if (globalIndex >= firstContiguousGID_ && 01131 globalIndex <= lastContiguousGID_) { 01132 return static_cast<LO> (globalIndex - firstContiguousGID_); 01133 } 01134 else { 01135 // This also works if this process owns no indices. 01136 // In that case, glMap_ will be empty, so this branch 01137 // will always return the invalid LID. 01138 const typename global_to_local_table_type::size_type i = 01139 glMap_.find (globalIndex); 01140 return glMap_.valid_at (i) ? // if the GID is in the map, ... 01141 glMap_.value_at (i) : // ... return its LID, ... 01142 getInvalidLocalIndex (); // ... else return the invalid LID. 01143 } 01144 } 01145 01150 KOKKOS_INLINE_FUNCTION GO getGlobalIndex (const LO localIndex) const { 01151 if (getMyNumIndices() == 0) return getInvalidLocalIndex(); 01152 if (localIndex < getMinLocalIndex () || 01153 localIndex > getMaxLocalIndex ()) { 01154 // This process will always take this branch if it owns no indices. 01155 return getInvalidGlobalIndex (); 01156 } 01157 else if (isContiguous ()) { 01158 return getMinGlobalIndex () + localIndex; 01159 } 01160 else { 01161 return lgMap_(localIndex); 01162 } 01163 } 01164 01166 KOKKOS_INLINE_FUNCTION bool 01167 isOwnedLocalIndex (const LO localIndex) const { 01168 return localIndex >= getMinLocalIndex () && 01169 localIndex <= getMaxLocalIndex (); 01170 } 01171 01173 KOKKOS_INLINE_FUNCTION bool 01174 isOwnedGlobalIndex (const GO globalIndex) const { 01175 return this->getLocalIndex (globalIndex) != getInvalidLocalIndex (); 01176 } 01177 01185 KOKKOS_INLINE_FUNCTION bool isUniform () const { 01186 return uniform_; 01187 } 01188 01200 KOKKOS_INLINE_FUNCTION bool isContiguous () const { 01201 return contiguous_; 01202 } 01203 01225 KOKKOS_INLINE_FUNCTION bool isDistributed () const { 01226 return distributed_; 01227 } 01228 01230 01231 01232 01239 Kokkos::View<const GO*, DeviceType> getMyGlobalIndices () { 01240 const GO myNumIndices = getMyNumIndices (); 01241 01242 if (myNumIndices != 0 && lgMap_.dimension_0 () == 0) { 01243 Kokkos::View<GO*, DeviceType> lgMap ("LID->GID", myNumIndices); 01244 // fill with [getMinGlobalIndex(), getMinGlobalIndex()+myNumIndices-1]. 01245 iota<GO, DeviceType> (lgMap, this->getMinGlobalIndex ()); 01246 lgMap_ = lgMap; // shallow copy; cast to const 01247 } 01248 return lgMap_; 01249 } 01250 01252 std::string description () { 01253 return "\"Tpetra::Details::Map\""; 01254 } 01255 01257 void 01258 describe (Teuchos::FancyOStream &out, 01259 const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) 01260 { 01261 using std::endl; 01262 const Teuchos::EVerbosityLevel vl = 01263 (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel; 01264 01265 if (vl != Teuchos::VERB_NONE) { 01266 // By convention, describe() always starts with a tab. 01267 Teuchos::OSTab tab0 (out); 01268 out << description () << endl; // TODO (mfh 05 Feb 2014) Print more info. 01269 } 01270 } 01271 01272 private: 01273 void validateLocally (const int myRank) { 01274 TEUCHOS_TEST_FOR_EXCEPTION( 01275 invalidGlobalIndex_ != Teuchos::OrdinalTraits<GO>::invalid (), 01276 std::logic_error, 01277 "Tpetra::Details::Map: On Process " << myRank << ", " 01278 "invalidGlobalIndex_ was not set."); 01279 TEUCHOS_TEST_FOR_EXCEPTION( 01280 invalidLocalIndex_ != Teuchos::OrdinalTraits<LO>::invalid (), 01281 std::logic_error, 01282 "Tpetra::Details::Map: On Process " << myRank << ", " 01283 "invalidLocalIndex_ was not set."); 01284 TEUCHOS_TEST_FOR_EXCEPTION( 01285 ! contiguous_ && myNumIndices_ != lgMap_.dimension_0 (), 01286 std::logic_error, "Tpetra::Details::Map: On Process " << myRank << 01287 ", this noncontiguous Map has myNumIndices_ = " << myNumIndices_ << 01288 " != lgMap_.dimension_0 () = " << lgMap_.dimension_0 () << "."); 01289 01290 // Test whether the Map contains all the noncontiguous indices 01291 // it claims to contain. 01292 if (! contiguous_ && myNumIndices_ > 0) { 01293 bool bad = false; 01294 Teuchos::Array<GO> badGlobals; 01295 Teuchos::Array<LO> badLocalsIn; 01296 Teuchos::Array<LO> badLocalsOut; 01297 01298 for (LO localIndexIn = 0; localIndexIn < myNumIndices_; ++localIndexIn) { 01299 const GO globalIndex = this->getGlobalIndex (localIndexIn); 01300 const LO localIndexOut = this->getLocalIndex (globalIndex); 01301 01302 if (localIndexIn != localIndexOut) { 01303 bad = true; 01304 badGlobals.push_back (globalIndex); 01305 badLocalsIn.push_back (localIndexIn); 01306 badLocalsOut.push_back (localIndexOut); 01307 } 01308 } 01309 01310 // Get the GID->LID table's keys and values as Teuchos::Array. 01311 typedef typename DeviceType::size_type size_type; 01312 const size_type mapSize = glMap_.size (); 01313 Teuchos::Array<GO> keys; 01314 Teuchos::Array<LO> vals; 01315 keys.reserve (mapSize); 01316 vals.reserve (mapSize); 01317 for (size_type k = 0; k < mapSize; ++k) { 01318 keys.push_back (glMap_.key_at (k)); 01319 vals.push_back (glMap_.value_at (k)); 01320 } 01321 // Get the LID->GID table as an ArrayView for easy printing. 01322 Teuchos::ArrayView<const GO> lgMapAv (lgMap_.ptr_on_device (), 01323 lgMap_.dimension_0 ()); 01324 TEUCHOS_TEST_FOR_EXCEPTION( 01325 bad, std::logic_error, "Tpetra::Details::Map: On Process " << 01326 myRank << ", this noncontiguous Map's GID->LID lookup is broken. " 01327 "The following GIDs are in this process' GID list, but attempting " 01328 "to look up their LIDs gives the wrong answer: " << badGlobals () << 01329 ". The corresponding LID list should have been " << badLocalsIn () 01330 << ", but was " << badLocalsOut << " instead. The Map's GID list " 01331 "is " << lgMapAv << ". The keys in the GID->LID table are " << 01332 keys () << " and their values are " << vals () << ". The GID->LID " 01333 "table reports its size as " << mapSize << " and its capacity as " 01334 << glMap_.capacity () << ". The initial sequence of contiguous " 01335 "GIDs on this process is [" << firstContiguousGID_ << "," << 01336 lastContiguousGID_ << "]."); 01337 } 01338 } 01339 01356 bool checkIsDist (const Teuchos::Comm<int>& comm) { 01357 using Teuchos::outArg; 01358 using Teuchos::REDUCE_MIN; 01359 using Teuchos::reduceAll; 01360 01361 bool global = false; 01362 if (comm.getSize () > 1) { 01363 // The communicator has more than one process, but that 01364 // doesn't necessarily mean the Map is distributed. 01365 int localRep = 0; 01366 if (getGlobalNumIndices () == static_cast<GO> (getMyNumIndices ())) { 01367 // The number of local indices on this process equals the 01368 // number of global indices. 01369 // 01370 // NOTE (mfh 22 Nov 2011) Does this still work if there 01371 // were duplicates in the GID list on input (the third Map 01372 // constructor), so that the number of LIDs (which are not 01373 // duplicated) on this process could be less than the 01374 // number of GIDs, even if this process owns all the IDs? 01375 localRep = 1; 01376 } 01377 int allLocalRep; 01378 reduceAll<int, int> (comm, REDUCE_MIN, localRep, outArg (allLocalRep)); 01379 if (allLocalRep != 1) { 01380 // At least one process does not own all the elements. 01381 // This makes the Map a distributed Map. 01382 global = true; 01383 } 01384 } 01385 // If the communicator has only one process, then the Map is 01386 // not distributed. 01387 return global; 01388 } 01389 01394 GO invalidGlobalIndex_; 01395 01400 LO invalidLocalIndex_; 01401 01403 GO globalNumIndices_; 01404 01406 GO myNumIndices_; 01407 01409 GO indexBase_; 01410 01412 GO firstContiguousGID_; 01413 01415 GO lastContiguousGID_; 01416 01420 GO minMyGID_; 01421 01425 GO maxMyGID_; 01426 01429 GO minAllGID_; 01430 01433 GO maxAllGID_; 01434 01441 typedef Kokkos::UnorderedMap<const GO, const LO, DeviceType> global_to_local_table_type; 01442 01459 global_to_local_table_type glMap_; 01460 01472 Kokkos::View<const GO*, DeviceType> lgMap_; 01473 01479 bool contiguous_; 01480 01483 bool distributed_; 01484 01490 bool uniform_; 01491 01495 bool initialized_; 01496 }; 01497 01523 template<class OutMapType, class DeviceMapType, class HostMapType> 01524 struct MapMirrorer { 01525 typedef OutMapType output_map_type; 01526 typedef DeviceMapType device_map_type; 01527 typedef HostMapType host_map_type; 01528 01530 static output_map_type 01531 mirror (device_map_type& deviceMap, host_map_type& hostMap); 01532 }; 01533 01534 // Partial specialization for when all three Maps have different 01535 // device types. 01536 template<class LO, class GO, class OutDeviceType, class DeviceType, class HostDeviceType> 01537 struct MapMirrorer<Map<LO, GO, OutDeviceType>, Map<LO, GO, DeviceType>, Map<LO, GO, HostDeviceType> > { 01538 typedef Map<LO, GO, OutDeviceType> output_map_type; 01539 typedef Map<LO, GO, DeviceType> device_map_type; 01540 typedef Map<LO, GO, HostDeviceType> host_map_type; 01541 01542 static output_map_type 01543 mirror (device_map_type& deviceMap, host_map_type& hostMap) { 01544 output_map_type outputMap; 01545 01546 // Favor the host Map, since if OutDeviceType != DeviceType, 01547 // then OutDeviceType is likely not to be a CUDA device (e.g., 01548 // DeviceType = Cuda, HostDeviceType = OpenMP, and 01549 // OutDeviceType = Serial). 01550 if (hostMap.initialized ()) { 01551 outputMap.template create_copy_view<HostDeviceType> (hostMap); 01552 } else { 01553 outputMap.template create_copy_view<DeviceType> (deviceMap); 01554 } 01555 return outputMap; 01556 } 01557 }; 01558 01559 // Partial specialization for when the device and host Maps have 01560 // different device types, and the output Map has the same device 01561 // type as the device Map. 01562 template<class LO, class GO, class DeviceType, class HostDeviceType> 01563 struct MapMirrorer<Map<LO, GO, DeviceType>, Map<LO, GO, DeviceType>, Map<LO, GO, HostDeviceType> > { 01564 typedef Map<LO, GO, DeviceType> output_map_type; 01565 typedef Map<LO, GO, DeviceType> device_map_type; 01566 typedef Map<LO, GO, HostDeviceType> host_map_type; 01567 01568 static output_map_type 01569 mirror (device_map_type& deviceMap, host_map_type& hostMap) { 01570 if (deviceMap.initialized ()) { 01571 return deviceMap; 01572 } 01573 else { 01574 output_map_type outMap; 01575 outMap.template create_copy_view<HostDeviceType> (hostMap); 01576 if (outMap.initialized () && ! deviceMap.initialized ()) { 01577 deviceMap = outMap; // cache the deep copy 01578 } 01579 return outMap; 01580 } 01581 } 01582 }; 01583 01584 // Partial specialization for when the device and host Maps have 01585 // different device types, and the output Map has the same device 01586 // type as the host Map. 01587 template<class LO, class GO, class DeviceType, class HostDeviceType> 01588 struct MapMirrorer<Map<LO, GO, HostDeviceType>, Map<LO, GO, DeviceType>, Map<LO, GO, HostDeviceType> > { 01589 typedef Map<LO, GO, HostDeviceType> output_map_type; 01590 typedef Map<LO, GO, DeviceType> device_map_type; 01591 typedef Map<LO, GO, HostDeviceType> host_map_type; 01592 01593 static output_map_type 01594 mirror (device_map_type& deviceMap, host_map_type& hostMap) { 01595 if (hostMap.initialized ()) { 01596 return hostMap; 01597 } 01598 else { 01599 output_map_type outMap; 01600 outMap.template create_copy_view<DeviceType> (deviceMap); 01601 if (outMap.initialized ()) { 01602 hostMap = outMap; // cache the deep copy 01603 } 01604 return outMap; 01605 } 01606 } 01607 }; 01608 01609 // Partial specialization for when the device, host, and output 01610 // Maps have the same device types. 01611 template<class LO, class GO, class DeviceType> 01612 struct MapMirrorer<Map<LO, GO, DeviceType>, Map<LO, GO, DeviceType>, Map<LO, GO, DeviceType> > { 01613 typedef Map<LO, GO, DeviceType> output_map_type; 01614 typedef Map<LO, GO, DeviceType> device_map_type; 01615 typedef Map<LO, GO, DeviceType> host_map_type; 01616 01617 static output_map_type 01618 mirror (device_map_type& deviceMap, const host_map_type& /* hostMap */ ) { 01619 return deviceMap; 01620 } 01621 }; 01622 01623 } 01624 } 01625 01626 #endif // TPETRA_KOKKOSREFACTOR_DETAILS_MAP_HPP
1.7.6.1