|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_KOKKOSREFACTOR_MAP_DEF_HPP 00043 #define TPETRA_KOKKOSREFACTOR_MAP_DEF_HPP 00044 00045 #include <Tpetra_Directory.hpp> // must include for implicit instantiation to work 00046 #include <Tpetra_Util.hpp> 00047 #include <Teuchos_as.hpp> 00048 #include <stdexcept> 00049 00050 #ifdef DOXYGEN_USE_ONLY 00051 # include "Tpetra_Map_decl.hpp" 00052 #endif 00053 00054 namespace Tpetra { 00055 00056 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00057 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00058 Map () : 00059 comm_ (new Teuchos::SerialComm<int> ()), 00060 node_ (KokkosClassic::Details::getNode<Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > ()), 00061 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, node_type> ()) 00062 {} 00063 00064 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00065 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00066 Map (const global_size_t globalNumIndices, 00067 const GlobalOrdinal indexBase, 00068 const Teuchos::RCP<const Teuchos::Comm<int> >& comm, 00069 const LocalGlobal lOrG, 00070 const Teuchos::RCP<node_type>& node) : 00071 comm_ (comm), 00072 node_ (node), 00073 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, node_type> ()) 00074 { 00075 // Start with a host Map implementation, since this will make this 00076 // class' public (host) methods work. If users want device 00077 // methods, they will call getDeviceView(), which will initialize 00078 // the device Map implementation. 00079 // 00080 // NOTE (mfh 06 Feb 2014) If we're using UVM, we don't really need 00081 // the host and device Maps to be separate. 00082 mapHost_ = host_impl_type (Teuchos::as<GlobalOrdinal> (globalNumIndices), 00083 indexBase, *comm, lOrG); 00084 } 00085 00086 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00087 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00088 Map (const global_size_t globalNumIndices, 00089 const size_t myNumIndices, 00090 const GlobalOrdinal indexBase, 00091 const Teuchos::RCP<const Teuchos::Comm<int> >& comm, 00092 const Teuchos::RCP<node_type>& node) : 00093 comm_ (comm), 00094 node_ (node), 00095 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, node_type> ()) 00096 { 00097 typedef GlobalOrdinal GO; 00098 const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid (); 00099 00100 const GO globalNumInds = (globalNumIndices == GSTI) ? 00101 getInvalidGlobalIndex () : Teuchos::as<GO> (globalNumIndices); 00102 const GO myNumInds = (myNumIndices == GSTI) ? 00103 getInvalidLocalIndex () : Teuchos::as<GO> (myNumIndices); 00104 00105 // Start with a host Map implementation, since this will make this 00106 // class' public (host) methods work. If users want device 00107 // methods, they will call getDeviceView(), which will initialize 00108 // the device Map implementation. 00109 // 00110 // NOTE (mfh 06 Feb 2014) If we're using UVM, we don't really need 00111 // the host and device Maps to be separate. 00112 mapHost_ = host_impl_type (globalNumInds, myNumInds, indexBase, *comm); 00113 00114 // Create the Directory on demand in getRemoteIndexList(). 00115 } 00116 00117 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00118 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00119 Map (const global_size_t globalNumIndices, 00120 const Kokkos::View<const GlobalOrdinal*, device_type>& myGlobalIndices, 00121 const GlobalOrdinal indexBase, 00122 const Teuchos::RCP<const Teuchos::Comm<int> >& comm, 00123 const Teuchos::RCP<node_type>& node) : 00124 comm_ (comm), 00125 node_ (node), 00126 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, node_type> ()) 00127 { 00128 typedef GlobalOrdinal GO; 00129 const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid (); 00130 const GO globalNumInds = (globalNumIndices == GSTI) ? 00131 getInvalidGlobalIndex () : Teuchos::as<GO> (globalNumIndices); 00132 00133 // Since we already have device data, start here with a device Map 00134 // implementation. In order to make this class' host methods work 00135 // and still be const (that is, legal to call in a host parallel 00136 // operation), we can't create the host mirror on demand; we must 00137 // create it here, in advance. 00138 // 00139 // NOTE (mfh 06 Feb 2014) If we're using UVM, we don't really need 00140 // the host and device Maps to be separate. 00141 mapDevice_ = device_impl_type (globalNumInds, myGlobalIndices, indexBase, *comm); 00142 mapHost_.create_copy_view (mapDevice_); 00143 00144 // Create the Directory on demand in getRemoteIndexList(). 00145 } 00146 00147 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00148 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00149 Map (const global_size_t globalNumIndices, 00150 const Teuchos::ArrayView<const GlobalOrdinal>& myGlobalIndices, 00151 const GlobalOrdinal indexBase, 00152 const Teuchos::RCP<const Teuchos::Comm<int> >& comm, 00153 const Teuchos::RCP<node_type> &node) : 00154 comm_ (comm), 00155 node_ (node), 00156 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, node_type> ()) 00157 { 00158 typedef GlobalOrdinal GO; 00159 typedef Kokkos::View<const GlobalOrdinal*, typename device_type::host_mirror_device_type> const_host_view_type; 00160 typedef Kokkos::View<GlobalOrdinal*, typename device_type::host_mirror_device_type> host_view_type; 00161 typedef Kokkos::View<GlobalOrdinal*, device_type> device_view_type; 00162 00163 // Copy the input GID list from host (we assume that 00164 // Teuchos::ArrayView should only view host memory) to device. 00165 // 00166 // FIXME (mfh 06 Feb, 24 Mar 2014) We could use the CUDA API 00167 // function here that can look at a pointer and tell whether it 00168 // lives on host or device, to tell whether the Teuchos::ArrayView 00169 // is viewing host or device memory. Regardless, we don't own the 00170 // data and we will need a deep copy anyway, so we might as well 00171 // copy it. 00172 const_host_view_type gidsHost_view (Kokkos::ViewWithoutManaging(),myGlobalIndices.getRawPtr (), myGlobalIndices.size ()); 00173 host_view_type gidsHost ("GIDS_Host", myGlobalIndices.size ()); 00174 Kokkos::deep_copy (gidsHost, gidsHost_view); 00175 device_view_type gidsDevice ("GIDs", myGlobalIndices.size ()); 00176 Kokkos::deep_copy (gidsDevice, gidsHost); 00177 00178 const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid (); 00179 const GO globalNumInds = (globalNumIndices == GSTI) ? 00180 getInvalidGlobalIndex () : Teuchos::as<GO> (globalNumIndices); 00181 00182 // Start with a host Map implementation, since this will make this 00183 // class' public (host) methods work. If users want device 00184 // methods, they will call getDeviceView(), which will initialize 00185 // the device Map implementation. 00186 // 00187 // NOTE (mfh 06 Feb 2014) If we're using UVM, we don't really need 00188 // the host and device Maps to be separate, but we need to create a compatible 00189 // host view of the GIDs using the device ptr. 00190 mapDevice_ = device_impl_type (globalNumInds, gidsDevice , indexBase, *comm); 00191 #ifdef KOKKOS_USE_CUDA_UVM 00192 const GlobalOrdinal* ptr = gidsDevice.ptr_on_device(); 00193 mapHost_ = host_impl_type (globalNumInds, const_host_view_type(Kokkos::ViewWithoutManaging(),ptr,myGlobalIndices.size ()), indexBase, *comm); 00194 #else 00195 mapHost_ = host_impl_type (globalNumInds, gidsHost, indexBase, *comm); 00196 #endif 00197 // Create the Directory on demand in getRemoteIndexList(). 00198 } 00199 00200 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00201 global_size_t 00202 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00203 getGlobalNumElements () const { 00204 return mapHost_.getGlobalNumIndices (); 00205 } 00206 00207 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00208 size_t 00209 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00210 getNodeNumElements () const { 00211 return mapHost_.getMyNumIndices (); 00212 } 00213 00214 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00215 GlobalOrdinal 00216 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00217 getIndexBase () const { 00218 return mapHost_.getIndexBase (); 00219 } 00220 00221 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00222 GlobalOrdinal 00223 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00224 getInvalidGlobalIndex () const { 00225 return mapHost_.getInvalidGlobalIndex (); 00226 } 00227 00228 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00229 LocalOrdinal 00230 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00231 getInvalidLocalIndex () const { 00232 return mapHost_.getInvalidLocalIndex (); 00233 } 00234 00235 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00236 LocalOrdinal 00237 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00238 getMinLocalIndex () const { 00239 return static_cast<LocalOrdinal> (0); 00240 } 00241 00242 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00243 LocalOrdinal 00244 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00245 getMaxLocalIndex () const { 00246 return mapHost_.getMaxLocalIndex (); 00247 } 00248 00249 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00250 GlobalOrdinal 00251 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00252 getMinGlobalIndex () const { 00253 return mapHost_.getMinGlobalIndex (); 00254 } 00255 00256 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00257 GlobalOrdinal 00258 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00259 getMaxGlobalIndex () const { 00260 return mapHost_.getMaxGlobalIndex (); 00261 } 00262 00263 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00264 GlobalOrdinal 00265 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00266 getMinAllGlobalIndex () const { 00267 return mapHost_.getMinAllGlobalIndex (); 00268 } 00269 00270 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00271 GlobalOrdinal 00272 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00273 getMaxAllGlobalIndex () const { 00274 return mapHost_.getMaxAllGlobalIndex (); 00275 } 00276 00277 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00278 LocalOrdinal 00279 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00280 getLocalElement (const GlobalOrdinal globalIndex) const 00281 { 00282 return mapHost_.getLocalIndex (globalIndex); 00283 } 00284 00285 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00286 GlobalOrdinal 00287 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00288 getGlobalElement (const LocalOrdinal localIndex) const 00289 { 00290 return mapHost_.getGlobalIndex (localIndex); 00291 } 00292 00293 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00294 bool 00295 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00296 isNodeLocalElement (const LocalOrdinal localIndex) const { 00297 return mapHost_.isOwnedLocalIndex (localIndex); 00298 } 00299 00300 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00301 bool 00302 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00303 isNodeGlobalElement (const GlobalOrdinal globalIndex) const { 00304 return mapHost_.isOwnedGlobalIndex (globalIndex); 00305 } 00306 00307 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00308 bool 00309 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00310 isUniform () const { 00311 return mapHost_.isUniform (); 00312 } 00313 00314 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00315 bool 00316 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00317 isContiguous () const { 00318 return mapHost_.isContiguous (); 00319 } 00320 00321 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00322 bool 00323 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00324 isCompatible (const Map<LocalOrdinal, GlobalOrdinal, node_type>& map) const 00325 { 00326 using Teuchos::outArg; 00327 using Teuchos::REDUCE_MIN; 00328 using Teuchos::reduceAll; 00329 // 00330 // Tests that avoid the Boolean all-reduce below by using 00331 // globally consistent quantities. 00332 // 00333 if (this == &map) { 00334 // Pointer equality on one process always implies pointer 00335 // equality on all processes, since Map is immutable. 00336 return true; 00337 } 00338 else if (getComm ()->getSize () != map.getComm ()->getSize ()) { 00339 // The two communicators have different numbers of processes. 00340 // It's not correct to call isCompatible() in that case. This 00341 // may result in the all-reduce hanging below. 00342 return false; 00343 } 00344 else if (getGlobalNumElements () != map.getGlobalNumElements ()) { 00345 // Two Maps are definitely NOT compatible if they have different 00346 // global numbers of indices. 00347 return false; 00348 } 00349 else if (isContiguous () && isUniform () && 00350 map.isContiguous () && map.isUniform ()) { 00351 // Contiguous uniform Maps with the same number of processes in 00352 // their communicators, and with the same global numbers of 00353 // indices, are always compatible. 00354 return true; 00355 } 00356 00357 TEUCHOS_TEST_FOR_EXCEPTION( 00358 getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error, 00359 "Tpetra::Map::isCompatible: There's a bug in this method. We've already " 00360 "checked that this condition is true above, but it's false here. " 00361 "Please report this bug to the Tpetra developers."); 00362 00363 // Do both Maps have the same number of indices on each process? 00364 const int locallyCompat = 00365 (getNodeNumElements () == map.getNodeNumElements ()) ? 1 : 0; 00366 00367 int globallyCompat = 0; 00368 reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat)); 00369 return (globallyCompat == 1); 00370 } 00371 00372 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00373 bool 00374 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00375 locallySameAs (const Map<LocalOrdinal, GlobalOrdinal, node_type>& map) const 00376 { 00377 using Teuchos::ArrayView; 00378 typedef GlobalOrdinal GO; 00379 typedef typename ArrayView<const GO>::size_type size_type; 00380 00381 // If both Maps are contiguous, we can compare their GID ranges 00382 // easily by looking at the min and max GID on this process. 00383 // Otherwise, we'll compare their GID lists. If only one Map is 00384 // contiguous, then we only have to call getNodeElementList() on 00385 // the noncontiguous Map. (It's best to avoid calling it on a 00386 // contiguous Map, since it results in unnecessary storage that 00387 // persists for the lifetime of the Map.) 00388 00389 if (getNodeNumElements () != map.getNodeNumElements ()) { 00390 return false; 00391 } 00392 else if (getMinGlobalIndex () != map.getMinGlobalIndex () || 00393 getMaxGlobalIndex () != map.getMaxGlobalIndex ()) { 00394 return false; 00395 } 00396 else { 00397 if (isContiguous ()) { 00398 if (map.isContiguous ()) { 00399 return true; // min and max match, so the ranges match. 00400 } 00401 else { // *this is contiguous, but map is not contiguous 00402 TEUCHOS_TEST_FOR_EXCEPTION( 00403 ! this->isContiguous () || map.isContiguous (), std::logic_error, 00404 "Tpetra::Map::locallySameAs: BUG"); 00405 ArrayView<const GO> rhsElts = map.getNodeElementList (); 00406 const GO minLhsGid = this->getMinGlobalIndex (); 00407 const size_type numRhsElts = rhsElts.size (); 00408 for (size_type k = 0; k < numRhsElts; ++k) { 00409 const GO curLhsGid = minLhsGid + static_cast<GO> (k); 00410 if (curLhsGid != rhsElts[k]) { 00411 return false; // stop on first mismatch 00412 } 00413 } 00414 return true; 00415 } 00416 } 00417 else if (map.isContiguous ()) { // *this is not contiguous, but map is 00418 TEUCHOS_TEST_FOR_EXCEPTION( 00419 this->isContiguous () || ! map.isContiguous (), std::logic_error, 00420 "Tpetra::Map::locallySameAs: BUG"); 00421 ArrayView<const GO> lhsElts = this->getNodeElementList (); 00422 const GO minRhsGid = map.getMinGlobalIndex (); 00423 const size_type numLhsElts = lhsElts.size (); 00424 for (size_type k = 0; k < numLhsElts; ++k) { 00425 const GO curRhsGid = minRhsGid + static_cast<GO> (k); 00426 if (curRhsGid != lhsElts[k]) { 00427 return false; // stop on first mismatch 00428 } 00429 } 00430 return true; 00431 } 00432 else { // neither *this nor map are contiguous 00433 // std::equal requires that the latter range is as large as 00434 // the former. We know the ranges have equal length, because 00435 // they have the same number of local entries. 00436 ArrayView<const GO> lhsElts = getNodeElementList (); 00437 ArrayView<const GO> rhsElts = map.getNodeElementList (); 00438 return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ()); 00439 } 00440 } 00441 } 00442 00443 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00444 bool 00445 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00446 isSameAs (const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > &map) const 00447 { 00448 using Teuchos::outArg; 00449 using Teuchos::REDUCE_MIN; 00450 using Teuchos::reduceAll; 00451 // 00452 // Tests that avoid the Boolean all-reduce below by using 00453 // globally consistent quantities. 00454 // 00455 if (this == &map) { 00456 // Pointer equality on one process always implies pointer 00457 // equality on all processes, since Map is immutable. 00458 return true; 00459 } 00460 else if (getComm ()->getSize () != map.getComm ()->getSize ()) { 00461 // The two communicators have different numbers of processes. 00462 // It's not correct to call isSameAs() in that case. This 00463 // may result in the all-reduce hanging below. 00464 return false; 00465 } 00466 else if (getGlobalNumElements () != map.getGlobalNumElements ()) { 00467 // Two Maps are definitely NOT the same if they have different 00468 // global numbers of indices. 00469 return false; 00470 } 00471 else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () || 00472 getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () || 00473 getIndexBase () != map.getIndexBase ()) { 00474 // If the global min or max global index doesn't match, or if 00475 // the index base doesn't match, then the Maps aren't the same. 00476 return false; 00477 } 00478 else if (isDistributed () != map.isDistributed ()) { 00479 // One Map is distributed and the other is not, which means that 00480 // the Maps aren't the same. 00481 return false; 00482 } 00483 else if (isContiguous () && isUniform () && 00484 map.isContiguous () && map.isUniform ()) { 00485 // Contiguous uniform Maps with the same number of processes in 00486 // their communicators, with the same global numbers of indices, 00487 // and with matching index bases and ranges, must be the same. 00488 return true; 00489 } 00490 00491 // The two communicators must have the same number of processes, 00492 // with process ranks occurring in the same order. This uses 00493 // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says: 00494 // "Operations that access communicators are local and their 00495 // execution does not require interprocess communication." 00496 // However, just to be sure, I'll put this call after the above 00497 // tests that don't communicate. 00498 if (! Details::congruent (*comm_, * (map.getComm ()))) { 00499 return false; 00500 } 00501 00502 // If we get this far, we need to check local properties and then 00503 // communicate local sameness across all processes. 00504 const int isSame_lcl = locallySameAs (map) ? 1 : 0; 00505 00506 // Return true if and only if all processes report local sameness. 00507 int isSame_gbl = 0; 00508 reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl)); 00509 return isSame_gbl == 1; 00510 } 00511 00512 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00513 Teuchos::ArrayView<const GlobalOrdinal> 00514 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00515 getNodeElementList () const 00516 { 00517 typedef GlobalOrdinal GO; 00518 Kokkos::View<const GO*, host_mirror_device_type> myGlobalInds = 00519 mapHost_.getMyGlobalIndices (); // creates it if it doesn't exist 00520 00521 return Teuchos::ArrayView<const GO> (myGlobalInds.ptr_on_device (), 00522 myGlobalInds.dimension_0 ()); 00523 } 00524 00525 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00526 bool 00527 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00528 isDistributed () const { 00529 return mapHost_.isDistributed (); 00530 } 00531 00532 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00533 std::string 00534 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00535 description () const { 00536 using Teuchos::TypeNameTraits; 00537 std::ostringstream os; 00538 00539 os << "\"Tpetra::Map\": {" 00540 << "LocalOrdinalType: " << TypeNameTraits<local_ordinal_type>::name () 00541 << ", GlobalOrdinalType: " << TypeNameTraits<global_ordinal_type>::name () 00542 << ", DeviceType: " << TypeNameTraits<device_type>::name (); 00543 if (this->getObjectLabel () != "") { 00544 os << ", Label: \"" << this->getObjectLabel () << "\""; 00545 } 00546 os << ", Global number of entries: " << getGlobalNumElements () 00547 << ", Number of processes: " << getComm ()->getSize () 00548 << ", Uniform: " << (isUniform () ? "true" : "false") 00549 << ", Contiguous: " << (isContiguous () ? "true" : "false") 00550 << ", Distributed: " << (isDistributed () ? "true" : "false") 00551 << "}"; 00552 return os.str (); 00553 } 00554 00555 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00556 void 00557 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00558 describe (Teuchos::FancyOStream &out, 00559 const Teuchos::EVerbosityLevel verbLevel) const 00560 { 00561 using std::endl; 00562 using std::setw; 00563 using Teuchos::ArrayView; 00564 using Teuchos::as; 00565 using Teuchos::OSTab; 00566 using Teuchos::toString; 00567 using Teuchos::TypeNameTraits; 00568 using Teuchos::VERB_DEFAULT; 00569 using Teuchos::VERB_NONE; 00570 using Teuchos::VERB_LOW; 00571 using Teuchos::VERB_MEDIUM; 00572 using Teuchos::VERB_HIGH; 00573 using Teuchos::VERB_EXTREME; 00574 typedef typename ArrayView<const GlobalOrdinal>::size_type size_type; 00575 00576 const size_t nME = getNodeNumElements (); 00577 ArrayView<const GlobalOrdinal> myEntries = getNodeElementList (); 00578 const int myRank = comm_->getRank (); 00579 const int numProcs = comm_->getSize (); 00580 00581 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel; 00582 00583 // By convention, describe() always begins with a tab before printing. 00584 OSTab tab0 (out); 00585 00586 if (vl == VERB_NONE) { 00587 // do nothing 00588 } 00589 else if (vl == VERB_LOW) { 00590 if (myRank == 0) { 00591 out << "\"Tpetra::Map\":" << endl; 00592 OSTab tab1 (out); 00593 out << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name () << endl 00594 << "GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name () << endl 00595 << "DeviceType: " << TypeNameTraits<device_type>::name () << endl; 00596 if (this->getObjectLabel () != "") { 00597 out << "Label: \"" << this->getObjectLabel () << "\"" << endl; 00598 } 00599 out << "Global number of entries: " << getGlobalNumElements () << endl 00600 << "Minimum global index: " << getMinAllGlobalIndex () << endl 00601 << "Maximum global index: " << getMaxAllGlobalIndex () << endl 00602 << "Index base: " << getIndexBase () << endl 00603 << "Number of processes: " << getComm ()->getSize () << endl 00604 << "Uniform: " << (isUniform () ? "true" : "false") << endl 00605 << "Contiguous: " << (isContiguous () ? "true" : "false") << endl 00606 << "Distributed: " << (isDistributed () ? "true" : "false") << endl; 00607 } 00608 } 00609 00610 if (vl >= VERB_HIGH) { // HIGH or EXTREME 00611 for (int p = 0; p < numProcs; ++p) { 00612 if (myRank == p) { 00613 out << "Process " << myRank << ":" << endl; 00614 OSTab tab1 (out); 00615 out << "My number of entries: " << nME << endl 00616 << "My minimum global index: " << getMinGlobalIndex () << endl 00617 << "My maximum global index: " << getMaxGlobalIndex () << endl; 00618 if (vl == VERB_EXTREME) { 00619 out << "My global indices: ["; 00620 for (size_type k = 0; k < myEntries.size (); ++k) { 00621 out << myEntries[k]; 00622 if (k + 1 < myEntries.size ()) { 00623 out << ", "; 00624 } 00625 } 00626 out << "]" << endl; 00627 } 00628 std::flush (out); 00629 } 00630 // Do a few global ops to give I/O a chance to complete 00631 comm_->barrier (); 00632 comm_->barrier (); 00633 comm_->barrier (); 00634 } 00635 } 00636 } 00637 00638 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00639 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00640 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00641 replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const 00642 { 00643 using Teuchos::ArrayView; 00644 using Teuchos::outArg; 00645 using Teuchos::RCP; 00646 using Teuchos::REDUCE_MIN; 00647 using Teuchos::reduceAll; 00648 typedef global_size_t GST; 00649 typedef LocalOrdinal LO; 00650 typedef GlobalOrdinal GO; 00651 typedef Map<LO, GO, node_type> map_type; 00652 00653 // mfh 26 Mar 2013: The lazy way to do this is simply to recreate 00654 // the Map by calling its ordinary public constructor, using the 00655 // original Map's data. This only involves O(1) all-reduces over 00656 // the new communicator, which in the common case only includes a 00657 // small number of processes. 00658 00659 // Create the Map to return. 00660 if (newComm.is_null ()) { 00661 return Teuchos::null; // my process does not participate in the new Map 00662 } else { 00663 // Map requires that the index base equal the global min GID. 00664 // Figuring out the global min GID requires a reduction over all 00665 // processes in the new communicator. It could be that some (or 00666 // even all) of these processes contain zero entries. (Recall 00667 // that this method, unlike removeEmptyProcesses(), may remove 00668 // an arbitrary subset of processes.) We deal with this by 00669 // doing a min over the min GID on each process if the process 00670 // has more than zero entries, or the global max GID, if that 00671 // process has zero entries. If no processes have any entries, 00672 // then the index base doesn't matter anyway. 00673 const GO myMinGid = (this->getNodeNumElements () == 0) ? 00674 this->getMaxAllGlobalIndex () : this->getMinGlobalIndex (); 00675 GO newIndexBase = this->getInvalidGlobalIndex (); 00676 reduceAll<int, GO> (*newComm, REDUCE_MIN, myMinGid, outArg (newIndexBase)); 00677 00678 // Make Map's constructor compute the global number of indices. 00679 const GST globalNumInds = Teuchos::OrdinalTraits<GST>::invalid (); 00680 00681 if (mapDevice_.initialized ()) { 00682 Kokkos::View<const GO*, DeviceType> myGIDs = 00683 mapDevice_.getMyGlobalIndices (); 00684 return rcp (new map_type (globalNumInds, myGIDs, newIndexBase, 00685 newComm, this->getNode ())); 00686 } 00687 else { 00688 Kokkos::View<const GO*, host_mirror_device_type> myGidsHostView = 00689 mapHost_.getMyGlobalIndices (); 00690 ArrayView<const GO> myGidsArrayView (myGidsHostView.ptr_on_device (), 00691 myGidsHostView.dimension_0 ()); 00692 return rcp (new map_type (globalNumInds, myGidsArrayView, newIndexBase, 00693 newComm, this->getNode ())); 00694 } 00695 } 00696 } 00697 00698 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00699 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00700 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00701 removeEmptyProcesses () const 00702 { 00703 using Teuchos::Comm; 00704 using Teuchos::null; 00705 using Teuchos::outArg; 00706 using Teuchos::RCP; 00707 using Teuchos::REDUCE_MIN; 00708 using Teuchos::reduceAll; 00709 00710 // Create the new communicator. split() returns a valid 00711 // communicator on all processes. On processes where color == 0, 00712 // ignore the result. Passing key == 0 tells MPI to order the 00713 // processes in the new communicator by their rank in the old 00714 // communicator. 00715 const int color = (getNodeNumElements () == 0) ? 0 : 1; 00716 // MPI_Comm_split must be called collectively over the original 00717 // communicator. We can't just call it on processes with color 00718 // one, even though we will ignore its result on processes with 00719 // color zero. 00720 RCP<const Comm<int> > newComm = comm_->split (color, 0); 00721 if (color == 0) { 00722 newComm = null; 00723 } 00724 00725 // Create the Map to return. 00726 if (newComm.is_null ()) { 00727 return null; // my process does not participate in the new Map 00728 } else { 00729 // The default constructor that's useful for clone() above is 00730 // also useful here. 00731 RCP<Map> map = rcp (new Map ()); 00732 map->comm_ = newComm; 00733 map->node_ = node_; 00734 map->mapHost_ = mapHost_; 00735 map->mapDevice_ = mapDevice_; 00736 00737 // Uniformity and contiguity have not changed. The directory 00738 // has changed, but we've taken care of that above. However, 00739 // distributed-ness may have changed, since the communicator has 00740 // changed. 00741 // 00742 // If the original Map was NOT distributed, then the new Map 00743 // cannot be distributed. If the number of processes in the new 00744 // communicator is 1, then the new Map is not distributed. 00745 // Otherwise, we have to check the new Map using an all-reduce 00746 // (over the new communicator). For example, the original Map 00747 // may have had some processes with zero elements, and all other 00748 // processes with the same number of elements as in the whole 00749 // Map. That Map is technically distributed, because of the 00750 // processes with zero elements. Removing those processes would 00751 // make the new Map locally replicated. 00752 if (! isDistributed () || newComm->getSize () == 1) { 00753 map->mapHost_.setDistributed (false); 00754 map->mapDevice_.setDistributed (false); 00755 } else { 00756 const int iOwnAllGids = 00757 (getNodeNumElements () == getGlobalNumElements ()) ? 1 : 0; 00758 int allProcsOwnAllGids = 0; 00759 reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, 00760 outArg (allProcsOwnAllGids)); 00761 map->mapHost_.setDistributed (allProcsOwnAllGids != 1); 00762 map->mapDevice_.setDistributed (allProcsOwnAllGids != 1); 00763 } 00764 00765 // Map's default constructor creates an uninitialized Directory. 00766 // The Directory will be initialized on demand in 00767 // getRemoteIndexList(). 00768 // 00769 // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the 00770 // directory more efficiently than just recreating it. If 00771 // directory recreation proves a bottleneck, we can always 00772 // revisit this. On the other hand, Directory creation is only 00773 // collective over the new, presumably much smaller 00774 // communicator, so it may not be worth the effort to optimize. 00775 00776 return map; 00777 } 00778 } 00779 00780 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00781 void 00782 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00783 setupDirectory () const 00784 { 00785 TEUCHOS_TEST_FOR_EXCEPTION( 00786 directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: " 00787 "The Directory is null. " 00788 "Please report this bug to the Tpetra developers."); 00789 00790 // Only create the Directory if it hasn't been created yet. 00791 // This is a collective operation. 00792 if (! directory_->initialized ()) { 00793 directory_->initialize (*this); 00794 } 00795 } 00796 00797 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00798 LookupStatus 00799 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00800 getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs, 00801 const Teuchos::ArrayView<int>& PIDs, 00802 const Teuchos::ArrayView<LocalOrdinal>& LIDs) const 00803 { 00804 TEUCHOS_TEST_FOR_EXCEPTION( 00805 GIDs.size () != PIDs.size (), std::invalid_argument, 00806 "Tpetra::Map (Kokkos refactor)::getRemoteIndexList (3 args): GIDs.size ()" 00807 " = " << GIDs.size () << " != PIDs.size () = " << PIDs.size () << "."); 00808 TEUCHOS_TEST_FOR_EXCEPTION( 00809 GIDs.size () != LIDs.size (), std::invalid_argument, 00810 "Tpetra::Map (Kokkos refactor)::getRemoteIndexList (3 args): GIDs.size ()" 00811 " = " << GIDs.size () << " != LIDs.size () = " << LIDs.size () << "."); 00812 00813 // Empty Maps (i.e., containing no indices on any processes in the 00814 // Map's communicator) are perfectly valid. In that case, if the 00815 // input GID list is nonempty, we fill the output arrays with 00816 // invalid values, and return IDNotPresent to notify the caller. 00817 // It's perfectly valid to give getRemoteIndexList GIDs that the 00818 // Map doesn't own. SubmapImport test 2 needs this functionality. 00819 if (getGlobalNumElements () == 0) { 00820 if (GIDs.size () == 0) { 00821 return AllIDsPresent; // trivially 00822 } else { 00823 for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) { 00824 PIDs[k] = Teuchos::OrdinalTraits<int>::invalid (); 00825 } 00826 for (typename Teuchos::ArrayView<LocalOrdinal>::size_type k = 0; 00827 k < LIDs.size (); ++k) { 00828 LIDs[k] = Teuchos::OrdinalTraits<LocalOrdinal>::invalid (); 00829 } 00830 return IDNotPresent; 00831 } 00832 } 00833 00834 // getRemoteIndexList must be called collectively, and Directory 00835 // initialization is collective too, so it's OK to initialize the 00836 // Directory on demand. 00837 setupDirectory (); 00838 return directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs); 00839 } 00840 00841 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00842 LookupStatus 00843 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00844 getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs, 00845 const Teuchos::ArrayView<int>& PIDs) const 00846 { 00847 TEUCHOS_TEST_FOR_EXCEPTION( 00848 GIDs.size () != PIDs.size (), std::invalid_argument, 00849 "Tpetra::Map (Kokkos refactor)::getRemoteIndexList (2 args): GIDs.size ()" 00850 " = " << GIDs.size () << " != PIDs.size () = " << PIDs.size () << "."); 00851 00852 // Empty Maps (i.e., containing no indices on any processes in the 00853 // Map's communicator) are perfectly valid. In that case, if the 00854 // input GID list is nonempty, we fill the output array with 00855 // invalid values, and return IDNotPresent to notify the caller. 00856 // It's perfectly valid to give getRemoteIndexList GIDs that the 00857 // Map doesn't own. SubmapImport test 2 needs this functionality. 00858 if (getGlobalNumElements () == 0) { 00859 if (GIDs.size () == 0) { 00860 return AllIDsPresent; // trivially 00861 } else { 00862 // The Map contains no indices, so all output PIDs are invalid. 00863 for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) { 00864 PIDs[k] = Teuchos::OrdinalTraits<int>::invalid (); 00865 } 00866 return IDNotPresent; 00867 } 00868 } 00869 00870 // getRemoteIndexList must be called collectively, and Directory 00871 // creation is collective too, so it's OK to create the Directory 00872 // on demand. 00873 setupDirectory (); 00874 return directory_->getDirectoryEntries (*this, GIDs, PIDs); 00875 } 00876 00877 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00878 Teuchos::RCP<const Teuchos::Comm<int> > 00879 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00880 getComm () const { 00881 return comm_; 00882 } 00883 00884 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00885 Teuchos::RCP<Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > 00886 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00887 getNode () const { 00888 return node_; 00889 } 00890 00891 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00892 bool 00893 Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00894 isOneToOne () const 00895 { 00896 TEUCHOS_TEST_FOR_EXCEPTION( 00897 getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: " 00898 "getComm() returns null. Please report this bug to the Tpetra " 00899 "developers."); 00900 00901 // This is a collective operation, if it hasn't been called before. 00902 setupDirectory (); 00903 return directory_->isOneToOne (*this); 00904 } 00905 00906 } // namespace Tpetra 00907 00908 #endif // TPETRA_KOKKOSREFACTOR_MAP_DEF_HPP
1.7.6.1