|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_KOKKOSREFACTOR_CRSGRAPH_DEF_HPP 00043 #define TPETRA_KOKKOSREFACTOR_CRSGRAPH_DEF_HPP 00044 00045 #ifdef DOXYGEN_USE_ONLY 00046 # include "Tpetra_CrsGraph_decl.hpp" 00047 #endif 00048 #include <KokkosCompat_ClassicNodeAPI_Wrapper.hpp> 00049 00050 namespace Tpetra { 00051 00052 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00053 CrsGraph< 00054 LocalOrdinal, GlobalOrdinal, 00055 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00056 CrsGraph (const Teuchos::RCP<const map_type>& rowMap, 00057 size_t maxNumEntriesPerRow, 00058 ProfileType pftype, 00059 const Teuchos::RCP<Teuchos::ParameterList>& params) : 00060 dist_object_type (rowMap) 00061 , rowMap_ (rowMap) 00062 , nodeNumEntries_ (0) 00063 , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ()) 00064 , pftype_ (pftype) 00065 , numAllocForAllRows_ (maxNumEntriesPerRow) 00066 , storageStatus_ (pftype == StaticProfile ? 00067 Details::STORAGE_1D_UNPACKED : 00068 Details::STORAGE_2D) 00069 , indicesAreAllocated_ (false) 00070 , indicesAreLocal_ (false) 00071 , indicesAreGlobal_ (false) 00072 , fillComplete_ (false) 00073 , indicesAreSorted_ (true) 00074 , noRedundancies_ (true) 00075 , haveLocalConstants_ (false) 00076 , haveGlobalConstants_ (false) 00077 , sortGhostsAssociatedWithEachProcessor_ (true) 00078 { 00079 const char tfecfFuncName[] = "CrsGraph(rowMap,maxNumEntriesPerRow," 00080 "pftype,params): "; 00081 staticAssertions (); 00082 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00083 maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (), 00084 std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be " 00085 "a valid size_t value, which in this case means it must not be " 00086 "Teuchos::OrdinalTraits<size_t>::invalid()."); 00087 resumeFill (params); 00088 checkInternalState (); 00089 } 00090 00091 00092 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00093 CrsGraph< 00094 LocalOrdinal, GlobalOrdinal, 00095 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00096 CrsGraph (const Teuchos::RCP<const map_type>& rowMap, 00097 const Teuchos::RCP<const map_type>& colMap, 00098 const size_t maxNumEntriesPerRow, 00099 const ProfileType pftype, 00100 const Teuchos::RCP<Teuchos::ParameterList>& params) : 00101 dist_object_type (rowMap) 00102 , rowMap_ (rowMap) 00103 , colMap_ (colMap) 00104 , nodeNumEntries_ (0) 00105 , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ()) 00106 , pftype_ (pftype) 00107 , numAllocForAllRows_ (maxNumEntriesPerRow) 00108 , storageStatus_ (pftype == StaticProfile ? 00109 Details::STORAGE_1D_UNPACKED : 00110 Details::STORAGE_2D) 00111 , indicesAreAllocated_ (false) 00112 , indicesAreLocal_ (false) 00113 , indicesAreGlobal_ (false) 00114 , fillComplete_ (false) 00115 , indicesAreSorted_ (true) 00116 , noRedundancies_ (true) 00117 , haveLocalConstants_ (false) 00118 , haveGlobalConstants_ (false) 00119 , sortGhostsAssociatedWithEachProcessor_ (true) 00120 { 00121 const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,maxNumEntriesPerRow," 00122 "pftype,params): "; 00123 staticAssertions (); 00124 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00125 maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (), 00126 std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be " 00127 "a valid size_t value, which in this case means it must not be " 00128 "Teuchos::OrdinalTraits<size_t>::invalid()."); 00129 resumeFill (params); 00130 checkInternalState (); 00131 } 00132 00133 00134 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00135 CrsGraph< 00136 LocalOrdinal, GlobalOrdinal, 00137 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00138 CrsGraph (const Teuchos::RCP<const map_type>& rowMap, 00139 const Teuchos::ArrayRCP<const size_t>& numEntPerRow, 00140 const ProfileType pftype, 00141 const Teuchos::RCP<Teuchos::ParameterList>& params) : 00142 dist_object_type (rowMap) 00143 , rowMap_ (rowMap) 00144 , nodeNumEntries_ (0) 00145 , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ()) 00146 , pftype_ (pftype) 00147 , numAllocForAllRows_ (0) 00148 , storageStatus_ (pftype == StaticProfile ? 00149 Details::STORAGE_1D_UNPACKED : 00150 Details::STORAGE_2D) 00151 , indicesAreAllocated_ (false) 00152 , indicesAreLocal_ (false) 00153 , indicesAreGlobal_ (false) 00154 , fillComplete_ (false) 00155 , indicesAreSorted_ (true) 00156 , noRedundancies_ (true) 00157 , haveLocalConstants_ (false) 00158 , haveGlobalConstants_ (false) 00159 , sortGhostsAssociatedWithEachProcessor_ (true) 00160 { 00161 const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): "; 00162 staticAssertions (); 00163 00164 const size_t lclNumRows = rowMap.is_null () ? 00165 static_cast<size_t> (0) : rowMap->getNodeNumElements (); 00166 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00167 static_cast<size_t> (numEntPerRow.size ()) != lclNumRows, 00168 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size () 00169 << " != the local number of rows " << lclNumRows << " as specified by " 00170 "the input row Map."); 00171 00172 #ifdef HAVE_TPETRA_DEBUG 00173 for (size_t r = 0; r < lclNumRows; ++r) { 00174 const size_t curRowCount = numEntPerRow[r]; 00175 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00176 curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (), 00177 std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid " 00178 "number of entries (Teuchos::OrdinalTraits<size_t>::invalid())."); 00179 } 00180 #endif // HAVE_TPETRA_DEBUG 00181 00182 // Allocate k_numAllocPerRow_ (upper bound on number of entries 00183 // per row). We get a host view of the data, as an ArrayRCP. 00184 // Create a nonowning Kokkos::View of it, copy into 00185 // k_numAllocPerRow, and sync. Then assign to k_numAllocPerRow_ 00186 // (which is a const view, so we can't copy into it directly). 00187 typedef Kokkos::DualView<size_t*, Kokkos::LayoutLeft, device_type> 00188 dual_view_type; 00189 typedef typename device_type::host_mirror_device_type host_type; 00190 typedef Kokkos::View<const size_t*, Kokkos::LayoutLeft, host_type, 00191 Kokkos::MemoryUnmanaged> in_view_type; 00192 in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows); 00193 dual_view_type k_numAllocPerRow ("Tpetra::CrsGraph::numAllocPerRow", 00194 lclNumRows); 00195 k_numAllocPerRow.template modify<host_type> (); 00196 Kokkos::deep_copy (k_numAllocPerRow.h_view, numAllocPerRowIn); 00197 k_numAllocPerRow.template sync<device_type> (); 00198 k_numAllocPerRow_ = k_numAllocPerRow; 00199 numAllocPerRow_ = Kokkos::Compat::persistingView (k_numAllocPerRow_.h_view); 00200 00201 resumeFill (params); 00202 checkInternalState (); 00203 } 00204 00205 00206 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00207 CrsGraph< 00208 LocalOrdinal, GlobalOrdinal, 00209 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00210 CrsGraph (const Teuchos::RCP<const map_type>& rowMap, 00211 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow, 00212 const ProfileType pftype, 00213 const Teuchos::RCP<Teuchos::ParameterList>& params) : 00214 dist_object_type (rowMap) 00215 , rowMap_ (rowMap) 00216 , nodeNumEntries_ (0) 00217 , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ()) 00218 , pftype_ (pftype) 00219 , k_numAllocPerRow_ (numEntPerRow) 00220 , numAllocPerRow_ (Kokkos::Compat::persistingView (numEntPerRow.h_view)) 00221 , numAllocForAllRows_ (0) 00222 , storageStatus_ (pftype == StaticProfile ? 00223 Details::STORAGE_1D_UNPACKED : 00224 Details::STORAGE_2D) 00225 , indicesAreAllocated_ (false) 00226 , indicesAreLocal_ (false) 00227 , indicesAreGlobal_ (false) 00228 , fillComplete_ (false) 00229 , indicesAreSorted_ (true) 00230 , noRedundancies_ (true) 00231 , haveLocalConstants_ (false) 00232 , haveGlobalConstants_ (false) 00233 , sortGhostsAssociatedWithEachProcessor_ (true) 00234 { 00235 const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): "; 00236 staticAssertions (); 00237 00238 const size_t lclNumRows = rowMap.is_null () ? 00239 static_cast<size_t> (0) : rowMap->getNodeNumElements (); 00240 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00241 static_cast<size_t> (numEntPerRow.dimension_0 ()) != lclNumRows, 00242 std::invalid_argument, "numEntPerRow has length " << 00243 numEntPerRow.dimension_0 () << " != the local number of rows " << 00244 lclNumRows << " as specified by " "the input row Map."); 00245 00246 #ifdef HAVE_TPETRA_DEBUG 00247 for (size_t r = 0; r < lclNumRows; ++r) { 00248 const size_t curRowCount = numEntPerRow.h_view(r); 00249 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00250 curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (), 00251 std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid " 00252 "number of entries (Teuchos::OrdinalTraits<size_t>::invalid())."); 00253 } 00254 #endif // HAVE_TPETRA_DEBUG 00255 00256 resumeFill (params); 00257 checkInternalState (); 00258 } 00259 00260 00261 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00262 CrsGraph< 00263 LocalOrdinal, GlobalOrdinal, 00264 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00265 CrsGraph (const Teuchos::RCP<const map_type>& rowMap, 00266 const Teuchos::RCP<const map_type>& colMap, 00267 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow, 00268 const ProfileType pftype, 00269 const Teuchos::RCP<Teuchos::ParameterList>& params) : 00270 dist_object_type (rowMap) 00271 , rowMap_ (rowMap) 00272 , colMap_ (colMap) 00273 , nodeNumEntries_ (0) 00274 , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ()) 00275 , pftype_ (pftype) 00276 , k_numAllocPerRow_ (numEntPerRow) 00277 , numAllocPerRow_ (Kokkos::Compat::persistingView (numEntPerRow.h_view)) 00278 , numAllocForAllRows_ (0) 00279 , storageStatus_ (pftype == StaticProfile ? 00280 Details::STORAGE_1D_UNPACKED : 00281 Details::STORAGE_2D) 00282 , indicesAreAllocated_ (false) 00283 , indicesAreLocal_ (false) 00284 , indicesAreGlobal_ (false) 00285 , fillComplete_ (false) 00286 , indicesAreSorted_ (true) 00287 , noRedundancies_ (true) 00288 , haveLocalConstants_ (false) 00289 , haveGlobalConstants_ (false) 00290 , sortGhostsAssociatedWithEachProcessor_ (true) 00291 { 00292 const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): "; 00293 staticAssertions (); 00294 00295 const size_t lclNumRows = rowMap.is_null () ? 00296 static_cast<size_t> (0) : rowMap->getNodeNumElements (); 00297 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00298 static_cast<size_t> (numEntPerRow.dimension_0 ()) != lclNumRows, 00299 std::invalid_argument, "numEntPerRow has length " << 00300 numEntPerRow.dimension_0 () << " != the local number of rows " << 00301 lclNumRows << " as specified by " "the input row Map."); 00302 00303 #ifdef HAVE_TPETRA_DEBUG 00304 for (size_t r = 0; r < lclNumRows; ++r) { 00305 const size_t curRowCount = numEntPerRow.h_view(r); 00306 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00307 curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (), 00308 std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid " 00309 "number of entries (Teuchos::OrdinalTraits<size_t>::invalid())."); 00310 } 00311 #endif // HAVE_TPETRA_DEBUG 00312 00313 resumeFill (params); 00314 checkInternalState (); 00315 } 00316 00317 00318 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00319 CrsGraph< 00320 LocalOrdinal, GlobalOrdinal, 00321 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00322 CrsGraph (const Teuchos::RCP<const map_type>& rowMap, 00323 const Teuchos::RCP<const map_type>& colMap, 00324 const Teuchos::ArrayRCP<const size_t>& numEntPerRow, 00325 ProfileType pftype, 00326 const Teuchos::RCP<Teuchos::ParameterList>& params) : 00327 dist_object_type (rowMap) 00328 , rowMap_ (rowMap) 00329 , colMap_ (colMap) 00330 , nodeNumEntries_ (0) 00331 , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ()) 00332 , pftype_ (pftype) 00333 , numAllocForAllRows_ (0) 00334 , storageStatus_ (pftype == StaticProfile ? 00335 Details::STORAGE_1D_UNPACKED : 00336 Details::STORAGE_2D) 00337 , indicesAreAllocated_ (false) 00338 , indicesAreLocal_ (false) 00339 , indicesAreGlobal_ (false) 00340 , fillComplete_ (false) 00341 , indicesAreSorted_ (true) 00342 , noRedundancies_ (true) 00343 , haveLocalConstants_ (false) 00344 , haveGlobalConstants_ (false) 00345 , sortGhostsAssociatedWithEachProcessor_ (true) 00346 { 00347 const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype," 00348 "params): "; 00349 staticAssertions (); 00350 00351 const size_t lclNumRows = rowMap.is_null () ? 00352 static_cast<size_t> (0) : rowMap->getNodeNumElements (); 00353 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00354 static_cast<size_t> (numEntPerRow.size ()) != lclNumRows, 00355 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size () 00356 << " != the local number of rows " << lclNumRows << " as specified by " 00357 "the input row Map."); 00358 00359 #ifdef HAVE_TPETRA_DEBUG 00360 for (size_t r = 0; r < lclNumRows; ++r) { 00361 const size_t curRowCount = numEntPerRow[r]; 00362 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00363 curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (), 00364 std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid " 00365 "number of entries (Teuchos::OrdinalTraits<size_t>::invalid())."); 00366 } 00367 #endif // HAVE_TPETRA_DEBUG 00368 00369 // Allocate k_numAllocPerRow_ (upper bound on number of entries 00370 // per row). We get a host view of the data, as an ArrayRCP. 00371 // Create a nonowning Kokkos::View of it, copy into 00372 // k_numAllocPerRow, and sync. Then assign to k_numAllocPerRow_ 00373 // (which is a const view, so we can't copy into it directly). 00374 typedef Kokkos::DualView<size_t*, Kokkos::LayoutLeft, device_type> 00375 dual_view_type; 00376 typedef typename device_type::host_mirror_device_type host_type; 00377 typedef Kokkos::View<const size_t*, Kokkos::LayoutLeft, host_type, 00378 Kokkos::MemoryUnmanaged> in_view_type; 00379 in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows); 00380 dual_view_type k_numAllocPerRow ("Tpetra::CrsGraph::numAllocPerRow", 00381 lclNumRows); 00382 k_numAllocPerRow.template modify<host_type> (); 00383 Kokkos::deep_copy (k_numAllocPerRow.h_view, numAllocPerRowIn); 00384 k_numAllocPerRow.template sync<device_type> (); 00385 k_numAllocPerRow_ = k_numAllocPerRow; 00386 numAllocPerRow_ = Kokkos::Compat::persistingView (k_numAllocPerRow_.h_view); 00387 00388 resumeFill (params); 00389 checkInternalState (); 00390 } 00391 00392 00393 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00394 CrsGraph< 00395 LocalOrdinal, GlobalOrdinal, 00396 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00397 CrsGraph (const Teuchos::RCP<const map_type>& rowMap, 00398 const Teuchos::RCP<const map_type>& colMap, 00399 const t_RowPtrs& rowPointers, 00400 const t_LocalOrdinal_1D& columnIndices, 00401 const Teuchos::RCP<Teuchos::ParameterList>& params) : 00402 dist_object_type (rowMap) 00403 , rowMap_(rowMap) 00404 , colMap_(colMap) 00405 , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()) 00406 , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()) 00407 , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()) 00408 , nodeNumEntries_(0) 00409 , nodeNumAllocated_(OrdinalTraits<size_t>::invalid()) 00410 , pftype_(StaticProfile) 00411 , numAllocForAllRows_(0) 00412 , storageStatus_ (Details::STORAGE_1D_PACKED) 00413 , indicesAreAllocated_(true) 00414 , indicesAreLocal_(true) 00415 , indicesAreGlobal_(false) 00416 , fillComplete_(false) 00417 , indicesAreSorted_(true) 00418 , noRedundancies_(true) 00419 , haveLocalConstants_ (false) 00420 , haveGlobalConstants_ (false) 00421 , sortGhostsAssociatedWithEachProcessor_(true) 00422 { 00423 staticAssertions (); 00424 setAllIndices (rowPointers, columnIndices); 00425 checkInternalState (); 00426 } 00427 00428 00429 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00430 CrsGraph< 00431 LocalOrdinal, GlobalOrdinal, 00432 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00433 CrsGraph (const Teuchos::RCP<const map_type>& rowMap, 00434 const Teuchos::RCP<const map_type>& colMap, 00435 const Teuchos::ArrayRCP<size_t>& rowPointers, 00436 const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices, 00437 const Teuchos::RCP<Teuchos::ParameterList>& params) : 00438 dist_object_type (rowMap) 00439 , rowMap_ (rowMap) 00440 , colMap_ (colMap) 00441 , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()) 00442 , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()) 00443 , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()) 00444 , nodeNumEntries_ (0) 00445 , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ()) 00446 , pftype_ (StaticProfile) 00447 , numAllocForAllRows_ (0) 00448 , storageStatus_ (Details::STORAGE_1D_PACKED) 00449 , indicesAreAllocated_ (true) 00450 , indicesAreLocal_ (true) 00451 , indicesAreGlobal_ (false) 00452 , fillComplete_ (false) 00453 , indicesAreSorted_ (true) 00454 , noRedundancies_ (true) 00455 , haveLocalConstants_ (false) 00456 , haveGlobalConstants_ (false) 00457 , sortGhostsAssociatedWithEachProcessor_ (true) 00458 { 00459 staticAssertions (); 00460 setAllIndices (rowPointers, columnIndices); 00461 checkInternalState (); 00462 } 00463 00464 00465 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00466 CrsGraph< 00467 LocalOrdinal, GlobalOrdinal, 00468 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00469 CrsGraph (const Teuchos::RCP<const map_type>& rowMap, 00470 const Teuchos::RCP<const map_type>& colMap, 00471 const LocalStaticCrsGraphType& k_local_graph_, 00472 const Teuchos::RCP<Teuchos::ParameterList>& params) 00473 : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap) 00474 , rowMap_ (rowMap) 00475 , colMap_ (colMap) 00476 , k_lclGraph_ (k_local_graph_) 00477 , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()) 00478 , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()) 00479 , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()) 00480 , nodeNumEntries_ (0) // FIXME (mfh 17 Mar 2014) should get from k_lclGraph_ right now 00481 , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ()) 00482 , pftype_ (StaticProfile) 00483 , numAllocForAllRows_ (0) 00484 , storageStatus_ (Details::STORAGE_1D_PACKED) 00485 , indicesAreAllocated_ (true) 00486 , indicesAreLocal_ (true) 00487 , indicesAreGlobal_ (false) 00488 , fillComplete_ (false) 00489 , indicesAreSorted_ (true) 00490 , noRedundancies_ (true) 00491 , haveLocalConstants_ (false) 00492 , haveGlobalConstants_ (false) 00493 , sortGhostsAssociatedWithEachProcessor_(true) 00494 { 00495 using Teuchos::arcp; 00496 using Teuchos::ArrayRCP; 00497 using Teuchos::as; 00498 using Teuchos::ParameterList; 00499 using Teuchos::parameterList; 00500 using Teuchos::rcp; 00501 typedef GlobalOrdinal GO; 00502 typedef LocalOrdinal LO; 00503 00504 staticAssertions(); 00505 const char tfecfFuncName[] = "CrsGraph(Map,Map,Kokkos::LocalStaticCrsGraph)"; 00506 00507 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00508 colMap.is_null (), std::runtime_error, 00509 ": The input column Map must be nonnull."); 00510 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00511 k_local_graph_.numRows () != rowMap->getNodeNumElements (), 00512 std::runtime_error, 00513 ": The input row Map and the input local graph need to have the same " 00514 "number of rows. The row Map claims " << rowMap->getNodeNumElements () 00515 << " row(s), but the local graph claims " << k_local_graph_.numRows () 00516 << " row(s)."); 00517 // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns 00518 // rowMap_->getNodeNumElements(), but it doesn't have to. 00519 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00520 // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error, 00521 // ": The input row Map and the input local graph need to have the same " 00522 // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), " 00523 // "but the local graph claims " << k_local_graph_.numRows () << " row(s)."); 00524 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00525 k_lclInds1D_.dimension_0 () != 0 || k_gblInds1D_.dimension_0 () != 0, std::logic_error, 00526 ": cannot have 1D data structures allocated."); 00527 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00528 ! lclInds2D_.is_null () || ! gblInds2D_.is_null (), std::logic_error, 00529 ": cannot have 2D data structures allocated."); 00530 00531 nodeNumAllocated_ = k_local_graph_.row_map (getNodeNumRows ()); 00532 nodeNumEntries_ = k_local_graph_.row_map (getNodeNumRows ()); 00533 00534 // NOTE (mfh 17 Mar 2014) We also need a version of this CrsGraph 00535 // constructor that takes a domain and range Map, as well as a row 00536 // and column Map. In that case, we must pass the domain and 00537 // range Map into the following method. 00538 setDomainRangeMaps (rowMap_, rowMap_); 00539 makeImportExport (); 00540 00541 RCP<ParameterList> lclparams; 00542 if (params.is_null ()) { 00543 lclparams = parameterList (); 00544 } else { 00545 lclparams = sublist (params, "Local Graph"); 00546 } 00547 // FIXME (mfh 28 Aug 2014) "Local Graph" sublist not used. 00548 00549 k_lclInds1D_ = k_lclGraph_.entries; 00550 k_rowPtrs_ = k_lclGraph_.row_map; 00551 00552 typename LocalStaticCrsGraphType::row_map_type d_ptrs = k_lclGraph_.row_map; 00553 typename LocalStaticCrsGraphType::entries_type d_inds = k_lclGraph_.entries; 00554 00555 // Reset local properties 00556 upperTriangular_ = true; 00557 lowerTriangular_ = true; 00558 nodeMaxNumRowEntries_ = 0; 00559 nodeNumDiags_ = 0; 00560 00561 // Compute triangular properties 00562 const size_t numLocalRows = getNodeNumRows (); 00563 for (size_t localRow = 0; localRow < numLocalRows; ++localRow) { 00564 const GO globalRow = rowMap_->getGlobalElement (localRow); 00565 const LO rlcid = colMap_->getLocalElement (globalRow); 00566 00567 // It's entirely possible that the local matrix has no entries 00568 // in the column corresponding to the current row. In that 00569 // case, the column Map may not necessarily contain that GID. 00570 // This is why we check whether rlcid is "invalid" (which means 00571 // that globalRow is not a GID in the column Map). 00572 if (rlcid != Teuchos::OrdinalTraits<LO>::invalid ()) { 00573 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00574 rlcid + 1 >= static_cast<LO> (d_ptrs.dimension_0 ()), 00575 std::runtime_error, ": The given row Map and/or column Map is/are " 00576 "not compatible with the provided local graph."); 00577 if (d_ptrs(rlcid) != d_ptrs(rlcid + 1)) { 00578 const size_t smallestCol = 00579 static_cast<size_t> (d_inds(d_ptrs(rlcid))); 00580 const size_t largestCol = 00581 static_cast<size_t> (d_inds(d_ptrs(rlcid + 1)-1)); 00582 if (smallestCol < localRow) { 00583 upperTriangular_ = false; 00584 } 00585 if (localRow < largestCol) { 00586 lowerTriangular_ = false; 00587 } 00588 for (size_t i = d_ptrs(rlcid); i < d_ptrs(rlcid + 1); ++i) { 00589 if (d_inds(i) == rlcid) { 00590 ++nodeNumDiags_; 00591 } 00592 } 00593 } 00594 nodeMaxNumRowEntries_ = 00595 std::max (d_ptrs(rlcid + 1) - d_ptrs(rlcid), nodeMaxNumRowEntries_); 00596 } 00597 } 00598 00599 haveLocalConstants_ = true; 00600 computeGlobalConstants (); 00601 00602 fillComplete_ = true; 00603 checkInternalState (); 00604 } 00605 00606 00607 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00608 CrsGraph< 00609 LocalOrdinal, GlobalOrdinal, 00610 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00611 ~CrsGraph () 00612 {} 00613 00614 00615 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00616 Teuchos::RCP<const Teuchos::ParameterList> 00617 CrsGraph< 00618 LocalOrdinal, GlobalOrdinal, 00619 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00620 getValidParameters () const 00621 { 00622 using Teuchos::RCP; 00623 using Teuchos::ParameterList; 00624 using Teuchos::parameterList; 00625 00626 RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph"); 00627 00628 // Make a sublist for the Import. 00629 RCP<ParameterList> importSublist = parameterList ("Import"); 00630 00631 // FIXME (mfh 02 Apr 2012) We should really have the Import and 00632 // Export objects fill in these lists. However, we don't want to 00633 // create an Import or Export unless we need them. For now, we 00634 // know that the Import and Export just pass the list directly to 00635 // their Distributor, so we can create a Distributor here 00636 // (Distributor's constructor is a lightweight operation) and have 00637 // it fill in the list. 00638 00639 // Fill in Distributor default parameters by creating a 00640 // Distributor and asking it to do the work. 00641 Distributor distributor (rowMap_->getComm (), importSublist); 00642 params->set ("Import", *importSublist, "How the Import performs communication."); 00643 00644 // Make a sublist for the Export. For now, it's a clone of the 00645 // Import sublist. It's not a shallow copy, though, since we 00646 // might like the Import to do communication differently than the 00647 // Export. 00648 params->set ("Export", *importSublist, "How the Export performs communication."); 00649 00650 return params; 00651 } 00652 00653 00654 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00655 void 00656 CrsGraph< 00657 LocalOrdinal, GlobalOrdinal, 00658 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00659 setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params) 00660 { 00661 Teuchos::RCP<const Teuchos::ParameterList> validParams = 00662 getValidParameters (); 00663 params->validateParametersAndSetDefaults (*validParams); 00664 this->setMyParamList (params); 00665 } 00666 00667 00668 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00669 global_size_t 00670 CrsGraph< 00671 LocalOrdinal, GlobalOrdinal, 00672 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00673 getGlobalNumRows () const 00674 { 00675 return rowMap_->getGlobalNumElements (); 00676 } 00677 00678 00679 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00680 global_size_t 00681 CrsGraph< 00682 LocalOrdinal, GlobalOrdinal, 00683 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00684 getGlobalNumCols () const 00685 { 00686 const char tfecfFuncName[] = "getGlobalNumCols: "; 00687 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00688 ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error, 00689 "The graph does not have a domain Map. You may not call this method in " 00690 "that case."); 00691 return getDomainMap ()->getGlobalNumElements (); 00692 } 00693 00694 00695 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00696 size_t 00697 CrsGraph< 00698 LocalOrdinal, GlobalOrdinal, 00699 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00700 getNodeNumRows () const 00701 { 00702 return rowMap_.is_null () ? static_cast<size_t> (0) : 00703 rowMap_->getNodeNumElements (); 00704 } 00705 00706 00707 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00708 size_t 00709 CrsGraph< 00710 LocalOrdinal, GlobalOrdinal, 00711 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00712 getNodeNumCols () const 00713 { 00714 const char tfecfFuncName[] = "getNodeNumCols: "; 00715 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00716 ! hasColMap (), std::runtime_error, 00717 "The graph does not have a column Map. You may not call this method " 00718 "unless the graph has a column Map. This requires either that a custom " 00719 "column Map was given to the constructor, or that fillComplete() has " 00720 "been called."); 00721 return colMap_.is_null () ? static_cast<size_t> (0) : 00722 colMap_->getNodeNumElements (); 00723 } 00724 00725 00726 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00727 size_t 00728 CrsGraph< 00729 LocalOrdinal, GlobalOrdinal, 00730 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00731 getNodeNumDiags () const 00732 { 00733 return nodeNumDiags_; 00734 } 00735 00736 00737 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00738 global_size_t 00739 CrsGraph< 00740 LocalOrdinal, GlobalOrdinal, 00741 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00742 getGlobalNumDiags () const 00743 { 00744 return globalNumDiags_; 00745 } 00746 00747 00748 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00749 Teuchos::RCP<Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > 00750 CrsGraph< 00751 LocalOrdinal, GlobalOrdinal, 00752 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getNode () const 00753 { 00754 return rowMap_.is_null () ? Teuchos::null : rowMap_->getNode (); 00755 } 00756 00757 00758 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00759 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, 00760 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00761 CrsGraph< 00762 LocalOrdinal, GlobalOrdinal, 00763 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00764 getRowMap () const 00765 { 00766 return rowMap_; 00767 } 00768 00769 00770 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00771 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, 00772 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00773 CrsGraph< 00774 LocalOrdinal, GlobalOrdinal, 00775 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00776 getColMap () const 00777 { 00778 return colMap_; 00779 } 00780 00781 00782 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00783 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, 00784 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00785 CrsGraph< 00786 LocalOrdinal, GlobalOrdinal, 00787 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00788 getDomainMap () const 00789 { 00790 return domainMap_; 00791 } 00792 00793 00794 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00795 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, 00796 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00797 CrsGraph< 00798 LocalOrdinal, GlobalOrdinal, 00799 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00800 getRangeMap () const 00801 { 00802 return rangeMap_; 00803 } 00804 00805 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00806 Teuchos::RCP<const Import< 00807 LocalOrdinal, GlobalOrdinal, 00808 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00809 CrsGraph< 00810 LocalOrdinal, GlobalOrdinal, 00811 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00812 getImporter () const 00813 { 00814 return importer_; 00815 } 00816 00817 00818 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00819 Teuchos::RCP<const Export< 00820 LocalOrdinal, GlobalOrdinal, 00821 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00822 CrsGraph< 00823 LocalOrdinal, GlobalOrdinal, 00824 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00825 getExporter () const 00826 { 00827 return exporter_; 00828 } 00829 00830 00831 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00832 bool 00833 CrsGraph< 00834 LocalOrdinal, GlobalOrdinal, 00835 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00836 hasColMap () const 00837 { 00838 return ! colMap_.is_null (); 00839 } 00840 00841 00842 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00843 bool 00844 CrsGraph< 00845 LocalOrdinal, GlobalOrdinal, 00846 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00847 isStorageOptimized () const 00848 { 00849 // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if 00850 // getNodeNumRows() is zero? 00851 00852 const bool isOpt = indicesAreAllocated_ && 00853 k_numRowEntries_.dimension_0 () == 0 && 00854 getNodeNumRows () > 0; 00855 00856 #ifdef HAVE_TPETRA_DEBUG 00857 const char tfecfFuncName[] = "isStorageOptimized"; 00858 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00859 isOpt && getProfileType () == DynamicProfile, std::logic_error, 00860 ": The matrix claims to have optimized storage, but getProfileType() " 00861 "returns DynamicProfile. This should never happen. Please report this " 00862 "bug to the Tpetra developers."); 00863 #endif // HAVE_TPETRA_DEBUG 00864 00865 return isOpt; 00866 } 00867 00868 00869 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00870 ProfileType 00871 CrsGraph< 00872 LocalOrdinal, GlobalOrdinal, 00873 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00874 getProfileType () const 00875 { 00876 return pftype_; 00877 } 00878 00879 00880 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00881 global_size_t 00882 CrsGraph< 00883 LocalOrdinal, GlobalOrdinal, 00884 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00885 getGlobalNumEntries () const 00886 { 00887 return globalNumEntries_; 00888 } 00889 00890 00891 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00892 size_t 00893 CrsGraph< 00894 LocalOrdinal, GlobalOrdinal, 00895 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00896 getNodeNumEntries () const 00897 { 00898 return nodeNumEntries_; 00899 } 00900 00901 00902 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00903 global_size_t 00904 CrsGraph< 00905 LocalOrdinal, GlobalOrdinal, 00906 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00907 getGlobalMaxNumRowEntries () const 00908 { 00909 return globalMaxNumRowEntries_; 00910 } 00911 00912 00913 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00914 size_t 00915 CrsGraph< 00916 LocalOrdinal, GlobalOrdinal, 00917 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00918 getNodeMaxNumRowEntries () const 00919 { 00920 return nodeMaxNumRowEntries_; 00921 } 00922 00923 00924 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00925 bool 00926 CrsGraph< 00927 LocalOrdinal, GlobalOrdinal, 00928 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00929 isFillComplete () const 00930 { 00931 return fillComplete_; 00932 } 00933 00934 00935 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00936 bool 00937 CrsGraph< 00938 LocalOrdinal, GlobalOrdinal, 00939 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00940 isFillActive () const 00941 { 00942 return ! fillComplete_; 00943 } 00944 00945 00946 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00947 bool 00948 CrsGraph< 00949 LocalOrdinal, GlobalOrdinal, 00950 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00951 isUpperTriangular () const 00952 { 00953 return upperTriangular_; 00954 } 00955 00956 00957 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00958 bool 00959 CrsGraph< 00960 LocalOrdinal, GlobalOrdinal, 00961 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00962 isLowerTriangular () const 00963 { 00964 return lowerTriangular_; 00965 } 00966 00967 00968 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00969 bool 00970 CrsGraph< 00971 LocalOrdinal, GlobalOrdinal, 00972 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00973 isLocallyIndexed () const 00974 { 00975 return indicesAreLocal_; 00976 } 00977 00978 00979 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00980 bool 00981 CrsGraph< 00982 LocalOrdinal, GlobalOrdinal, 00983 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00984 isGloballyIndexed () const 00985 { 00986 return indicesAreGlobal_; 00987 } 00988 00989 00990 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00991 size_t 00992 CrsGraph< 00993 LocalOrdinal, GlobalOrdinal, 00994 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00995 getNodeAllocationSize () const 00996 { 00997 return nodeNumAllocated_; 00998 } 00999 01000 01001 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01002 Teuchos::RCP<const Teuchos::Comm<int> > 01003 CrsGraph< 01004 LocalOrdinal, GlobalOrdinal, 01005 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01006 getComm () const 01007 { 01008 return rowMap_.is_null () ? Teuchos::null : rowMap_->getComm (); 01009 } 01010 01011 01012 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01013 GlobalOrdinal 01014 CrsGraph< 01015 LocalOrdinal, GlobalOrdinal, 01016 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01017 getIndexBase () const 01018 { 01019 return rowMap_->getIndexBase (); 01020 } 01021 01022 01023 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01024 bool 01025 CrsGraph< 01026 LocalOrdinal, GlobalOrdinal, 01027 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01028 indicesAreAllocated () const 01029 { 01030 return indicesAreAllocated_; 01031 } 01032 01033 01034 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01035 bool 01036 CrsGraph< 01037 LocalOrdinal, GlobalOrdinal, 01038 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01039 isSorted () const 01040 { 01041 return indicesAreSorted_; 01042 } 01043 01044 01045 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01046 bool 01047 CrsGraph< 01048 LocalOrdinal, GlobalOrdinal, 01049 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01050 isMerged () const 01051 { 01052 return noRedundancies_; 01053 } 01054 01055 01056 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01057 void 01058 CrsGraph< 01059 LocalOrdinal, GlobalOrdinal, 01060 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01061 setLocallyModified () 01062 { 01063 // FIXME (mfh 07 May 2013) How do we know that the change 01064 // introduced a redundancy, or even that it invalidated the sorted 01065 // order of indices? CrsGraph has always made this conservative 01066 // guess. It could be a bit costly to check at insertion time, 01067 // though. 01068 indicesAreSorted_ = false; 01069 noRedundancies_ = false; 01070 01071 // We've modified the graph, so we'll have to recompute local 01072 // constants like the number of diagonal entries on this process. 01073 haveLocalConstants_ = false; 01074 } 01075 01076 01077 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01078 void 01079 CrsGraph< 01080 LocalOrdinal, GlobalOrdinal, 01081 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01082 allocateIndices (const ELocalGlobal lg) 01083 { 01084 using Teuchos::arcp; 01085 using Teuchos::Array; 01086 using Teuchos::ArrayRCP; 01087 typedef Teuchos::ArrayRCP<size_t>::size_type size_type; 01088 const char tfecfFuncName[] = "allocateIndices: "; 01089 01090 // This is a protected function, only callable by us. If it was 01091 // called incorrectly, it is our fault. That's why the tests 01092 // below throw std::logic_error instead of std::invalid_argument. 01093 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01094 isLocallyIndexed () && lg == GlobalIndices, std::logic_error, 01095 "The graph is locally indexed, but Tpetra code is calling this method " 01096 "with lg=GlobalIndices. Please report this bug to the Tpetra developers."); 01097 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01098 isGloballyIndexed () && lg == LocalIndices, std::logic_error, 01099 "The graph is globally indexed, but Tpetra code is calling this method " 01100 "with lg=LocalIndices. Please report this bug to the Tpetra developers."); 01101 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01102 indicesAreAllocated (), std::logic_error, "The graph's indices are " 01103 "already allocated, but Tpetra code is calling allocateIndices) again. " 01104 "Please report this bug to the Tpetra developers."); 01105 01106 const size_t numRows = getNodeNumRows (); 01107 01108 if (getProfileType () == StaticProfile) { 01109 // 01110 // STATIC ALLOCATION PROFILE 01111 // 01112 t_RowPtrsNC k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1); 01113 01114 if (k_numAllocPerRow_.dimension_0 () != 0) { 01115 // It's OK to throw std::invalid_argument here, because we 01116 // haven't incurred any side effects yet. Throwing that 01117 // exception (and not, say, std::logic_error) implies that the 01118 // instance can recover. 01119 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01120 k_numAllocPerRow_.dimension_0 () != numRows, std::invalid_argument, 01121 "k_numAllocPerRow_ is allocated (has length != 0), but its length = " 01122 << k_numAllocPerRow_.dimension_0 () << " != numRows = " << numRows 01123 << "."); 01124 // FIXME hack until we get parallel_scan in kokkos 01125 // 01126 // FIXME (mfh 11 Aug 2014) This assumes UVM, since k_rowPtrs_ 01127 // is currently a device View. Should instead use a DualView. 01128 typename Kokkos::DualView<const size_t*, device_type>::t_host h_numAllocPerRow = 01129 k_numAllocPerRow_.h_view; // use a host view for now, since we compute on host 01130 bool anyInvalidAllocSizes = false; 01131 for (size_t i = 0; i < numRows; ++i) { 01132 size_t allocSize = h_numAllocPerRow(i); 01133 if (allocSize == Teuchos::OrdinalTraits<size_t>::invalid ()) { 01134 anyInvalidAllocSizes = true; 01135 allocSize = 0; 01136 } 01137 k_rowPtrs(i+1) = k_rowPtrs(i) + allocSize; 01138 } 01139 // It's OK to throw std::invalid_argument here, because we 01140 // haven't incurred any side effects yet. Throwing that 01141 // exception (and not, say, std::logic_error) implies that the 01142 // instance can recover. 01143 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01144 anyInvalidAllocSizes, std::invalid_argument, "The input array of " 01145 "allocation sizes per row had at least one invalid (== " 01146 "Teuchos::OrdinalTraits<size_t>::invalid()) entry."); 01147 } 01148 else { 01149 // It's OK to throw std::invalid_argument here, because we 01150 // haven't incurred any side effects yet. Throwing that 01151 // exception (and not, say, std::logic_error) implies that the 01152 // instance can recover. 01153 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01154 numAllocForAllRows_ == Teuchos::OrdinalTraits<size_t>::invalid (), 01155 std::invalid_argument, "numAllocForAllRows_ has an invalid value, " 01156 "namely Teuchos::OrdinalTraits<size_t>::invalid() = " << 01157 Teuchos::OrdinalTraits<size_t>::invalid () << "."); 01158 // FIXME hack until we get parallel_scan in kokkos 01159 // 01160 // FIXME (mfh 11 Aug 2014) This assumes UVM, since k_rowPtrs_ 01161 // is currently a device View. Should instead use a DualView. 01162 for (size_t i = 0; i < numRows; ++i) { 01163 k_rowPtrs(i+1) = k_rowPtrs(i) + numAllocForAllRows_; 01164 } 01165 } 01166 01167 // "Commit" the resulting row offsets. 01168 k_rowPtrs_ = k_rowPtrs; 01169 01170 // FIXME (mfh 05,11 Aug 2014) This assumes UVM, since k_rowPtrs_ 01171 // is currently a device View. Should instead use a DualView. 01172 const size_type numInds = static_cast<size_type> (k_rowPtrs_(numRows)); 01173 if (lg == LocalIndices) { 01174 k_lclInds1D_ = t_LocalOrdinal_1D ("Tpetra::CrsGraph::ind", numInds); 01175 } 01176 else { 01177 k_gblInds1D_ = t_GlobalOrdinal_1D ("Tpetra::CrsGraph::ind", numInds); 01178 gblInds1D_ = Kokkos::Compat::persistingView (k_gblInds1D_); 01179 } 01180 nodeNumAllocated_ = numInds; 01181 storageStatus_ = Details::STORAGE_1D_UNPACKED; 01182 } 01183 else { 01184 // 01185 // DYNAMIC ALLOCATION PROFILE 01186 // 01187 01188 // Use the host view of k_numAllocPerRow_, since we have to 01189 // allocate 2-D storage on the host. 01190 typename Kokkos::DualView<const size_t*, device_type>::t_host h_numAllocPerRow = 01191 k_numAllocPerRow_.h_view; 01192 const bool useNumAllocPerRow = (k_numAllocPerRow_.dimension_0 () != 0); 01193 01194 if (lg == LocalIndices) { 01195 lclInds2D_ = arcp<Array<LocalOrdinal> > (numRows); 01196 nodeNumAllocated_ = 0; 01197 for (size_t i = 0; i < numRows; ++i) { 01198 const size_t howMany = useNumAllocPerRow ? 01199 h_numAllocPerRow(i) : numAllocForAllRows_; 01200 nodeNumAllocated_ += howMany; 01201 if (howMany > 0) { 01202 lclInds2D_[i].resize (howMany); 01203 } 01204 } 01205 } 01206 else { // allocate global indices 01207 gblInds2D_ = arcp<Array<GlobalOrdinal> > (numRows); 01208 nodeNumAllocated_ = 0; 01209 for (size_t i = 0; i < numRows; ++i) { 01210 const size_t howMany = useNumAllocPerRow ? 01211 h_numAllocPerRow(i) : numAllocForAllRows_; 01212 nodeNumAllocated_ += howMany; 01213 if (howMany > 0) { 01214 gblInds2D_[i].resize (howMany); 01215 } 01216 } 01217 } 01218 storageStatus_ = Details::STORAGE_2D; 01219 } 01220 01221 indicesAreLocal_ = (lg == LocalIndices); 01222 indicesAreGlobal_ = (lg == GlobalIndices); 01223 01224 if (numRows > 0) { 01225 k_numRowEntries_ = 01226 t_numRowEntries_ ("Tpetra::CrsGraph::numRowEntries", numRows); 01227 // Legacy Kokkos classic array views the above DualView's host data. 01228 numRowEntries_ = Kokkos::Compat::persistingView (k_numRowEntries_.h_view); 01229 01230 // Fill with zeros on the host. k_numRowEntries_ is a DualView. 01231 // 01232 // TODO (mfh 05 Aug 2014) Write a device kernel for this. 01233 typedef typename device_type::host_mirror_device_type 01234 host_mirror_device_type; 01235 k_numRowEntries_.template modify<host_mirror_device_type> (); 01236 size_t* const hostPtr = k_numRowEntries_.h_view.ptr_on_device (); 01237 std::fill (hostPtr, hostPtr + numRows, static_cast<size_t> (0)); 01238 k_numRowEntries_.template sync<device_type> (); 01239 } 01240 01241 // done with these 01242 numAllocForAllRows_ = 0; 01243 k_numAllocPerRow_ = Kokkos::DualView<const size_t*, device_type> (); 01244 numAllocPerRow_ = Teuchos::null; 01245 indicesAreAllocated_ = true; 01246 checkInternalState (); 01247 } 01248 01249 01250 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01251 template <class T> 01252 Teuchos::ArrayRCP<Teuchos::Array<T> > 01253 CrsGraph< 01254 LocalOrdinal, GlobalOrdinal, 01255 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01256 allocateValues2D () const 01257 { 01258 using Teuchos::arcp; 01259 using Teuchos::Array; 01260 using Teuchos::ArrayRCP; 01261 const char tfecfFuncName[] = "allocateValues2D: "; 01262 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01263 ! indicesAreAllocated (), std::runtime_error, 01264 "Graph indices must be allocated before values."); 01265 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01266 getProfileType () != DynamicProfile, std::runtime_error, 01267 "Graph indices must be allocated in a dynamic profile."); 01268 01269 ArrayRCP<Array<T> > values2D; 01270 values2D = arcp<Array<T> > (getNodeNumRows ()); 01271 if (lclInds2D_ != null) { 01272 const size_t numRows = lclInds2D_.size (); 01273 for (size_t r = 0; r < numRows; ++r) { 01274 values2D[r].resize (lclInds2D_[r].size ()); 01275 } 01276 } 01277 else if (gblInds2D_ != null) { 01278 const size_t numRows = gblInds2D_.size (); 01279 for (size_t r = 0; r < numRows; ++r) { 01280 values2D[r].resize (gblInds2D_[r].size ()); 01281 } 01282 } 01283 return values2D; 01284 } 01285 01286 01287 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01288 Teuchos::ArrayView<const LocalOrdinal> 01289 CrsGraph< 01290 LocalOrdinal, GlobalOrdinal, 01291 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01292 getLocalView (const RowInfo rowinfo) const 01293 { 01294 using Kokkos::subview; 01295 using Kokkos::View; 01296 typedef LocalOrdinal LO; 01297 typedef View<const LO*, device_type, Kokkos::MemoryUnmanaged> row_view_type; 01298 01299 if (rowinfo.allocSize == 0) { 01300 return Teuchos::ArrayView<const LO> (); 01301 } 01302 else { // nothing in the row to view 01303 if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage 01304 const size_t start = rowinfo.offset1D; 01305 const size_t len = rowinfo.allocSize; 01306 const std::pair<size_t, size_t> rng (start, start + len); 01307 row_view_type rowView = subview<row_view_type> (k_lclInds1D_, rng); 01308 return Teuchos::ArrayView<const LO> (rowView.ptr_on_device (), len, 01309 Teuchos::RCP_DISABLE_NODE_LOOKUP); 01310 } 01311 else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage 01312 return lclInds2D_[rowinfo.localRow] (); 01313 } 01314 else { 01315 return Teuchos::ArrayView<const LO> (); // nothing in the row to view 01316 } 01317 } 01318 } 01319 01320 01321 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01322 Teuchos::ArrayView<LocalOrdinal> 01323 CrsGraph< 01324 LocalOrdinal, GlobalOrdinal, 01325 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01326 getLocalViewNonConst (const RowInfo rowinfo) 01327 { 01328 using Kokkos::subview; 01329 using Kokkos::View; 01330 typedef LocalOrdinal LO; 01331 typedef View<LO*, device_type, Kokkos::MemoryUnmanaged> row_view_type; 01332 01333 if (rowinfo.allocSize == 0) { // nothing in the row to view 01334 return Teuchos::ArrayView<LO> (); 01335 } 01336 else { 01337 if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage 01338 const size_t start = rowinfo.offset1D; 01339 const size_t len = rowinfo.allocSize; 01340 const std::pair<size_t, size_t> rng (start, start + len); 01341 row_view_type rowView = subview<row_view_type> (k_lclInds1D_, rng); 01342 return Teuchos::ArrayView<LO> (rowView.ptr_on_device (), len, 01343 Teuchos::RCP_DISABLE_NODE_LOOKUP); 01344 } 01345 else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage 01346 return lclInds2D_[rowinfo.localRow] (); 01347 } 01348 else { 01349 return Teuchos::ArrayView<LO> (); // nothing in the row to view 01350 } 01351 } 01352 } 01353 01354 01355 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01356 Teuchos::ArrayView<const GlobalOrdinal> 01357 CrsGraph< 01358 LocalOrdinal, GlobalOrdinal, 01359 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01360 getGlobalView (const RowInfo rowinfo) const 01361 { 01362 Teuchos::ArrayView<const GlobalOrdinal> view; 01363 if (rowinfo.allocSize > 0) { 01364 if (gblInds1D_ != null) { 01365 view = gblInds1D_ (rowinfo.offset1D, rowinfo.allocSize); 01366 } 01367 else if (! gblInds2D_[rowinfo.localRow].empty()) { 01368 view = gblInds2D_[rowinfo.localRow] (); 01369 } 01370 } 01371 return view; 01372 } 01373 01374 01375 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01376 Teuchos::ArrayView<GlobalOrdinal> 01377 CrsGraph< 01378 LocalOrdinal, GlobalOrdinal, 01379 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01380 getGlobalViewNonConst (const RowInfo rowinfo) 01381 { 01382 Teuchos::ArrayView<GlobalOrdinal> view; 01383 if (rowinfo.allocSize > 0) { 01384 if (gblInds1D_ != null) { 01385 view = gblInds1D_ (rowinfo.offset1D, rowinfo.allocSize); 01386 } 01387 else if (!gblInds2D_[rowinfo.localRow].empty()) { 01388 view = gblInds2D_[rowinfo.localRow] (); 01389 } 01390 } 01391 return view; 01392 } 01393 01394 01395 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01396 RowInfo 01397 CrsGraph< 01398 LocalOrdinal, GlobalOrdinal, 01399 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01400 getRowInfo (const size_t myRow) const 01401 { 01402 #ifdef HAVE_TPETRA_DEBUG 01403 const char tfecfFuncName[] = "getRowInfo"; 01404 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01405 ! rowMap_->isNodeLocalElement (myRow), std::logic_error, 01406 ": The given (local) row index myRow = " << myRow 01407 << " does not belong to the graph's row Map. " 01408 "This probably indicates a bug in Tpetra::CrsGraph or Tpetra::CrsMatrix. " 01409 "Please report this bug to the Tpetra developers."); 01410 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01411 ! hasRowInfo (), std::logic_error, 01412 ": Late catch! Graph does not have row info anymore. " 01413 "Error should have been caught earlier. " 01414 "Please report this bug to the Tpetra developers."); 01415 #endif // HAVE_TPETRA_DEBUG 01416 01417 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid (); 01418 RowInfo ret; 01419 ret.localRow = myRow; 01420 if (nodeNumAllocated_ != 0 && nodeNumAllocated_ != STINV) { 01421 // graph data structures have the info that we need 01422 // 01423 // if static graph, offsets tell us the allocation size 01424 if (getProfileType() == StaticProfile) { 01425 ret.offset1D = k_rowPtrs_(myRow); 01426 ret.allocSize = k_rowPtrs_(myRow+1) - k_rowPtrs_(myRow); 01427 if (k_numRowEntries_.dimension_0 () == 0) { 01428 ret.numEntries = ret.allocSize; 01429 } else { 01430 ret.numEntries = k_numRowEntries_.h_view(myRow); 01431 } 01432 } 01433 else { 01434 ret.offset1D = STINV; 01435 if (isLocallyIndexed ()) { 01436 ret.allocSize = lclInds2D_[myRow].size (); 01437 } 01438 else { 01439 ret.allocSize = gblInds2D_[myRow].size (); 01440 } 01441 ret.numEntries = k_numRowEntries_.h_view(myRow); 01442 } 01443 } 01444 else if (nodeNumAllocated_ == 0) { 01445 // have performed allocation, but the graph has no allocation or entries 01446 ret.allocSize = 0; 01447 ret.numEntries = 0; 01448 ret.offset1D = STINV; 01449 } 01450 else if (! indicesAreAllocated ()) { 01451 // haven't performed allocation yet; probably won't hit this code 01452 // 01453 // FIXME (mfh 07 Aug 2014) We want graph's constructors to 01454 // allocate, rather than doing lazy allocation at first insert. 01455 // This will make k_numAllocPerRow_ obsolete. 01456 const bool useNumAllocPerRow = (k_numAllocPerRow_.dimension_0 () != 0); 01457 if (useNumAllocPerRow) { 01458 ret.allocSize = k_numAllocPerRow_.h_view(myRow); 01459 } else { 01460 ret.allocSize = numAllocForAllRows_; 01461 } 01462 ret.numEntries = 0; 01463 ret.offset1D = STINV; 01464 } 01465 else { 01466 // don't know how we ended up here... 01467 TEUCHOS_TEST_FOR_EXCEPT(true); 01468 } 01469 return ret; 01470 } 01471 01472 01473 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01474 void 01475 CrsGraph< 01476 LocalOrdinal, GlobalOrdinal, 01477 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01478 staticAssertions () const 01479 { 01480 using Teuchos::OrdinalTraits; 01481 typedef LocalOrdinal LO; 01482 typedef GlobalOrdinal GO; 01483 typedef global_size_t GST; 01484 01485 // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal): 01486 // This is so that we can store local indices in the memory 01487 // formerly occupied by global indices. 01488 Teuchos::CompileTimeAssert< sizeof(GO) < sizeof(LO) > cta_size1; 01489 (void) cta_size1; 01490 01491 // Assumption: max(GlobalOrdinal) >= max(LocalOrdinal) and 01492 // max(size_t) >= max(LocalOrdinal) 01493 // This is so that we can represent any LocalOrdinal as a 01494 // size_t, and any LocalOrdinal as a GlobalOrdinal 01495 Teuchos::CompileTimeAssert< sizeof(GST) < sizeof(size_t) > cta_size2; 01496 (void) cta_size2; 01497 01498 // can't call max() with CompileTimeAssert, because it isn't a 01499 // constant expression; will need to make this a runtime check 01500 const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the " 01501 "given template arguments: size assumptions are not valid."; 01502 TEUCHOS_TEST_FOR_EXCEPTION( 01503 static_cast<size_t> (OrdinalTraits<LO>::max ()) > OrdinalTraits<size_t>::max (), 01504 std::runtime_error, msg); 01505 TEUCHOS_TEST_FOR_EXCEPTION( 01506 static_cast<GST> (OrdinalTraits<LO>::max ()) > static_cast<GST> (OrdinalTraits<GO>::max ()), 01507 std::runtime_error, msg); 01508 TEUCHOS_TEST_FOR_EXCEPTION( 01509 static_cast<size_t> (OrdinalTraits<GO>::max ()) > OrdinalTraits<GST>::max(), 01510 std::runtime_error, msg); 01511 TEUCHOS_TEST_FOR_EXCEPTION( 01512 OrdinalTraits<size_t>::max () > OrdinalTraits<GST>::max (), 01513 std::runtime_error, msg); 01514 } 01515 01516 01517 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01518 size_t 01519 CrsGraph< 01520 LocalOrdinal, GlobalOrdinal, 01521 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01522 insertIndices (const RowInfo& rowinfo, 01523 const SLocalGlobalViews &newInds, 01524 const ELocalGlobal lg, 01525 const ELocalGlobal I) 01526 { 01527 using Teuchos::ArrayView; 01528 #ifdef HAVE_TPETRA_DEBUG 01529 TEUCHOS_TEST_FOR_EXCEPTION( 01530 lg != GlobalIndices && lg != LocalIndices, std::invalid_argument, 01531 "Tpetra::CrsGraph::insertIndices: lg must be either GlobalIndices or " 01532 "LocalIndices."); 01533 #endif // HAVE_TPETRA_DEBUG 01534 size_t numNewInds = 0; 01535 if (lg == GlobalIndices) { // input indices are global 01536 ArrayView<const GlobalOrdinal> new_ginds = newInds.ginds; 01537 numNewInds = new_ginds.size(); 01538 if (I == GlobalIndices) { // store global indices 01539 ArrayView<GlobalOrdinal> gind_view = getGlobalViewNonConst(rowinfo); 01540 std::copy(new_ginds.begin(), new_ginds.end(), gind_view.begin()+rowinfo.numEntries); 01541 } 01542 else if (I == LocalIndices) { // store local indices 01543 ArrayView<LocalOrdinal> lind_view = getLocalViewNonConst(rowinfo); 01544 typename ArrayView<const GlobalOrdinal>::iterator in = new_ginds.begin(); 01545 const typename ArrayView<const GlobalOrdinal>::iterator stop = new_ginds.end(); 01546 typename ArrayView<LocalOrdinal>::iterator out = lind_view.begin()+rowinfo.numEntries; 01547 while (in != stop) { 01548 *out++ = colMap_->getLocalElement (*in++); 01549 } 01550 } 01551 } 01552 else if (lg == LocalIndices) { // input indices are local 01553 ArrayView<const LocalOrdinal> new_linds = newInds.linds; 01554 numNewInds = new_linds.size(); 01555 if (I == LocalIndices) { // store local indices 01556 ArrayView<LocalOrdinal> lind_view = getLocalViewNonConst(rowinfo); 01557 std::copy(new_linds.begin(), new_linds.end(), lind_view.begin()+rowinfo.numEntries); 01558 } 01559 else if (I == GlobalIndices) { 01560 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Tpetra::CrsGraph::" 01561 "insertIndices: the case where the input indices are local and the " 01562 "indices to write are global (lg=LocalIndices, I=GlobalIndices) is " 01563 "not implemented, because it does not make sense." << std::endl << 01564 "If you have correct local column indices, that means the graph has " 01565 "a column Map. In that case, you should be storing local indices."); 01566 } 01567 } 01568 01569 // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete, 01570 // but for now, for correctness, do the modify-sync cycle here. 01571 typedef typename device_type::host_mirror_device_type 01572 host_mirror_device_type; 01573 k_numRowEntries_.template modify<host_mirror_device_type> (); 01574 k_numRowEntries_.h_view(rowinfo.localRow) += numNewInds; 01575 k_numRowEntries_.template sync<device_type> (); 01576 01577 nodeNumEntries_ += numNewInds; 01578 setLocallyModified (); 01579 return numNewInds; 01580 } 01581 01582 01583 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01584 void 01585 CrsGraph< 01586 LocalOrdinal, GlobalOrdinal, 01587 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01588 insertGlobalIndicesImpl (const LocalOrdinal myRow, 01589 const Teuchos::ArrayView<const GlobalOrdinal>& indices) 01590 { 01591 const char tfecfFuncName[] = "insertGlobalIndicesImpl"; 01592 01593 RowInfo rowInfo = getRowInfo(myRow); 01594 const size_t numNewInds = indices.size(); 01595 const size_t newNumEntries = rowInfo.numEntries + numNewInds; 01596 if (newNumEntries > rowInfo.allocSize) { 01597 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01598 getProfileType() == StaticProfile, std::runtime_error, 01599 ": new indices exceed statically allocated graph structure."); 01600 01601 // update allocation, doubling size to reduce # reallocations 01602 size_t newAllocSize = 2*rowInfo.allocSize; 01603 if (newAllocSize < newNumEntries) { 01604 newAllocSize = newNumEntries; 01605 } 01606 gblInds2D_[myRow].resize(newAllocSize); 01607 nodeNumAllocated_ += (newAllocSize - rowInfo.allocSize); 01608 } 01609 01610 // Copy new indices at end of global index array 01611 if (gblInds1D_ != null) 01612 std::copy(indices.begin(), indices.end(), 01613 gblInds1D_.begin()+rowInfo.offset1D+rowInfo.numEntries); 01614 else 01615 std::copy(indices.begin(), indices.end(), 01616 gblInds2D_[myRow].begin()+rowInfo.numEntries); 01617 01618 // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete, 01619 // but for now, for correctness, do the modify-sync cycle here. 01620 typedef typename device_type::host_mirror_device_type 01621 host_mirror_device_type; 01622 k_numRowEntries_.template modify<host_mirror_device_type> (); 01623 k_numRowEntries_.h_view(myRow) += numNewInds; 01624 k_numRowEntries_.template sync<device_type> (); 01625 01626 nodeNumEntries_ += numNewInds; 01627 setLocallyModified (); 01628 01629 #ifdef HAVE_TPETRA_DEBUG 01630 { 01631 const size_t chkNewNumEntries = getNumEntriesInLocalRow (myRow); 01632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01633 chkNewNumEntries != newNumEntries, std::logic_error, 01634 ": Internal logic error. Please contact Tpetra team."); 01635 } 01636 #endif 01637 } 01638 01639 01640 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01641 void 01642 CrsGraph< 01643 LocalOrdinal, GlobalOrdinal, 01644 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01645 insertLocalIndicesImpl (const LocalOrdinal myRow, 01646 const Teuchos::ArrayView<const LocalOrdinal>& indices) 01647 { 01648 using Kokkos::MemoryUnmanaged; 01649 using Kokkos::subview; 01650 using Kokkos::View; 01651 typedef LocalOrdinal LO; 01652 const char* tfecfFuncName ("insertLocallIndicesImpl"); 01653 01654 RowInfo rowInfo = getRowInfo(myRow); 01655 const size_t numNewInds = indices.size(); 01656 const size_t newNumEntries = rowInfo.numEntries + numNewInds; 01657 if (newNumEntries > rowInfo.allocSize) { 01658 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01659 getProfileType() == StaticProfile, std::runtime_error, 01660 ": new indices exceed statically allocated graph structure."); 01661 01662 // update allocation, doubling size to reduce number of reallocations 01663 size_t newAllocSize = 2*rowInfo.allocSize; 01664 if (newAllocSize < newNumEntries) 01665 newAllocSize = newNumEntries; 01666 lclInds2D_[myRow].resize(newAllocSize); 01667 nodeNumAllocated_ += (newAllocSize - rowInfo.allocSize); 01668 } 01669 01670 // Store the new indices at the end of row myRow. 01671 if (k_lclInds1D_.dimension_0 () != 0) { 01672 typedef View<const LO*, device_type, MemoryUnmanaged> input_view_type; 01673 typedef View<LO*, device_type> row_view_type; 01674 01675 input_view_type inputInds (indices.getRawPtr (), indices.size ()); 01676 const size_t start = rowInfo.offset1D + rowInfo.numEntries; // end of row 01677 const std::pair<size_t, size_t> rng (start, start + newNumEntries); 01678 row_view_type myInds = subview<row_view_type> (k_lclInds1D_, rng); 01679 Kokkos::deep_copy (myInds, inputInds); 01680 } 01681 else { 01682 std::copy (indices.begin (), indices.end (), 01683 lclInds2D_[myRow].begin () + rowInfo.numEntries); 01684 } 01685 01686 // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete, 01687 // but for now, for correctness, do the modify-sync cycle here. 01688 typedef typename device_type::host_mirror_device_type 01689 host_mirror_device_type; 01690 k_numRowEntries_.template modify<host_mirror_device_type> (); 01691 k_numRowEntries_.h_view(myRow) += numNewInds; 01692 k_numRowEntries_.template sync<device_type> (); 01693 01694 nodeNumEntries_ += numNewInds; 01695 setLocallyModified (); 01696 #ifdef HAVE_TPETRA_DEBUG 01697 { 01698 const size_t chkNewNumEntries = getNumEntriesInLocalRow (myRow); 01699 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01700 chkNewNumEntries != newNumEntries, std::logic_error, 01701 ": Internal logic error. Please contact Tpetra team."); 01702 } 01703 #endif 01704 } 01705 01706 01707 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01708 template <class Scalar> 01709 void 01710 CrsGraph< 01711 LocalOrdinal, GlobalOrdinal, 01712 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01713 insertIndicesAndValues (const RowInfo& rowInfo, 01714 const SLocalGlobalViews& newInds, 01715 const Teuchos::ArrayView<Scalar>& oldRowVals, 01716 const Teuchos::ArrayView<const Scalar>& newRowVals, 01717 const ELocalGlobal lg, 01718 const ELocalGlobal I) 01719 { 01720 #ifdef HAVE_TPETRA_DEBUG 01721 size_t numNewInds = 0; 01722 try { 01723 numNewInds = insertIndices (rowInfo, newInds, lg, I); 01724 } catch (std::exception& e) { 01725 TEUCHOS_TEST_FOR_EXCEPTION( 01726 true, std::runtime_error, "Tpetra::CrsGraph::insertIndicesAndValues: " 01727 "insertIndices threw an exception: " << e.what ()); 01728 } 01729 TEUCHOS_TEST_FOR_EXCEPTION( 01730 numNewInds > oldRowVals.size (), std::runtime_error, 01731 "Tpetra::CrsGraph::insertIndicesAndValues: numNewInds (" << numNewInds 01732 << ") > oldRowVals.size() (" << oldRowVals.size () << "."); 01733 #else 01734 const size_t numNewInds = insertIndices (rowInfo, newInds, lg, I); 01735 #endif // HAVE_TPETRA_DEBUG 01736 01737 typedef typename Teuchos::ArrayView<Scalar>::size_type size_type; 01738 01739 #ifdef HAVE_TPETRA_DEBUG 01740 TEUCHOS_TEST_FOR_EXCEPTION( 01741 rowInfo.numEntries + numNewInds > oldRowVals.size (), std::runtime_error, 01742 "Tpetra::CrsGraph::insertIndicesAndValues: rowInfo.numEntries (" << 01743 rowInfo.numEntries << ") + numNewInds (" << numNewInds << 01744 ") > oldRowVals.size() (" << oldRowVals.size () << ")."); 01745 TEUCHOS_TEST_FOR_EXCEPTION( 01746 static_cast<size_type> (numNewInds) > newRowVals.size (), 01747 std::runtime_error, "Tpetra::CrsGraph::insertIndicesAndValues: " 01748 "numNewInds (" << numNewInds << ") > newRowVals.size() (" 01749 << newRowVals.size () << ")."); 01750 #endif // HAVE_TPETRA_DEBUG 01751 01752 size_type oldInd = static_cast<size_type> (rowInfo.numEntries); 01753 #ifdef HAVE_TPETRA_DEBUG 01754 try { 01755 #endif // HAVE_TPETRA_DEBUG 01756 for (size_type newInd = 0; newInd < static_cast<size_type> (numNewInds); 01757 ++newInd, ++oldInd) { 01758 oldRowVals[oldInd] = newRowVals[newInd]; 01759 } 01760 #ifdef HAVE_TPETRA_DEBUG 01761 } 01762 catch (std::exception& e) { 01763 TEUCHOS_TEST_FOR_EXCEPTION( 01764 true, std::runtime_error, "Tpetra::CrsGraph::insertIndicesAndValues: " 01765 "for loop for copying values threw an exception: " << e.what ()); 01766 } 01767 #endif // HAVE_TPETRA_DEBUG 01768 } 01769 01770 01771 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01772 void 01773 CrsGraph< 01774 LocalOrdinal, GlobalOrdinal, 01775 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01776 sortRowIndices (const RowInfo rowinfo) 01777 { 01778 if (rowinfo.numEntries > 0) { 01779 Teuchos::ArrayView<LocalOrdinal> inds_view = 01780 this->getLocalViewNonConst (rowinfo); 01781 std::sort (inds_view.begin (), inds_view.begin () + rowinfo.numEntries); 01782 } 01783 } 01784 01785 01786 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01787 template <class Scalar> 01788 void 01789 CrsGraph< 01790 LocalOrdinal, GlobalOrdinal, 01791 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01792 sortRowIndicesAndValues (const RowInfo rowinfo, 01793 const Teuchos::ArrayView<Scalar>& values) 01794 { 01795 if (rowinfo.numEntries > 0) { 01796 Teuchos::ArrayView<LocalOrdinal> inds_view = 01797 this->getLocalViewNonConst (rowinfo); 01798 sort2 (inds_view.begin (), inds_view.begin () + rowinfo.numEntries, 01799 values.begin ()); 01800 } 01801 } 01802 01803 01804 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01805 void 01806 CrsGraph< 01807 LocalOrdinal, GlobalOrdinal, 01808 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01809 mergeRowIndices (RowInfo rowinfo) 01810 { 01811 using Teuchos::ArrayView; 01812 const char tfecfFuncName[] = "mergeRowIndices: "; 01813 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01814 isStorageOptimized (), std::logic_error, "The graph is already storage " 01815 "optimized, so we shouldn't be merging any indices. " 01816 "Please report this bug to the Tpetra developers."); 01817 01818 ArrayView<LocalOrdinal> inds_view = this->getLocalViewNonConst (rowinfo); 01819 typename ArrayView<LocalOrdinal>::iterator beg, end, newend; 01820 beg = inds_view.begin(); 01821 end = inds_view.begin() + rowinfo.numEntries; 01822 newend = std::unique(beg,end); 01823 const size_t mergedEntries = newend - beg; 01824 #ifdef HAVE_TPETRA_DEBUG 01825 // merge should not have eliminated any entries; if so, the 01826 // assignment below will destroy the packed structure 01827 TEUCHOS_TEST_FOR_EXCEPT( isStorageOptimized () && mergedEntries != rowinfo.numEntries ); 01828 #endif // HAVE_TPETRA_DEBUG 01829 01830 // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete, 01831 // but for now, for correctness, do the modify-sync cycle here. 01832 typedef typename device_type::host_mirror_device_type 01833 host_mirror_device_type; 01834 k_numRowEntries_.template modify<host_mirror_device_type> (); 01835 k_numRowEntries_.h_view(rowinfo.localRow) = mergedEntries; 01836 k_numRowEntries_.template sync<device_type> (); 01837 01838 nodeNumEntries_ -= (rowinfo.numEntries - mergedEntries); 01839 } 01840 01841 01842 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01843 template<class Scalar> 01844 void 01845 CrsGraph< 01846 LocalOrdinal, GlobalOrdinal, 01847 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01848 mergeRowIndicesAndValues (RowInfo rowinfo, 01849 const Teuchos::ArrayView<Scalar>& rowValues) 01850 { 01851 using Teuchos::ArrayView; 01852 const char tfecfFuncName[] = "mergeRowIndicesAndValues: "; 01853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01854 isStorageOptimized(), std::logic_error, "It is invalid to call this " 01855 "method if the graph's storage has already been optimized. Please " 01856 "report this bug to the Tpetra developers."); 01857 01858 typedef typename ArrayView<Scalar>::iterator Iter; 01859 Iter rowValueIter = rowValues.begin (); 01860 ArrayView<LocalOrdinal> inds_view = getLocalViewNonConst (rowinfo); 01861 typename ArrayView<LocalOrdinal>::iterator beg, end, newend; 01862 01863 // beg,end define a half-exclusive interval over which to iterate. 01864 beg = inds_view.begin(); 01865 end = inds_view.begin() + rowinfo.numEntries; 01866 newend = beg; 01867 if (beg != end) { 01868 typename ArrayView<LocalOrdinal>::iterator cur = beg + 1; 01869 Iter vcur = rowValueIter + 1; 01870 Iter vend = rowValueIter; 01871 cur = beg+1; 01872 while (cur != end) { 01873 if (*cur != *newend) { 01874 // new entry; save it 01875 ++newend; 01876 ++vend; 01877 (*newend) = (*cur); 01878 (*vend) = (*vcur); 01879 } 01880 else { 01881 // old entry; merge it 01882 //(*vend) = f (*vend, *vcur); 01883 (*vend) += *vcur; 01884 } 01885 ++cur; 01886 ++vcur; 01887 } 01888 ++newend; // one past the last entry, per typical [beg,end) semantics 01889 } 01890 const size_t mergedEntries = newend - beg; 01891 #ifdef HAVE_TPETRA_DEBUG 01892 // merge should not have eliminated any entries; if so, the 01893 // assignment below will destroy the packed structure 01894 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01895 isStorageOptimized() && mergedEntries != rowinfo.numEntries, 01896 std::logic_error, 01897 ": Merge was incorrect; it eliminated entries from the graph. " 01898 << std::endl << "Please report this bug to the Tpetra developers."); 01899 #endif // HAVE_TPETRA_DEBUG 01900 01901 // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete, 01902 // but for now, for correctness, do the modify-sync cycle here. 01903 typedef typename device_type::host_mirror_device_type 01904 host_mirror_device_type; 01905 k_numRowEntries_.template modify<host_mirror_device_type> (); 01906 k_numRowEntries_.h_view(rowinfo.localRow) = mergedEntries; 01907 k_numRowEntries_.template sync<device_type> (); 01908 01909 nodeNumEntries_ -= (rowinfo.numEntries - mergedEntries); 01910 } 01911 01912 01913 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01914 void 01915 CrsGraph< 01916 LocalOrdinal, GlobalOrdinal, 01917 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01918 setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap, 01919 const Teuchos::RCP<const map_type>& rangeMap) 01920 { 01921 // simple pointer comparison for equality 01922 if (domainMap_ != domainMap) { 01923 domainMap_ = domainMap; 01924 importer_ = null; 01925 } 01926 if (rangeMap_ != rangeMap) { 01927 rangeMap_ = rangeMap; 01928 exporter_ = null; 01929 } 01930 } 01931 01932 01933 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01934 size_t 01935 CrsGraph< 01936 LocalOrdinal, GlobalOrdinal, 01937 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01938 findLocalIndex (RowInfo rowinfo, LocalOrdinal ind, size_t hint) const 01939 { 01940 using Teuchos::ArrayView; 01941 ArrayView<const LocalOrdinal> colInds = this->getLocalView (rowinfo); 01942 return this->findLocalIndex (rowinfo, ind, colInds, hint); 01943 } 01944 01945 01946 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01947 size_t 01948 CrsGraph< 01949 LocalOrdinal, GlobalOrdinal, 01950 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01951 findLocalIndex (RowInfo rowinfo, 01952 LocalOrdinal ind, 01953 Teuchos::ArrayView<const LocalOrdinal> colInds, 01954 size_t hint) const 01955 { 01956 typedef typename Teuchos::ArrayView<const LocalOrdinal>::iterator IT; 01957 01958 // If the hint was correct, then the hint is the offset to return. 01959 if (hint < rowinfo.numEntries && colInds[hint] == ind) { 01960 return hint; 01961 } 01962 01963 // The hint was wrong, so we must search for the given column 01964 // index in the column indices for the given row. How we do the 01965 // search depends on whether the graph's column indices are 01966 // sorted. 01967 IT beg = colInds.begin (); 01968 IT end = beg + rowinfo.numEntries; 01969 IT ptr = beg + rowinfo.numEntries; // "null" 01970 bool found = true; 01971 01972 if (isSorted ()) { 01973 std::pair<IT,IT> p = std::equal_range (beg, end, ind); // binary search 01974 if (p.first == p.second) { 01975 found = false; 01976 } else { 01977 ptr = p.first; 01978 } 01979 } 01980 else { 01981 ptr = std::find (beg, end, ind); // direct search 01982 if (ptr == end) { 01983 found = false; 01984 } 01985 } 01986 01987 if (found) { 01988 return static_cast<size_t> (ptr - beg); 01989 } 01990 else { 01991 return Teuchos::OrdinalTraits<size_t>::invalid (); 01992 } 01993 } 01994 01995 01996 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01997 size_t 01998 CrsGraph< 01999 LocalOrdinal, GlobalOrdinal, 02000 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02001 findGlobalIndex (RowInfo rowinfo, GlobalOrdinal ind, size_t hint) const 02002 { 02003 using Teuchos::ArrayView; 02004 typedef typename ArrayView<const GlobalOrdinal>::iterator IT; 02005 02006 // Don't let an invalid global column index through. 02007 if (ind == Teuchos::OrdinalTraits<GlobalOrdinal>::invalid ()) { 02008 return Teuchos::OrdinalTraits<size_t>::invalid (); 02009 } 02010 02011 ArrayView<const GlobalOrdinal> indices = getGlobalView (rowinfo); 02012 02013 // We don't actually require that the hint be a valid index. 02014 // If it is not in range, we just ignore it. 02015 if (hint < rowinfo.numEntries && indices[hint] == ind) { 02016 return hint; 02017 } 02018 02019 IT beg = indices.begin (); 02020 IT end = indices.begin () + rowinfo.numEntries; // not indices.end() 02021 if (isSorted ()) { // use binary search 02022 const std::pair<IT,IT> p = std::equal_range (beg, end, ind); 02023 if (p.first == p.second) { // range of matching entries is empty 02024 return Teuchos::OrdinalTraits<size_t>::invalid (); 02025 } else { 02026 return p.first - beg; 02027 } 02028 } 02029 else { // not sorted; must use linear search 02030 const IT loc = std::find (beg, end, ind); 02031 if (loc == end) { 02032 return Teuchos::OrdinalTraits<size_t>::invalid (); 02033 } else { 02034 return loc - beg; 02035 } 02036 } 02037 } 02038 02039 02040 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02041 void 02042 CrsGraph< 02043 LocalOrdinal, GlobalOrdinal, 02044 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02045 clearGlobalConstants () 02046 { 02047 globalNumEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid (); 02048 globalNumDiags_ = Teuchos::OrdinalTraits<global_size_t>::invalid (); 02049 globalMaxNumRowEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid (); 02050 haveGlobalConstants_ = false; 02051 } 02052 02053 02054 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02055 void 02056 CrsGraph< 02057 LocalOrdinal, GlobalOrdinal, 02058 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02059 checkInternalState () const 02060 { 02061 #ifdef HAVE_TPETRA_DEBUG 02062 const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid (); 02063 const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); 02064 const char err[] = "Tpetra::CrsGraph::checkInternalState: Likely internal " 02065 "logic error. Please contact Tpetra team."; 02066 // check the internal state of this data structure 02067 // this is called by numerous state-changing methods, in a debug build, to ensure that the object 02068 // always remains in a valid state 02069 // the graph should have been allocated with a row map 02070 TEUCHOS_TEST_FOR_EXCEPTION( rowMap_ == null, std::logic_error, err ); 02071 // am either complete or active 02072 TEUCHOS_TEST_FOR_EXCEPTION( isFillActive() == isFillComplete(), std::logic_error, err ); 02073 // if the graph has been fill completed, then all maps should be present 02074 TEUCHOS_TEST_FOR_EXCEPTION( isFillComplete() == true && (colMap_ == null || rangeMap_ == null || domainMap_ == null), std::logic_error, err ); 02075 // if storage has been optimized, then indices should have been allocated (even if trivially so) 02076 TEUCHOS_TEST_FOR_EXCEPTION( isStorageOptimized() == true && indicesAreAllocated() == false, std::logic_error, err ); 02077 // if storage has been optimized, then number of allocated is now the number of entries 02078 TEUCHOS_TEST_FOR_EXCEPTION( isStorageOptimized() == true && getNodeAllocationSize() != getNodeNumEntries(), std::logic_error, err ); 02079 // if graph doesn't have the global constants, then they should all be marked as invalid 02080 TEUCHOS_TEST_FOR_EXCEPTION( haveGlobalConstants_ == false && ( globalNumEntries_ != GSTI || globalNumDiags_ != GSTI || globalMaxNumRowEntries_ != GSTI ), std::logic_error, err ); 02081 // if the graph has global cosntants, then they should be valid. 02082 TEUCHOS_TEST_FOR_EXCEPTION( haveGlobalConstants_ == true && ( globalNumEntries_ == GSTI || globalNumDiags_ == GSTI || globalMaxNumRowEntries_ == GSTI ), std::logic_error, err ); 02083 TEUCHOS_TEST_FOR_EXCEPTION( haveGlobalConstants_ == true && ( globalNumEntries_ < nodeNumEntries_ || globalNumDiags_ < nodeNumDiags_ || globalMaxNumRowEntries_ < nodeMaxNumRowEntries_ ), 02084 std::logic_error, err ); 02085 // if indices are allocated, then the allocation specifications should have been released 02086 TEUCHOS_TEST_FOR_EXCEPTION( 02087 indicesAreAllocated () && (numAllocForAllRows_ != 0 || 02088 k_numAllocPerRow_.dimension_0 () != 0), 02089 std::logic_error, err ); 02090 // if indices are not allocated, then information dictating allocation quantities should be present 02091 TEUCHOS_TEST_FOR_EXCEPTION( 02092 ! indicesAreAllocated () && (nodeNumAllocated_ != STI || 02093 nodeNumEntries_ != 0), 02094 std::logic_error, err ); 02095 // if storage is optimized, then profile should be static 02096 TEUCHOS_TEST_FOR_EXCEPTION( isStorageOptimized() && pftype_ != StaticProfile, std::logic_error, err ); 02097 02098 // If k_rowPtrs_ exists (has nonzero size), it must have N+1 rows, 02099 // and k_rowPtrs_(N) must equal k_gblInds1D_.dimension_0() (if 02100 // globally indexed) or k_lclInds1D_.dimension_0() (if locally 02101 // indexed). 02102 02103 TEUCHOS_TEST_FOR_EXCEPTION( 02104 isGloballyIndexed () && k_rowPtrs_.dimension_0 () != 0 && 02105 (static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1 || 02106 k_rowPtrs_(getNodeNumRows ()) != static_cast<size_t> (k_gblInds1D_.dimension_0 ())), 02107 std::logic_error, err ); 02108 02109 TEUCHOS_TEST_FOR_EXCEPTION( 02110 isLocallyIndexed () && k_rowPtrs_.dimension_0 () != 0 && 02111 (static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1 || 02112 k_rowPtrs_(getNodeNumRows ()) != static_cast<size_t> (k_lclInds1D_.dimension_0 ())), 02113 std::logic_error, err ); 02114 02115 // If profile is dynamic and indices are allocated, then 2-D 02116 // allocations of column index storage (either local or global) 02117 // must be present. 02118 TEUCHOS_TEST_FOR_EXCEPTION( 02119 pftype_ == DynamicProfile && 02120 indicesAreAllocated () && 02121 getNodeNumRows () > 0 && 02122 lclInds2D_.is_null () && gblInds2D_.is_null (), 02123 std::logic_error, err ); 02124 02125 // If profile is dynamic and the calling process owns nonzero 02126 // rows, then k_numRowEntries_ and 2-D storage of column indices 02127 // (whether local or global) must be present. 02128 TEUCHOS_TEST_FOR_EXCEPTION( 02129 pftype_ == DynamicProfile && 02130 indicesAreAllocated () && 02131 getNodeNumRows () > 0 && 02132 (k_numRowEntries_.dimension_0 () == 0 || (lclInds2D_.is_null () && gblInds2D_.is_null ())), 02133 std::logic_error, err ); 02134 02135 // if profile is dynamic, then 1D allocations should not be present 02136 TEUCHOS_TEST_FOR_EXCEPTION( 02137 pftype_ == DynamicProfile && 02138 (k_lclInds1D_.dimension_0 () != 0 || k_gblInds1D_.dimension_0 () != 0), 02139 std::logic_error, err ); 02140 // if profile is dynamic, then row offsets should not be present 02141 TEUCHOS_TEST_FOR_EXCEPTION( 02142 pftype_ == DynamicProfile && k_rowPtrs_.dimension_0 () != 0, 02143 std::logic_error, err ); 02144 // if profile is static and we have allocated non-trivially, then 02145 // 1D allocations should be present 02146 TEUCHOS_TEST_FOR_EXCEPTION( 02147 pftype_ == StaticProfile && indicesAreAllocated () && 02148 getNodeAllocationSize () > 0 && k_lclInds1D_.dimension_0 () == 0 && 02149 k_gblInds1D_.dimension_0 () == 0, 02150 std::logic_error, err); 02151 // if profile is static, then 2D allocations should not be present 02152 TEUCHOS_TEST_FOR_EXCEPTION( 02153 pftype_ == StaticProfile && (lclInds2D_ != null || gblInds2D_ != null), 02154 std::logic_error, err ); 02155 02156 // if indices are not allocated, then none of the buffers should be. 02157 TEUCHOS_TEST_FOR_EXCEPTION( 02158 ! indicesAreAllocated () && 02159 ((k_rowPtrs_.dimension_0 () != 0 || k_numRowEntries_.dimension_0 () != 0) || 02160 k_lclInds1D_.dimension_0 () != 0 || lclInds2D_ != null || 02161 k_gblInds1D_.dimension_0 () != 0 || gblInds2D_ != null), 02162 std::logic_error, err ); 02163 02164 // indices may be local or global only if they are allocated 02165 // (numAllocated is redundant; could simply be indicesAreLocal_ || 02166 // indicesAreGlobal_) 02167 TEUCHOS_TEST_FOR_EXCEPTION( (indicesAreLocal_ || indicesAreGlobal_) && ! indicesAreAllocated_, std::logic_error, err ); 02168 // indices may be local or global, but not both 02169 TEUCHOS_TEST_FOR_EXCEPTION( indicesAreLocal_ == true && indicesAreGlobal_ == true, std::logic_error, err ); 02170 // if indices are local, then global allocations should not be present 02171 TEUCHOS_TEST_FOR_EXCEPTION( 02172 indicesAreLocal_ && (k_gblInds1D_.dimension_0 () != 0 || gblInds2D_ != null), 02173 std::logic_error, err ); 02174 // if indices are global, then local allocations should not be present 02175 TEUCHOS_TEST_FOR_EXCEPTION( 02176 indicesAreGlobal_ && (k_lclInds1D_.dimension_0 () != 0 || lclInds2D_ != null), 02177 std::logic_error, err ); 02178 // if indices are local, then local allocations should be present 02179 TEUCHOS_TEST_FOR_EXCEPTION( 02180 indicesAreLocal_ && getNodeAllocationSize () > 0 && 02181 k_lclInds1D_.dimension_0 () == 0 && getNodeNumRows () > 0 && 02182 lclInds2D_.is_null (), 02183 std::logic_error, err); 02184 // if indices are global, then global allocations should be present 02185 TEUCHOS_TEST_FOR_EXCEPTION( 02186 indicesAreGlobal_ && getNodeAllocationSize () > 0 && 02187 k_gblInds1D_.dimension_0 () == 0 && getNodeNumRows () > 0 && 02188 gblInds2D_.is_null (), 02189 std::logic_error, err); 02190 // if indices are allocated, then we should have recorded how many were allocated 02191 TEUCHOS_TEST_FOR_EXCEPTION( indicesAreAllocated() == true && nodeNumAllocated_ == STI, std::logic_error, err ); 02192 // if indices are not allocated, then the allocation size should be marked invalid 02193 TEUCHOS_TEST_FOR_EXCEPTION( indicesAreAllocated() == false && nodeNumAllocated_ != STI, std::logic_error, err ); 02194 // check the actual allocations 02195 if (indicesAreAllocated()) { 02196 size_t actualNumAllocated = 0; 02197 if (pftype_ == DynamicProfile) { 02198 if (isGloballyIndexed() && gblInds2D_ != null) { 02199 for (size_t r = 0; r < getNodeNumRows(); ++r) { 02200 actualNumAllocated += gblInds2D_[r].size(); 02201 } 02202 } 02203 else if (isLocallyIndexed() && lclInds2D_ != null) { 02204 for (size_t r = 0; r < getNodeNumRows(); ++r) { 02205 actualNumAllocated += lclInds2D_[r].size(); 02206 } 02207 } 02208 TEUCHOS_TEST_FOR_EXCEPTION(actualNumAllocated != nodeNumAllocated_, std::logic_error, err ); 02209 } 02210 else if (k_rowPtrs_.dimension_0 () != 0) { // pftype_ == StaticProfile 02211 TEUCHOS_TEST_FOR_EXCEPTION( 02212 static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1, 02213 std::logic_error, err); 02214 02215 actualNumAllocated = k_rowPtrs_(getNodeNumRows ()); 02216 TEUCHOS_TEST_FOR_EXCEPTION( 02217 isLocallyIndexed () && 02218 static_cast<size_t> (k_lclInds1D_.dimension_0 ()) != actualNumAllocated, 02219 std::logic_error, err ); 02220 TEUCHOS_TEST_FOR_EXCEPTION( 02221 isGloballyIndexed () && 02222 static_cast<size_t> (k_gblInds1D_.dimension_0 ()) != actualNumAllocated, 02223 std::logic_error, err ); 02224 TEUCHOS_TEST_FOR_EXCEPTION(actualNumAllocated != nodeNumAllocated_, std::logic_error, err ); 02225 } 02226 } 02227 #endif // HAVE_TPETRA_DEBUG 02228 } 02229 02230 02231 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02232 size_t 02233 CrsGraph< 02234 LocalOrdinal, GlobalOrdinal, 02235 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02236 getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const 02237 { 02238 using Teuchos::OrdinalTraits; 02239 const LocalOrdinal lrow = rowMap_->getLocalElement (globalRow); 02240 if (hasRowInfo () && lrow != OrdinalTraits<LocalOrdinal>::invalid ()) { 02241 const RowInfo rowinfo = getRowInfo (lrow); 02242 return rowinfo.numEntries; 02243 } else { 02244 return OrdinalTraits<size_t>::invalid (); 02245 } 02246 } 02247 02248 02249 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02250 size_t 02251 CrsGraph< 02252 LocalOrdinal, GlobalOrdinal, 02253 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02254 getNumEntriesInLocalRow (LocalOrdinal localRow) const 02255 { 02256 if (hasRowInfo () && rowMap_->isNodeLocalElement (localRow)) { 02257 const RowInfo rowinfo = getRowInfo (localRow); 02258 return rowinfo.numEntries; 02259 } else { 02260 return Teuchos::OrdinalTraits<size_t>::invalid (); 02261 } 02262 } 02263 02264 02265 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02266 size_t 02267 CrsGraph< 02268 LocalOrdinal, GlobalOrdinal, 02269 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02270 getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const 02271 { 02272 const LocalOrdinal lrow = rowMap_->getLocalElement (globalRow); 02273 if (hasRowInfo () && lrow != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) { 02274 const RowInfo rowinfo = getRowInfo (lrow); 02275 return rowinfo.allocSize; 02276 } else { 02277 return Teuchos::OrdinalTraits<size_t>::invalid (); 02278 } 02279 } 02280 02281 02282 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02283 size_t 02284 CrsGraph< 02285 LocalOrdinal, GlobalOrdinal, 02286 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02287 getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const 02288 { 02289 if (hasRowInfo () && rowMap_->isNodeLocalElement (localRow)) { 02290 const RowInfo rowinfo = getRowInfo (localRow); 02291 return rowinfo.allocSize; 02292 } else { 02293 return Teuchos::OrdinalTraits<size_t>::invalid(); 02294 } 02295 } 02296 02297 02298 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02299 Teuchos::ArrayRCP<const size_t> 02300 CrsGraph< 02301 LocalOrdinal, GlobalOrdinal, 02302 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02303 getNodeRowPtrs () const 02304 { 02305 return Kokkos::Compat::persistingView (k_rowPtrs_); 02306 } 02307 02308 02309 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02310 Teuchos::ArrayRCP<const LocalOrdinal> 02311 CrsGraph< 02312 LocalOrdinal, GlobalOrdinal, 02313 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02314 getNodePackedIndices () const 02315 { 02316 return Kokkos::Compat::persistingView (k_lclInds1D_); 02317 } 02318 02319 02320 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02321 void 02322 CrsGraph< 02323 LocalOrdinal, GlobalOrdinal, 02324 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02325 getLocalRowCopy (LocalOrdinal localRow, 02326 const Teuchos::ArrayView<LocalOrdinal>&indices, 02327 size_t& numEntries) const 02328 { 02329 using Teuchos::ArrayView; 02330 typedef LocalOrdinal LO; 02331 typedef GlobalOrdinal GO; 02332 02333 TEUCHOS_TEST_FOR_EXCEPTION( 02334 isGloballyIndexed () && ! hasColMap (), std::runtime_error, 02335 "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and " 02336 "does not have a column Map yet. That means we don't have local indices " 02337 "for columns yet, so it doesn't make sense to call this method. If the " 02338 "graph doesn't have a column Map yet, you should call fillComplete on " 02339 "it first."); 02340 TEUCHOS_TEST_FOR_EXCEPTION( 02341 ! hasRowInfo(), std::runtime_error, 02342 "Tpetra::CrsGraph::getLocalRowCopy: graph row information was deleted " 02343 "at fillComplete()."); 02344 02345 if (! getRowMap ()->isNodeLocalElement (localRow)) { 02346 numEntries = 0; 02347 return; 02348 } 02349 02350 const RowInfo rowinfo = getRowInfo (localRow); 02351 const size_t theNumEntries = rowinfo.numEntries; 02352 02353 TEUCHOS_TEST_FOR_EXCEPTION( 02354 static_cast<size_t> (indices.size ()) < theNumEntries, 02355 std::runtime_error, 02356 "Tpetra::CrsGraph::getLocalRowCopy: The given row " << localRow << " has " 02357 << theNumEntries << " entries, but indices.size() = " << indices.size () 02358 << ", which does not suffice to store the row's indices."); 02359 02360 numEntries = theNumEntries; 02361 02362 if (isLocallyIndexed ()) { 02363 ArrayView<const LO> lview = getLocalView (rowinfo); 02364 std::copy (lview.begin (), lview.begin () + numEntries, indices.begin ()); 02365 } 02366 else if (isGloballyIndexed ()) { 02367 ArrayView<const GO> gview = getGlobalView (rowinfo); 02368 const map_type& colMap = * (this->getColMap ()); 02369 for (size_t j = 0; j < numEntries; ++j) { 02370 indices[j] = colMap.getLocalElement (gview[j]); 02371 } 02372 } 02373 else { 02374 // If the graph on the calling process is neither locally nor 02375 // globally indexed, that means it owns no column indices. 02376 // 02377 // FIXME (mfh 21 Oct 2013) It's not entirely clear to me whether 02378 // we can reach this branch, given the checks above. However, 02379 // if that is the case, it should still be correct to call this 02380 // function if the calling process owns no column indices. 02381 numEntries = 0; 02382 } 02383 } 02384 02385 02386 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02387 void 02388 CrsGraph< 02389 LocalOrdinal, GlobalOrdinal, 02390 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02391 getGlobalRowCopy (GlobalOrdinal globalRow, 02392 const Teuchos::ArrayView<GlobalOrdinal>& indices, 02393 size_t& NumIndices) const 02394 { 02395 using Teuchos::ArrayView; 02396 const char tfecfFuncName[] = "getGlobalRowCopy: "; 02397 // we either currently store global indices, or we have a column 02398 // map with which to transcribe our local indices for the user 02399 const LocalOrdinal lrow = rowMap_->getLocalElement (globalRow); 02400 02401 // FIXME (mfh 22 Aug 2014) Instead of throwing an exception, 02402 // should just set NumIndices=0 and return. In that case, the 02403 // calling process owns no entries in that row, so the right thing 02404 // to do is to return an empty copy. 02405 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02406 lrow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (), 02407 std::runtime_error, 02408 "GlobalRow (== " << globalRow << ") does not belong to this process."); 02409 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02410 ! hasRowInfo (), std::runtime_error, 02411 "Graph row information was deleted at fillComplete()."); 02412 const RowInfo rowinfo = this->getRowInfo (static_cast<size_t> (lrow)); 02413 NumIndices = rowinfo.numEntries; 02414 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02415 static_cast<size_t> (indices.size ()) < NumIndices, std::runtime_error, 02416 "Specified storage (size==" << indices.size () << ") does not suffice " 02417 "to hold all entries for this row (NumIndices == " << NumIndices << ")."); 02418 if (isLocallyIndexed ()) { 02419 ArrayView<const LocalOrdinal> lview = this->getLocalView (rowinfo); 02420 for (size_t j = 0; j < NumIndices; ++j) { 02421 indices[j] = colMap_->getGlobalElement (lview[j]); 02422 } 02423 } 02424 else if (isGloballyIndexed ()) { 02425 ArrayView<const GlobalOrdinal> gview = this->getGlobalView (rowinfo); 02426 std::copy (gview.begin (), gview.begin () + NumIndices, indices.begin ()); 02427 } 02428 } 02429 02430 02431 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02432 void 02433 CrsGraph< 02434 LocalOrdinal, GlobalOrdinal, 02435 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02436 getLocalRowView (LocalOrdinal localRow, 02437 Teuchos::ArrayView<const LocalOrdinal>& indices) const 02438 { 02439 const char tfecfFuncName[] = "getLocalRowView: "; 02440 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02441 isGloballyIndexed (), std::runtime_error, "The graph's indices are " 02442 "currently stored as global indices, so we cannot return a view with " 02443 "local column indices, whether or not the graph has a column Map. If " 02444 "the graph _does_ have a column Map, use getLocalRowCopy() instead."); 02445 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02446 ! hasRowInfo (), std::runtime_error, "Graph row information was " 02447 "deleted at fillComplete()."); 02448 indices = Teuchos::null; 02449 if (rowMap_->isNodeLocalElement (localRow)) { 02450 const RowInfo rowinfo = this->getRowInfo (localRow); 02451 if (rowinfo.numEntries > 0) { 02452 indices = this->getLocalView (rowinfo); 02453 // getLocalView returns a view of the _entire_ row, including 02454 // any extra space at the end (which 1-D unpacked storage 02455 // might have, for example). That's why we have to take a 02456 // subview of the returned view. 02457 indices = indices (0, rowinfo.numEntries); 02458 } 02459 } 02460 #ifdef HAVE_TPETRA_DEBUG 02461 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02462 static_cast<size_t> (indices.size ()) != this->getNumEntriesInLocalRow (localRow), 02463 std::logic_error, ": Violated stated post-conditions. Please contact Tpetra team."); 02464 #endif // HAVE_TPETRA_DEBUG 02465 } 02466 02467 02468 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02469 void 02470 CrsGraph< 02471 LocalOrdinal, GlobalOrdinal, 02472 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02473 getGlobalRowView (GlobalOrdinal globalRow, 02474 Teuchos::ArrayView<const GlobalOrdinal>& indices) const 02475 { 02476 const char tfecfFuncName[] = "getGlobalRowView: "; 02477 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02478 isLocallyIndexed (), std::runtime_error, "The graph's indices are " 02479 "currently stored as local indices, so we cannot return a view with " 02480 "global column indices. Use getGlobalRowCopy() instead."); 02481 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02482 ! hasRowInfo (), std::runtime_error, 02483 "Graph row information was deleted at fillComplete()."); 02484 02485 // isNodeGlobalElement() requires a global to local lookup anyway, 02486 // and getLocalElement() returns invalid() if the element wasn't found. 02487 const LocalOrdinal localRow = rowMap_->getLocalElement (globalRow); 02488 indices = Teuchos::null; 02489 if (localRow != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) { 02490 const RowInfo rowInfo = getRowInfo (static_cast<size_t> (localRow)); 02491 if (rowInfo.numEntries > 0) { 02492 indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries); 02493 } 02494 } 02495 #ifdef HAVE_TPETRA_DEBUG 02496 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02497 static_cast<size_t> (indices.size ()) != this->getNumEntriesInGlobalRow (globalRow), 02498 std::logic_error, 02499 "Violated stated postconditions: indices.size() = " << indices.size () 02500 << " != getNumEntriesInGlobalRow(globalRow=" << globalRow 02501 << ") = " << this->getNumEntriesInGlobalRow (globalRow) 02502 << ". Please report this bug to the Tpetra developers."); 02503 #endif // HAVE_TPETRA_DEBUG 02504 } 02505 02506 02507 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02508 void 02509 CrsGraph< 02510 LocalOrdinal, GlobalOrdinal, 02511 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02512 insertLocalIndices (const LocalOrdinal localRow, 02513 const Teuchos::ArrayView<const LocalOrdinal>& indices) 02514 { 02515 const char tfecfFuncName[] = "insertLocalIndices"; 02516 02517 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02518 ! isFillActive (), std::runtime_error, 02519 ": requires that fill is active."); 02520 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02521 isGloballyIndexed (), std::runtime_error, 02522 ": graph indices are global; use insertGlobalIndices()."); 02523 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02524 ! hasColMap (), std::runtime_error, 02525 ": cannot insert local indices without a column map."); 02526 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02527 ! rowMap_->isNodeLocalElement (localRow), std::runtime_error, 02528 ": row does not belong to this node."); 02529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02530 ! hasRowInfo (), std::runtime_error, 02531 ": graph row information was deleted at fillComplete()."); 02532 if (! indicesAreAllocated ()) { 02533 allocateIndices (LocalIndices); 02534 } 02535 02536 #ifdef HAVE_TPETRA_DEBUG 02537 // In a debug build, if the graph has a column Map, test whether 02538 // any of the given column indices are not in the column Map. 02539 // Keep track of the invalid column indices so we can tell the 02540 // user about them. 02541 if (hasColMap ()) { 02542 using Teuchos::Array; 02543 using Teuchos::toString; 02544 using std::endl; 02545 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type; 02546 02547 const map_type& colMap = * (getColMap ()); 02548 Array<LocalOrdinal> badColInds; 02549 bool allInColMap = true; 02550 for (size_type k = 0; k < indices.size (); ++k) { 02551 if (! colMap.isNodeLocalElement (indices[k])) { 02552 allInColMap = false; 02553 badColInds.push_back (indices[k]); 02554 } 02555 } 02556 if (! allInColMap) { 02557 std::ostringstream os; 02558 os << "Tpetra::CrsMatrix::insertLocalIndices: You attempted to insert " 02559 "entries in owned row " << localRow << ", at the following column " 02560 "indices: " << toString (indices) << "." << endl; 02561 os << "Of those, the following indices are not in the column Map on " 02562 "this process: " << toString (badColInds) << "." << endl << "Since " 02563 "the graph has a column Map already, it is invalid to insert entries " 02564 "at those locations."; 02565 TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ()); 02566 } 02567 } 02568 #endif // HAVE_TPETRA_DEBUG 02569 02570 insertLocalIndicesImpl (localRow, indices); 02571 02572 #ifdef HAVE_TPETRA_DEBUG 02573 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02574 indicesAreAllocated() == false || isLocallyIndexed() == false, 02575 std::logic_error, 02576 ": Violated stated post-conditions. Please contact Tpetra team."); 02577 #endif 02578 } 02579 02580 02581 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02582 void 02583 CrsGraph< 02584 LocalOrdinal, GlobalOrdinal, 02585 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02586 insertLocalIndicesFiltered (const LocalOrdinal localRow, 02587 const Teuchos::ArrayView<const LocalOrdinal>& indices) 02588 { 02589 typedef LocalOrdinal LO; 02590 const char tfecfFuncName[] = "insertLocalIndicesFiltered"; 02591 02592 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02593 isFillActive() == false, std::runtime_error, 02594 ": requires that fill is active."); 02595 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02596 isGloballyIndexed() == true, std::runtime_error, 02597 ": graph indices are global; use insertGlobalIndices()."); 02598 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02599 hasColMap() == false, std::runtime_error, 02600 ": cannot insert local indices without a column map."); 02601 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02602 rowMap_->isNodeLocalElement(localRow) == false, std::runtime_error, 02603 ": row does not belong to this node."); 02604 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02605 ! hasRowInfo (), std::runtime_error, 02606 ": graph row information was deleted at fillComplete()."); 02607 if (! indicesAreAllocated ()) { 02608 allocateIndices (LocalIndices); 02609 } 02610 02611 // If we have a column map, use it to filter the entries. 02612 if (hasColMap ()) { 02613 Teuchos::Array<LO> filtered_indices (indices); 02614 SLocalGlobalViews inds_view; 02615 SLocalGlobalNCViews inds_ncview; 02616 inds_ncview.linds = filtered_indices(); 02617 const size_t numFilteredEntries = 02618 filterIndices<LocalIndices>(inds_ncview); 02619 inds_view.linds = filtered_indices (0, numFilteredEntries); 02620 insertLocalIndicesImpl(localRow, inds_view.linds); 02621 } 02622 else { 02623 insertLocalIndicesImpl(localRow, indices); 02624 } 02625 #ifdef HAVE_TPETRA_DEBUG 02626 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02627 indicesAreAllocated() == false || isLocallyIndexed() == false, 02628 std::logic_error, 02629 ": Violated stated post-conditions. Please contact Tpetra team."); 02630 #endif 02631 } 02632 02633 02634 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02635 void 02636 CrsGraph< 02637 LocalOrdinal, GlobalOrdinal, 02638 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02639 insertGlobalIndices (const GlobalOrdinal grow, 02640 const Teuchos::ArrayView<const GlobalOrdinal>& indices) 02641 { 02642 using Teuchos::ArrayView; 02643 typedef LocalOrdinal LO; 02644 typedef GlobalOrdinal GO; 02645 typedef typename ArrayView<const GO>::size_type size_type; 02646 const char tfecfFuncName[] = "insertGlobalIndices"; 02647 02648 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02649 isLocallyIndexed() == true, std::runtime_error, 02650 ": graph indices are local; use insertLocalIndices()."); 02651 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02652 ! hasRowInfo (), std::runtime_error, 02653 ": graph row information was deleted at fillComplete()."); 02654 // This can't really be satisfied for now, because if we are 02655 // fillComplete(), then we are local. In the future, this may 02656 // change. However, the rule that modification require active 02657 // fill will not change. 02658 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02659 isFillActive() == false, std::runtime_error, 02660 ": You are not allowed to call this method if fill is not active. " 02661 "If fillComplete has been called, you must first call resumeFill " 02662 "before you may insert indices."); 02663 if (! indicesAreAllocated ()) { 02664 allocateIndices (GlobalIndices); 02665 } 02666 const LO myRow = rowMap_->getLocalElement (grow); 02667 if (myRow != Teuchos::OrdinalTraits<LO>::invalid ()) { 02668 #ifdef HAVE_TPETRA_DEBUG 02669 if (hasColMap ()) { 02670 using std::endl; 02671 const map_type& colMap = * (getColMap ()); 02672 // In a debug build, keep track of the nonowned ("bad") column 02673 // indices, so that we can display them in the exception 02674 // message. In a release build, just ditch the loop early if 02675 // we encounter a nonowned column index. 02676 Array<GO> badColInds; 02677 bool allInColMap = true; 02678 for (size_type k = 0; k < indices.size (); ++k) { 02679 if (! colMap.isNodeGlobalElement (indices[k])) { 02680 allInColMap = false; 02681 badColInds.push_back (indices[k]); 02682 } 02683 } 02684 if (! allInColMap) { 02685 std::ostringstream os; 02686 os << "Tpetra::CrsGraph::insertGlobalIndices: You attempted to insert " 02687 "entries in owned row " << grow << ", at the following column " 02688 "indices: " << toString (indices) << "." << endl; 02689 os << "Of those, the following indices are not in the column Map on " 02690 "this process: " << toString (badColInds) << "." << endl << "Since " 02691 "the matrix has a column Map already, it is invalid to insert " 02692 "entries at those locations."; 02693 TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ()); 02694 } 02695 } 02696 #endif // HAVE_TPETRA_DEBUG 02697 insertGlobalIndicesImpl (myRow, indices); 02698 } 02699 else { // a nonlocal row 02700 const size_type numIndices = indices.size (); 02701 // This creates the Array if it doesn't exist yet. std::map's 02702 // operator[] does a lookup each time, so it's better to pull 02703 // nonlocals_[grow] out of the loop. 02704 std::vector<GO>& nonlocalRow = nonlocals_[grow]; 02705 for (size_type k = 0; k < numIndices; ++k) { 02706 nonlocalRow.push_back (indices[k]); 02707 } 02708 } 02709 #ifdef HAVE_TPETRA_DEBUG 02710 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02711 indicesAreAllocated() == false || isGloballyIndexed() == false, 02712 std::logic_error, 02713 ": Violated stated post-conditions. Please contact Tpetra team."); 02714 #endif 02715 } 02716 02717 02718 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02719 void 02720 CrsGraph< 02721 LocalOrdinal, GlobalOrdinal, 02722 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02723 insertGlobalIndicesFiltered (const GlobalOrdinal grow, 02724 const Teuchos::ArrayView<const GlobalOrdinal>& indices) 02725 { 02726 using Teuchos::Array; 02727 using Teuchos::ArrayView; 02728 typedef LocalOrdinal LO; 02729 typedef GlobalOrdinal GO; 02730 const char tfecfFuncName[] = "insertGlobalIndicesFiltered"; 02731 02732 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02733 isLocallyIndexed() == true, std::runtime_error, 02734 ": graph indices are local; use insertLocalIndices()."); 02735 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02736 ! hasRowInfo (), std::runtime_error, 02737 ": graph row information was deleted at fillComplete()."); 02738 // This can't really be satisfied for now, because if we are 02739 // fillComplete(), then we are local. In the future, this may 02740 // change. However, the rule that modification require active 02741 // fill will not change. 02742 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02743 isFillActive() == false, std::runtime_error, 02744 ": You are not allowed to call this method if fill is not active. " 02745 "If fillComplete has been called, you must first call resumeFill " 02746 "before you may insert indices."); 02747 if (! indicesAreAllocated ()) { 02748 allocateIndices (GlobalIndices); 02749 } 02750 const LO myRow = rowMap_->getLocalElement (grow); 02751 if (myRow != Teuchos::OrdinalTraits<LO>::invalid ()) { 02752 // If we have a column map, use it to filter the entries. 02753 if (hasColMap ()) { 02754 Array<GO> filtered_indices(indices); 02755 SLocalGlobalViews inds_view; 02756 SLocalGlobalNCViews inds_ncview; 02757 inds_ncview.ginds = filtered_indices(); 02758 const size_t numFilteredEntries = 02759 filterIndices<GlobalIndices> (inds_ncview); 02760 inds_view.ginds = filtered_indices (0, numFilteredEntries); 02761 insertGlobalIndicesImpl(myRow, inds_view.ginds); 02762 } 02763 else { 02764 insertGlobalIndicesImpl(myRow, indices); 02765 } 02766 } 02767 else { // a nonlocal row 02768 typedef typename ArrayView<const GO>::size_type size_type; 02769 const size_type numIndices = indices.size (); 02770 for (size_type k = 0; k < numIndices; ++k) { 02771 nonlocals_[grow].push_back (indices[k]); 02772 } 02773 } 02774 #ifdef HAVE_TPETRA_DEBUG 02775 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02776 indicesAreAllocated() == false || isGloballyIndexed() == false, 02777 std::logic_error, 02778 ": Violated stated post-conditions. Please contact Tpetra team."); 02779 #endif 02780 } 02781 02782 02783 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02784 void 02785 CrsGraph< 02786 LocalOrdinal, GlobalOrdinal, 02787 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02788 removeLocalIndices (LocalOrdinal lrow) 02789 { 02790 const char tfecfFuncName[] = "removeLocalIndices: "; 02791 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02792 ! isFillActive (), std::runtime_error, "requires that fill is active."); 02793 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02794 isStorageOptimized (), std::runtime_error, 02795 "cannot remove indices after optimizeStorage() has been called."); 02796 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02797 isGloballyIndexed (), std::runtime_error, "graph indices are global."); 02798 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02799 ! rowMap_->isNodeLocalElement (lrow), std::runtime_error, 02800 "Local row " << lrow << " is not in the row Map on the calling process."); 02801 if (! indicesAreAllocated ()) { 02802 allocateIndices (LocalIndices); 02803 } 02804 02805 // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on 02806 // all processes? 02807 clearGlobalConstants (); 02808 02809 if (k_numRowEntries_.dimension_0 () != 0) { 02810 const size_t oldNumEntries = k_numRowEntries_.h_view (lrow); 02811 nodeNumEntries_ -= oldNumEntries; 02812 02813 // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete, 02814 // but for now, for correctness, do the modify-sync cycle here. 02815 typedef typename device_type::host_mirror_device_type 02816 host_mirror_device_type; 02817 k_numRowEntries_.template modify<host_mirror_device_type> (); 02818 k_numRowEntries_.h_view(lrow) = 0; 02819 k_numRowEntries_.template sync<device_type> (); 02820 } 02821 #ifdef HAVE_TPETRA_DEBUG 02822 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02823 getNumEntriesInLocalRow (lrow) != 0 || 02824 ! indicesAreAllocated () || 02825 ! isLocallyIndexed (), std::logic_error, 02826 ": Violated stated post-conditions. Please contact Tpetra team."); 02827 #endif // HAVE_TPETRA_DEBUG 02828 } 02829 02830 02831 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02832 void 02833 CrsGraph< 02834 LocalOrdinal, GlobalOrdinal, 02835 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02836 setAllIndices (const t_RowPtrs& rowPointers, 02837 const t_LocalOrdinal_1D& columnIndices) 02838 { 02839 const char tfecfFuncName[] = "setAllIndices: "; 02840 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02841 ! hasColMap () || getColMap ().is_null (), std::runtime_error, 02842 "The graph must have a column Map before you may call this method."); 02843 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02844 static_cast<size_t> (rowPointers.size ()) != this->getNodeNumRows () + 1, 02845 std::runtime_error, "rowPointers.size() = " << rowPointers.size () << 02846 " != this->getNodeNumRows()+1 = " << (this->getNodeNumRows () + 1) << 02847 "."); 02848 02849 // FIXME (mfh 07 Aug 2014) We need to relax this restriction, 02850 // since the future model will be allocation at construction, not 02851 // lazy allocation on first insert. 02852 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02853 k_lclInds1D_.dimension_0 () != 0 || k_gblInds1D_.dimension_0 () != 0, 02854 std::runtime_error, "You may not call this method if 1-D data structures " 02855 "are already allocated."); 02856 02857 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02858 lclInds2D_ != Teuchos::null || gblInds2D_ != Teuchos::null, 02859 std::runtime_error, "You may not call this method if 2-D data structures " 02860 "are already allocated."); 02861 02862 const size_t localNumEntries = rowPointers(getNodeNumRows ()); 02863 02864 indicesAreAllocated_ = true; 02865 indicesAreLocal_ = true; 02866 pftype_ = StaticProfile; // if the profile wasn't static before, it sure is now. 02867 k_lclInds1D_ = columnIndices; 02868 k_rowPtrs_ = rowPointers; 02869 nodeNumAllocated_ = localNumEntries; 02870 nodeNumEntries_ = localNumEntries; 02871 02872 // These normally get cleared out at the end of allocateIndices. 02873 // It makes sense to clear them out here, because at the end of 02874 // this method, the graph is allocated on the calling process. 02875 numAllocForAllRows_ = 0; 02876 k_numAllocPerRow_ = Kokkos::DualView<const size_t*, device_type> (); 02877 numAllocPerRow_ = Teuchos::null; 02878 02879 checkInternalState (); 02880 } 02881 02882 02883 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02884 void 02885 CrsGraph< 02886 LocalOrdinal, GlobalOrdinal, 02887 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02888 setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers, 02889 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices) 02890 { 02891 Kokkos::View<size_t*, device_type> k_ptr = 02892 Kokkos::Compat::getKokkosViewDeepCopy<DeviceType> (rowPointers ()); 02893 Kokkos::View<LocalOrdinal*, device_type> k_ind = 02894 Kokkos::Compat::getKokkosViewDeepCopy<DeviceType> (columnIndices ()); 02895 02896 setAllIndices (k_ptr, k_ind); 02897 } 02898 02899 02900 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02901 void 02902 CrsGraph< 02903 LocalOrdinal, GlobalOrdinal, 02904 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02905 getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow, 02906 size_t& boundForAllLocalRows, 02907 bool& boundSameForAllLocalRows) const 02908 { 02909 // The three output arguments. We assign them to the actual 02910 // output arguments at the end, in order to implement 02911 // transactional semantics. 02912 Teuchos::ArrayRCP<const size_t> numEntriesPerRow; 02913 size_t numEntriesForAll = 0; 02914 bool allRowsSame = true; 02915 02916 const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ()); 02917 02918 if (! this->indicesAreAllocated ()) { 02919 if (k_numAllocPerRow_.dimension_0 () != 0) { 02920 numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_.h_view); 02921 allRowsSame = false; // conservatively; we don't check the array 02922 } 02923 else { 02924 numEntriesForAll = numAllocForAllRows_; 02925 allRowsSame = true; 02926 } 02927 } 02928 else if (k_numRowEntries_.dimension_0 () != 0) { 02929 numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_.h_view); 02930 allRowsSame = false; // conservatively; we don't check the array 02931 } 02932 else if (this->nodeNumAllocated_ == 0) { 02933 numEntriesForAll = 0; 02934 allRowsSame = true; 02935 } 02936 else { 02937 // left with the case that we have optimized storage. in this 02938 // case, we have to construct a list of row sizes. 02939 TEUCHOS_TEST_FOR_EXCEPTION( 02940 this->getProfileType () != StaticProfile, std::logic_error, 02941 "Tpetra::CrsGraph::getNumEntriesPerRowUpperBound: " 02942 "The graph is not StaticProfile, but storage appears to be optimized. " 02943 "Please report this bug to the Tpetra developers."); 02944 TEUCHOS_TEST_FOR_EXCEPTION( 02945 numRows != 0 && k_rowPtrs_.dimension_0 () == 0, std::logic_error, 02946 "Tpetra::CrsGraph::getNumEntriesPerRowUpperBound: " 02947 "The graph has " << numRows << " (> 0) row" << (numRows != 1 ? "s" : "") 02948 << " on the calling process, but the k_rowPtrs_ array has zero entries. " 02949 "Please report this bug to the Tpetra developers."); 02950 02951 Teuchos::ArrayRCP<size_t> numEnt; 02952 if (numRows != 0) { 02953 numEnt = Teuchos::arcp<size_t> (numRows); 02954 } 02955 02956 // We have to iterate through the row offsets anyway, so we 02957 // might as well check whether all rows' bounds are the same. 02958 bool allRowsReallySame = false; 02959 for (ptrdiff_t i = 0; i < numRows; ++i) { 02960 numEnt[i] = k_rowPtrs_(i+1) - k_rowPtrs_(i); 02961 if (i != 0 && numEnt[i] != numEnt[i-1]) { 02962 allRowsReallySame = false; 02963 } 02964 } 02965 if (allRowsReallySame) { 02966 if (numRows == 0) { 02967 numEntriesForAll = 0; 02968 } else { 02969 numEntriesForAll = numEnt[1] - numEnt[0]; 02970 } 02971 allRowsSame = true; 02972 } 02973 else { 02974 numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt); 02975 allRowsSame = false; // conservatively; we don't check the array 02976 } 02977 } 02978 02979 TEUCHOS_TEST_FOR_EXCEPTION( 02980 numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error, 02981 "Tpetra::CrsGraph::getNumEntriesPerLocalRowUpperBound: " 02982 "numEntriesForAll and numEntriesPerRow are not consistent. The former " 02983 "is nonzero (" << numEntriesForAll << "), but the latter has nonzero " 02984 "size " << numEntriesPerRow.size () << ". " 02985 "Please report this bug to the Tpetra developers."); 02986 TEUCHOS_TEST_FOR_EXCEPTION( 02987 numEntriesForAll != 0 && ! allRowsSame, std::logic_error, 02988 "Tpetra::CrsGraph::getNumEntriesPerLocalRowUpperBound: " 02989 "numEntriesForAll and allRowsSame are not consistent. The former " 02990 "is nonzero (" << numEntriesForAll << "), but the latter is false. " 02991 "Please report this bug to the Tpetra developers."); 02992 TEUCHOS_TEST_FOR_EXCEPTION( 02993 numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error, 02994 "Tpetra::CrsGraph::getNumEntriesPerLocalRowUpperBound: " 02995 "numEntriesPerRow and allRowsSame are not consistent. The former has " 02996 "nonzero length " << numEntriesForAll << ", but the latter is true. " 02997 "Please report this bug to the Tpetra developers."); 02998 02999 boundPerLocalRow = numEntriesPerRow; 03000 boundForAllLocalRows = numEntriesForAll; 03001 boundSameForAllLocalRows = allRowsSame; 03002 } 03003 03004 03005 // TODO: in the future, globalAssemble() should use import/export functionality 03006 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03007 void 03008 CrsGraph< 03009 LocalOrdinal, GlobalOrdinal, 03010 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03011 globalAssemble () 03012 { 03013 using Teuchos::Array; 03014 using Teuchos::as; 03015 using Teuchos::Comm; 03016 using Teuchos::gatherAll; 03017 using Teuchos::ireceive; 03018 using Teuchos::isend; 03019 using Teuchos::outArg; 03020 using Teuchos::REDUCE_MAX; 03021 using Teuchos::reduceAll; 03022 using Teuchos::toString; 03023 using Teuchos::waitAll; 03024 using std::endl; 03025 using std::make_pair; 03026 using std::pair; 03027 typedef GlobalOrdinal GO; 03028 typedef typename std::map<GO, std::vector<GO> >::const_iterator NLITER; 03029 typedef typename Array<GO>::size_type size_type; 03030 03031 const char tfecfFuncName[] = "globalAssemble"; // for exception macro 03032 RCP<const Comm<int> > comm = getComm(); 03033 03034 const int numImages = comm->getSize(); 03035 const int myImageID = comm->getRank(); 03036 #ifdef HAVE_TPETRA_DEBUG 03037 Teuchos::barrier (*comm); 03038 #endif // HAVE_TPETRA_DEBUG 03039 03040 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive(), std::runtime_error, 03041 ": requires that fill is active."); 03042 // Determine if any nodes have global entries to share 03043 { 03044 size_t MyNonlocals = nonlocals_.size(), MaxGlobalNonlocals; 03045 reduceAll (*comm, REDUCE_MAX, MyNonlocals, outArg (MaxGlobalNonlocals)); 03046 if (MaxGlobalNonlocals == 0) { 03047 return; // no entries to share 03048 } 03049 } 03050 03051 // compute a list of NLRs from nonlocals_ and use it to compute: 03052 // IdsAndRows: a vector of (id,row) pairs 03053 // NLR2Id: a map from NLR to the Id that owns it 03054 // globalNeighbors: a global graph of connectivity between images: 03055 // globalNeighbors(i,j) indicates that j sends to i 03056 // sendIDs: a list of all images I send to 03057 // recvIDs: a list of all images I receive from (constructed later) 03058 Array<pair<int, GO> > IdsAndRows; 03059 std::map<GO, int> NLR2Id; 03060 Teuchos::SerialDenseMatrix<int, char> globalNeighbors; 03061 Array<int> sendIDs, recvIDs; 03062 { 03063 // nonlocals_ contains the entries we are holding for all 03064 // nonowned rows. Compute list of rows for which we have data. 03065 Array<GO> NLRs; 03066 std::set<GO> setOfRows; 03067 for (NLITER iter = nonlocals_.begin (); iter != nonlocals_.end (); ++iter) { 03068 setOfRows.insert (iter->first); 03069 } 03070 // copy the elements in the set into an Array 03071 NLRs.resize (setOfRows.size ()); 03072 std::copy (setOfRows.begin (), setOfRows.end (), NLRs.begin ()); 03073 03074 // get a list of ImageIDs for the non-local rows (NLRs) 03075 Array<int> NLRIds(NLRs.size()); 03076 { 03077 const LookupStatus stat = 03078 rowMap_->getRemoteIndexList (NLRs (), NLRIds ()); 03079 int lclerror = ( stat == IDNotPresent ? 1 : 0 ); 03080 int gblerror; 03081 reduceAll<int, int> (*comm, REDUCE_MAX, lclerror, outArg (gblerror)); 03082 if (gblerror != 0) { 03083 const int myRank = comm->getRank (); 03084 std::ostringstream os; 03085 os << "On one or more processes in the communicator, " 03086 << "there were insertions into rows of the graph that do not " 03087 << "exist in the row Map on any process in the communicator." 03088 << endl << "This process " << myRank << " is " 03089 << (lclerror == 0 ? "not " : "") << "one of those offending " 03090 << "processes." << endl; 03091 if (lclerror != 0) { 03092 // If NLRIds[k] is -1, then NLRs[k] is a row index not in 03093 // the row Map. Collect this list of invalid row indices 03094 // for display in the exception message. 03095 Array<GO> invalidNonlocalRows; 03096 for (size_type k = 0; k < NLRs.size (); ++k) { 03097 if (NLRIds[k] == -1) { 03098 invalidNonlocalRows.push_back (NLRs[k]); 03099 } 03100 } 03101 const size_type numInvalid = invalidNonlocalRows.size (); 03102 os << "On this process, " << numInvalid << " nonlocal row" 03103 << (numInvalid != 1 ? "s " : " ") << " were inserted that are " 03104 << "not in the row Map on any process." << endl; 03105 // Don't print _too_ many nonlocal rows. 03106 if (numInvalid <= 100) { 03107 os << "Offending row indices: " 03108 << toString (invalidNonlocalRows ()) << endl; 03109 } 03110 } 03111 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03112 gblerror != 0, std::runtime_error, 03113 ": nonlocal entries correspond to invalid rows." 03114 << endl << os.str ()); 03115 } 03116 } 03117 03118 // build up a list of neighbors, as well as a map between NLRs and Ids 03119 // localNeighbors[i] != 0 iff I have data to send to image i 03120 // put NLRs,Ids into an array of pairs 03121 IdsAndRows.reserve(NLRs.size()); 03122 Array<char> localNeighbors(numImages, 0); 03123 typename Array<GO>::const_iterator nlr; 03124 typename Array<int>::const_iterator id; 03125 for (nlr = NLRs.begin(), id = NLRIds.begin(); 03126 nlr != NLRs.end(); ++nlr, ++id) { 03127 NLR2Id[*nlr] = *id; 03128 localNeighbors[*id] = 1; 03129 // IdsAndRows.push_back(make_pair<int,GlobalOrdinal>(*id,*nlr)); 03130 IdsAndRows.push_back(make_pair(*id,*nlr)); 03131 } 03132 for (int j=0; j<numImages; ++j) { 03133 if (localNeighbors[j]) { 03134 sendIDs.push_back(j); 03135 } 03136 } 03137 // sort IdsAndRows, by Ids first, then rows 03138 std::sort(IdsAndRows.begin(),IdsAndRows.end()); 03139 // gather from other nodes to form the full graph 03140 globalNeighbors.shapeUninitialized(numImages,numImages); 03141 gatherAll (*getComm(), numImages, localNeighbors.getRawPtr(), 03142 numImages * numImages, globalNeighbors.values()); 03143 // globalNeighbors at this point contains (on all images) the 03144 // connectivity between the images. 03145 // globalNeighbors(i,j) != 0 means that j sends to i/that i receives from j 03146 } 03147 03149 // FIGURE OUT WHO IS SENDING TO WHOM AND HOW MUCH 03150 // DO THIS IN THE PROCESS OF PACKING ALL OUTGOING DATA ACCORDING TO DESTINATION ID 03152 03153 // loop over all columns to know from which images I can expect to receive something 03154 for (int j=0; j<numImages; ++j) { 03155 if (globalNeighbors(myImageID,j)) { 03156 recvIDs.push_back(j); 03157 } 03158 } 03159 const size_t numRecvs = recvIDs.size(); 03160 03161 // we know how many we're sending to already 03162 // form a contiguous list of all data to be sent 03163 // track the number of entries for each ID 03164 Array<pair<GO, GO> > IJSendBuffer; 03165 Array<size_t> sendSizes(sendIDs.size(), 0); 03166 size_t numSends = 0; 03167 for (typename Array<pair<int, GO> >::const_iterator IdAndRow = IdsAndRows.begin(); 03168 IdAndRow != IdsAndRows.end(); ++IdAndRow) { 03169 int id = IdAndRow->first; 03170 GO row = IdAndRow->second; 03171 // have we advanced to a new send? 03172 if (sendIDs[numSends] != id) { 03173 numSends++; 03174 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(sendIDs[numSends] != id, 03175 std::logic_error, ": internal logic error. Contact Tpetra team."); 03176 } 03177 // copy data for row into contiguous storage 03178 for (typename std::vector<GO>::const_iterator j = nonlocals_[row].begin (); 03179 j != nonlocals_[row].end (); ++j) { 03180 IJSendBuffer.push_back (pair<GlobalOrdinal, GlobalOrdinal> (row, *j)); 03181 sendSizes[numSends]++; 03182 } 03183 } 03184 if (IdsAndRows.size() > 0) { 03185 numSends++; // one last increment, to make it a count instead of an index 03186 } 03187 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03188 as<typename Array<int>::size_type> (numSends) != sendIDs.size (), 03189 std::logic_error, ": internal logic error. Contact Tpetra team."); 03190 03191 // don't need this data anymore 03192 nonlocals_.clear(); 03193 03195 // TRANSMIT SIZE INFO BETWEEN SENDERS AND RECEIVERS 03197 03198 // Array of pending nonblocking communication requests. It's OK 03199 // to mix nonblocking send and receive requests in the same 03200 // waitAll() call. 03201 Array<RCP<Teuchos::CommRequest<int> > > requests; 03202 03203 // perform non-blocking sends: send sizes to our recipients 03204 for (size_t s = 0; s < numSends ; ++s) { 03205 // We're using a nonowning RCP because all communication 03206 // will be local to this method and the scope of our data 03207 requests.push_back (isend<int, size_t> (*comm, 03208 rcp (&sendSizes[s], false), 03209 sendIDs[s])); 03210 } 03211 // perform non-blocking receives: receive sizes from our senders 03212 Array<size_t> recvSizes (numRecvs); 03213 for (size_t r = 0; r < numRecvs; ++r) { 03214 // We're using a nonowning RCP because all communication 03215 // will be local to this method and the scope of our data 03216 requests.push_back (ireceive (*comm, rcp (&recvSizes[r], false), recvIDs[r])); 03217 } 03218 // Wait on all the nonblocking sends and receives. 03219 if (! requests.empty()) { 03220 waitAll (*comm, requests()); 03221 } 03222 #ifdef HAVE_TPETRA_DEBUG 03223 Teuchos::barrier (*comm); 03224 #endif // HAVE_TPETRA_DEBUG 03225 03226 // This doesn't necessarily deallocate the array. 03227 requests.resize (0); 03228 03230 // NOW SEND/RECEIVE ALL ROW DATA 03232 // from the size info, build the ArrayViews into IJSendBuffer 03233 Array<ArrayView<pair<GO,GO> > > sendBuffers(numSends,null); 03234 { 03235 size_t cur = 0; 03236 for (size_t s=0; s<numSends; ++s) { 03237 sendBuffers[s] = IJSendBuffer(cur,sendSizes[s]); 03238 cur += sendSizes[s]; 03239 } 03240 } 03241 // perform non-blocking sends 03242 for (size_t s=0; s < numSends ; ++s) 03243 { 03244 // We're using a nonowning RCP because all communication 03245 // will be local to this method and the scope of our data 03246 ArrayRCP<pair<GO,GO> > tmpSendBuf = 03247 arcp (sendBuffers[s].getRawPtr(), 0, sendBuffers[s].size(), false); 03248 requests.push_back (isend<int, pair<GO,GO> > (*comm, tmpSendBuf, sendIDs[s])); 03249 } 03250 // calculate amount of storage needed for receives 03251 // setup pointers for the receives as well 03252 size_t totalRecvSize = std::accumulate (recvSizes.begin(), recvSizes.end(), 0); 03253 Array<pair<GO,GO> > IJRecvBuffer (totalRecvSize); 03254 // from the size info, build the ArrayViews into IJRecvBuffer 03255 Array<ArrayView<pair<GO,GO> > > recvBuffers (numRecvs, null); 03256 { 03257 size_t cur = 0; 03258 for (size_t r=0; r<numRecvs; ++r) { 03259 recvBuffers[r] = IJRecvBuffer(cur,recvSizes[r]); 03260 cur += recvSizes[r]; 03261 } 03262 } 03263 // perform non-blocking recvs 03264 for (size_t r = 0; r < numRecvs; ++r) { 03265 // We're using a nonowning RCP because all communication 03266 // will be local to this method and the scope of our data 03267 ArrayRCP<pair<GO,GO> > tmpRecvBuf = 03268 arcp (recvBuffers[r].getRawPtr(), 0, recvBuffers[r].size(), false); 03269 requests.push_back (ireceive (*comm, tmpRecvBuf, recvIDs[r])); 03270 } 03271 // perform waits 03272 if (! requests.empty()) { 03273 waitAll (*comm, requests()); 03274 } 03275 #ifdef HAVE_TPETRA_DEBUG 03276 Teuchos::barrier (*comm); 03277 #endif // HAVE_TPETRA_DEBUG 03278 03280 // NOW PROCESS THE RECEIVED ROW DATA 03282 // TODO: instead of adding one entry at a time, add one row at a time. 03283 // this requires resorting; they arrived sorted by sending node, 03284 // so that entries could be non-contiguous if we received 03285 // multiple entries for a particular row from different processors. 03286 // it also requires restoring the data, which may make it not worth the trouble. 03287 for (typename Array<pair<GO,GO> >::const_iterator ij = IJRecvBuffer.begin(); 03288 ij != IJRecvBuffer.end(); ++ij) 03289 { 03290 insertGlobalIndicesFiltered (ij->first, tuple<GO> (ij->second)); 03291 } 03292 checkInternalState(); 03293 } 03294 03295 03296 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03297 void 03298 CrsGraph< 03299 LocalOrdinal, GlobalOrdinal, 03300 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03301 resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params) 03302 { 03303 const char tfecfFuncName[] = "resumeFill"; 03304 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! hasRowInfo(), std::runtime_error, 03305 ": Sorry, you cannot resume fill of the CrsGraph, since the graph's row " 03306 "information was deleted in fillComplete()."); 03307 03308 #ifdef HAVE_TPETRA_DEBUG 03309 Teuchos::barrier( *rowMap_->getComm() ); 03310 #endif // HAVE_TPETRA_DEBUG 03311 clearGlobalConstants(); 03312 if (params != null) this->setParameterList (params); 03313 lowerTriangular_ = false; 03314 upperTriangular_ = false; 03315 // either still sorted/merged or initially sorted/merged 03316 indicesAreSorted_ = true; 03317 noRedundancies_ = true; 03318 fillComplete_ = false; 03319 #ifdef HAVE_TPETRA_DEBUG 03320 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03321 ! isFillActive() || isFillComplete(), std::logic_error, 03322 "::resumeFill(): At end of method, either fill is not active or fill is " 03323 "complete. This violates stated post-conditions. Please report this bug " 03324 "to the Tpetra developers."); 03325 #endif // HAVE_TPETRA_DEBUG 03326 } 03327 03328 03329 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03330 void 03331 CrsGraph< 03332 LocalOrdinal, GlobalOrdinal, 03333 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03334 fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params) 03335 { 03336 // If the graph already has domain and range Maps, don't clobber 03337 // them. If it doesn't, use the current row Map for both the 03338 // domain and range Maps. 03339 // 03340 // NOTE (mfh 28 Sep 2014): If the graph was constructed without a 03341 // column Map, and column indices are inserted which are not in 03342 // the row Map on any process, this will cause troubles. However, 03343 // that is not a common case for most applications that we 03344 // encounter, and checking for it might require more 03345 // communication. 03346 Teuchos::RCP<const map_type> domMap = this->getDomainMap (); 03347 if (domMap.is_null ()) { 03348 domMap = this->getRowMap (); 03349 } 03350 Teuchos::RCP<const map_type> ranMap = this->getRangeMap (); 03351 if (ranMap.is_null ()) { 03352 ranMap = this->getRowMap (); 03353 } 03354 this->fillComplete (domMap, ranMap, params); 03355 } 03356 03357 03358 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03359 void 03360 CrsGraph< 03361 LocalOrdinal, GlobalOrdinal, 03362 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03363 fillComplete (const Teuchos::RCP<const map_type>& domainMap, 03364 const Teuchos::RCP<const map_type>& rangeMap, 03365 const Teuchos::RCP<Teuchos::ParameterList>& params) 03366 { 03367 const char tfecfFuncName[] = "fillComplete"; 03368 03369 #ifdef HAVE_TPETRA_DEBUG 03370 rowMap_->getComm ()->barrier (); 03371 #endif // HAVE_TPETRA_DEBUG 03372 03373 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(), 03374 std::runtime_error, ": Graph fill state must be active (isFillActive() " 03375 "must be true) before calling fillComplete()."); 03376 03377 const int numProcs = getComm ()->getSize (); 03378 03379 // 03380 // Read and set parameters 03381 // 03382 03383 // Does the caller want to sort remote GIDs (within those owned by 03384 // the same process) in makeColMap()? 03385 if (! params.is_null ()) { 03386 if (params->isParameter ("sort column map ghost gids")) { 03387 sortGhostsAssociatedWithEachProcessor_ = 03388 params->get<bool> ("sort column map ghost gids", 03389 sortGhostsAssociatedWithEachProcessor_); 03390 } 03391 else if (params->isParameter ("Sort column Map ghost GIDs")) { 03392 sortGhostsAssociatedWithEachProcessor_ = 03393 params->get<bool> ("Sort column Map ghost GIDs", 03394 sortGhostsAssociatedWithEachProcessor_); 03395 } 03396 } 03397 03398 // If true, the caller promises that no process did nonlocal 03399 // changes since the last call to fillComplete. 03400 bool assertNoNonlocalInserts = false; 03401 if (! params.is_null ()) { 03402 assertNoNonlocalInserts = 03403 params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts); 03404 } 03405 03406 // 03407 // Allocate indices, if they haven't already been allocated 03408 // 03409 if (! indicesAreAllocated ()) { 03410 if (hasColMap ()) { 03411 // We have a column Map, so use local indices. 03412 allocateIndices (LocalIndices); 03413 } else { 03414 // We don't have a column Map, so use global indices. 03415 allocateIndices (GlobalIndices); 03416 } 03417 } 03418 03419 // 03420 // Do global assembly, if requested and if the communicator 03421 // contains more than one process. 03422 // 03423 const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1; 03424 if (mayNeedGlobalAssemble) { 03425 // This first checks if we need to do global assembly. 03426 // The check costs a single all-reduce. 03427 globalAssemble (); 03428 } 03429 else { 03430 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03431 numProcs > 1 && nonlocals_.size() > 0, std::runtime_error, 03432 ":" << std::endl << "The graph's communicator contains only one " 03433 "process, but there are nonlocal entries. " << std::endl << 03434 "This probably means that invalid entries were added to the graph."); 03435 } 03436 03437 // Set domain and range Map. This may clear the Import / Export 03438 // objects if the new Maps differ from any old ones. 03439 setDomainRangeMaps (domainMap, rangeMap); 03440 03441 // If the graph does not already have a column Map (either from 03442 // the user constructor calling the version of the constructor 03443 // that takes a column Map, or from a previous fillComplete call), 03444 // then create it. 03445 if (! hasColMap ()) { 03446 makeColMap (); 03447 } 03448 03449 // Make indices local, if they aren't already. 03450 // The method doesn't do any work if the indices are already local. 03451 makeIndicesLocal (); 03452 03453 if (! isSorted ()) { 03454 // If this process has no indices, then CrsGraph considers it 03455 // already trivially sorted. Thus, this method need not be 03456 // called on all processes in the row Map's communicator. 03457 sortAllIndices (); 03458 } 03459 03460 if (! isMerged()) { 03461 mergeAllIndices (); 03462 } 03463 makeImportExport (); // Make Import and Export objects, if necessary 03464 computeGlobalConstants (); 03465 fillLocalGraph (params); 03466 fillComplete_ = true; 03467 03468 #ifdef HAVE_TPETRA_DEBUG 03469 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03470 isFillActive() == true || isFillComplete() == false, std::logic_error, 03471 ": Violated stated post-conditions. Please contact Tpetra team."); 03472 #endif 03473 03474 checkInternalState (); 03475 } 03476 03477 03478 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03479 void 03480 CrsGraph< 03481 LocalOrdinal, GlobalOrdinal, 03482 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03483 expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap, 03484 const Teuchos::RCP<const map_type>& rangeMap, 03485 const Teuchos::RCP<const import_type>& importer, 03486 const Teuchos::RCP<const export_type>& exporter, 03487 const Teuchos::RCP<Teuchos::ParameterList>& params) 03488 { 03489 const char tfecfFuncName[] = "expertStaticFillComplete: "; 03490 03491 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03492 domainMap.is_null () || rangeMap.is_null (), 03493 std::runtime_error, "The input domain Map and range Map must be nonnull."); 03494 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03495 pftype_ != StaticProfile, std::runtime_error, "You may not call this " 03496 "method unless the graph is StaticProfile."); 03497 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03498 isFillComplete () || ! hasColMap (), std::runtime_error, "You may not " 03499 "call this method unless the graph has a column Map."); 03500 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03501 getNodeNumRows () > 0 && k_rowPtrs_.dimension_0 () == 0, 03502 std::runtime_error, "The calling process has getNodeNumRows() = " 03503 << getNodeNumRows () << " > 0 rows, but the row offsets array has not " 03504 "been set."); 03505 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03506 static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1, 03507 std::runtime_error, "The row offsets array has length " << 03508 k_rowPtrs_.dimension_0 () << " != getNodeNumRows()+1 = " << 03509 (getNodeNumRows () + 1) << "."); 03510 03511 // Note: We don't need to do the following things which are normally done in fillComplete: 03512 // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAllIndices, mergeAllIndices 03513 03514 // Note: Need to do this so computeGlobalConstants & fillLocalGraph work 03515 // 03516 // The first assignment is always true if the graph has 1-D 03517 // storage (StaticProfile). The second assignment is only true if 03518 // storage is packed. 03519 nodeNumAllocated_ = k_rowPtrs_(getNodeNumRows ()); 03520 nodeNumEntries_ = nodeNumAllocated_; 03521 03522 // Constants from allocateIndices 03523 // 03524 // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go 03525 // away once the graph is allocated. expertStaticFillComplete 03526 // either presumes that the graph is allocated, or "allocates" it. 03527 // 03528 // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor 03529 // version of CrsGraph is to allocate in the constructor, not 03530 // lazily on first insert. That will make both 03531 // numAllocForAllRows_ and k_numAllocPerRow_ obsolete. 03532 numAllocForAllRows_ = 0; 03533 k_numAllocPerRow_ = Kokkos::DualView<const size_t*, device_type> (); 03534 numAllocPerRow_ = Teuchos::null; 03535 indicesAreAllocated_ = true; 03536 03537 // Constants from makeIndicesLocal 03538 // 03539 // The graph has a column Map, so its indices had better be local. 03540 indicesAreLocal_ = true; 03541 indicesAreGlobal_ = false; 03542 03543 // set domain/range map: may clear the import/export objects 03544 setDomainRangeMaps (domainMap, rangeMap); 03545 03546 // Presume the user sorted and merged the arrays first 03547 indicesAreSorted_ = true; 03548 noRedundancies_ = true; 03549 03550 // makeImportExport won't create a new importer/exporter if I set one here first. 03551 importer_ = Teuchos::null; 03552 exporter_ = Teuchos::null; 03553 if (importer != Teuchos::null) { 03554 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03555 ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) || 03556 ! importer->getTargetMap ()->isSameAs (*getColMap ()), 03557 std::invalid_argument,": importer does not match matrix maps."); 03558 importer_ = importer; 03559 03560 } 03561 if (exporter != Teuchos::null) { 03562 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03563 ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) || 03564 ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()), 03565 std::invalid_argument,": exporter does not match matrix maps."); 03566 exporter_ = exporter; 03567 } 03568 makeImportExport (); 03569 03570 // Compute the constants 03571 computeGlobalConstants (); 03572 03573 // Since we have a StaticProfile, fillLocalGraph will do the right thing... 03574 fillLocalGraph (params); 03575 fillComplete_ = true; 03576 03577 checkInternalState (); 03578 } 03579 03580 03581 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03582 void 03583 CrsGraph< 03584 LocalOrdinal, GlobalOrdinal, 03585 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03586 fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params) 03587 { 03588 using Kokkos::create_mirror_view; 03589 using Teuchos::ArrayRCP; 03590 using Teuchos::null; 03591 using Teuchos::RCP; 03592 using Teuchos::rcp; 03593 typedef ArrayRCP<size_t>::size_type size_type; 03594 typedef t_numRowEntries_ row_entries_type; 03595 typedef t_RowPtrsNC row_offsets_type; 03596 typedef t_LocalOrdinal_1D lclinds_1d_type; 03597 03598 const size_t lclNumRows = this->getNodeNumRows (); 03599 03600 // This method's goal is to fill in the two arrays (compressed 03601 // sparse row format) that define the sparse graph's structure. 03602 // 03603 // Use t_RowPtrs and not LocalStaticCrsGraphType::row_map_type for 03604 // k_ptrs, because the latter is const and we need to modify 03605 // k_ptrs here. 03606 row_offsets_type k_ptrs; 03607 t_RowPtrs k_ptrs_const; 03608 lclinds_1d_type k_inds; 03609 03610 // The number of entries in each locally owned row. This is a 03611 // DualView. 2-D storage lives on host and is currently not 03612 // thread-safe for parallel kernels even on host, so we have to 03613 // work sequentially with host storage in that case. 03614 row_entries_type k_numRowEnt = k_numRowEntries_; 03615 typename row_entries_type::t_host h_numRowEnt = k_numRowEnt.h_view; 03616 03617 bool requestOptimizedStorage = true; 03618 if (! params.is_null () && ! params->get ("Optimize Storage", true)) { 03619 requestOptimizedStorage = false; 03620 } 03621 if (getProfileType () == DynamicProfile) { 03622 // Pack 2-D storage (DynamicProfile) into 1-D packed storage. 03623 // 03624 // DynamicProfile means that the graph's column indices are 03625 // currently stored in a 2-D "unpacked" format, in the 03626 // arrays-of-arrays lclInds2D_. We allocate 1-D storage 03627 // (k_inds) and then copy from 2-D storage (lclInds2D_) into 1-D 03628 // storage (k_inds). 03629 TEUCHOS_TEST_FOR_EXCEPTION( 03630 static_cast<size_t> (k_numRowEnt.dimension_0 ()) != lclNumRows, 03631 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph (called from " 03632 "fillComplete or expertStaticFillComplete): For the DynamicProfile " 03633 "branch, k_numRowEnt has the wrong length. " 03634 "k_numRowEnt.dimension_0() = " << k_numRowEnt.dimension_0 () 03635 << " != getNodeNumRows() = " << lclNumRows << ""); 03636 03637 // Pack the row offsets into k_ptrs, by doing a sum-scan of 03638 // the array of valid entry counts per row (h_numRowEnt). 03639 // 03640 // Total number of entries in the matrix on the calling 03641 // process. We will compute this in the loop below. It's 03642 // cheap to compute and useful as a sanity check. 03643 size_t lclTotalNumEntries = 0; 03644 // This will be a host view of packed row offsets. 03645 typename row_offsets_type::HostMirror h_ptrs; 03646 { 03647 // Allocate the packed row offsets array. 03648 k_ptrs = row_offsets_type ("Tpetra::CrsGraph::ptr", lclNumRows+1); 03649 k_ptrs_const = k_ptrs; 03650 // 03651 // FIXME hack until we get parallel_scan in kokkos 03652 // 03653 h_ptrs = create_mirror_view (k_ptrs); 03654 h_ptrs(0) = 0; 03655 for (size_type i = 0; i < static_cast<size_type> (lclNumRows); ++i) { 03656 const size_t numEnt = h_numRowEnt(i); 03657 lclTotalNumEntries += numEnt; 03658 h_ptrs(i+1) = h_ptrs(i) + numEnt; 03659 } 03660 Kokkos::deep_copy (k_ptrs, h_ptrs); 03661 } 03662 03663 TEUCHOS_TEST_FOR_EXCEPTION( 03664 static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1, 03665 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: In DynamicProfile " 03666 "branch, after packing k_ptrs, k_ptrs.dimension_0() = " 03667 << k_ptrs.dimension_0 () << " != (lclNumRows+1) = " 03668 << (lclNumRows+1) << "."); 03669 TEUCHOS_TEST_FOR_EXCEPTION( 03670 static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1, 03671 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: In DynamicProfile " 03672 "branch, after packing h_ptrs, h_ptrs.dimension_0() = " 03673 << h_ptrs.dimension_0 () << " != (lclNumRows+1) = " 03674 << (lclNumRows+1) << "."); 03675 // FIXME (mfh 08 Aug 2014) This assumes UVM. 03676 TEUCHOS_TEST_FOR_EXCEPTION( 03677 k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error, 03678 "Tpetra::CrsGraph::fillLocalGraph: In DynamicProfile branch, after " 03679 "packing k_ptrs, k_ptrs(lclNumRows = " << lclNumRows << ") = " << 03680 k_ptrs(lclNumRows) << " != total number of entries on the calling " 03681 "process = " << lclTotalNumEntries << "."); 03682 03683 // Allocate the array of packed column indices. 03684 k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries); 03685 // We need a host view of the above, since 2-D storage lives on host. 03686 typename lclinds_1d_type::HostMirror h_inds = create_mirror_view (k_inds); 03687 // Pack the column indices. 03688 for (size_t row = 0; row < lclNumRows; ++row) { 03689 const size_t numEnt = h_numRowEnt(row); 03690 std::copy (lclInds2D_[row].begin (), 03691 lclInds2D_[row].begin () + numEnt, 03692 h_inds.ptr_on_device () + h_ptrs(row)); 03693 } 03694 Kokkos::deep_copy (k_inds, h_inds); 03695 03696 // Sanity check of packed row offsets. 03697 if (k_ptrs.dimension_0 () != 0) { 03698 const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ()); 03699 TEUCHOS_TEST_FOR_EXCEPTION( 03700 static_cast<size_t> (k_ptrs(numOffsets-1)) != k_inds.dimension_0 (), 03701 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: " 03702 "In DynamicProfile branch, after packing, k_ptrs(" << (numOffsets-1) 03703 << ") = " << k_ptrs(numOffsets-1) << " != k_inds.dimension_0() = " 03704 << k_inds.dimension_0 () << "."); 03705 } 03706 } 03707 else if (getProfileType () == StaticProfile) { 03708 // StaticProfile means that the graph's column indices are 03709 // currently stored in a 1-D format, with row offsets in 03710 // k_rowPtrs_ and local column indices in k_lclInds1D_. 03711 03712 // StaticProfile also means that the graph's array of row 03713 // offsets must already be allocated. 03714 TEUCHOS_TEST_FOR_EXCEPTION( 03715 k_rowPtrs_.dimension_0 () == 0, std::logic_error, 03716 "k_rowPtrs_ has size zero, but shouldn't"); 03717 TEUCHOS_TEST_FOR_EXCEPTION( 03718 k_rowPtrs_.dimension_0 () != lclNumRows + 1, std::logic_error, 03719 "Tpetra::CrsGraph::fillLocalGraph: k_rowPtrs_ has size " 03720 << k_rowPtrs_.dimension_0 () << " != (lclNumRows + 1) = " 03721 << (lclNumRows + 1) << ".") 03722 { 03723 const size_t numOffsets = k_rowPtrs_.dimension_0 (); 03724 // FIXME (mfh 08 Aug 2014) This relies on UVM. 03725 TEUCHOS_TEST_FOR_EXCEPTION( 03726 numOffsets != 0 && 03727 k_lclInds1D_.dimension_0 () != k_rowPtrs_(numOffsets-1), 03728 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: " 03729 "numOffsets = " << numOffsets << " != 0 and " 03730 "k_lclInds1D_.dimension_0() = " << k_lclInds1D_.dimension_0 () 03731 << " != k_rowPtrs_(" << numOffsets << ") = " 03732 << k_rowPtrs_(numOffsets-1) << "."); 03733 } 03734 03735 if (nodeNumEntries_ != nodeNumAllocated_) { 03736 // The graph's current 1-D storage is "unpacked." This means 03737 // the row offsets may differ from what the final row offsets 03738 // should be. This could happen, for example, if the user 03739 // specified StaticProfile in the constructor and set an upper 03740 // bound on the number of entries in each row, but didn't fill 03741 // all those entries. 03742 TEUCHOS_TEST_FOR_EXCEPTION( 03743 static_cast<size_t> (k_numRowEnt.dimension_0 ()) != lclNumRows, 03744 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph (called from " 03745 "fillComplete or expertStaticFillComplete): In StaticProfile " 03746 "unpacked branch, k_numRowEnt has the wrong length. " 03747 "k_numRowEnt.dimension_0() = " << k_numRowEnt.dimension_0 () 03748 << " != getNodeNumRows() = " << lclNumRows << ""); 03749 03750 if (k_rowPtrs_.dimension_0 () != 0) { 03751 const size_t numOffsets = 03752 static_cast<size_t> (k_rowPtrs_.dimension_0 ()); 03753 TEUCHOS_TEST_FOR_EXCEPTION( 03754 k_rowPtrs_(numOffsets-1) != static_cast<size_t> (k_lclInds1D_.dimension_0 ()), 03755 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: " 03756 "In StaticProfile branch, before allocating or packing, " 03757 "k_rowPtrs_(" << (numOffsets-1) << ") = " 03758 << k_rowPtrs_(numOffsets-1) << " != k_lclInds1D_.dimension_0() = " 03759 << k_lclInds1D_.dimension_0 () << "."); 03760 } 03761 03762 // Pack the row offsets into k_ptrs, by doing a sum-scan of 03763 // the array of valid entry counts per row (h_numRowEnt). 03764 03765 // Total number of entries in the matrix on the calling 03766 // process. We will compute this in the loop below. It's 03767 // cheap to compute and useful as a sanity check. 03768 size_t lclTotalNumEntries = 0; 03769 { 03770 // Allocate the packed row offsets array. 03771 k_ptrs = row_offsets_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1); 03772 k_ptrs_const = k_ptrs; 03773 // 03774 // FIXME hack until we get parallel_scan in kokkos 03775 // 03776 // Unlike in the 2-D storage case above, we don't need the 03777 // host view of the packed row offsets array after packing 03778 // the row offsets. 03779 typename row_offsets_type::HostMirror h_k_ptrs = 03780 create_mirror_view (k_ptrs); 03781 h_k_ptrs(0) = 0; 03782 for (size_t i = 0; i < lclNumRows; ++i) { 03783 const size_t numEnt = h_numRowEnt(i); 03784 lclTotalNumEntries += numEnt; 03785 h_k_ptrs(i+1) = h_k_ptrs(i) + numEnt; 03786 } 03787 Kokkos::deep_copy (k_ptrs, h_k_ptrs); 03788 } 03789 03790 TEUCHOS_TEST_FOR_EXCEPTION( 03791 static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1, 03792 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: In " 03793 "StaticProfile unpacked branch, after allocating k_ptrs, " 03794 "k_ptrs.dimension_0() = " << k_ptrs.dimension_0 () << " != " 03795 "lclNumRows+1 = " << (lclNumRows+1) << "."); 03796 // FIXME (mfh 08 Aug 2014) This assumes UVM. 03797 TEUCHOS_TEST_FOR_EXCEPTION( 03798 k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error, 03799 "Tpetra::CrsGraph::fillLocalGraph: In StaticProfile unpacked " 03800 "branch, after filling k_ptrs, k_ptrs(lclNumRows=" << lclNumRows 03801 << ") = " << k_ptrs(lclNumRows) << " != total number of entries on " 03802 "the calling process = " << lclTotalNumEntries << "."); 03803 03804 // Allocate the array of packed column indices. 03805 k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries); 03806 03807 // k_rowPtrs_ and k_lclInds1D_ are currently unpacked. Pack 03808 // them, using the packed row offsets array k_ptrs that we 03809 // created above. 03810 // 03811 // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in 03812 // CrsMatrix?), we need to keep around the unpacked row 03813 // offsets and column indices. 03814 03815 // Pack the column indices from unpacked k_lclInds1D_ into 03816 // packed k_inds. We will replace k_lclInds1D_ below. 03817 typedef pack_functor<t_LocalOrdinal_1D, t_RowPtrs> inds_packer_type; 03818 inds_packer_type f (k_inds, k_lclInds1D_, k_ptrs, k_rowPtrs_); 03819 Kokkos::parallel_for (lclNumRows, f); 03820 03821 TEUCHOS_TEST_FOR_EXCEPTION( 03822 k_ptrs.dimension_0 () == 0, std::logic_error, "Tpetra::CrsGraph::" 03823 "fillLocalGraph: In StaticProfile \"Optimize Storage\" = true branch," 03824 " after packing, k_ptrs.dimension_0() = 0. This probably means that " 03825 "k_rowPtrs_ was never allocated."); 03826 if (k_ptrs.dimension_0 () != 0) { 03827 const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ()); 03828 TEUCHOS_TEST_FOR_EXCEPTION( 03829 static_cast<size_t> (k_ptrs(numOffsets - 1)) != k_inds.dimension_0 (), 03830 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: " 03831 "In StaticProfile \"Optimize Storage\"=true branch, after packing, " 03832 "k_ptrs(" << (numOffsets-1) << ") = " << k_ptrs(numOffsets-1) << 03833 " != k_inds.dimension_0() = " << k_inds.dimension_0 () << "."); 03834 } 03835 } 03836 else { // We don't have to pack, so just set the pointers. 03837 k_ptrs_const = k_rowPtrs_; 03838 k_inds = k_lclInds1D_; 03839 03840 TEUCHOS_TEST_FOR_EXCEPTION( 03841 k_ptrs_const.dimension_0 () == 0, std::logic_error, "Tpetra::CrsGraph::" 03842 "fillLocalGraph: In StaticProfile \"Optimize Storage\" = " 03843 "false branch, k_ptrs_const.dimension_0() = 0. This probably means that " 03844 "k_rowPtrs_ was never allocated."); 03845 if (k_ptrs_const.dimension_0 () != 0) { 03846 const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ()); 03847 TEUCHOS_TEST_FOR_EXCEPTION( 03848 static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_inds.dimension_0 (), 03849 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: " 03850 "In StaticProfile \"Optimize Storage\" = false branch, " 03851 "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets - 1) 03852 << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << "."); 03853 } 03854 } 03855 } 03856 03857 // Extra sanity checks. 03858 TEUCHOS_TEST_FOR_EXCEPTION( 03859 static_cast<size_t> (k_ptrs_const.dimension_0 ()) != lclNumRows + 1, 03860 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: After packing, " 03861 "k_ptrs_const.dimension_0() = " << k_ptrs_const.dimension_0 () 03862 << " != lclNumRows+1 = " << (lclNumRows+1) << "."); 03863 if (k_ptrs_const.dimension_0 () != 0) { 03864 const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ()); 03865 TEUCHOS_TEST_FOR_EXCEPTION( 03866 static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_inds.dimension_0 (), 03867 std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: After packing, " 03868 "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets-1) 03869 << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << "."); 03870 } 03871 03872 if (requestOptimizedStorage) { 03873 // With optimized storage, we don't need to store the 2-D column 03874 // indices array-of-arrays, or the array of row entry counts. 03875 03876 // Free graph data structures that are only needed for 2-D or 03877 // unpacked 1-D storage. 03878 lclInds2D_ = null; 03879 k_numRowEntries_ = row_entries_type (); 03880 numRowEntries_ = null; // legacy KokkosClassic view of above 03881 03882 // Keep the new 1-D packed allocations. 03883 k_rowPtrs_ = k_ptrs_const; 03884 k_lclInds1D_ = k_inds; 03885 03886 // Storage is packed now, so the number of allocated entries is 03887 // the same as the actual number of entries. 03888 nodeNumAllocated_ = nodeNumEntries_; 03889 // The graph is definitely StaticProfile now, whether or not it 03890 // was before. 03891 pftype_ = StaticProfile; 03892 } 03893 03894 // FIXME (mfh 28 Aug 2014) "Local Graph" sublist no longer used. 03895 03896 // Build the local graph. 03897 k_lclGraph_ = LocalStaticCrsGraphType (k_inds, k_ptrs_const); 03898 03899 // TODO (mfh 13 Mar 2014) getNodeNumDiags(), isUpperTriangular(), 03900 // and isLowerTriangular() depend on computeGlobalConstants(), in 03901 // particular the part where it looks at the local matrix. You 03902 // have to use global indices to determine which entries are 03903 // diagonal, or above or below the diagonal. However, lower or 03904 // upper triangularness is a local property. 03905 } 03906 03907 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03908 void 03909 CrsGraph< 03910 LocalOrdinal, GlobalOrdinal, 03911 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03912 replaceColMap (const Teuchos::RCP<const map_type>& newColMap) 03913 { 03914 // NOTE: This safety check matches the code, but not the documentation of Crsgraph 03915 // 03916 // FIXME (mfh 18 Aug 2014) This will break if the calling process 03917 // has no entries, because in that case, currently it is neither 03918 // locally nor globally indexed. This will change once we get rid 03919 // of lazy allocation (so that the constructor allocates indices 03920 // and therefore commits to local vs. global). 03921 const char tfecfFuncName[] = "replaceColMap: "; 03922 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03923 isLocallyIndexed () || isGloballyIndexed (), std::runtime_error, 03924 "Requires matching maps and non-static graph."); 03925 colMap_ = newColMap; 03926 } 03927 03928 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03929 void 03930 CrsGraph< 03931 LocalOrdinal, GlobalOrdinal, 03932 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03933 reindexColumns (const Teuchos::RCP<const map_type>& newColMap, 03934 const Teuchos::RCP<const import_type>& newImport, 03935 const bool sortIndicesInEachRow) 03936 { 03937 using Teuchos::REDUCE_MIN; 03938 using Teuchos::reduceAll; 03939 using Teuchos::RCP; 03940 typedef GlobalOrdinal GO; 03941 typedef LocalOrdinal LO; 03942 const char tfecfFuncName[] = "reindexColumns: "; 03943 03944 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03945 isFillComplete (), std::runtime_error, "The graph is fill complete " 03946 "(isFillComplete() returns true). You must call resumeFill() before " 03947 "you may call this method."); 03948 03949 // mfh 19 Aug 2014: This method does NOT redistribute data; it 03950 // doesn't claim to do the work of an Import or Export. This 03951 // means that for all processes, the calling process MUST own all 03952 // column indices, in both the old column Map (if it exists) and 03953 // the new column Map. We check this via an all-reduce. 03954 // 03955 // Some processes may be globally indexed, others may be locally 03956 // indexed, and others (that have no graph entries) may be 03957 // neither. This method will NOT change the graph's current 03958 // state. If it's locally indexed, it will stay that way, and 03959 // vice versa. It would easy to add an option to convert indices 03960 // from global to local, so as to save a global-to-local 03961 // conversion pass. However, we don't do this here. The intended 03962 // typical use case is that the graph already has a column Map and 03963 // is locally indexed, and this is the case for which we optimize. 03964 03965 const size_t lclNumRows = getNodeNumRows (); 03966 03967 // Attempt to convert indices to the new column Map's version of 03968 // local. This will fail if on the calling process, the graph has 03969 // indices that are not on that process in the new column Map. 03970 // After the local conversion attempt, we will do an all-reduce to 03971 // see if any processes failed. 03972 03973 // If this is false, then either the graph contains a column index 03974 // which is invalid in the CURRENT column Map, or the graph is 03975 // locally indexed but currently has no column Map. In either 03976 // case, there is no way to convert the current local indices into 03977 // global indices, so that we can convert them into the new column 03978 // Map's local indices. It's possible for this to be true on some 03979 // processes but not others, due to replaceColMap. 03980 bool allCurColIndsValid = true; 03981 // On the calling process, are all valid current column indices 03982 // also in the new column Map on the calling process? In other 03983 // words, does local reindexing suffice, or should the user have 03984 // done an Import or Export instead? 03985 bool localSuffices = true; 03986 03987 // Final arrays for the local indices. We will allocate exactly 03988 // one of these ONLY if the graph is locally indexed on the 03989 // calling process, and ONLY if the graph has one or more entries 03990 // (is not empty) on the calling process. In that case, we 03991 // allocate the first (1-D storage) if the graph has a static 03992 // profile, else we allocate the second (2-D storage). 03993 t_LocalOrdinal_1D newLclInds1D; 03994 Teuchos::ArrayRCP<Teuchos::Array<LO> > newLclInds2D; 03995 03996 // If indices aren't allocated, that means the calling process 03997 // owns no entries in the graph. Thus, there is nothing to 03998 // convert, and it trivially succeeds locally. 03999 if (indicesAreAllocated ()) { 04000 if (isLocallyIndexed ()) { 04001 if (hasColMap ()) { // locally indexed, and currently has a column Map 04002 const map_type& oldColMap = * (getColMap ()); 04003 if (pftype_ == StaticProfile) { 04004 // Allocate storage for the new local indices. 04005 RCP<node_type> node = getRowMap ()->getNode (); 04006 newLclInds1D = t_LocalOrdinal_1D ("Tpetra::CrsGraph::ind", 04007 nodeNumAllocated_); 04008 // Attempt to convert the new indices locally. 04009 for (size_t lclRow = 0; lclRow < lclNumRows; ++lclRow) { 04010 const RowInfo rowInfo = getRowInfo (lclRow); 04011 const size_t beg = rowInfo.offset1D; 04012 const size_t end = beg + rowInfo.numEntries; 04013 for (size_t k = beg; k < end; ++k) { 04014 // FIXME (mfh 21 Aug 2014) This assumes UVM. Should 04015 // use a DualView instead. 04016 const LO oldLclCol = k_lclInds1D_(k); 04017 if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) { 04018 allCurColIndsValid = false; 04019 break; // Stop at the first invalid index 04020 } 04021 const GO gblCol = oldColMap.getGlobalElement (oldLclCol); 04022 04023 // The above conversion MUST succeed. Otherwise, the 04024 // current local index is invalid, which means that 04025 // the graph was constructed incorrectly. 04026 if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) { 04027 allCurColIndsValid = false; 04028 break; // Stop at the first invalid index 04029 } 04030 else { 04031 const LO newLclCol = newColMap->getLocalElement (gblCol); 04032 if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) { 04033 localSuffices = false; 04034 break; // Stop at the first invalid index 04035 } 04036 // FIXME (mfh 21 Aug 2014) This assumes UVM. Should 04037 // use a DualView instead. 04038 newLclInds1D(k) = newLclCol; 04039 } 04040 } // for each entry in the current row 04041 } // for each locally owned row 04042 } 04043 else { // pftype_ == DynamicProfile 04044 // Allocate storage for the new local indices. We only 04045 // allocate the outer array here; we will allocate the 04046 // inner arrays below. 04047 newLclInds2D = Teuchos::arcp<Teuchos::Array<LO> > (lclNumRows); 04048 04049 // Attempt to convert the new indices locally. 04050 for (size_t lclRow = 0; lclRow < lclNumRows; ++lclRow) { 04051 const RowInfo rowInfo = getRowInfo (lclRow); 04052 newLclInds2D.resize (rowInfo.allocSize); 04053 04054 Teuchos::ArrayView<const LO> oldLclRowView = getLocalView (rowInfo); 04055 Teuchos::ArrayView<LO> newLclRowView = (newLclInds2D[lclRow]) (); 04056 04057 for (size_t k = 0; k < rowInfo.numEntries; ++k) { 04058 const LO oldLclCol = oldLclRowView[k]; 04059 if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) { 04060 allCurColIndsValid = false; 04061 break; // Stop at the first invalid index 04062 } 04063 const GO gblCol = oldColMap.getGlobalElement (oldLclCol); 04064 04065 // The above conversion MUST succeed. Otherwise, the 04066 // local index is invalid and the graph is wrong. 04067 if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) { 04068 allCurColIndsValid = false; 04069 break; // Stop at the first invalid index 04070 } 04071 else { 04072 const LO newLclCol = newColMap->getLocalElement (gblCol); 04073 if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) { 04074 localSuffices = false; 04075 break; // Stop at the first invalid index. 04076 } 04077 newLclRowView[k] = newLclCol; 04078 } 04079 } // for each entry in the current row 04080 } // for each locally owned row 04081 } // pftype_ 04082 } 04083 else { // locally indexed, but no column Map 04084 // This case is only possible if replaceColMap() was called 04085 // with a null argument on the calling process. It's 04086 // possible, but it means that this method can't possibly 04087 // succeed, since we have no way of knowing how to convert 04088 // the current local indices to global indices. 04089 allCurColIndsValid = false; 04090 } 04091 } 04092 else { // globally indexed 04093 // If the graph is globally indexed, we don't need to save 04094 // local indices, but we _do_ need to know whether the current 04095 // global indices are valid in the new column Map. We may 04096 // need to do a getRemoteIndexList call to find this out. 04097 // 04098 // In this case, it doesn't matter whether the graph currently 04099 // has a column Map. We don't need the old column Map to 04100 // convert from global indices to the _new_ column Map's local 04101 // indices. Furthermore, we can use the same code, whether 04102 // the graph is static or dynamic profile. 04103 04104 // Test whether the current global indices are in the new 04105 // column Map on the calling process. 04106 for (size_t lclRow = 0; lclRow < lclNumRows; ++lclRow) { 04107 const RowInfo rowInfo = getRowInfo (lclRow); 04108 Teuchos::ArrayView<const GO> oldGblRowView = getGlobalView (rowInfo); 04109 for (size_t k = 0; k < rowInfo.numEntries; ++k) { 04110 const GO gblCol = oldGblRowView[k]; 04111 if (! newColMap->isNodeGlobalElement (gblCol)) { 04112 localSuffices = false; 04113 break; // Stop at the first invalid index 04114 } 04115 } // for each entry in the current row 04116 } // for each locally owned row 04117 } // locally or globally indexed 04118 } // whether indices are allocated 04119 04120 // Do an all-reduce to check both possible error conditions. 04121 int lclSuccess[2]; 04122 lclSuccess[0] = allCurColIndsValid ? 1 : 0; 04123 lclSuccess[1] = localSuffices ? 1 : 0; 04124 int gblSuccess[2]; 04125 gblSuccess[0] = 0; 04126 gblSuccess[1] = 0; 04127 RCP<const Teuchos::Comm<int> > comm = 04128 getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm (); 04129 if (! comm.is_null ()) { 04130 reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess); 04131 } 04132 04133 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04134 gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue." 04135 " The most likely reason is that the graph is locally indexed, but the " 04136 "column Map is missing (null) on some processes, due to a previous call " 04137 "to replaceColMap()."); 04138 04139 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04140 gblSuccess[1] == 0, std::runtime_error, "On some process, the graph " 04141 "contains column indices that are in the old column Map, but not in the " 04142 "new column Map (on that process). This method does NOT redistribute " 04143 "data; it does not claim to do the work of an Import or Export operation." 04144 " This means that for all processess, the calling process MUST own all " 04145 "column indices, in both the old column Map and the new column Map. In " 04146 "this case, you will need to do an Import or Export operation to " 04147 "redistribute data."); 04148 04149 // Commit the results. 04150 if (isLocallyIndexed ()) { 04151 if (pftype_ == StaticProfile) { 04152 k_lclInds1D_ = newLclInds1D; 04153 } else { // dynamic profile 04154 lclInds2D_ = newLclInds2D; 04155 } 04156 // We've reindexed, so we don't know if the indices are sorted. 04157 // 04158 // FIXME (mfh 17 Sep 2014) It could make sense to check this, 04159 // since we're already going through all the indices above. We 04160 // could also sort each row in place; that way, we would only 04161 // have to make one pass over the rows. 04162 indicesAreSorted_ = false; 04163 if (sortIndicesInEachRow) { 04164 // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in 04165 // order to call this method. 04166 // 04167 // FIXME (mfh 17 Sep 2014) This violates the strong exception 04168 // guarantee. It would be better to sort the new index arrays 04169 // before committing them. 04170 sortAllIndices (); 04171 } 04172 } 04173 colMap_ = newColMap; 04174 04175 if (newImport.is_null ()) { 04176 // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to 04177 // check whether the input Import is null on any process. 04178 // 04179 // If the domain Map hasn't been set yet, we can't compute a new 04180 // Import object. Leave it what it is; it should be null, but 04181 // it doesn't matter. If the domain Map _has_ been set, then 04182 // compute a new Import object if necessary. 04183 if (! domainMap_.is_null ()) { 04184 if (! domainMap_->isSameAs (* newColMap)) { 04185 importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap)); 04186 } else { 04187 importer_ = Teuchos::null; // don't need an Import 04188 } 04189 } 04190 } else { 04191 // The caller gave us an Import object. Assume that it's valid. 04192 importer_ = newImport; 04193 } 04194 } 04195 04196 04197 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04198 void 04199 CrsGraph< 04200 LocalOrdinal, GlobalOrdinal, 04201 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04202 replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap, 04203 const Teuchos::RCP<const import_type>& newImporter) 04204 { 04205 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: "; 04206 TEUCHOS_TEST_FOR_EXCEPTION( 04207 colMap_.is_null (), std::invalid_argument, prefix << "You may not call " 04208 "this method unless the graph already has a column Map."); 04209 TEUCHOS_TEST_FOR_EXCEPTION( 04210 newDomainMap.is_null (), std::invalid_argument, 04211 prefix << "The new domain Map must be nonnull."); 04212 04213 #ifdef HAVE_TPETRA_DEBUG 04214 if (newImporter.is_null ()) { 04215 // It's not a good idea to put expensive operations in a macro 04216 // clause, even if they are side effect - free, because macros 04217 // don't promise that they won't evaluate their arguments more 04218 // than once. It's polite for them to do so, but not required. 04219 const bool colSameAsDom = colMap_->isSameAs (*newDomainMap); 04220 TEUCHOS_TEST_FOR_EXCEPTION( 04221 colSameAsDom, std::invalid_argument, "If the new Import is null, " 04222 "then the new domain Map must be the same as the current column Map."); 04223 } 04224 else { 04225 const bool colSameAsTgt = 04226 colMap_->isSameAs (* (newImporter->getTargetMap ())); 04227 const bool newDomSameAsSrc = 04228 newDomainMap->isSameAs (* (newImporter->getSourceMap ())); 04229 TEUCHOS_TEST_FOR_EXCEPTION( 04230 ! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the " 04231 "new Import is nonnull, then the current column Map must be the same " 04232 "as the new Import's target Map, and the new domain Map must be the " 04233 "same as the new Import's source Map."); 04234 } 04235 #endif // HAVE_TPETRA_DEBUG 04236 04237 domainMap_ = newDomainMap; 04238 importer_ = Teuchos::rcp_const_cast<import_type> (newImporter); 04239 } 04240 04241 04242 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04243 typename CrsGraph< 04244 LocalOrdinal, GlobalOrdinal, 04245 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::LocalStaticCrsGraphType 04246 CrsGraph< 04247 LocalOrdinal, GlobalOrdinal, 04248 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04249 getLocalGraph_Kokkos () const 04250 { 04251 return k_lclGraph_; 04252 } 04253 04254 04255 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04256 void 04257 CrsGraph< 04258 LocalOrdinal, 04259 GlobalOrdinal, 04260 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04261 computeGlobalConstants () 04262 { 04263 using Teuchos::as; 04264 using Teuchos::outArg; 04265 using Teuchos::reduceAll; 04266 typedef LocalOrdinal LO; 04267 typedef GlobalOrdinal GO; 04268 typedef global_size_t GST; 04269 04270 #ifdef HAVE_TPETRA_DEBUG 04271 TEUCHOS_TEST_FOR_EXCEPTION(! hasColMap(), std::logic_error, "Tpetra::" 04272 "CrsGraph::computeGlobalConstants: At this point, the graph should have " 04273 "a column Map, but it does not. Please report this bug to the Tpetra " 04274 "developers."); 04275 #endif // HAVE_TPETRA_DEBUG 04276 04277 // If necessary, (re)compute the local constants: nodeNumDiags_, 04278 // lowerTriangular_, upperTriangular_, and nodeMaxNumRowEntries_. 04279 if (! haveLocalConstants_) { 04280 // We have actually already computed nodeNumEntries_. 04281 // nodeNumEntries_ gets updated by methods that insert or remove 04282 // indices (including setAllIndices and 04283 // expertStaticFillComplete). Before fillComplete, its count 04284 // may include duplicate column indices in the same row. 04285 // However, mergeRowIndices and mergeRowIndicesAndValues both 04286 // subtract off merged indices in each row from the total count. 04287 // Thus, nodeNumEntries_ _should_ be accurate at this point, 04288 // meaning that we don't have to re-count it here. 04289 04290 // Reset local properties 04291 upperTriangular_ = true; 04292 lowerTriangular_ = true; 04293 nodeMaxNumRowEntries_ = 0; 04294 nodeNumDiags_ = 0; 04295 04296 // At this point, we know that we have both a row Map and a column Map. 04297 const map_type& rowMap = *rowMap_; 04298 const map_type& colMap = *colMap_; 04299 04300 // Go through all the entries of the graph. Count the number of 04301 // diagonal elements we encounter, and figure out whether the 04302 // graph is lower or upper triangular. Diagonal elements are 04303 // determined using global indices, with respect to the whole 04304 // graph. However, lower or upper triangularity is a local 04305 // property, and is determined using local indices. 04306 // 04307 // At this point, indices have already been sorted in each row. 04308 // That makes finding out whether the graph is lower / upper 04309 // triangular easier. 04310 if (indicesAreAllocated () && nodeNumAllocated_ > 0) { 04311 const size_t numLocalRows = getNodeNumRows (); 04312 for (size_t localRow = 0; localRow < numLocalRows; ++localRow) { 04313 const GO globalRow = rowMap.getGlobalElement (localRow); 04314 // Find the local (column) index for the diagonal entry. 04315 // This process might not necessarily own _any_ entries in 04316 // the current row. If it doesn't, skip this row. It won't 04317 // affect any of the attributes (nodeNumDiagons_, 04318 // upperTriangular_, lowerTriangular_, or 04319 // nodeMaxNumRowEntries_) which this loop sets. 04320 const LO rlcid = colMap.getLocalElement (globalRow); 04321 // This process owns one or more entries in the current row. 04322 RowInfo rowInfo = getRowInfo (localRow); 04323 ArrayView<const LO> rview = getLocalView (rowInfo); 04324 typename ArrayView<const LO>::iterator beg, end, cur; 04325 beg = rview.begin(); 04326 end = beg + rowInfo.numEntries; 04327 if (beg != end) { 04328 for (cur = beg; cur != end; ++cur) { 04329 // is this the diagonal? 04330 if (rlcid == *cur) ++nodeNumDiags_; 04331 } 04332 // Local column indices are sorted in each row. That means 04333 // the smallest column index in this row (on this process) 04334 // is *beg, and the largest column index in this row (on 04335 // this process) is *(end - 1). We know that end - 1 is 04336 // valid because beg != end. 04337 const size_t smallestCol = static_cast<size_t> (*beg); 04338 const size_t largestCol = static_cast<size_t> (*(end - 1)); 04339 04340 if (smallestCol < localRow) { 04341 upperTriangular_ = false; 04342 } 04343 if (localRow < largestCol) { 04344 lowerTriangular_ = false; 04345 } 04346 } 04347 // Update the max number of entries over all rows. 04348 nodeMaxNumRowEntries_ = std::max (nodeMaxNumRowEntries_, rowInfo.numEntries); 04349 } 04350 } 04351 haveLocalConstants_ = true; 04352 } // if my process doesn't have local constants 04353 04354 // Compute global constants from local constants. Processes that 04355 // already have local constants still participate in the 04356 // all-reduces, using their previously computed values. 04357 if (haveGlobalConstants_ == false) { 04358 // Promote all the nodeNum* and nodeMaxNum* quantities from 04359 // size_t to global_size_t, when doing the all-reduces for 04360 // globalNum* / globalMaxNum* results. 04361 // 04362 // FIXME (mfh 07 May 2013) Unfortunately, we either have to do 04363 // this in two all-reduces (one for the sum and the other for 04364 // the max), or use a custom MPI_Op that combines the sum and 04365 // the max. The latter might even be slower than two 04366 // all-reduces on modern network hardware. It would also be a 04367 // good idea to use nonblocking all-reduces (MPI 3), so that we 04368 // don't have to wait around for the first one to finish before 04369 // starting the second one. 04370 GST lcl[2], gbl[2]; 04371 lcl[0] = static_cast<GST> (nodeNumEntries_); 04372 lcl[1] = static_cast<GST> (nodeNumDiags_); 04373 reduceAll<int,GST> (*getComm (), Teuchos::REDUCE_SUM, 04374 2, lcl, gbl); 04375 globalNumEntries_ = gbl[0]; 04376 globalNumDiags_ = gbl[1]; 04377 reduceAll<int,GST> (*getComm (), Teuchos::REDUCE_MAX, 04378 static_cast<GST> (nodeMaxNumRowEntries_), 04379 outArg (globalMaxNumRowEntries_)); 04380 haveGlobalConstants_ = true; 04381 } 04382 } 04383 04384 04385 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04386 void 04387 CrsGraph< 04388 LocalOrdinal, GlobalOrdinal, 04389 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04390 makeIndicesLocal () 04391 { 04392 using Teuchos::arcp; 04393 using Teuchos::Array; 04394 typedef LocalOrdinal LO; 04395 typedef GlobalOrdinal GO; 04396 const char tfecfFuncName[] = "makeIndicesLocal"; 04397 04398 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04399 ! this->hasColMap (), std::logic_error, ": The graph does not have a " 04400 "column Map yet. This method should never be called in that case. " 04401 "Please report this bug to the Tpetra developers."); 04402 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04403 this->getColMap ().is_null (), std::logic_error, ": The graph claims " 04404 "that it has a column Map, because hasColMap() returns true. However, " 04405 "the result of getColMap() is null. This should never happen. Please " 04406 "report this bug to the Tpetra developers."); 04407 04408 const size_t lclNumRows = this->getNodeNumRows (); 04409 const map_type& colMap = * (this->getColMap ()); 04410 04411 if (isGloballyIndexed () && lclNumRows > 0) { 04412 typename t_numRowEntries_::t_host h_numRowEnt = k_numRowEntries_.h_view; 04413 04414 // allocate data for local indices 04415 if (getProfileType () == StaticProfile) { 04416 // If GO and LO are the same size, we can reuse the existing 04417 // array of 1-D index storage to convert column indices from 04418 // GO to LO. Otherwise, we'll just allocate a new buffer. 04419 if (nodeNumAllocated_ && Kokkos::Impl::is_same<LO,GO>::value) { 04420 k_lclInds1D_ = Kokkos::Impl::if_c<Kokkos::Impl::is_same<LO,GO>::value, 04421 t_GlobalOrdinal_1D, 04422 t_LocalOrdinal_1D >::select (k_gblInds1D_, k_lclInds1D_); 04423 } 04424 else { 04425 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04426 k_rowPtrs_.dimension_0 () == 0, std::logic_error, ": This should " 04427 "never happen at this point. Please report this bug to the Tpetra " 04428 "developers."); 04429 const size_t numEnt = k_rowPtrs_[lclNumRows]; 04430 04431 k_lclInds1D_ = t_LocalOrdinal_1D ("Tpetra::CrsGraph::lclind", numEnt); 04432 } 04433 04434 for (size_t r = 0; r < lclNumRows; ++r) { 04435 const size_t offset = k_rowPtrs_(r); 04436 const size_t numentry = h_numRowEnt(r); 04437 for (size_t j = 0; j < numentry; ++j) { 04438 const GO gid = k_gblInds1D_(offset + j); 04439 const LO lid = colMap.getLocalElement (gid); 04440 k_lclInds1D_(offset + j) = lid; 04441 #ifdef HAVE_TPETRA_DEBUG 04442 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04443 k_lclInds1D_(offset + j) == Teuchos::OrdinalTraits<LO>::invalid(), 04444 std::logic_error, 04445 ": In local row r=" << r << ", global column " << gid << " is " 04446 "not in the column Map. This should never happen. Please " 04447 "report this bug to the Tpetra developers."); 04448 #endif // HAVE_TPETRA_DEBUG 04449 } 04450 } 04451 // We've converted column indices from global to local, so we 04452 // can deallocate the global column indices (which we know are 04453 // in 1-D storage, because the graph has static profile). 04454 k_gblInds1D_ = t_GlobalOrdinal_1D (); 04455 gblInds1D_ = Teuchos::null; 04456 } 04457 else { // the graph has dynamic profile (2-D index storage) 04458 lclInds2D_ = arcp<Array<LO> > (lclNumRows); 04459 for (size_t r = 0; r < lclNumRows; ++r) { 04460 if (! gblInds2D_[r].empty ()) { 04461 const GO* const ginds = gblInds2D_[r].getRawPtr (); 04462 const size_t rna = gblInds2D_[r].size (); 04463 const size_t numentry = h_numRowEnt(r); 04464 lclInds2D_[r].resize (rna); 04465 LO* const linds = lclInds2D_[r].getRawPtr (); 04466 for (size_t j = 0; j < numentry; ++j) { 04467 const GO gid = ginds[j]; 04468 const LO lid = colMap.getLocalElement (gid); 04469 linds[j] = lid; 04470 #ifdef HAVE_TPETRA_DEBUG 04471 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04472 linds[j] == Teuchos::OrdinalTraits<LO>::invalid(), 04473 std::logic_error, 04474 ": Global column ginds[j=" << j << "]=" << ginds[j] 04475 << " of local row r=" << r << " is not in the column Map. " 04476 "This should never happen. Please report this bug to the " 04477 "Tpetra developers."); 04478 #endif // HAVE_TPETRA_DEBUG 04479 } 04480 } 04481 } 04482 gblInds2D_ = Teuchos::null; 04483 } 04484 } // globallyIndexed() && lclNumRows > 0 04485 04486 k_lclGraph_ = LocalStaticCrsGraphType (k_lclInds1D_, k_rowPtrs_); 04487 indicesAreLocal_ = true; 04488 indicesAreGlobal_ = false; 04489 checkInternalState (); 04490 } 04491 04492 04493 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04494 void 04495 CrsGraph< 04496 LocalOrdinal, GlobalOrdinal, 04497 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04498 sortAllIndices () 04499 { 04500 // this should be called only after makeIndicesLocal() 04501 TEUCHOS_TEST_FOR_EXCEPT( isGloballyIndexed () ); 04502 if (isSorted () == false) { 04503 // FIXME (mfh 06 Mar 2014) This would be a good place for a 04504 // thread-parallel kernel. 04505 for (size_t row = 0; row < getNodeNumRows (); ++row) { 04506 RowInfo rowInfo = getRowInfo (row); 04507 sortRowIndices (rowInfo); 04508 } 04509 } 04510 indicesAreSorted_ = true; // we just sorted every row 04511 } 04512 04513 04514 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04515 void 04516 CrsGraph< 04517 LocalOrdinal, GlobalOrdinal, 04518 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04519 makeColMap () 04520 { 04521 using Teuchos::Array; 04522 using Teuchos::ArrayView; 04523 using Teuchos::rcp; 04524 using Teuchos::REDUCE_MAX; 04525 using Teuchos::reduceAll; 04526 using std::endl; 04527 typedef LocalOrdinal LO; 04528 typedef GlobalOrdinal GO; 04529 const char tfecfFuncName[] = "makeColMap"; 04530 04531 if (hasColMap ()) { // The graph already has a column Map. 04532 // FIXME (mfh 26 Feb 2013): This currently prevents structure 04533 // changes that affect the column Map. 04534 return; 04535 } 04536 04537 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04538 isLocallyIndexed (), std::runtime_error, 04539 ": The graph is locally indexed. Calling makeColMap() to make the " 04540 "column Map requires that the graph be globally indexed."); 04541 04542 // After the calling process is done going through all of the rows 04543 // it owns, myColumns will contain the list of indices owned by 04544 // this process in the column Map. 04545 Array<GO> myColumns; 04546 04547 // If we reach this point, we don't have a column Map yet, so the 04548 // graph can't be locally indexed. Thus, isGloballyIndexed() == 04549 // false means that the graph is empty on this process, so 04550 // myColumns will be left empty. 04551 if (isGloballyIndexed ()) { 04552 // Go through all the rows, finding the populated column indices. 04553 // 04554 // Our final list of indices for the column Map constructor will 04555 // have the following properties (all of which are with respect 04556 // to the calling process): 04557 // 04558 // 1. Indices in the domain Map go first. 04559 // 2. Indices not in the domain Map follow, ordered first 04560 // contiguously by their owning process rank (in the domain 04561 // Map), then in increasing order within that. 04562 // 3. No duplicate indices. 04563 // 04564 // This imitates the ordering used by Aztec(OO) and Epetra. 04565 // Storing indices owned by the same process (in the domain Map) 04566 // contiguously permits the use of contiguous send and receive 04567 // buffers. 04568 // 04569 // We begin by partitioning the column indices into "local" GIDs 04570 // (owned by the domain Map) and "remote" GIDs (not owned by the 04571 // domain Map). We use the same order for local GIDs as the 04572 // domain Map, so we track them in place in their array. We use 04573 // an std::set (RemoteGIDSet) to keep track of remote GIDs, so 04574 // that we don't have to merge duplicates later. 04575 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid (); 04576 size_t numLocalColGIDs = 0, numRemoteColGIDs = 0; 04577 04578 // GIDisLocal[lid] == 0 if and only if local index lid in the 04579 // domain Map is remote (not local). 04580 Array<char> GIDisLocal (domainMap_->getNodeNumElements (), 0); 04581 std::set<GO> RemoteGIDSet; 04582 // This preserves the not-sorted Epetra order of GIDs. 04583 std::vector<GO> RemoteGIDUnorderedVector; 04584 const size_t myNumRows = getNodeNumRows (); 04585 for (size_t r = 0; r < myNumRows; ++r) { 04586 RowInfo rowinfo = getRowInfo (r); 04587 if (rowinfo.numEntries > 0) { 04588 // NOTE (mfh 02 Sep 2014) getGlobalView() returns a view of 04589 // all the space in the row, not just the occupied entries. 04590 // (This matters for the case of unpacked 1-D storage. We 04591 // might not have packed it yet.) That's why we need to 04592 // take a subview. 04593 ArrayView<const GO> rowGids = getGlobalView (rowinfo); 04594 rowGids = rowGids (0, rowinfo.numEntries); 04595 04596 for (size_t k = 0; k < rowinfo.numEntries; ++k) { 04597 const GO gid = rowGids[k]; 04598 const LO lid = domainMap_->getLocalElement (gid); 04599 if (lid != LINV) { 04600 const char alreadyFound = GIDisLocal[lid]; 04601 if (alreadyFound == 0) { 04602 GIDisLocal[lid] = static_cast<char> (1); 04603 ++numLocalColGIDs; 04604 } 04605 } 04606 else { 04607 const bool notAlreadyFound = RemoteGIDSet.insert (gid).second; 04608 if (notAlreadyFound) { // gid did not exist in the set before 04609 if (! sortGhostsAssociatedWithEachProcessor_) { 04610 // The user doesn't want to sort remote GIDs (for 04611 // each remote process); they want us to keep remote 04612 // GIDs in their original order. We do this by 04613 // stuffing each remote GID into an array as we 04614 // encounter it for the first time. The std::set 04615 // helpfully tracks first encounters. 04616 RemoteGIDUnorderedVector.push_back (gid); 04617 } 04618 ++numRemoteColGIDs; 04619 } 04620 } 04621 } // for each entry k in row r 04622 } // if row r contains > 0 entries 04623 } // for each locally owned row r 04624 04625 // Possible short-circuit for serial scenario: 04626 // 04627 // If all domain GIDs are present as column indices, then set 04628 // ColMap=DomainMap. By construction, LocalGIDs is a subset of 04629 // DomainGIDs. 04630 // 04631 // If we have 04632 // * Number of remote GIDs is 0, so that ColGIDs == LocalGIDs, 04633 // and 04634 // * Number of local GIDs is number of domain GIDs 04635 // then 04636 // * LocalGIDs \subset DomainGIDs && size(LocalGIDs) == 04637 // size(DomainGIDs) => DomainGIDs == LocalGIDs == ColGIDs 04638 // on the calling process. 04639 // 04640 // We will concern ourselves only with the special case of a 04641 // serial DomainMap, obviating the need for communication. 04642 // 04643 // If 04644 // * DomainMap has a serial communicator 04645 // then we can set the column Map as the domain Map 04646 // return. Benefit: this graph won't need an Import object 04647 // later. 04648 // 04649 // Note, for a serial domain map, there can be no RemoteGIDs, 04650 // because there are no remote processes. Likely explanations 04651 // for this are: 04652 // * user submitted erroneous column indices 04653 // * user submitted erroneous domain Map 04654 if (domainMap_->getComm ()->getSize () == 1) { 04655 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04656 numRemoteColGIDs != 0, std::runtime_error, 04657 ": " << numRemoteColGIDs << " column " 04658 << (numRemoteColGIDs != 1 ? "indices are" : "index is") 04659 << " not in the domain Map." << endl 04660 << "Either these indices are invalid or the domain Map is invalid." 04661 << endl << "Remember that nonsquare matrices, or matrices where the " 04662 "row and range Maps are different, require calling the version of " 04663 "fillComplete that takes the domain and range Maps as input."); 04664 if (numLocalColGIDs == domainMap_->getNodeNumElements()) { 04665 colMap_ = domainMap_; 04666 checkInternalState (); 04667 return; 04668 } 04669 } 04670 04671 // Populate myColumns with a list of all column GIDs. Put 04672 // locally owned (in the domain Map) GIDs at the front: they 04673 // correspond to "same" and "permuted" entries between the 04674 // column Map and the domain Map. Put remote GIDs at the back. 04675 myColumns.resize (numLocalColGIDs + numRemoteColGIDs); 04676 // get pointers into myColumns for each part 04677 ArrayView<GO> LocalColGIDs = myColumns (0, numLocalColGIDs); 04678 ArrayView<GO> RemoteColGIDs = myColumns (numLocalColGIDs, numRemoteColGIDs); 04679 04680 // Copy the remote GIDs into myColumns 04681 if (sortGhostsAssociatedWithEachProcessor_) { 04682 // The std::set puts GIDs in increasing order. 04683 std::copy (RemoteGIDSet.begin(), RemoteGIDSet.end(), 04684 RemoteColGIDs.begin()); 04685 } else { 04686 // Respect the originally encountered order. 04687 std::copy (RemoteGIDUnorderedVector.begin(), 04688 RemoteGIDUnorderedVector.end(), RemoteColGIDs.begin()); 04689 } 04690 04691 // Make a list of process ranks corresponding to the remote GIDs. 04692 Array<int> RemoteImageIDs (numRemoteColGIDs); 04693 // Look up the remote process' ranks in the domain Map. 04694 { 04695 const LookupStatus stat = 04696 domainMap_->getRemoteIndexList (RemoteColGIDs, RemoteImageIDs ()); 04697 #ifdef HAVE_TPETRA_DEBUG 04698 // If any process returns IDNotPresent, then at least one of 04699 // the remote indices was not present in the domain Map. This 04700 // means that the Import object cannot be constructed, because 04701 // of incongruity between the column Map and domain Map. 04702 // This has two likely causes: 04703 // - The user has made a mistake in the column indices 04704 // - The user has made a mistake with respect to the domain Map 04705 const int missingID_lcl = (stat == IDNotPresent ? 1 : 0); 04706 int missingID_gbl = 0; 04707 reduceAll<int, int> (*getComm (), REDUCE_MAX, missingID_lcl, 04708 outArg (missingID_gbl)); 04709 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04710 missingID_gbl == 1, std::runtime_error, 04711 ": Some column indices are not in the domain Map." << endl 04712 << "Either these column indices are invalid or the domain Map is " 04713 "invalid." << endl << "Likely cause: For a nonsquare matrix, you " 04714 "must give the domain and range Maps as input to fillComplete."); 04715 #else 04716 (void) stat; // forestall compiler warning for unused variable 04717 #endif // HAVE_TPETRA_DEBUG 04718 } 04719 // Sort incoming remote column indices by their owning process 04720 // rank, so that all columns coming from a given remote process 04721 // are contiguous. This means the Import's Distributor doesn't 04722 // need to reorder data. 04723 // 04724 // NOTE (mfh 02 Sep 2014) This needs to be a stable sort, so 04725 // that it respects either of the possible orderings of GIDs 04726 // (sorted, or original order) specified above. 04727 sort2 (RemoteImageIDs.begin(), RemoteImageIDs.end(), RemoteColGIDs.begin()); 04728 04729 // Copy the local GIDs into myColumns. Two cases: 04730 // 1. If the number of Local column GIDs is the same as the 04731 // number of Local domain GIDs, we can simply read the domain 04732 // GIDs into the front part of ColIndices (see logic above 04733 // from the serial short circuit case) 04734 // 2. We step through the GIDs of the DomainMap, checking to see 04735 // if each domain GID is a column GID. We want to do this to 04736 // maintain a consistent ordering of GIDs between the columns 04737 // and the domain. 04738 04739 const size_t numDomainElts = domainMap_->getNodeNumElements (); 04740 if (numLocalColGIDs == numDomainElts) { 04741 // If the number of locally owned GIDs are the same as the 04742 // number of local domain Map elements, then the local domain 04743 // Map elements are the same as the locally owned GIDs. 04744 if (domainMap_->isContiguous ()) { 04745 // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case 04746 // that the domain Map is contiguous, it's more efficient to 04747 // avoid calling getNodeElementList(), since that 04748 // permanently constructs and caches the GID list in the 04749 // contiguous Map. 04750 GO curColMapGid = domainMap_->getMinGlobalIndex (); 04751 for (size_t k = 0; k < numLocalColGIDs; ++k, ++curColMapGid) { 04752 LocalColGIDs[k] = curColMapGid; 04753 } 04754 } 04755 else { 04756 ArrayView<const GO> domainElts = domainMap_->getNodeElementList (); 04757 std::copy (domainElts.begin(), domainElts.end(), LocalColGIDs.begin()); 04758 } 04759 } 04760 else { 04761 // Count the number of locally owned GIDs, both to keep track 04762 // of the current array index, and as a sanity check. 04763 size_t numLocalCount = 0; 04764 if (domainMap_->isContiguous ()) { 04765 // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case 04766 // that the domain Map is contiguous, it's more efficient to 04767 // avoid calling getNodeElementList(), since that 04768 // permanently constructs and caches the GID list in the 04769 // contiguous Map. 04770 GO curColMapGid = domainMap_->getMinGlobalIndex (); 04771 for (size_t i = 0; i < numDomainElts; ++i, ++curColMapGid) { 04772 if (GIDisLocal[i]) { 04773 LocalColGIDs[numLocalCount++] = curColMapGid; 04774 } 04775 } 04776 } 04777 else { 04778 ArrayView<const GO> domainElts = domainMap_->getNodeElementList (); 04779 for (size_t i = 0; i < numDomainElts; ++i) { 04780 if (GIDisLocal[i]) { 04781 LocalColGIDs[numLocalCount++] = domainElts[i]; 04782 } 04783 } 04784 } 04785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04786 numLocalCount != numLocalColGIDs, std::logic_error, 04787 ": numLocalCount = " << numLocalCount << " != numLocalColGIDs = " 04788 << numLocalColGIDs << ". This should never happen. Please report " 04789 "this bug to the Tpetra developers."); 04790 } 04791 04792 // FIXME (mfh 03 Apr 2013) Now would be a good time to use the 04793 // information we collected above to construct the Import. In 04794 // particular, building an Import requires: 04795 // 04796 // 1. numSameIDs (length of initial contiguous sequence of GIDs 04797 // on this process that are the same in both Maps; this 04798 // equals the number of domain Map elements on this process) 04799 // 04800 // 2. permuteToLIDs and permuteFromLIDs (both empty in this 04801 // case, since there's no permutation going on; the column 04802 // Map starts with the domain Map's GIDs, and immediately 04803 // after them come the remote GIDs) 04804 // 04805 // 3. remoteGIDs (exactly those GIDs that we found out above 04806 // were not in the domain Map) and remoteLIDs (which we could 04807 // have gotten above by using the three-argument version of 04808 // getRemoteIndexList() that computes local indices as well 04809 // as process ranks, instead of the two-argument version that 04810 // was used above) 04811 // 04812 // 4. remotePIDs (which we have from the getRemoteIndexList() 04813 // call above) 04814 // 04815 // 5. Sorting remotePIDs, and applying that permutation to 04816 // remoteGIDs and remoteLIDs (by calling sort3 above instead 04817 // of sort2) 04818 // 04819 // 6. Everything after the sort3 call in Import::setupExport(): 04820 // a. Create the Distributor via createFromRecvs(), which 04821 // computes exportGIDs and exportPIDs 04822 // b. Compute exportLIDs from exportGIDs (by asking the 04823 // source Map, in this case the domain Map, to convert 04824 // global to local) 04825 // 04826 // Steps 1-5 come for free, since we must do that work anyway in 04827 // order to compute the column Map. In particular, Step 3 is 04828 // even more expensive than Step 6a, since it involves both 04829 // creating and using a new Distributor object. 04830 04831 } // if the graph is globally indexed 04832 04833 const global_size_t gstInv = 04834 Teuchos::OrdinalTraits<global_size_t>::invalid (); 04835 // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to 04836 // be the same as the Map's min GID? If the first column is empty 04837 // (contains no entries), then the column Map's min GID won't 04838 // necessarily be the same as the domain Map's index base. 04839 const GO indexBase = domainMap_->getIndexBase (); 04840 colMap_ = rcp (new map_type (gstInv, myColumns, indexBase, 04841 domainMap_->getComm (), 04842 domainMap_->getNode ())); 04843 04844 checkInternalState (); 04845 } 04846 04847 04848 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04849 void 04850 CrsGraph< 04851 LocalOrdinal, GlobalOrdinal, 04852 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04853 mergeAllIndices () 04854 { 04855 TEUCHOS_TEST_FOR_EXCEPT( isGloballyIndexed() ); // call only after makeIndicesLocal() 04856 TEUCHOS_TEST_FOR_EXCEPT( ! isSorted() ); // call only after sortIndices() 04857 if (! isMerged ()) { 04858 for (size_t row=0; row < getNodeNumRows(); ++row) { 04859 RowInfo rowInfo = getRowInfo(row); 04860 mergeRowIndices(rowInfo); 04861 } 04862 // we just merged every row 04863 noRedundancies_ = true; 04864 } 04865 } 04866 04867 04868 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04869 void 04870 CrsGraph< 04871 LocalOrdinal, GlobalOrdinal, 04872 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04873 makeImportExport () 04874 { 04875 using Teuchos::ParameterList; 04876 using Teuchos::RCP; 04877 using Teuchos::rcp; 04878 04879 TEUCHOS_TEST_FOR_EXCEPTION(! hasColMap (), std::logic_error, "Tpetra::" 04880 "CrsGraph::makeImportExport: This method may not be called unless the " 04881 "graph has a column Map."); 04882 RCP<ParameterList> params = this->getNonconstParameterList (); // could be null 04883 04884 // Don't do any checks to see if we need to create the Import, if 04885 // it exists already. 04886 // 04887 // FIXME (mfh 25 Mar 2013) This will become incorrect if we 04888 // change CrsGraph in the future to allow changing the column 04889 // Map after fillComplete. For now, the column Map is fixed 04890 // after the first fillComplete call. 04891 if (importer_.is_null ()) { 04892 // Create the Import instance if necessary. 04893 if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) { 04894 if (params.is_null () || ! params->isSublist ("Import")) { 04895 importer_ = rcp (new import_type (domainMap_, colMap_)); 04896 } else { 04897 RCP<ParameterList> importSublist = sublist (params, "Import", true); 04898 importer_ = rcp (new import_type (domainMap_, colMap_, importSublist)); 04899 } 04900 } 04901 } 04902 04903 // Don't do any checks to see if we need to create the Export, if 04904 // it exists already. 04905 if (exporter_.is_null ()) { 04906 // Create the Export instance if necessary. 04907 if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) { 04908 if (params.is_null () || ! params->isSublist ("Export")) { 04909 exporter_ = rcp (new export_type (rowMap_, rangeMap_)); 04910 } 04911 else { 04912 RCP<ParameterList> exportSublist = sublist (params, "Export", true); 04913 exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist)); 04914 } 04915 } 04916 } 04917 } 04918 04919 04920 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04921 std::string 04922 CrsGraph< 04923 LocalOrdinal, GlobalOrdinal, 04924 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04925 description () const 04926 { 04927 std::ostringstream oss; 04928 oss << dist_object_type::description (); 04929 if (isFillComplete ()) { 04930 oss << "{status = fill complete" 04931 << ", global rows = " << getGlobalNumRows() 04932 << ", global cols = " << getGlobalNumCols() 04933 << ", global num entries = " << getGlobalNumEntries() 04934 << "}"; 04935 } 04936 else { 04937 oss << "{status = fill not complete" 04938 << ", global rows = " << getGlobalNumRows() 04939 << "}"; 04940 } 04941 return oss.str(); 04942 } 04943 04944 04945 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04946 void 04947 CrsGraph< 04948 LocalOrdinal, GlobalOrdinal, 04949 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04950 describe (Teuchos::FancyOStream &out, 04951 const Teuchos::EVerbosityLevel verbLevel) const 04952 { 04953 const char tfecfFuncName[] = "describe()"; 04954 using std::endl; 04955 using std::setw; 04956 using Teuchos::VERB_DEFAULT; 04957 using Teuchos::VERB_NONE; 04958 using Teuchos::VERB_LOW; 04959 using Teuchos::VERB_MEDIUM; 04960 using Teuchos::VERB_HIGH; 04961 using Teuchos::VERB_EXTREME; 04962 Teuchos::EVerbosityLevel vl = verbLevel; 04963 if (vl == VERB_DEFAULT) vl = VERB_LOW; 04964 RCP<const Comm<int> > comm = this->getComm(); 04965 const int myImageID = comm->getRank(), 04966 numImages = comm->getSize(); 04967 size_t width = 1; 04968 for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) { 04969 ++width; 04970 } 04971 width = std::max<size_t> (width, static_cast<size_t> (11)) + 2; 04972 Teuchos::OSTab tab (out); 04973 // none: print nothing 04974 // low: print O(1) info from node 0 04975 // medium: print O(P) info, num entries per node 04976 // high: print O(N) info, num entries per row 04977 // extreme: print O(NNZ) info: print graph indices 04978 // 04979 // for medium and higher, print constituent objects at specified verbLevel 04980 if (vl != VERB_NONE) { 04981 if (myImageID == 0) out << this->description() << std::endl; 04982 // O(1) globals, minus what was already printed by description() 04983 if (isFillComplete() && myImageID == 0) { 04984 out << "Global number of diagonals = " << globalNumDiags_ << std::endl; 04985 out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl; 04986 } 04987 // constituent objects 04988 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) { 04989 if (myImageID == 0) out << "\nRow map: " << std::endl; 04990 rowMap_->describe(out,vl); 04991 if (colMap_ != null) { 04992 if (myImageID == 0) out << "\nColumn map: " << std::endl; 04993 colMap_->describe(out,vl); 04994 } 04995 if (domainMap_ != null) { 04996 if (myImageID == 0) out << "\nDomain map: " << std::endl; 04997 domainMap_->describe(out,vl); 04998 } 04999 if (rangeMap_ != null) { 05000 if (myImageID == 0) out << "\nRange map: " << std::endl; 05001 rangeMap_->describe(out,vl); 05002 } 05003 } 05004 // O(P) data 05005 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) { 05006 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) { 05007 if (myImageID == imageCtr) { 05008 out << "Node ID = " << imageCtr << std::endl 05009 << "Node number of entries = " << nodeNumEntries_ << std::endl 05010 << "Node number of diagonals = " << nodeNumDiags_ << std::endl 05011 << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl; 05012 if (indicesAreAllocated()) { 05013 out << "Node number of allocated entries = " << nodeNumAllocated_ << std::endl; 05014 } 05015 else { 05016 out << "Indices are not allocated." << std::endl; 05017 } 05018 } 05019 comm->barrier(); 05020 comm->barrier(); 05021 comm->barrier(); 05022 } 05023 } 05024 // O(N) and O(NNZ) data 05025 if (vl == VERB_HIGH || vl == VERB_EXTREME) { 05026 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05027 ! hasRowInfo (), std::runtime_error, ": reduce verbosity level; " 05028 "graph row information was deleted at fillComplete()."); 05029 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) { 05030 if (myImageID == imageCtr) { 05031 out << std::setw(width) << "Node ID" 05032 << std::setw(width) << "Global Row" 05033 << std::setw(width) << "Num Entries"; 05034 if (vl == VERB_EXTREME) { 05035 out << "Entries"; 05036 } 05037 out << std::endl; 05038 for (size_t r=0; r < getNodeNumRows(); ++r) { 05039 RowInfo rowinfo = getRowInfo(r); 05040 GlobalOrdinal gid = rowMap_->getGlobalElement(r); 05041 out << std::setw(width) << myImageID 05042 << std::setw(width) << gid 05043 << std::setw(width) << rowinfo.numEntries; 05044 if (vl == VERB_EXTREME) { 05045 if (isGloballyIndexed()) { 05046 ArrayView<const GlobalOrdinal> rowview = getGlobalView(rowinfo); 05047 for (size_t j=0; j < rowinfo.numEntries; ++j) out << rowview[j] << " "; 05048 } 05049 else if (isLocallyIndexed()) { 05050 ArrayView<const LocalOrdinal> rowview = getLocalView(rowinfo); 05051 for (size_t j=0; j < rowinfo.numEntries; ++j) out << colMap_->getGlobalElement(rowview[j]) << " "; 05052 } 05053 } 05054 out << std::endl; 05055 } 05056 } 05057 comm->barrier(); 05058 comm->barrier(); 05059 comm->barrier(); 05060 } 05061 } 05062 } 05063 } 05064 05065 05066 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 05067 bool 05068 CrsGraph< 05069 LocalOrdinal, GlobalOrdinal, 05070 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05071 checkSizes (const SrcDistObject& source) 05072 { 05073 (void) source; // forestall "unused variable" compiler warnings 05074 05075 // It's not clear what kind of compatibility checks on sizes can 05076 // be performed here. Epetra_CrsGraph doesn't check any sizes for 05077 // compatibility. 05078 return true; 05079 } 05080 05081 05082 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 05083 void 05084 CrsGraph< 05085 LocalOrdinal, GlobalOrdinal, 05086 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05087 copyAndPermute (const SrcDistObject& source, 05088 size_t numSameIDs, 05089 const Teuchos::ArrayView<const LocalOrdinal> &permuteToLIDs, 05090 const Teuchos::ArrayView<const LocalOrdinal> &permuteFromLIDs) 05091 { 05092 using Teuchos::Array; 05093 using Teuchos::ArrayView; 05094 typedef LocalOrdinal LO; 05095 typedef GlobalOrdinal GO; 05096 const char tfecfFuncName[] = "copyAndPermute"; 05097 typedef CrsGraph<LO, GO, node_type> this_type; 05098 typedef RowGraph<LO, GO, node_type> row_graph_type; 05099 05100 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05101 permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error, 05102 ": permuteToLIDs and permuteFromLIDs must have the same size."); 05103 // Make sure that the source object has the right type. We only 05104 // actually need it to be a RowGraph, with matching first three 05105 // template parameters. If it's a CrsGraph, we can use view mode 05106 // instead of copy mode to get each row's data. 05107 // 05108 // FIXME (mfh 07 Jul 2013) It should not be necessary for any of 05109 // the template parameters but GO to match. GO has to match 05110 // because the graph has to send indices as global ordinals, if 05111 // the source and target graphs do not have the same column Map. 05112 // If LO doesn't match, the graphs could communicate using global 05113 // indices. It could be possible that Node affects the graph's 05114 // storage format, but packAndPrepare should assume a common 05115 // communication format in any case. 05116 const row_graph_type* srcRowGraph = dynamic_cast<const row_graph_type*> (&source); 05117 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05118 srcRowGraph == NULL, std::invalid_argument, 05119 ": The source object must be a RowGraph with matching first three " 05120 "template parameters."); 05121 05122 // If the source object is actually a CrsGraph, we can use view 05123 // mode instead of copy mode to access the entries in each row, 05124 // if the graph is not fill complete. 05125 const this_type* srcCrsGraph = dynamic_cast<const this_type*> (&source); 05126 05127 const map_type& srcRowMap = * (srcRowGraph->getRowMap ()); 05128 const map_type& tgtRowMap = * (this->getRowMap ()); 05129 const bool src_filled = srcRowGraph->isFillComplete (); 05130 Array<GO> row_copy; 05131 LO myid = 0; 05132 05133 // 05134 // "Copy" part of "copy and permute." 05135 // 05136 if (src_filled || srcCrsGraph == NULL) { 05137 // If the source graph is fill complete, we can't use view mode, 05138 // because the data might be stored in a different format not 05139 // compatible with the expectations of view mode. Also, if the 05140 // source graph is not a CrsGraph, we can't use view mode, 05141 // because RowGraph only provides copy mode access to the data. 05142 for (size_t i = 0; i < numSameIDs; ++i, ++myid) { 05143 const GO gid = srcRowMap.getGlobalElement (myid); 05144 size_t row_length = srcRowGraph->getNumEntriesInGlobalRow (gid); 05145 row_copy.resize (row_length); 05146 size_t check_row_length = 0; 05147 srcRowGraph->getGlobalRowCopy (gid, row_copy (), check_row_length); 05148 this->insertGlobalIndices (gid, row_copy ()); 05149 } 05150 } else { 05151 for (size_t i = 0; i < numSameIDs; ++i, ++myid) { 05152 const GO gid = srcRowMap.getGlobalElement (myid); 05153 ArrayView<const GO> row; 05154 srcCrsGraph->getGlobalRowView (gid, row); 05155 this->insertGlobalIndices (gid, row); 05156 } 05157 } 05158 05159 // 05160 // "Permute" part of "copy and permute." 05161 // 05162 if (src_filled || srcCrsGraph == NULL) { 05163 for (LO i = 0; i < permuteToLIDs.size (); ++i) { 05164 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs[i]); 05165 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs[i]); 05166 size_t row_length = srcRowGraph->getNumEntriesInGlobalRow (srcgid); 05167 row_copy.resize (row_length); 05168 size_t check_row_length = 0; 05169 srcRowGraph->getGlobalRowCopy (srcgid, row_copy (), check_row_length); 05170 this->insertGlobalIndices (mygid, row_copy ()); 05171 } 05172 } else { 05173 for (LO i = 0; i < permuteToLIDs.size (); ++i) { 05174 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs[i]); 05175 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs[i]); 05176 ArrayView<const GO> row; 05177 srcCrsGraph->getGlobalRowView (srcgid, row); 05178 this->insertGlobalIndices (mygid, row); 05179 } 05180 } 05181 } 05182 05183 05184 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 05185 void 05186 CrsGraph< 05187 LocalOrdinal, GlobalOrdinal, 05188 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05189 packAndPrepare (const SrcDistObject& source, 05190 const Teuchos::ArrayView<const LocalOrdinal> &exportLIDs, 05191 Teuchos::Array<GlobalOrdinal> &exports, 05192 const Teuchos::ArrayView<size_t> & numPacketsPerLID, 05193 size_t& constantNumPackets, 05194 Distributor &distor) 05195 { 05196 using Teuchos::Array; 05197 const char tfecfFuncName[] = "packAndPrepare"; 05198 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05199 exportLIDs.size() != numPacketsPerLID.size(), std::runtime_error, 05200 ": exportLIDs and numPacketsPerLID must have the same size."); 05201 typedef RowGraph<LocalOrdinal, GlobalOrdinal, node_type> row_graph_type; 05202 const row_graph_type& srcGraph = dynamic_cast<const row_graph_type&> (source); 05203 05204 // We don't check whether src_graph has had fillComplete called, 05205 // because it doesn't matter whether the *source* graph has been 05206 // fillComplete'd. The target graph can not be fillComplete'd yet. 05207 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05208 this->isFillComplete (), std::runtime_error, 05209 ": The target graph of an Import or Export must not be fill complete."); 05210 05211 srcGraph.pack (exportLIDs, exports, numPacketsPerLID, constantNumPackets, distor); 05212 } 05213 05214 05215 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 05216 void 05217 CrsGraph< 05218 LocalOrdinal, GlobalOrdinal, 05219 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05220 pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs, 05221 Teuchos::Array<GlobalOrdinal>& exports, 05222 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 05223 size_t& constantNumPackets, 05224 Distributor& distor) const 05225 { 05226 using Teuchos::Array; 05227 typedef LocalOrdinal LO; 05228 typedef GlobalOrdinal GO; 05229 const char tfecfFuncName[] = "pack"; 05230 (void) distor; // forestall "unused argument" compiler warning 05231 05232 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05233 exportLIDs.size() != numPacketsPerLID.size(), std::runtime_error, 05234 ": exportLIDs and numPacketsPerLID must have the same size."); 05235 05236 const map_type& srcMap = * (this->getMap ()); 05237 constantNumPackets = 0; 05238 05239 // Set numPacketsPerLID[i] to the number of entries owned by the 05240 // calling process in (local) row exportLIDs[i] of the graph, that 05241 // the caller wants us to send out. Compute the total number of 05242 // packets (that is, entries) owned by this process in all the 05243 // rows that the caller wants us to send out. 05244 size_t totalNumPackets = 0; 05245 Array<GO> row; 05246 for (LO i = 0; i < exportLIDs.size (); ++i) { 05247 const GO GID = srcMap.getGlobalElement (exportLIDs[i]); 05248 size_t row_length = this->getNumEntriesInGlobalRow (GID); 05249 numPacketsPerLID[i] = row_length; 05250 totalNumPackets += row_length; 05251 } 05252 05253 exports.resize (totalNumPackets); 05254 05255 // Loop again over the rows to export, and pack rows of indices 05256 // into the output buffer. 05257 size_t exportsOffset = 0; 05258 for (LO i = 0; i < exportLIDs.size (); ++i) { 05259 const GO GID = srcMap.getGlobalElement (exportLIDs[i]); 05260 size_t row_length = this->getNumEntriesInGlobalRow (GID); 05261 row.resize (row_length); 05262 size_t check_row_length = 0; 05263 this->getGlobalRowCopy (GID, row (), check_row_length); 05264 typename Array<GO>::const_iterator row_iter = row.begin(); 05265 typename Array<GO>::const_iterator row_end = row.end(); 05266 size_t j = 0; 05267 for (; row_iter != row_end; ++row_iter, ++j) { 05268 exports[exportsOffset+j] = *row_iter; 05269 } 05270 exportsOffset += row.size(); 05271 } 05272 } 05273 05274 05275 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 05276 void 05277 CrsGraph< 05278 LocalOrdinal, GlobalOrdinal, 05279 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05280 unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal> &importLIDs, 05281 const Teuchos::ArrayView<const GlobalOrdinal> &imports, 05282 const Teuchos::ArrayView<size_t> &numPacketsPerLID, 05283 size_t constantNumPackets, 05284 Distributor& /* distor */, 05285 CombineMode /* CM */) 05286 { 05287 using Teuchos::ArrayView; 05288 05289 // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly 05290 // reasonable meaning, whether or not the matrix is fill complete. 05291 // It's just more work to implement. 05292 05293 // We are not checking the value of the CombineMode input 05294 // argument. For CrsGraph, we only support import/export 05295 // operations if fillComplete has not yet been called. Any 05296 // incoming column-indices are inserted into the target graph. In 05297 // this context, CombineMode values of ADD vs INSERT are 05298 // equivalent. What is the meaning of REPLACE for CrsGraph? If a 05299 // duplicate column-index is inserted, it will be compressed out 05300 // when fillComplete is called. 05301 // 05302 // Note: I think REPLACE means that an existing row is replaced by 05303 // the imported row, i.e., the existing indices are cleared. CGB, 05304 // 6/17/2010 05305 05306 const char tfecfFuncName[] = "unpackAndCombine"; 05307 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05308 importLIDs.size() != numPacketsPerLID.size(), std::runtime_error, 05309 ": importLIDs and numPacketsPerLID must have the same size."); 05310 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05311 isFillComplete (), std::runtime_error, 05312 ": Import or Export operations are not allowed on the destination " 05313 "CrsGraph if it is fill complete."); 05314 size_t importsOffset = 0; 05315 05316 typedef typename ArrayView<const LocalOrdinal>::const_iterator iter_type; 05317 iter_type impLIDiter = importLIDs.begin(); 05318 iter_type impLIDend = importLIDs.end(); 05319 05320 for (size_t i = 0; impLIDiter != impLIDend; ++impLIDiter, ++i) { 05321 LocalOrdinal row_length = numPacketsPerLID[i]; 05322 ArrayView<const GlobalOrdinal> row (&imports[importsOffset], row_length); 05323 insertGlobalIndicesFiltered (this->getMap ()->getGlobalElement (*impLIDiter), row); 05324 importsOffset += row_length; 05325 } 05326 } 05327 05328 05329 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 05330 void 05331 CrsGraph< 05332 LocalOrdinal, GlobalOrdinal, 05333 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05334 removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap) 05335 { 05336 using Teuchos::Comm; 05337 using Teuchos::null; 05338 using Teuchos::ParameterList; 05339 using Teuchos::RCP; 05340 05341 // We'll set all the state "transactionally," so that this method 05342 // satisfies the strong exception guarantee. This object's state 05343 // won't be modified until the end of this method. 05344 RCP<const map_type> rowMap, domainMap, rangeMap, colMap; 05345 RCP<import_type> importer; 05346 RCP<export_type> exporter; 05347 05348 rowMap = newMap; 05349 RCP<const Comm<int> > newComm = 05350 (newMap.is_null ()) ? null : newMap->getComm (); 05351 05352 if (! domainMap_.is_null ()) { 05353 if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) { 05354 // Common case: original domain and row Maps are identical. 05355 // In that case, we need only replace the original domain Map 05356 // with the new Map. This ensures that the new domain and row 05357 // Maps _stay_ identical. 05358 domainMap = newMap; 05359 } else { 05360 domainMap = domainMap_->replaceCommWithSubset (newComm); 05361 } 05362 } 05363 if (! rangeMap_.is_null ()) { 05364 if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) { 05365 // Common case: original range and row Maps are identical. In 05366 // that case, we need only replace the original range Map with 05367 // the new Map. This ensures that the new range and row Maps 05368 // _stay_ identical. 05369 rangeMap = newMap; 05370 } else { 05371 rangeMap = rangeMap_->replaceCommWithSubset (newComm); 05372 } 05373 } 05374 if (! colMap.is_null ()) { 05375 colMap = colMap_->replaceCommWithSubset (newComm); 05376 } 05377 05378 // (Re)create the Export and / or Import if necessary. 05379 if (! newComm.is_null ()) { 05380 RCP<ParameterList> params = this->getNonconstParameterList (); // could be null 05381 // 05382 // The operations below are collective on the new communicator. 05383 // 05384 // (Re)create the Export object if necessary. If I haven't 05385 // called fillComplete yet, I don't have a rangeMap, so I must 05386 // first check if the _original_ rangeMap is not null. Ditto 05387 // for the Import object and the domain Map. 05388 if (! rangeMap_.is_null () && 05389 rangeMap != rowMap && 05390 ! rangeMap->isSameAs (*rowMap)) { 05391 if (params.is_null () || ! params->isSublist ("Export")) { 05392 exporter = rcp (new export_type (rowMap, rangeMap)); 05393 } 05394 else { 05395 RCP<ParameterList> exportSublist = sublist (params, "Export", true); 05396 exporter = rcp (new export_type (rowMap, rangeMap, exportSublist)); 05397 } 05398 } 05399 // (Re)create the Import object if necessary. 05400 if (! domainMap_.is_null () && 05401 domainMap != colMap && 05402 ! domainMap->isSameAs (*colMap)) { 05403 if (params.is_null () || ! params->isSublist ("Import")) { 05404 importer = rcp (new import_type (domainMap, colMap)); 05405 } else { 05406 RCP<ParameterList> importSublist = sublist (params, "Import", true); 05407 importer = rcp (new import_type (domainMap, colMap, importSublist)); 05408 } 05409 } 05410 } // if newComm is not null 05411 05412 // Defer side effects until the end. If no destructors throw 05413 // exceptions (they shouldn't anyway), then this method satisfies 05414 // the strong exception guarantee. 05415 exporter_ = exporter; 05416 importer_ = importer; 05417 rowMap_ = rowMap; 05418 // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph 05419 // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to 05420 // the same object. We might want to get rid of this redundant 05421 // pointer sometime, but for now, we'll leave it alone and just 05422 // set map_ to the same object. 05423 this->map_ = rowMap; 05424 domainMap_ = domainMap; 05425 rangeMap_ = rangeMap; 05426 colMap_ = colMap; 05427 } 05428 05429 05430 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType> 05431 bool 05432 CrsGraph<LocalOrdinal, GlobalOrdinal, 05433 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05434 hasRowInfo () const 05435 { 05436 if (indicesAreAllocated () && 05437 getProfileType () == StaticProfile && 05438 k_rowPtrs_.dimension_0 () == 0) { 05439 return false; 05440 } else { 05441 return true; 05442 } 05443 } 05444 05445 } // namespace Tpetra 05446 05447 #endif // TPETRA_CRSGRAPH_DEF_HPP
1.7.6.1