|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_KOKKOSREFACTOR_CRSMATRIX_DEF_HPP 00043 #define TPETRA_KOKKOSREFACTOR_CRSMATRIX_DEF_HPP 00044 00045 #ifdef DOXYGEN_USE_ONLY 00046 # include "Tpetra_KokkosRefactor_CrsMatrix_decl.hpp" 00047 #endif 00048 #include <Kokkos_Sequential_SparseKernels.hpp> 00049 00050 namespace Tpetra { 00051 00052 template <class Scalar, 00053 class LocalOrdinal, 00054 class GlobalOrdinal, 00055 class DeviceType> 00056 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, 00057 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00058 CrsMatrix (const RCP<const map_type> &rowMap, 00059 size_t maxNumEntriesPerRow, 00060 ProfileType pftype, 00061 const RCP<Teuchos::ParameterList>& params) : 00062 DistObject<char, LocalOrdinal, GlobalOrdinal, node_type> (rowMap), 00063 storageStatus_ (pftype == StaticProfile ? 00064 Details::STORAGE_1D_UNPACKED : 00065 Details::STORAGE_2D), 00066 fillComplete_ (false), 00067 frobNorm_ (-STM::one ()) 00068 { 00069 try { 00070 myGraph_ = rcp (new crs_graph_type (rowMap, maxNumEntriesPerRow, pftype, params)); 00071 } 00072 catch (std::exception& e) { 00073 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, 00074 "CrsMatrix constructor: caught exception while allocating CrsGraph " 00075 "object: " << std::endl << e.what ()); 00076 } 00077 staticGraph_ = myGraph_; 00078 resumeFill (params); 00079 checkInternalState (); 00080 } 00081 00082 00083 template <class Scalar, class LocalOrdinal, 00084 class GlobalOrdinal, class DeviceType> 00085 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, 00086 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00087 CrsMatrix (const Teuchos::RCP<const map_type>& rowMap, 00088 const Teuchos::ArrayRCP<const size_t>& NumEntriesPerRowToAlloc, 00089 ProfileType pftype, 00090 const Teuchos::RCP<Teuchos::ParameterList>& params) : 00091 DistObject<char, LocalOrdinal, GlobalOrdinal, node_type> (rowMap), 00092 storageStatus_ (pftype == StaticProfile ? 00093 Details::STORAGE_1D_UNPACKED : 00094 Details::STORAGE_2D), 00095 fillComplete_ (false), 00096 frobNorm_ (-STM::one ()) 00097 { 00098 try { 00099 myGraph_ = rcp (new Graph (rowMap, NumEntriesPerRowToAlloc, pftype, params)); 00100 } 00101 catch (std::exception &e) { 00102 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, 00103 typeName(*this) << "::CrsMatrix(): caught exception while allocating CrsGraph object: " 00104 << std::endl << e.what() << std::endl); 00105 } 00106 staticGraph_ = myGraph_; 00107 resumeFill(params); 00108 checkInternalState(); 00109 } 00110 00111 00112 template <class Scalar, 00113 class LocalOrdinal, 00114 class GlobalOrdinal, class DeviceType> 00115 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, 00116 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00117 CrsMatrix (const Teuchos::RCP<const map_type>& rowMap, 00118 const Teuchos::RCP<const map_type>& colMap, 00119 size_t maxNumEntriesPerRow, 00120 ProfileType pftype, 00121 const Teuchos::RCP<Teuchos::ParameterList>& params) : 00122 DistObject<char, LocalOrdinal, GlobalOrdinal, node_type> (rowMap), 00123 storageStatus_ (pftype == StaticProfile ? 00124 Details::STORAGE_1D_UNPACKED : 00125 Details::STORAGE_2D), 00126 fillComplete_ (false), 00127 frobNorm_ (-STM::one ()) 00128 { 00129 TEUCHOS_TEST_FOR_EXCEPTION(! staticGraph_.is_null(), std::logic_error, 00130 "Tpetra::CrsMatrix ctor (row Map, col Map, maxNumEntriesPerRow, ...): " 00131 "staticGraph_ is not null at the beginning of the constructor. " 00132 "Please report this bug to the Tpetra developers."); 00133 TEUCHOS_TEST_FOR_EXCEPTION(! myGraph_.is_null(), std::logic_error, 00134 "Tpetra::CrsMatrix ctor (row Map, col Map, maxNumEntriesPerRow, ...): " 00135 "myGraph_ is not null at the beginning of the constructor. " 00136 "Please report this bug to the Tpetra developers."); 00137 try { 00138 myGraph_ = rcp (new Graph (rowMap, colMap, maxNumEntriesPerRow, pftype, params)); 00139 } 00140 catch (std::exception &e) { 00141 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, 00142 "CrsMatrix constructor: Caught exception while allocating " 00143 "CrsGraph object: " << std::endl << e.what ()); 00144 } 00145 staticGraph_ = myGraph_; 00146 resumeFill(params); 00147 checkInternalState(); 00148 } 00149 00150 00151 template <class Scalar, 00152 class LocalOrdinal, 00153 class GlobalOrdinal, class DeviceType> 00154 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, 00155 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00156 CrsMatrix (const Teuchos::RCP<const map_type>& rowMap, 00157 const Teuchos::RCP<const map_type>& colMap, 00158 const Teuchos::ArrayRCP<const size_t>& numEntPerRow, 00159 ProfileType pftype, 00160 const RCP<Teuchos::ParameterList>& params) : 00161 DistObject<char, LocalOrdinal, GlobalOrdinal, node_type> (rowMap), 00162 storageStatus_ (pftype == StaticProfile ? 00163 Details::STORAGE_1D_UNPACKED : 00164 Details::STORAGE_2D), 00165 fillComplete_ (false), 00166 frobNorm_ (-STM::one ()) 00167 { 00168 try { 00169 myGraph_ = rcp (new Graph (rowMap, colMap, numEntPerRow, pftype, params)); 00170 } 00171 catch (std::exception &e) { 00172 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, 00173 "CrsMatrix constructor: caught exception while allocating " 00174 "CrsGraph object: " << std::endl << e.what ()); 00175 } 00176 staticGraph_ = myGraph_; 00177 resumeFill (params); 00178 checkInternalState (); 00179 } 00180 00181 00182 template<class Scalar, 00183 class LocalOrdinal, 00184 class GlobalOrdinal, class DeviceType> 00185 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00186 CrsMatrix (const Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > >& graph, 00187 const Teuchos::RCP<Teuchos::ParameterList>& params) : 00188 DistObject<char, LocalOrdinal,GlobalOrdinal, node_type> (graph->getRowMap ()), 00189 staticGraph_ (graph), 00190 storageStatus_ (Details::STORAGE_1D_PACKED), 00191 fillComplete_ (false), 00192 frobNorm_ (-STM::one ()) 00193 { 00194 const char tfecfFuncName[] = "CrsMatrix(graph)"; 00195 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(staticGraph_.is_null (), 00196 std::runtime_error, ": When calling the CrsMatrix constructor that " 00197 "accepts a static graph, the pointer to the graph must not be null."); 00198 // We prohibit the case where the graph is not yet filled. 00199 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! staticGraph_->isFillComplete (), 00200 std::runtime_error, ": The specified graph is not fill-complete. You " 00201 "must invoke fillComplete() on the graph before using it to construct a " 00202 "CrsMatrix. Note that calling resumeFill() makes the graph not fill-" 00203 "complete, even if you had previously called fillComplete(). In that " 00204 "case, you must call fillComplete() on the graph again."); 00205 // the graph has entries, and the matrix should have entries as well, set to zero. no need or point in lazy allocating in this case. 00206 // first argument LocalIndices is ignored; the graph is already allocated (local or global, we don't care here) 00207 allocateValues (LocalIndices, GraphAlreadyAllocated); 00208 resumeFill(params); 00209 checkInternalState(); 00210 } 00211 00212 template <class Scalar, 00213 class LocalOrdinal, 00214 class GlobalOrdinal, 00215 class DeviceType> 00216 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00217 CrsMatrix (const RCP<const map_type>& rowMap, 00218 const RCP<const map_type>& colMap, 00219 const t_RowPtrs & rowPointers, 00220 const t_LocalOrdinal_1D & columnIndices, 00221 const t_ValuesType & values, 00222 const RCP<Teuchos::ParameterList>& params) : 00223 DistObject<char, LocalOrdinal, GlobalOrdinal, node_type> (rowMap), 00224 storageStatus_ (Details::STORAGE_1D_PACKED), 00225 fillComplete_ (false), 00226 frobNorm_ (-STM::one ()) 00227 { 00228 try { 00229 myGraph_ = rcp (new Graph (rowMap, colMap, rowPointers, columnIndices, params)); 00230 } 00231 catch (std::exception &e) { 00232 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, 00233 "CrsMatrix constructor: caught exception while allocating " 00234 "CrsGraph object: " << std::endl << e.what ()); 00235 } 00236 staticGraph_ = myGraph_; 00237 k_values1D_ = values; 00238 values1D_ = Kokkos::Compat::persistingView (k_values1D_); 00239 resumeFill (params); 00240 checkInternalState (); 00241 } 00242 00243 template <class Scalar, 00244 class LocalOrdinal, 00245 class GlobalOrdinal, class DeviceType> 00246 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00247 CrsMatrix (const RCP<const map_type>& rowMap, 00248 const RCP<const map_type>& colMap, 00249 const ArrayRCP<size_t> & rowPointers, 00250 const ArrayRCP<LocalOrdinal> & columnIndices, 00251 const ArrayRCP<Scalar> & values, 00252 const RCP<Teuchos::ParameterList>& params) : 00253 DistObject<char, LocalOrdinal, GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > (rowMap), 00254 storageStatus_ (Details::STORAGE_1D_PACKED), 00255 fillComplete_ (false), 00256 frobNorm_ (-STM::one ()) 00257 { 00258 try { 00259 myGraph_ = rcp (new Graph (rowMap, colMap, rowPointers,columnIndices,params)); 00260 } 00261 catch (std::exception &e) { 00262 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, 00263 typeName(*this) << "::CrsMatrix(): caught exception while allocating " 00264 "CrsGraph object: " << std::endl << e.what ()); 00265 } 00266 staticGraph_ = myGraph_; 00267 // FIXME (mfh 05 Aug 2014) It should be possible to convince the 00268 // ArrayRCP to relinquish its allocation, but that might require 00269 // passing the ArrayRCP in by nonconst reference. 00270 k_values1D_ = Kokkos::Compat::getKokkosViewDeepCopy<DeviceType> (values ()); 00271 values1D_ = Kokkos::Compat::persistingView (k_values1D_); 00272 resumeFill (params); 00273 checkInternalState (); 00274 } 00275 00276 template <class Scalar, 00277 class LocalOrdinal, 00278 class GlobalOrdinal, class DeviceType> 00279 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00280 CrsMatrix (const RCP<const map_type>& rowMap, 00281 const RCP<const map_type>& colMap, 00282 const k_local_matrix_type& lclMatrix, 00283 const RCP<Teuchos::ParameterList>& params) : 00284 DistObject<char, LocalOrdinal, GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > (rowMap), 00285 k_lclMatrix_ (lclMatrix), 00286 storageStatus_ (Details::STORAGE_1D_PACKED), 00287 fillComplete_ (false), 00288 frobNorm_ (-STM::one ()) 00289 { 00290 using Teuchos::ArrayRCP; 00291 using Teuchos::arcp; 00292 using Teuchos::rcp; 00293 using Teuchos::RCP; 00294 const char tfecfFuncName[] = "CrsMatrix(rowMap,colMap,lclMatrix,params): "; 00295 00296 try { 00297 myGraph_ = rcp (new Graph (rowMap, colMap, lclMatrix.graph, params)); 00298 } 00299 catch (std::exception &e) { 00300 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, 00301 "Caught exception while allocating CrsGraph object: " << e.what ()); 00302 } 00303 staticGraph_ = myGraph_; 00304 computeGlobalConstants(); 00305 00306 k_values1D_ = k_lclMatrix_.values; 00307 00308 { 00309 // For backwards compatibility, set the Kokkos classic pointer 00310 // to the values, values1D_. 00311 ArrayRCP<scalar_type> classicValues = 00312 Kokkos::Compat::persistingView (k_lclMatrix_.values); 00313 values1D_ = classicValues; 00314 } 00315 00316 // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used. 00317 00318 // Now we're fill complete! 00319 fillComplete_ = true; 00320 00321 // Sanity checks at the end. 00322 #ifdef HAVE_TPETRA_DEBUG 00323 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error, 00324 "We're at the end of fillComplete(), but isFillActive() is true. " 00325 "Please report this bug to the Tpetra developers."); 00326 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error, 00327 "We're at the end of fillComplete(), but isFillActive() is true. " 00328 "Please report this bug to the Tpetra developers."); 00329 #endif // HAVE_TPETRA_DEBUG 00330 checkInternalState (); 00331 } 00332 00333 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, 00334 class DeviceType> 00335 CrsMatrix< 00336 Scalar, LocalOrdinal, GlobalOrdinal, 00337 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00338 ~CrsMatrix () {} 00339 00340 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, 00341 class DeviceType> 00342 RCP<const Teuchos::Comm<int> > 00343 CrsMatrix< 00344 Scalar, LocalOrdinal, GlobalOrdinal, 00345 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00346 getComm () const { 00347 return getCrsGraph ()->getComm (); 00348 } 00349 00350 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, 00351 class DeviceType> 00352 Teuchos::RCP<Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > 00353 CrsMatrix< 00354 Scalar, LocalOrdinal, GlobalOrdinal, 00355 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00356 getNode () const { 00357 return getCrsGraph ()->getNode (); 00358 } 00359 00360 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00361 ProfileType 00362 CrsMatrix< 00363 Scalar, LocalOrdinal, GlobalOrdinal, 00364 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00365 getProfileType () const { 00366 return getCrsGraph ()->getProfileType (); 00367 } 00368 00369 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00370 bool 00371 CrsMatrix< 00372 Scalar, LocalOrdinal, GlobalOrdinal, 00373 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00374 isFillComplete () const { 00375 return fillComplete_; 00376 } 00377 00378 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00379 bool 00380 CrsMatrix< 00381 Scalar, LocalOrdinal, GlobalOrdinal, 00382 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00383 isFillActive () const { 00384 return ! fillComplete_; 00385 } 00386 00387 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00388 bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::isStorageOptimized() const { 00389 return getCrsGraph()->isStorageOptimized(); 00390 } 00391 00392 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00393 bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::isLocallyIndexed() const { 00394 return getCrsGraph()->isLocallyIndexed(); 00395 } 00396 00397 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00398 bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::isGloballyIndexed() const { 00399 return getCrsGraph()->isGloballyIndexed(); 00400 } 00401 00402 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00403 bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::hasColMap() const { 00404 return getCrsGraph()->hasColMap(); 00405 } 00406 00407 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00408 global_size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getGlobalNumEntries() const { 00409 return getCrsGraph()->getGlobalNumEntries(); 00410 } 00411 00412 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00413 size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getNodeNumEntries() const { 00414 return getCrsGraph()->getNodeNumEntries(); 00415 } 00416 00417 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00418 global_size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getGlobalNumRows() const { 00419 return getCrsGraph()->getGlobalNumRows(); 00420 } 00421 00422 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00423 global_size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getGlobalNumCols() const { 00424 return getCrsGraph()->getGlobalNumCols(); 00425 } 00426 00427 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00428 size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getNodeNumRows() const { 00429 return getCrsGraph()->getNodeNumRows(); 00430 } 00431 00432 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00433 size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getNodeNumCols() const { 00434 return getCrsGraph()->getNodeNumCols(); 00435 } 00436 00437 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00438 global_size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getGlobalNumDiags() const { 00439 return getCrsGraph()->getGlobalNumDiags(); 00440 } 00441 00442 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00443 size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getNodeNumDiags() const { 00444 return getCrsGraph()->getNodeNumDiags(); 00445 } 00446 00447 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00448 size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const { 00449 return getCrsGraph()->getNumEntriesInGlobalRow(globalRow); 00450 } 00451 00452 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00453 size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getNumEntriesInLocalRow(LocalOrdinal localRow) const { 00454 return getCrsGraph()->getNumEntriesInLocalRow(localRow); 00455 } 00456 00457 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00458 size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getGlobalMaxNumRowEntries() const { 00459 return getCrsGraph()->getGlobalMaxNumRowEntries(); 00460 } 00461 00462 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00463 size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getNodeMaxNumRowEntries() const { 00464 return getCrsGraph()->getNodeMaxNumRowEntries(); 00465 } 00466 00467 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00468 GlobalOrdinal CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getIndexBase() const { 00469 return getRowMap()->getIndexBase(); 00470 } 00471 00472 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00473 RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00474 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getRowMap() const { 00475 return getCrsGraph()->getRowMap(); 00476 } 00477 00478 00479 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00480 RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00481 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getColMap() const { 00482 return getCrsGraph()->getColMap(); 00483 } 00484 00485 00486 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00487 RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00488 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getDomainMap() const { 00489 return getCrsGraph()->getDomainMap(); 00490 } 00491 00492 00493 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00494 RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00495 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getRangeMap() const { 00496 return getCrsGraph()->getRangeMap(); 00497 } 00498 00499 00500 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00501 RCP<const RowGraph<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00502 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getGraph() const { 00503 if (staticGraph_ != null) return staticGraph_; 00504 return myGraph_; 00505 } 00506 00507 00508 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00509 RCP<const CrsGraph<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 00510 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getCrsGraph() const { 00511 if (staticGraph_ != null) return staticGraph_; 00512 return myGraph_; 00513 } 00514 00515 00516 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00517 bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::isLowerTriangular() const { 00518 return getCrsGraph()->isLowerTriangular(); 00519 } 00520 00521 00522 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00523 bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::isUpperTriangular() const { 00524 return getCrsGraph()->isUpperTriangular(); 00525 } 00526 00527 00528 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00529 bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::isStaticGraph() const { 00530 return (myGraph_ == null); 00531 } 00532 00533 00534 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00535 bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::hasTransposeApply() const { 00536 return true; 00537 } 00538 00539 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00540 bool 00541 CrsMatrix< 00542 Scalar, LocalOrdinal, GlobalOrdinal, 00543 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00544 supportsRowViews () const { 00545 return true; 00546 } 00547 00550 // // 00551 // Internal utility methods // 00552 // // 00555 00556 00559 template <class Scalar, 00560 class LocalOrdinal, 00561 class GlobalOrdinal, 00562 class DeviceType> 00563 void 00564 CrsMatrix< 00565 Scalar, LocalOrdinal, GlobalOrdinal, 00566 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00567 allocateValues (ELocalGlobal lg, GraphAllocationStatus gas) 00568 { 00569 #ifdef HAVE_TPETRA_DEBUG 00570 // If the graph indices are already allocated, then gas should be 00571 // GraphAlreadyAllocated. Otherwise, gas should be 00572 // GraphNotYetAllocated. 00573 if ((gas == GraphAlreadyAllocated) != staticGraph_->indicesAreAllocated()) { 00574 const std::string err1 ("allocateValues: The caller has asserted that " 00575 "the graph is "); 00576 const std::string err2 ("already allocated, but the static graph says " 00577 "that its indices are "); 00578 const std::string err3 ("already allocated. Please report this bug to " 00579 "the Tpetra developers."); 00580 TEUCHOS_TEST_FOR_EXCEPTION(gas == GraphAlreadyAllocated && ! staticGraph_->indicesAreAllocated(), 00581 std::logic_error, err1 << err2 << "not " << err3); 00582 TEUCHOS_TEST_FOR_EXCEPTION(gas != GraphAlreadyAllocated && staticGraph_->indicesAreAllocated(), 00583 std::logic_error, err1 << "not " << err2 << err3); 00584 } 00585 00586 // If the graph is unallocated, then it had better be a 00587 // matrix-owned graph. ("Matrix-owned graph" means that the 00588 // matrix gets to define the graph structure. If the CrsMatrix 00589 // constructor that takes an RCP<const CrsGraph> was used, then 00590 // the matrix does _not_ own the graph.) 00591 TEUCHOS_TEST_FOR_EXCEPTION( 00592 ! staticGraph_->indicesAreAllocated() && myGraph_.is_null(), 00593 std::logic_error, 00594 "allocateValues: The static graph says that its indices are not " 00595 "allocated, but the graph is not owned by the matrix. Please report " 00596 "this bug to the Tpetra developers."); 00597 #endif // HAVE_TPETRA_DEBUG 00598 00599 if (gas == GraphNotYetAllocated) { 00600 myGraph_->allocateIndices (lg); 00601 } 00602 00603 // Allocate matrix values. 00604 if (getProfileType() == StaticProfile) { 00605 // "Static profile" means that the number of matrix entries in 00606 // each row was fixed at the time the CrsMatrix constructor was 00607 // called. This lets us use 1-D storage for the matrix's 00608 // values. ("1-D storage" means the same as that used by the 00609 // three arrays in the classic compressed sparse row format.) 00610 00611 const size_t lclNumRows = staticGraph_->getNodeNumRows (); 00612 typename Graph::t_RowPtrs k_ptrs = staticGraph_->k_rowPtrs_; 00613 TEUCHOS_TEST_FOR_EXCEPTION( 00614 k_ptrs.dimension_0 () != lclNumRows+1, std::logic_error, 00615 "Tpetra::CrsMatrix::allocateValues: With StaticProfile, row offsets " 00616 "array has length " << k_ptrs.dimension_0 () << " != (lclNumRows+1) = " 00617 << (lclNumRows+1) << "."); 00618 // FIXME (mfh 08 Aug 2014) This assumes UVM. We could fix this 00619 // either by storing the row offsets in the graph as a DualView, 00620 // or by making a device View of that entry, and copying it back 00621 // to host. 00622 const size_t lclTotalNumEntries = k_ptrs(lclNumRows); 00623 00624 // Allocate array of (packed???) matrix values. 00625 k_values1D_ = t_ValuesType ("Tpetra::CrsMatrix::val", lclTotalNumEntries); 00626 values1D_ = Kokkos::Compat::persistingView (k_values1D_); 00627 } 00628 else { 00629 // "Dynamic profile" means the number of matrix entries in each 00630 // row is not fixed and may expand. Thus, we store the matrix's 00631 // values in "2-D storage," meaning an array of arrays. The 00632 // outer array has as many inner arrays as there are rows in the 00633 // matrix, and each inner array stores the values in that row. 00634 values2D_ = staticGraph_->template allocateValues2D<Scalar>(); 00635 } 00636 } 00637 00640 template <class Scalar, 00641 class LocalOrdinal, 00642 class GlobalOrdinal, 00643 class DeviceType> 00644 void 00645 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00646 getAllValues (ArrayRCP<const size_t>& rowPointers, 00647 ArrayRCP<const LocalOrdinal>& columnIndices, 00648 ArrayRCP<const Scalar>& values) const 00649 { 00650 const char tfecfFuncName[] = "getAllValues"; 00651 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00652 columnIndices.size () != values.size (), std::runtime_error, 00653 " requires that columnIndices and values are the same size."); 00654 00655 RCP<const crs_graph_type> relevantGraph = getCrsGraph (); 00656 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00657 relevantGraph.is_null (), std::runtime_error, 00658 " requires that getCrsGraph() is not null."); 00659 try { 00660 rowPointers = relevantGraph->getNodeRowPtrs (); 00661 columnIndices = relevantGraph->getNodePackedIndices (); 00662 } 00663 catch (std::exception &e) { 00664 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00665 true, std::runtime_error, 00666 ": Caught exception while calling getCrsGraph()->getAllIndices()."); 00667 } 00668 values = Kokkos::Compat::persistingView (k_values1D_); 00669 } 00670 00671 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, 00672 class DeviceType> 00673 void 00674 CrsMatrix< 00675 Scalar, LocalOrdinal, GlobalOrdinal, 00676 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00677 fillLocalGraphAndMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params) 00678 { 00679 using Kokkos::create_mirror_view; 00680 using Teuchos::arcp_const_cast; 00681 using Teuchos::ArrayRCP; 00682 using Teuchos::null; 00683 using Teuchos::RCP; 00684 using Teuchos::rcp; 00685 typedef ArrayRCP<size_t>::size_type size_type; 00686 typedef typename Graph::t_numRowEntries_ row_entries_type; 00687 typedef typename Graph::t_RowPtrsNC row_offsets_type; 00688 typedef typename Graph::t_LocalOrdinal_1D lclinds_1d_type; 00689 typedef t_ValuesType values_type; 00690 00691 // fillComplete() only calls fillLocalGraphAndMatrix() if the 00692 // matrix owns the graph, which means myGraph_ is not null. 00693 TEUCHOS_TEST_FOR_EXCEPTION( 00694 myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::" 00695 "fillLocalGraphAndMatrix (called from fillComplete or " 00696 "expertStaticFillComplete): The nonconst graph (myGraph_) is null. This " 00697 "means that the matrix has a const (a.k.a. \"static\") graph. This may " 00698 "mean that fillComplete or expertStaticFillComplete has a bug, since it " 00699 "should never call fillLocalGraphAndMatrix in that case. " 00700 "Please report this bug to the Tpetra developers."); 00701 00702 const size_t lclNumRows = this->getNodeNumRows (); 00703 00704 // This method's goal is to fill in the three arrays (compressed 00705 // sparse row format) that define the sparse graph's and matrix's 00706 // structure, and the sparse matrix's values. 00707 // 00708 // Use t_RowPtrs and not 00709 // Graph::LocalStaticCrsGraphType::row_map_type for k_ptrs, 00710 // because the latter is const and we need to modify k_ptrs here. 00711 row_offsets_type k_ptrs; 00712 t_RowPtrs k_ptrs_const; 00713 lclinds_1d_type k_inds; 00714 values_type k_vals; 00715 00716 // Get references to the data in myGraph_, so we can modify them 00717 // as well. Note that we only call fillLocalGraphAndMatrix() if 00718 // the matrix owns the graph, which means myGraph_ is not null. 00719 lclinds_1d_type k_lclInds1D_ = myGraph_->k_lclInds1D_; 00720 00721 // The number of entries in each locally owned row. This is a 00722 // DualView. 2-D storage lives on host and is currently not 00723 // thread-safe for parallel kernels even on host, so we have to 00724 // work sequentially with host storage in that case. 00725 row_entries_type k_numRowEnt = myGraph_->k_numRowEntries_; 00726 typename row_entries_type::t_host h_numRowEnt = k_numRowEnt.h_view; 00727 00728 if (getProfileType () == DynamicProfile) { 00729 // Pack 2-D storage (DynamicProfile) into 1-D packed storage. 00730 // 00731 // DynamicProfile means that the matrix's column indices and 00732 // values are currently stored in a 2-D "unpacked" format, in 00733 // the arrays-of-arrays myGraph_->lclInds2D_ (for column 00734 // indices) and values2D_ (for values). We allocate 1-D storage 00735 // (k_inds resp. k_vals), and then copy from 2-D storage 00736 // (lclInds2D_ resp. values2D_) into 1-D storage (k_inds 00737 // resp. k_vals). 00738 TEUCHOS_TEST_FOR_EXCEPTION( 00739 static_cast<size_t> (k_numRowEnt.dimension_0 ()) != lclNumRows, 00740 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix (called " 00741 "from fillComplete or expertStaticFillComplete): For the " 00742 "DynamicProfile branch, k_numRowEnt has the wrong length. " 00743 "k_numRowEnt.dimension_0() = " << k_numRowEnt.dimension_0 () 00744 << " != getNodeNumRows() = " << lclNumRows << ""); 00745 00746 // Pack the row offsets into k_ptrs, by doing a sum-scan of 00747 // the array of valid entry counts per row (h_numRowEnt). 00748 // 00749 // Total number of entries in the matrix on the calling 00750 // process. We will compute this in the loop below. It's 00751 // cheap to compute and useful as a sanity check. 00752 size_t lclTotalNumEntries = 0; 00753 // This will be a host view of packed row offsets. 00754 typename row_offsets_type::HostMirror h_ptrs; 00755 { 00756 // Allocate the packed row offsets array. We use a nonconst 00757 // temporary (packedRowOffsets) here, because k_ptrs is const. 00758 // We will assign packedRowOffsets to k_ptrs below. 00759 row_offsets_type packedRowOffsets ("Tpetra::CrsGraph::ptr", 00760 lclNumRows+1); 00761 // 00762 // FIXME hack until we get parallel_scan in kokkos 00763 // 00764 h_ptrs = create_mirror_view (packedRowOffsets); 00765 h_ptrs(0) = 0; 00766 for (size_type i = 0; i < static_cast<size_type> (lclNumRows); ++i) { 00767 const size_t numEnt = h_numRowEnt(i); 00768 lclTotalNumEntries += numEnt; 00769 h_ptrs(i+1) = h_ptrs(i) + numEnt; 00770 } 00771 Kokkos::deep_copy (packedRowOffsets, h_ptrs); 00772 // packedRowOffsets is modifiable; k_ptrs isn't, so we have to 00773 // use packedRowOffsets in the loop above and assign here. 00774 k_ptrs = packedRowOffsets; 00775 k_ptrs_const = k_ptrs; 00776 } 00777 00778 TEUCHOS_TEST_FOR_EXCEPTION( 00779 static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1, 00780 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In " 00781 "DynamicProfile branch, after packing k_ptrs, k_ptrs.dimension_0()" 00782 " = " << k_ptrs.dimension_0 () << " != (lclNumRows+1) = " 00783 << (lclNumRows+1) << "."); 00784 TEUCHOS_TEST_FOR_EXCEPTION( 00785 static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1, 00786 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In " 00787 "DynamicProfile branch, after packing h_ptrs, h_ptrs.dimension_0()" 00788 " = " << h_ptrs.dimension_0 () << " != (lclNumRows+1) = " 00789 << (lclNumRows+1) << "."); 00790 // FIXME (mfh 08 Aug 2014) This assumes UVM. 00791 TEUCHOS_TEST_FOR_EXCEPTION( 00792 k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error, 00793 "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In DynamicProfile branch, " 00794 "after packing k_ptrs, k_ptrs(lclNumRows = " << lclNumRows << ") = " << 00795 k_ptrs(lclNumRows) << " != total number of entries on the calling " 00796 "process = " << lclTotalNumEntries << "."); 00797 00798 // Allocate the arrays of packed column indices and values. 00799 k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries); 00800 k_vals = t_ValuesType ("Tpetra::CrsMatrix::val", lclTotalNumEntries); 00801 00802 // We need host views of the above, since 2-D storage lives on host. 00803 typename lclinds_1d_type::HostMirror h_inds = create_mirror_view (k_inds); 00804 typename values_type::HostMirror h_vals = create_mirror_view (k_vals); 00805 00806 // Pack the column indices and values on the host. 00807 ArrayRCP<Array<LocalOrdinal> > lclInds2D = myGraph_->lclInds2D_; 00808 for (size_t row = 0; row < lclNumRows; ++row) { 00809 const size_t numEnt = h_numRowEnt(row); 00810 std::copy (lclInds2D[row].begin(), 00811 lclInds2D[row].begin() + numEnt, 00812 h_inds.ptr_on_device() + h_ptrs(row)); 00813 std::copy (values2D_[row].begin(), 00814 values2D_[row].begin() + numEnt, 00815 h_vals.ptr_on_device() + h_ptrs(row)); 00816 } 00817 // Copy the packed column indices and values to the device. 00818 Kokkos::deep_copy (k_inds, h_inds); 00819 Kokkos::deep_copy (k_vals, h_vals); 00820 00821 // Sanity check of packed row offsets. 00822 if (k_ptrs.dimension_0 () != 0) { 00823 const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ()); 00824 TEUCHOS_TEST_FOR_EXCEPTION( 00825 static_cast<size_t> (k_ptrs(numOffsets-1)) != k_vals.dimension_0 (), 00826 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: " 00827 "In DynamicProfile branch, after packing, k_ptrs(" << (numOffsets-1) 00828 << ") = " << k_ptrs(numOffsets-1) << " != k_vals.dimension_0() = " 00829 << k_vals.dimension_0 () << "."); 00830 TEUCHOS_TEST_FOR_EXCEPTION( 00831 static_cast<size_t> (k_ptrs(numOffsets-1)) != k_inds.dimension_0 (), 00832 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: " 00833 "In DynamicProfile branch, after packing, k_ptrs(" << (numOffsets-1) 00834 << ") = " << k_ptrs(numOffsets-1) << " != k_inds.dimension_0() = " 00835 << k_inds.dimension_0 () << "."); 00836 } 00837 } 00838 else if (getProfileType () == StaticProfile) { 00839 // StaticProfile means that the matrix's column indices and 00840 // values are currently stored in a 1-D format, with row offsets 00841 // in k_rowPtrs_ and local column indices in k_lclInds1D_. 00842 00843 // StaticProfile also means that the graph's array of row 00844 // offsets must already be allocated. 00845 typename Graph::LocalStaticCrsGraphType::row_map_type curRowOffsets = 00846 myGraph_->k_rowPtrs_; 00847 TEUCHOS_TEST_FOR_EXCEPTION( 00848 curRowOffsets.dimension_0 () == 0, std::logic_error, 00849 "curRowOffsets has size zero, but shouldn't"); 00850 TEUCHOS_TEST_FOR_EXCEPTION( 00851 curRowOffsets.dimension_0 () != lclNumRows + 1, std::logic_error, 00852 "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: curRowOffsets has size " 00853 << curRowOffsets.dimension_0 () << " != lclNumRows + 1 = " 00854 << (lclNumRows + 1) << ".") 00855 { 00856 const size_t numOffsets = curRowOffsets.dimension_0 (); 00857 // FIXME (mfh 06 Aug 2014) This relies on UVM. 00858 TEUCHOS_TEST_FOR_EXCEPTION( 00859 numOffsets != 0 && 00860 myGraph_->k_lclInds1D_.dimension_0 () != curRowOffsets(numOffsets - 1), 00861 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: " 00862 "numOffsets = " << numOffsets << " != 0 and " 00863 "myGraph_->k_lclInds1D_.dimension_0() = " 00864 << myGraph_->k_lclInds1D_.dimension_0 () 00865 << " != curRowOffsets(" << numOffsets << ") = " 00866 << curRowOffsets(numOffsets - 1) << "."); 00867 } 00868 00869 if (myGraph_->nodeNumEntries_ != myGraph_->nodeNumAllocated_) { 00870 // The matrix's current 1-D storage is "unpacked." This means 00871 // the row offsets may differ from what the final row offsets 00872 // should be. This could happen, for example, if the user 00873 // specified StaticProfile in the constructor and set an upper 00874 // bound on the number of entries per row, but didn't fill all 00875 // those entries. 00876 TEUCHOS_TEST_FOR_EXCEPTION( 00877 static_cast<size_t> (k_numRowEnt.dimension_0 ()) != lclNumRows, 00878 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix (called" 00879 " from fillComplete or expertStaticFillComplete): In StaticProfile " 00880 "unpacked branch, k_numRowEnt has the wrong length. " 00881 "k_numRowEnt.dimension_0() = " << k_numRowEnt.dimension_0 () 00882 << " != getNodeNumRows() = " << lclNumRows << "."); 00883 00884 if (curRowOffsets.dimension_0 () != 0) { 00885 const size_t numOffsets = 00886 static_cast<size_t> (curRowOffsets.dimension_0 ()); 00887 TEUCHOS_TEST_FOR_EXCEPTION( 00888 curRowOffsets(numOffsets-1) != static_cast<size_t> (k_values1D_.dimension_0 ()), 00889 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: " 00890 "In StaticProfile branch, before allocating or packing, " 00891 "curRowOffsets(" << (numOffsets-1) << ") = " 00892 << curRowOffsets(numOffsets - 1) 00893 << " != k_values1D_.dimension_0() = " 00894 << k_values1D_.dimension_0 () << "."); 00895 TEUCHOS_TEST_FOR_EXCEPTION( 00896 static_cast<size_t> (curRowOffsets(numOffsets - 1)) != 00897 myGraph_->k_lclInds1D_.dimension_0 (), 00898 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: " 00899 "In StaticProfile branch, before allocating or packing, " 00900 "curRowOffsets(" << (numOffsets-1) << ") = " 00901 << curRowOffsets(numOffsets - 1) 00902 << " != myGraph_->k_lclInds1D_.dimension_0() = " 00903 << myGraph_->k_lclInds1D_.dimension_0 () << "."); 00904 } 00905 00906 // Pack the row offsets into k_ptrs, by doing a sum-scan of 00907 // the array of valid entry counts per row (h_numRowEnt). 00908 00909 // Total number of entries in the matrix on the calling 00910 // process. We will compute this in the loop below. It's 00911 // cheap to compute and useful as a sanity check. 00912 size_t lclTotalNumEntries = 0; 00913 // This will be a host view of packed row offsets. 00914 typename row_offsets_type::HostMirror h_ptrs; 00915 { 00916 // Allocate the packed row offsets array. We use a nonconst 00917 // temporary (packedRowOffsets) here, because k_ptrs is 00918 // const. We will assign packedRowOffsets to k_ptrs below. 00919 row_offsets_type packedRowOffsets ("Tpetra::CrsGraph::ptr", 00920 lclNumRows+1); 00921 // 00922 // FIXME hack until we get parallel_scan in Kokkos 00923 // 00924 // Unlike in the 2-D storage case above, we don't need the 00925 // host view of the packed row offsets array after packing 00926 // the row offsets. 00927 h_ptrs = create_mirror_view (packedRowOffsets); 00928 h_ptrs(0) = 0; 00929 for (size_type i = 0; i < static_cast<size_type> (lclNumRows); ++i) { 00930 const size_t numEnt = h_numRowEnt(i); 00931 lclTotalNumEntries += numEnt; 00932 h_ptrs(i+1) = h_ptrs(i) + numEnt; 00933 } 00934 Kokkos::deep_copy (packedRowOffsets, h_ptrs); 00935 // packedRowOffsets is modifiable; k_ptrs isn't, so we have 00936 // to use packedRowOffsets in the loop above and assign here. 00937 k_ptrs = packedRowOffsets; 00938 k_ptrs_const = k_ptrs; 00939 } 00940 00941 TEUCHOS_TEST_FOR_EXCEPTION( 00942 static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1, 00943 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: For " 00944 "the StaticProfile unpacked-but-pack branch, after packing k_ptrs, " 00945 "k_ptrs.dimension_0() = " << k_ptrs.dimension_0 () << " != " 00946 "lclNumRows+1 = " << (lclNumRows+1) << "."); 00947 // FIXME (mfh 06 Aug 2014) This assumes UVM. 00948 TEUCHOS_TEST_FOR_EXCEPTION( 00949 k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error, 00950 "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In StaticProfile " 00951 "unpacked-but-pack branch, after filling k_ptrs, k_ptrs(lclNumRows=" 00952 << lclNumRows << ") = " << k_ptrs(lclNumRows) << " != total number " 00953 "of entries on the calling process = " << lclTotalNumEntries << "."); 00954 00955 // Allocate the arrays of packed column indices and values. 00956 k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries); 00957 k_vals = t_ValuesType ("Tpetra::CrsMatrix::val", lclTotalNumEntries); 00958 00959 // curRowOffsets (myGraph_->k_rowPtrs_) (???), k_lclInds1D_, 00960 // and k_values1D_ are currently unpacked. Pack them, using 00961 // the packed row offsets array k_ptrs that we created above. 00962 // 00963 // FIXME (mfh 06 Aug 2014) If "Optimize Storage" is false, we 00964 // need to keep around the unpacked row offsets, column 00965 // indices, and values arrays. 00966 00967 // Pack the column indices from unpacked k_lclInds1D_ into 00968 // packed k_inds. We will replace k_lclInds1D_ below. 00969 typedef pack_functor<typename Graph::t_LocalOrdinal_1D, 00970 typename Graph::LocalStaticCrsGraphType::row_map_type> 00971 inds_packer_type; 00972 inds_packer_type indsPacker (k_inds, myGraph_->k_lclInds1D_, 00973 k_ptrs, curRowOffsets); 00974 Kokkos::parallel_for (lclNumRows, indsPacker); 00975 00976 // Pack the values from unpacked k_values1D_ into packed 00977 // k_vals. We will replace k_values1D_ below. 00978 typedef pack_functor<t_ValuesType, 00979 typename Graph::LocalStaticCrsGraphType::row_map_type> 00980 vals_packer_type; 00981 vals_packer_type valsPacker (k_vals, this->k_values1D_, 00982 k_ptrs, curRowOffsets); 00983 Kokkos::parallel_for (lclNumRows, valsPacker); 00984 00985 TEUCHOS_TEST_FOR_EXCEPTION( 00986 k_ptrs.dimension_0 () == 0, std::logic_error, "Tpetra::CrsMatrix::" 00987 "fillLocalGraphAndMatrix: In StaticProfile \"Optimize Storage\" = " 00988 "true branch, after packing, k_ptrs.dimension_0() = 0. This " 00989 "probably means that k_rowPtrs_ was never allocated."); 00990 if (k_ptrs.dimension_0 () != 0) { 00991 const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ()); 00992 TEUCHOS_TEST_FOR_EXCEPTION( 00993 static_cast<size_t> (k_ptrs(numOffsets - 1)) != k_vals.dimension_0 (), 00994 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: " 00995 "In StaticProfile \"Optimize Storage\"=true branch, after packing, " 00996 "k_ptrs(" << (numOffsets-1) << ") = " << k_ptrs(numOffsets-1) << 00997 " != k_vals.dimension_0() = " << k_vals.dimension_0 () << "."); 00998 TEUCHOS_TEST_FOR_EXCEPTION( 00999 static_cast<size_t> (k_ptrs(numOffsets - 1)) != k_inds.dimension_0 (), 01000 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: " 01001 "In StaticProfile \"Optimize Storage\"=true branch, after packing, " 01002 "k_ptrs(" << (numOffsets-1) << ") = " << k_ptrs(numOffsets-1) << 01003 " != k_inds.dimension_0() = " << k_inds.dimension_0 () << "."); 01004 } 01005 } 01006 else { // We don't have to pack, so just set the pointers. 01007 k_ptrs_const = myGraph_->k_rowPtrs_; 01008 k_inds = myGraph_->k_lclInds1D_; 01009 k_vals = this->k_values1D_; 01010 01011 TEUCHOS_TEST_FOR_EXCEPTION( 01012 k_ptrs_const.dimension_0 () == 0, std::logic_error, "Tpetra::CrsMatrix::" 01013 "fillLocalGraphAndMatrix: In StaticProfile \"Optimize Storage\" = " 01014 "false branch, k_ptrs_const.dimension_0() = 0. This probably means that " 01015 "k_rowPtrs_ was never allocated."); 01016 if (k_ptrs_const.dimension_0 () != 0) { 01017 const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ()); 01018 TEUCHOS_TEST_FOR_EXCEPTION( 01019 static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_vals.dimension_0 (), 01020 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: " 01021 "In StaticProfile \"Optimize Storage\" = false branch, " 01022 "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets - 1) 01023 << " != k_vals.dimension_0() = " << k_vals.dimension_0 () << "."); 01024 TEUCHOS_TEST_FOR_EXCEPTION( 01025 static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_inds.dimension_0 (), 01026 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: " 01027 "In StaticProfile \"Optimize Storage\" = false branch, " 01028 "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets - 1) 01029 << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << "."); 01030 } 01031 } 01032 } 01033 01034 // Extra sanity checks. 01035 TEUCHOS_TEST_FOR_EXCEPTION( 01036 static_cast<size_t> (k_ptrs_const.dimension_0 ()) != lclNumRows + 1, 01037 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: After " 01038 "packing, k_ptrs_const.dimension_0() = " << k_ptrs_const.dimension_0 () 01039 << " != lclNumRows+1 = " << (lclNumRows+1) << "."); 01040 if (k_ptrs_const.dimension_0 () != 0) { 01041 const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ()); 01042 TEUCHOS_TEST_FOR_EXCEPTION( 01043 static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_vals.dimension_0 (), 01044 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: After " 01045 "packing, k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets-1) 01046 << " != k_vals.dimension_0() = " << k_vals.dimension_0 () << "."); 01047 TEUCHOS_TEST_FOR_EXCEPTION( 01048 static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_inds.dimension_0 (), 01049 std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: After " 01050 "packing, k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets-1) 01051 << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << "."); 01052 } 01053 01054 // May we ditch the old allocations for the packed (and otherwise 01055 // "optimized") allocations, later in this routine? Optimize 01056 // storage if the graph is not static, or if the graph already has 01057 // optimized storage. 01058 const bool defaultOptStorage = 01059 ! isStaticGraph () || staticGraph_->isStorageOptimized (); 01060 const bool requestOptimizedStorage = 01061 (! params.is_null () && params->get ("Optimize Storage", defaultOptStorage)) || 01062 (params.is_null () && defaultOptStorage); 01063 01064 // The graph has optimized storage when indices are allocated, 01065 // myGraph_->k_numRowEntries_ is empty, and there are more than 01066 // zero rows on this process. It's impossible for the graph to 01067 // have dynamic profile (getProfileType() == DynamicProfile) and 01068 // be optimized (isStorageOptimized()). 01069 if (requestOptimizedStorage) { 01070 // Free the old, unpacked, unoptimized allocations. 01071 // Change the graph from dynamic to static allocation profile 01072 01073 // Free graph data structures that are only needed for 2-D or 01074 // unpacked 1-D storage. 01075 myGraph_->lclInds2D_ = null; // legacy KokkosClassic 2-D storage 01076 myGraph_->k_numRowEntries_ = row_entries_type (); 01077 myGraph_->numRowEntries_ = null; // legacy KokkosClassic view of above 01078 01079 // Free the matrix's 2-D storage. 01080 this->values2D_ = null; 01081 01082 // Keep the new 1-D packed allocations. 01083 myGraph_->k_rowPtrs_ = k_ptrs_const; 01084 myGraph_->k_lclInds1D_ = k_inds; 01085 this->k_values1D_ = k_vals; 01086 01087 // Set Kokkos classic pointer for backwards compatibility. 01088 this->values1D_ = Kokkos::Compat::persistingView (k_vals); 01089 01090 // Storage is packed now, so the number of allocated entries is 01091 // the same as the actual number of entries. 01092 myGraph_->nodeNumAllocated_ = myGraph_->nodeNumEntries_; 01093 // The graph is definitely StaticProfile now, whether or not it 01094 // was before. 01095 myGraph_->pftype_ = StaticProfile; 01096 myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED; 01097 this->storageStatus_ = Details::STORAGE_1D_PACKED; 01098 } 01099 01100 RCP<Teuchos::ParameterList> lclparams; 01101 if (params.is_null ()) { 01102 lclparams = Teuchos::parameterList (); 01103 } else { 01104 lclparams = Teuchos::sublist (params, "Local Graph"); 01105 } 01106 01107 // Make the local graph, using the arrays of row offsets and 01108 // column indices that we built above. The local graph should be 01109 // null, but we delete it first so that any memory can be freed 01110 // before we allocate the new one. 01111 // 01112 // FIXME (mfh 06,28 Aug 2014) It would make more sense for 01113 // Tpetra::CrsGraph to have a protected method that accepts k_inds 01114 // and k_ptrs, and creates the local graph k_lclGraph_. 01115 myGraph_->k_lclGraph_ = 01116 typename Graph::LocalStaticCrsGraphType (k_inds, k_ptrs_const); 01117 01118 // Make the local matrix, using the local graph and vals array. 01119 01120 // FIXME (mfh 28 Aug 2014) "Local Sparse Ops" sublist is now ignored. 01121 01122 // k_lclMatrix_ = k_local_matrix_type ("Tpetra::CrsMatrix::k_lclMatrix_", 01123 // getNodeNumCols (), k_vals, 01124 // staticGraph_->getLocalGraph_Kokkos ()); 01125 k_lclMatrix_ = k_local_matrix_type ("Tpetra::CrsMatrix::k_lclMatrix_", 01126 getNodeNumCols (), k_vals, 01127 myGraph_->k_lclGraph_); 01128 // FIXME (mfh 28 Aug 2014) "Local Sparse Ops" sublist is now ignored. 01129 } 01130 01131 01132 template <class Scalar, 01133 class LocalOrdinal, 01134 class GlobalOrdinal, 01135 class DeviceType> 01136 void 01137 CrsMatrix< 01138 Scalar, LocalOrdinal, GlobalOrdinal, 01139 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01140 fillLocalMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params) 01141 { 01142 using Kokkos::create_mirror_view; 01143 using Teuchos::ArrayRCP; 01144 using Teuchos::null; 01145 using Teuchos::RCP; 01146 using Teuchos::rcp; 01147 typedef LocalOrdinal LO; 01148 typedef typename Graph::t_numRowEntries_ row_entries_type; 01149 typedef typename Graph::LocalStaticCrsGraphType::row_map_type row_map_type; 01150 typedef typename Graph::t_RowPtrsNC row_offsets_type; 01151 01152 const size_t lclNumRows = getNodeNumRows(); 01153 const map_type& rowMap = * (getRowMap ()); 01154 RCP<node_type> node = rowMap.getNode (); 01155 01156 // The goals of this routine are first, to allocate and fill 01157 // packed 1-D storage (see below for an explanation) in the vals 01158 // array, and second, to give vals to the local matrix and 01159 // finalize the local matrix. We only need k_ptrs, the packed 1-D 01160 // row offsets, within the scope of this routine, since we're only 01161 // filling the local matrix here (use fillLocalGraphAndMatrix() to 01162 // fill both the graph and the matrix at the same time). 01163 01164 // get data from staticGraph_ 01165 ArrayRCP<Array<LO> > lclInds2D = staticGraph_->lclInds2D_; 01166 ArrayRCP<size_t> numRowEntries = staticGraph_->numRowEntries_; 01167 size_t nodeNumEntries = staticGraph_->nodeNumEntries_; 01168 size_t nodeNumAllocated = staticGraph_->nodeNumAllocated_; 01169 row_map_type k_rowPtrs_ = staticGraph_->k_lclGraph_.row_map; 01170 01171 row_map_type k_ptrs; // "packed" row offsets array 01172 t_ValuesType k_vals; // "packed" values array 01173 01174 // May we ditch the old allocations for the packed (and otherwise 01175 // "optimized") allocations, later in this routine? Request 01176 // optimized storage by default. 01177 bool requestOptimizedStorage = true; 01178 const bool default_OptimizeStorage = 01179 ! isStaticGraph () || staticGraph_->isStorageOptimized (); 01180 if (! params.is_null () && ! params->get ("Optimize Storage", default_OptimizeStorage)) { 01181 requestOptimizedStorage = false; 01182 } 01183 // If we're not allowed to change a static graph, then we can't 01184 // change the storage of the matrix, either. This means that if 01185 // the graph's storage isn't already optimized, we can't optimize 01186 // the matrix's storage either. Check and give warning, as 01187 // appropriate. 01188 if (! staticGraph_->isStorageOptimized () && requestOptimizedStorage) { 01189 TPETRA_ABUSE_WARNING(true, std::runtime_error, 01190 "::fillLocalMatrix(): You requested optimized storage by setting the" 01191 "\"Optimize Storage\" flag to \"true\" in the parameter list, or by virtue" 01192 "of default behavior. However, the associated CrsGraph was filled separately" 01193 "and requested not to optimize storage. Therefore, the CrsMatrix cannot" 01194 "optimize storage."); 01195 requestOptimizedStorage = false; 01196 } 01197 01198 // The number of entries in each locally owned row. This is a 01199 // DualView. 2-D storage lives on host and is currently not 01200 // thread-safe for parallel kernels even on host, so we have to 01201 // work sequentially with host storage in that case. 01202 row_entries_type k_numRowEnt = staticGraph_->k_numRowEntries_; 01203 typename row_entries_type::t_host h_numRowEnt = k_numRowEnt.h_view; 01204 01205 if (getProfileType() == DynamicProfile) { 01206 // Pack 2-D storage (DynamicProfile) into 1-D packed storage. 01207 // 01208 // DynamicProfile means that the matrix's values are currently 01209 // stored in a 2-D "unpacked" format, in the array-of-arrays 01210 // values2D_. We allocate 1-D storage and then copy from 2-D 01211 // storage in values2D_ into 1-D storage in k_vals. Since we're 01212 // only allocating the local matrix here, not the local graph, 01213 // we don't need to keep the row offsets array, but we do need 01214 // it here temporarily in order to convert to 1-D storage. (The 01215 // allocStorage() function needs it.) We'll free ptrs later in 01216 // this method. 01217 // 01218 // FIXME (mfh 08 Aug 2014) If we're in this method, then the 01219 // graph should already have packed 1-D storage. Why can't we 01220 // just use the graph's current row offsets array? 01221 01222 // Pack the row offsets into k_ptrs, by doing a sum-scan of 01223 // the array of valid entry counts per row (h_numRowEnt). 01224 // 01225 // Total number of entries in the matrix on the calling 01226 // process. We will compute this in the loop below. It's 01227 // cheap to compute and useful as a sanity check. 01228 size_t lclTotalNumEntries = 0; 01229 // This will be a host view of packed row offsets. 01230 typename row_offsets_type::HostMirror h_ptrs; 01231 { 01232 row_offsets_type packedRowOffsets ("Tpetra::CrsGraph::ptr", lclNumRows+1); 01233 // 01234 // FIXME hack until we get parallel_scan in Kokkos 01235 // 01236 h_ptrs = create_mirror_view (packedRowOffsets); 01237 h_ptrs(0) = 0; 01238 for (size_t i = 0; i < lclNumRows; ++i) { 01239 const size_t numEnt = h_numRowEnt(i); 01240 lclTotalNumEntries += numEnt; 01241 h_ptrs(i+1) = h_ptrs(i) + numEnt; 01242 } 01243 Kokkos::deep_copy (packedRowOffsets, h_ptrs); 01244 k_ptrs = packedRowOffsets; 01245 } 01246 01247 TEUCHOS_TEST_FOR_EXCEPTION( 01248 static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1, 01249 std::logic_error, "Tpetra::CrsMatrix::fillLocalMatrix: In " 01250 "DynamicProfile branch, after packing k_ptrs, k_ptrs.dimension_0()" 01251 " = " << k_ptrs.dimension_0 () << " != (lclNumRows+1) = " 01252 << (lclNumRows+1) << "."); 01253 TEUCHOS_TEST_FOR_EXCEPTION( 01254 static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1, 01255 std::logic_error, "Tpetra::CrsMatrix::fillLocalMatrix: In " 01256 "DynamicProfile branch, after packing h_ptrs, h_ptrs.dimension_0()" 01257 " = " << h_ptrs.dimension_0 () << " != (lclNumRows+1) = " 01258 << (lclNumRows+1) << "."); 01259 // FIXME (mfh 08 Aug 2014) This assumes UVM. 01260 TEUCHOS_TEST_FOR_EXCEPTION( 01261 k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error, 01262 "Tpetra::CrsMatrix::fillLocalMatrix: In DynamicProfile branch, " 01263 "after packing k_ptrs, k_ptrs(lclNumRows = " << lclNumRows << ") = " << 01264 k_ptrs(lclNumRows) << " != total number of entries on the calling " 01265 "process = " << lclTotalNumEntries << "."); 01266 01267 // Allocate the array of packed values. 01268 k_vals = t_ValuesType ("Tpetra::CrsMatrix::val", lclTotalNumEntries); 01269 // We need a host view of the above, since 2-D storage lives on host. 01270 typename t_ValuesType::HostMirror h_vals = 01271 Kokkos::create_mirror_view (k_vals); 01272 // Pack the values on the host. 01273 for (size_t lclRow = 0; lclRow < lclNumRows; ++lclRow) { 01274 const size_t numEnt = h_numRowEnt(lclRow); 01275 std::copy (values2D_[lclRow].begin(), 01276 values2D_[lclRow].begin() + numEnt, 01277 h_vals.ptr_on_device() + h_ptrs(lclRow)); 01278 } 01279 // Copy the packed values to the device. 01280 Kokkos::deep_copy (k_vals, h_vals); 01281 01282 // Sanity check of packed row offsets. 01283 if (k_ptrs.dimension_0 () != 0) { 01284 const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ()); 01285 TEUCHOS_TEST_FOR_EXCEPTION( 01286 static_cast<size_t> (k_ptrs(numOffsets-1)) != k_vals.dimension_0 (), 01287 std::logic_error, "Tpetra::CrsMatrix::fillLocalMatrix: " 01288 "In DynamicProfile branch, after packing, k_ptrs(" << (numOffsets-1) 01289 << ") = " << k_ptrs(numOffsets-1) << " != k_vals.dimension_0() = " 01290 << k_vals.dimension_0 () << "."); 01291 } 01292 } 01293 else if (getProfileType () == StaticProfile) { 01294 // StaticProfile means that the matrix's values are currently 01295 // stored in a 1-D format. However, this format is "unpacked"; 01296 // it doesn't necessarily have the same row offsets as indicated 01297 // by the ptrs array returned by allocRowPtrs. This could 01298 // happen, for example, if the user specified StaticProfile in 01299 // the constructor and fixed the number of matrix entries in 01300 // each row, but didn't fill all those entries. 01301 // 01302 // As above, we don't need to keep the "packed" row offsets 01303 // array ptrs here, but we do need it here temporarily, so we 01304 // have to allocate it. We'll free ptrs later in this method. 01305 // 01306 // Note that this routine checks whether storage has already 01307 // been packed. This is a common case for solution of nonlinear 01308 // PDEs using the finite element method, as long as the 01309 // structure of the sparse matrix does not change between linear 01310 // solves. 01311 if (nodeNumEntries != nodeNumAllocated) { 01312 // We have to pack the 1-D storage, since the user didn't fill 01313 // up all requested storage. 01314 typename Graph::t_RowPtrsNC tmpk_ptrs ("Tpetra::CrsGraph::ptr", 01315 lclNumRows+1); 01316 // Total number of entries in the matrix on the calling 01317 // process. We will compute this in the loop below. It's 01318 // cheap to compute and useful as a sanity check. 01319 size_t lclTotalNumEntries = 0; 01320 k_ptrs = tmpk_ptrs; 01321 { 01322 // 01323 // FIXME hack until we get parallel_scan in Kokkos 01324 // 01325 typename row_offsets_type::HostMirror h_ptrs = 01326 create_mirror_view (tmpk_ptrs); 01327 h_ptrs(0) = 0; 01328 for (size_t i = 0; i < lclNumRows; ++i) { 01329 const size_t numEnt = h_numRowEnt(i); 01330 lclTotalNumEntries += numEnt; 01331 h_ptrs(i+1) = h_ptrs(i) + numEnt; 01332 } 01333 Kokkos::deep_copy (tmpk_ptrs, h_ptrs); 01334 } 01335 01336 // Allocate the "packed" values array. 01337 // It has exactly the right number of entries. 01338 k_vals = t_ValuesType ("Tpetra::CrsMatrix::val", lclTotalNumEntries); 01339 01340 // Pack k_values1D_ into k_vals. We will replace k_values1D_ below. 01341 typedef pack_functor<t_ValuesType, 01342 typename Graph::LocalStaticCrsGraphType::row_map_type> 01343 packer_type; 01344 packer_type valsPacker (k_vals, k_values1D_, tmpk_ptrs, k_rowPtrs_); 01345 Kokkos::parallel_for (lclNumRows, valsPacker); 01346 } 01347 else { // We don't have to pack, so just set the pointer. 01348 k_vals = k_values1D_; 01349 } 01350 } 01351 01352 // May we ditch the old allocations for the packed one? 01353 if (requestOptimizedStorage) { 01354 // The user requested optimized storage, so we can dump the 01355 // unpacked 2-D and 1-D storage, and keep the packed storage. 01356 values2D_ = null; 01357 k_values1D_ = k_vals; 01358 this->storageStatus_ = Details::STORAGE_1D_PACKED; 01359 } 01360 01361 // FIXME (mfh 28 Aug 2014) "Local Matrix" sublist is now ignored. 01362 01363 // Build the local sparse matrix object. 01364 k_lclMatrix_ = k_local_matrix_type ("Tpetra::CrsMatrix::k_lclMatrix_", 01365 getDomainMap ()->getNodeNumElements (), 01366 k_vals, 01367 staticGraph_->getLocalGraph_Kokkos ()); 01368 01369 // Set the legacy values1D_ array. 01370 ArrayRCP<scalar_type> classicValues = 01371 Kokkos::Compat::persistingView (k_lclMatrix_.values); 01372 values1D_ = classicValues; 01373 01374 // FIXME (mfh 28 Aug 2014) "Local Sparse Ops" sublist is now ignored. 01375 } 01376 01377 template<class Scalar, 01378 class LocalOrdinal, 01379 class GlobalOrdinal, 01380 class DeviceType> 01381 void 01382 CrsMatrix< 01383 Scalar, LocalOrdinal, GlobalOrdinal, 01384 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01385 insertLocalValues (const LocalOrdinal localRow, 01386 const Teuchos::ArrayView<const LocalOrdinal>& indices, 01387 const Teuchos::ArrayView<const Scalar>& values) 01388 { 01389 using Teuchos::Array; 01390 using Teuchos::ArrayView; 01391 using Teuchos::toString; 01392 using std::endl; 01393 const char tfecfFuncName[] = "insertLocalValues"; 01394 01395 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive (), std::runtime_error, 01396 ": Fill is not active. After calling fillComplete, you must call " 01397 "resumeFill before you may insert entries into the matrix again."); 01398 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph (), std::runtime_error, 01399 " cannot insert indices with static graph; use replaceLocalValues() instead."); 01400 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_->isGloballyIndexed(), 01401 std::runtime_error, ": graph indices are global; use insertGlobalValues()."); 01402 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! hasColMap (), std::runtime_error, 01403 " cannot insert local indices without a column map."); 01404 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.size() != indices.size(), 01405 std::runtime_error, ": values.size() must equal indices.size()."); 01406 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01407 ! getRowMap()->isNodeLocalElement(localRow), std::runtime_error, 01408 ": Local row index " << localRow << " does not belong to this process."); 01409 01410 if (! myGraph_->indicesAreAllocated ()) { 01411 try { 01412 allocateValues (LocalIndices, GraphNotYetAllocated); 01413 } 01414 catch (std::exception& e) { 01415 TEUCHOS_TEST_FOR_EXCEPTION( 01416 true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: " 01417 "allocateValues(LocalIndices,GraphNotYetAllocated) threw an " 01418 "exception: " << e.what ()); 01419 } 01420 } 01421 01422 const size_t numEntriesToAdd = static_cast<size_t> (indices.size ()); 01423 #ifdef HAVE_TPETRA_DEBUG 01424 // In a debug build, if the matrix has a column Map, test whether 01425 // any of the given column indices are not in the column Map. 01426 // Keep track of the invalid column indices so we can tell the 01427 // user about them. 01428 if (hasColMap ()) { 01429 const map_type& colMap = * (getColMap ()); 01430 Array<LocalOrdinal> badColInds; 01431 bool allInColMap = true; 01432 for (size_t k = 0; k < numEntriesToAdd; ++k) { 01433 if (! colMap.isNodeLocalElement (indices[k])) { 01434 allInColMap = false; 01435 badColInds.push_back (indices[k]); 01436 } 01437 } 01438 if (! allInColMap) { 01439 std::ostringstream os; 01440 os << "Tpetra::CrsMatrix::insertLocalValues: You attempted to insert " 01441 "entries in owned row " << localRow << ", at the following column " 01442 "indices: " << toString (indices) << "." << endl; 01443 os << "Of those, the following indices are not in the column Map on " 01444 "this process: " << toString (badColInds) << "." << endl << "Since " 01445 "the matrix has a column Map already, it is invalid to insert " 01446 "entries at those locations."; 01447 TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ()); 01448 } 01449 } 01450 #endif // HAVE_TPETRA_DEBUG 01451 01452 #ifdef HAVE_TPETRA_DEBUG 01453 RowInfo rowInfo; 01454 try { 01455 rowInfo = myGraph_->getRowInfo (localRow); 01456 } catch (std::exception& e) { 01457 TEUCHOS_TEST_FOR_EXCEPTION( 01458 true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: " 01459 "myGraph_->getRowInfo threw an exception: " << e.what ()); 01460 } 01461 #else 01462 RowInfo rowInfo = myGraph_->getRowInfo (localRow); 01463 #endif // HAVE_TPETRA_DEBUG 01464 01465 const size_t curNumEntries = rowInfo.numEntries; 01466 const size_t newNumEntries = curNumEntries + numEntriesToAdd; 01467 if (newNumEntries > rowInfo.allocSize) { 01468 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01469 getProfileType() == StaticProfile, std::runtime_error, 01470 ": new indices exceed statically allocated graph structure."); 01471 01472 // Make space for the new matrix entries. 01473 try { 01474 rowInfo = myGraph_->template updateLocalAllocAndValues<Scalar> (rowInfo, 01475 newNumEntries, 01476 values2D_[localRow]); 01477 } catch (std::exception& e) { 01478 TEUCHOS_TEST_FOR_EXCEPTION( 01479 true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: " 01480 "myGraph_->updateGlobalAllocAndValues threw an exception: " 01481 << e.what ()); 01482 } 01483 } 01484 typename Graph::SLocalGlobalViews indsView; 01485 indsView.linds = indices; 01486 01487 #ifdef HAVE_TPETRA_DEBUG 01488 ArrayView<Scalar> valsView; 01489 try { 01490 valsView = this->getViewNonConst (rowInfo); 01491 } catch (std::exception& e) { 01492 TEUCHOS_TEST_FOR_EXCEPTION( 01493 true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: " 01494 "getViewNonConst threw an exception: " << e.what ()); 01495 } 01496 #else 01497 ArrayView<Scalar> valsView = this->getViewNonConst (rowInfo); 01498 #endif // HAVE_TPETRA_DEBUG 01499 01500 try { 01501 myGraph_->template insertIndicesAndValues<Scalar> (rowInfo, indsView, 01502 valsView, values, 01503 LocalIndices, 01504 LocalIndices); 01505 } catch (std::exception& e) { 01506 TEUCHOS_TEST_FOR_EXCEPTION( 01507 true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: " 01508 "myGraph_->insertIndicesAndValues threw an exception: " 01509 << e.what ()); 01510 } 01511 01512 #ifdef HAVE_TPETRA_DEBUG 01513 const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow); 01514 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01515 chkNewNumEntries != newNumEntries, std::logic_error, 01516 ": The row should have " << newNumEntries << " entries after insert, but " 01517 "instead has " << chkNewNumEntries << ". Please report this bug to the " 01518 "Tpetra developers."); 01519 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isLocallyIndexed(), std::logic_error, 01520 ": At end of insertLocalValues(), this CrsMatrix is not locally indexed. " 01521 "Please report this bug to the Tpetra developers."); 01522 #endif // HAVE_TPETRA_DEBUG 01523 } 01524 01525 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, 01526 class DeviceType> 01527 void 01528 CrsMatrix< 01529 Scalar, LocalOrdinal, GlobalOrdinal, 01530 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01531 insertLocalValuesFiltered (const LocalOrdinal localRow, 01532 const Teuchos::ArrayView<const LocalOrdinal>& indices, 01533 const Teuchos::ArrayView<const Scalar>& values) 01534 { 01535 const char tfecfFuncName[] = "insertLocalValues"; 01536 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive (), std::runtime_error, 01537 " requires that fill is active."); 01538 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph (), std::runtime_error, 01539 " cannot insert indices with static graph; use replaceLocalValues() instead."); 01540 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_->isGloballyIndexed(), 01541 std::runtime_error, ": graph indices are global; use insertGlobalValues()."); 01542 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! hasColMap (), std::runtime_error, 01543 " cannot insert local indices without a column map."); 01544 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.size() != indices.size(), 01545 std::runtime_error, ": values.size() must equal indices.size()."); 01546 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01547 ! getRowMap()->isNodeLocalElement (localRow), std::runtime_error, 01548 ": Local row index " << localRow << " does not belong to this process."); 01549 if (! myGraph_->indicesAreAllocated ()) { 01550 allocateValues (LocalIndices, GraphNotYetAllocated); 01551 } 01552 // Use the graph to filter incoming entries whose column indices 01553 // aren't in the column Map. 01554 Teuchos::Array<LocalOrdinal> f_inds (indices); 01555 Teuchos::Array<Scalar> f_vals (values); 01556 const size_t numFilteredEntries = 01557 myGraph_->template filterLocalIndicesAndValues<Scalar> (f_inds (), 01558 f_vals ()); 01559 if (numFilteredEntries > 0) { 01560 RowInfo rowInfo = myGraph_->getRowInfo (localRow); 01561 const size_t curNumEntries = rowInfo.numEntries; 01562 const size_t newNumEntries = curNumEntries + numFilteredEntries; 01563 if (newNumEntries > rowInfo.allocSize) { 01564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01565 getProfileType () == StaticProfile, std::runtime_error, 01566 ": new indices exceed statically allocated graph structure. " 01567 "newNumEntries (" << newNumEntries << " > rowInfo.allocSize (" 01568 << rowInfo.allocSize << ")."); 01569 // Make space for the new matrix entries. 01570 rowInfo = 01571 myGraph_->template updateLocalAllocAndValues<Scalar> (rowInfo, 01572 newNumEntries, 01573 values2D_[localRow]); 01574 } 01575 typename Graph::SLocalGlobalViews inds_view; 01576 inds_view.linds = f_inds (0, numFilteredEntries); 01577 myGraph_->template insertIndicesAndValues<Scalar> (rowInfo, inds_view, 01578 this->getViewNonConst (rowInfo), 01579 f_vals, LocalIndices, 01580 LocalIndices); 01581 #ifdef HAVE_TPETRA_DEBUG 01582 const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow); 01583 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries, 01584 std::logic_error, ": Internal logic error. Please contact Tpetra team."); 01585 #endif // HAVE_TPETRA_DEBUG 01586 } 01587 #ifdef HAVE_TPETRA_DEBUG 01588 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isLocallyIndexed(), std::logic_error, 01589 ": At end of insertLocalValues(), this CrsMatrix is not locally indexed. " 01590 "Please report this bug to the Tpetra developers."); 01591 #endif // HAVE_TPETRA_DEBUG 01592 } 01593 01594 01595 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, 01596 class DeviceType> 01597 void 01598 CrsMatrix< 01599 Scalar, LocalOrdinal, GlobalOrdinal, 01600 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01601 insertGlobalValues (const GlobalOrdinal globalRow, 01602 const Teuchos::ArrayView<const GlobalOrdinal>& indices, 01603 const Teuchos::ArrayView<const Scalar>& values) 01604 { 01605 using Teuchos::Array; 01606 using Teuchos::ArrayView; 01607 using Teuchos::toString; 01608 using std::endl; 01609 typedef LocalOrdinal LO; 01610 typedef GlobalOrdinal GO; 01611 typedef typename Teuchos::ArrayView<const GO>::size_type size_type; 01612 const char tfecfFuncName[] = "insertGlobalValues: "; 01613 01614 #ifdef HAVE_TPETRA_DEBUG 01615 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01616 values.size() != indices.size(), std::runtime_error, 01617 "values.size() must equal indices.size(). values.size() = " 01618 << values.size() << ", but indices.size() = " << indices.size() << "."); 01619 #endif // HAVE_TPETRA_DEBUG 01620 01621 const LO localRow = getRowMap ()->getLocalElement (globalRow); 01622 01623 if (localRow == OTL::invalid ()) { // globalRow _not_ owned by calling process 01624 insertNonownedGlobalValues (globalRow, indices, values); 01625 } 01626 else { // globalRow _is_ owned by calling process 01627 if (this->isStaticGraph ()) { 01628 // Uh oh! Not allowed to insert into owned rows in that case. 01629 std::ostringstream err; 01630 const int myRank = getRowMap ()->getComm ()->getRank (); 01631 const int numProcs = getRowMap ()->getComm ()->getSize (); 01632 01633 err << "The matrix was constructed with a constant (\"static\") graph, " 01634 "yet the given global row index " << globalRow << " is in the row " 01635 "Map on the calling process (with rank " << myRank << ", of " << 01636 numProcs << " process(es)). In this case, you may not insert new " 01637 "entries into rows owned by the calling process."; 01638 01639 if (! getRowMap ()->isNodeGlobalElement (globalRow)) { 01640 err << " Furthermore, GID->LID conversion with the row Map claims that " 01641 "the global row index is owned on the calling process, yet " 01642 "getRowMap()->isNodeGlobalElement(globalRow) returns false. That's" 01643 " weird! This might indicate a Map bug. Please report this to the" 01644 " Tpetra developers."; 01645 } 01646 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01647 this->isStaticGraph (), std::runtime_error, err.str ()); 01648 } 01649 01650 if (! myGraph_->indicesAreAllocated ()) { 01651 try { 01652 allocateValues (GlobalIndices, GraphNotYetAllocated); 01653 } 01654 catch (std::exception& e) { 01655 TEUCHOS_TEST_FOR_EXCEPTION( 01656 true, std::runtime_error, "Tpetra::CrsMatrix::insertGlobalValues: " 01657 "allocateValues(GlobalIndices,GraphNotYetAllocated) threw an " 01658 "exception: " << e.what ()); 01659 } 01660 } 01661 01662 const size_type numEntriesToInsert = indices.size (); 01663 // If the matrix has a column Map, check at this point whether 01664 // the column indices belong to the column Map. 01665 // 01666 // FIXME (mfh 16 May 2013) We may want to consider deferring the 01667 // test to the CrsGraph method, since it may have to do this 01668 // anyway. 01669 if (hasColMap ()) { 01670 const map_type& colMap = * (getColMap ()); 01671 // In a debug build, keep track of the nonowned ("bad") column 01672 // indices, so that we can display them in the exception 01673 // message. In a release build, just ditch the loop early if 01674 // we encounter a nonowned column index. 01675 #ifdef HAVE_TPETRA_DEBUG 01676 Array<GO> badColInds; 01677 #endif // HAVE_TPETRA_DEBUG 01678 bool allInColMap = true; 01679 for (size_type k = 0; k < numEntriesToInsert; ++k) { 01680 if (! colMap.isNodeGlobalElement (indices[k])) { 01681 allInColMap = false; 01682 #ifdef HAVE_TPETRA_DEBUG 01683 badColInds.push_back (indices[k]); 01684 #else 01685 break; 01686 #endif // HAVE_TPETRA_DEBUG 01687 } 01688 } 01689 if (! allInColMap) { 01690 std::ostringstream os; 01691 os << "You attempted to insert entries in owned row " << globalRow 01692 << ", at the following column indices: " << toString (indices) 01693 << "." << endl; 01694 #ifdef HAVE_TPETRA_DEBUG 01695 os << "Of those, the following indices are not in the column Map on " 01696 "this process: " << toString (badColInds) << "." << endl << "Since " 01697 "the matrix has a column Map already, it is invalid to insert " 01698 "entries at those locations."; 01699 #else 01700 os << "At least one of those indices is not in the column Map on this " 01701 "process." << endl << "It is invalid to insert into columns not in " 01702 "the column Map on the process that owns the row."; 01703 #endif // HAVE_TPETRA_DEBUG 01704 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01705 ! allInColMap, std::invalid_argument, os.str ()); 01706 } 01707 } 01708 01709 typename Graph::SLocalGlobalViews inds_view; 01710 ArrayView<const Scalar> vals_view; 01711 01712 inds_view.ginds = indices; 01713 vals_view = values; 01714 01715 #ifdef HAVE_TPETRA_DEBUG 01716 RowInfo rowInfo; 01717 try { 01718 rowInfo = myGraph_->getRowInfo (localRow); 01719 } catch (std::exception& e) { 01720 TEUCHOS_TEST_FOR_EXCEPTION( 01721 true, std::runtime_error, "myGraph_->getRowInfo(localRow=" << localRow 01722 << ") threw an exception: " << e.what ()); 01723 } 01724 #else 01725 RowInfo rowInfo = myGraph_->getRowInfo (localRow); 01726 #endif // HAVE_TPETRA_DEBUG 01727 01728 const size_t curNumEntries = rowInfo.numEntries; 01729 const size_t newNumEntries = 01730 curNumEntries + static_cast<size_t> (numEntriesToInsert); 01731 if (newNumEntries > rowInfo.allocSize) { 01732 TEUCHOS_TEST_FOR_EXCEPTION( 01733 getProfileType () == StaticProfile && newNumEntries > rowInfo.allocSize, 01734 std::runtime_error, "Tpetra::CrsMatrix::insertGlobalValues: new " 01735 "indices exceed statically allocated graph structure. curNumEntries" 01736 " (" << curNumEntries << ") + numEntriesToInsert (" << 01737 numEntriesToInsert << ") > allocSize (" << rowInfo.allocSize << ")."); 01738 01739 // Update allocation only as much as necessary 01740 try { 01741 rowInfo = 01742 myGraph_->template updateGlobalAllocAndValues<Scalar> (rowInfo, 01743 newNumEntries, 01744 values2D_[localRow]); 01745 } catch (std::exception& e) { 01746 TEUCHOS_TEST_FOR_EXCEPTION( 01747 true, std::runtime_error, "myGraph_->updateGlobalAllocAndValues" 01748 "(...) threw an exception: " << e.what ()); 01749 } 01750 } 01751 try { 01752 if (isGloballyIndexed ()) { 01753 // lg=GlobalIndices, I=GlobalIndices means the method calls 01754 // getGlobalViewNonConst() and does direct copying, which 01755 // should be reasonably fast. 01756 myGraph_->template insertIndicesAndValues<Scalar> (rowInfo, inds_view, 01757 this->getViewNonConst (rowInfo), 01758 values, 01759 GlobalIndices, GlobalIndices); 01760 } 01761 else { 01762 // lg=GlobalIndices, I=LocalIndices means the method calls 01763 // the Map's getLocalElement() method once per entry to 01764 // insert. This may be slow. 01765 myGraph_->template insertIndicesAndValues<Scalar> (rowInfo, inds_view, 01766 this->getViewNonConst (rowInfo), 01767 values, 01768 GlobalIndices, LocalIndices); 01769 } 01770 } 01771 catch (std::exception& e) { 01772 TEUCHOS_TEST_FOR_EXCEPTION( 01773 true, std::runtime_error, "myGraph_->insertIndicesAndValues(...) " 01774 "threw an exception: " << e.what ()); 01775 } 01776 01777 #ifdef HAVE_TPETRA_DEBUG 01778 const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow); 01779 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries, 01780 std::logic_error, ": There should be a total of " << newNumEntries 01781 << " entries in the row, but the graph now reports " << chkNewNumEntries 01782 << " entries. Please report this bug to the Tpetra developers."); 01783 #endif // HAVE_TPETRA_DEBUG 01784 } 01785 } 01786 01787 01788 template<class Scalar, 01789 class LocalOrdinal, 01790 class GlobalOrdinal, class DeviceType> 01791 void 01792 CrsMatrix< 01793 Scalar, LocalOrdinal, GlobalOrdinal, 01794 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01795 insertGlobalValuesFiltered (const GlobalOrdinal globalRow, 01796 const ArrayView<const GlobalOrdinal>& indices, 01797 const ArrayView<const Scalar>& values) 01798 { 01799 typedef LocalOrdinal LO; 01800 typedef GlobalOrdinal GO; 01801 const char tfecfFuncName[] = "insertGlobalValuesFiltered"; 01802 01803 // mfh 14 Dec 2012: Defer test for static graph until we know that 01804 // globalRow is in the row Map. If it's not in the row Map, it 01805 // doesn't matter whether or not the graph is static; the data 01806 // just get stashed for later use by globalAssemble(). 01807 // 01808 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01809 // isStaticGraph(), std::runtime_error, 01810 // ": matrix was constructed with static graph. Cannot insert new entries."); 01811 #ifdef HAVE_TPETRA_DEBUG 01812 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01813 values.size() != indices.size(), std::runtime_error, 01814 ": values.size() must equal indices.size(). values.size() = " 01815 << values.size() << ", but indices.size() = " << indices.size() << "."); 01816 #endif // HAVE_TPETRA_DEBUG 01817 01818 const LO lrow = getRowMap ()->getLocalElement (globalRow); 01819 01820 if (lrow != Teuchos::OrdinalTraits<LO>::invalid ()) { // globalRow is in our row Map. 01821 // If the matrix has a static graph, this process is now allowed 01822 // to insert into rows it owns. 01823 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01824 this->isStaticGraph(), std::runtime_error, 01825 ": The CrsMatrix was constructed with a static graph. In that case, " 01826 "it's forbidded to insert new entries into rows owned by the calling process."); 01827 if (! myGraph_->indicesAreAllocated ()) { 01828 allocateValues (GlobalIndices, GraphNotYetAllocated); 01829 } 01830 typename Graph::SLocalGlobalViews inds_view; 01831 ArrayView<const Scalar> vals_view; 01832 01833 // We have to declare these Arrays here rather than in the 01834 // hasColMap() if branch, so that views to them will remain 01835 // valid for the whole scope. 01836 Array<GO> filtered_indices; 01837 Array<Scalar> filtered_values; 01838 if (hasColMap ()) { // We have a column Map. 01839 // Use column Map to filter the indices and corresponding 01840 // values, so that we only insert entries into columns we own. 01841 filtered_indices.assign (indices.begin (), indices.end ()); 01842 filtered_values.assign (values.begin (), values.end ()); 01843 const size_t numFilteredEntries = 01844 myGraph_->template filterGlobalIndicesAndValues<Scalar> (filtered_indices (), 01845 filtered_values ()); 01846 inds_view.ginds = filtered_indices (0, numFilteredEntries); 01847 vals_view = filtered_values (0, numFilteredEntries); 01848 } 01849 else { // we don't have a column Map. 01850 inds_view.ginds = indices; 01851 vals_view = values; 01852 } 01853 const size_t numFilteredEntries = vals_view.size (); 01854 // add the new indices and values 01855 if (numFilteredEntries > 0) { 01856 RowInfo rowInfo = myGraph_->getRowInfo(lrow); 01857 const size_t curNumEntries = rowInfo.numEntries; 01858 const size_t newNumEntries = curNumEntries + numFilteredEntries; 01859 if (newNumEntries > rowInfo.allocSize) { 01860 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01861 getProfileType() == StaticProfile, std::runtime_error, 01862 ": new indices exceed statically allocated graph structure."); 01863 01864 // Update allocation only as much as necessary 01865 rowInfo = myGraph_->template updateGlobalAllocAndValues<Scalar> (rowInfo, newNumEntries, 01866 values2D_[lrow]); 01867 } 01868 if (isGloballyIndexed ()) { 01869 // lg=GlobalIndices, I=GlobalIndices means the method calls 01870 // getGlobalViewNonConst() and does direct copying, which 01871 // should be reasonably fast. 01872 myGraph_->template insertIndicesAndValues<Scalar> (rowInfo, inds_view, 01873 this->getViewNonConst (rowInfo), 01874 vals_view, 01875 GlobalIndices, GlobalIndices); 01876 } 01877 else { 01878 // lg=GlobalIndices, I=LocalIndices means the method calls 01879 // the Map's getLocalElement() method once per entry to 01880 // insert. This may be slow. 01881 myGraph_->template insertIndicesAndValues<Scalar> (rowInfo, inds_view, 01882 this->getViewNonConst (rowInfo), 01883 vals_view, 01884 GlobalIndices, LocalIndices); 01885 } 01886 #ifdef HAVE_TPETRA_DEBUG 01887 { 01888 const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow(lrow); 01889 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries, 01890 std::logic_error, ": There should be a total of " << newNumEntries 01891 << " entries in the row, but the graph now reports " << chkNewNumEntries 01892 << " entries. Please report this bug to the Tpetra developers."); 01893 } 01894 #endif // HAVE_TPETRA_DEBUG 01895 } 01896 } 01897 else { // The calling process doesn't own the given row. 01898 insertNonownedGlobalValues (globalRow, indices, values); 01899 } 01900 } 01901 01902 01903 template<class Scalar, 01904 class LocalOrdinal, 01905 class GlobalOrdinal, 01906 class DeviceType> 01907 LocalOrdinal 01908 CrsMatrix< 01909 Scalar, LocalOrdinal, GlobalOrdinal, 01910 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01911 replaceLocalValues (const LocalOrdinal localRow, 01912 const ArrayView<const LocalOrdinal> &indices, 01913 const ArrayView<const Scalar>& values) 01914 { 01915 using Teuchos::Array; 01916 using Teuchos::ArrayView; 01917 // project2nd is a binary function that returns its second 01918 // argument. This replaces entries in the given row with their 01919 // corresponding entry of values. 01920 typedef Tpetra::project2nd<Scalar, Scalar> f_type; 01921 typedef LocalOrdinal LO; 01922 typedef GlobalOrdinal GO; 01923 typedef typename ArrayView<GO>::size_type size_type; 01924 01925 if (! isFillActive ()) { 01926 // Fill must be active in order to call this method. 01927 return Teuchos::OrdinalTraits<LO>::invalid (); 01928 } 01929 else if (! this->hasColMap ()) { 01930 // There is no such thing as local column indices without a column Map. 01931 return Teuchos::OrdinalTraits<LO>::invalid (); 01932 } 01933 else if (values.size () != indices.size ()) { 01934 // The sizes of values and indices must match. 01935 return Teuchos::OrdinalTraits<LO>::invalid (); 01936 } 01937 const bool isLocalRow = getRowMap ()->isNodeLocalElement (localRow); 01938 if (! isLocalRow) { 01939 // The calling process doesn't own the local row, so we can't 01940 // insert into it. 01941 return static_cast<LO> (0); 01942 } 01943 01944 if (indices.size () == 0) { 01945 return static_cast<LO> (0); 01946 } 01947 else { 01948 RowInfo rowInfo = staticGraph_->getRowInfo (localRow); 01949 ArrayView<Scalar> curVals = this->getViewNonConst (rowInfo); 01950 if (isLocallyIndexed ()) { 01951 return staticGraph_->template transformLocalValues<Scalar, f_type> (rowInfo, curVals, 01952 indices, values, 01953 f_type ()); 01954 } 01955 else if (isGloballyIndexed ()) { 01956 // Convert the given local indices to global indices. 01957 // 01958 // FIXME (mfh 27 Jun 2014) Why can't we ask the graph to do 01959 // that? It could do the conversions in place, so that we 01960 // wouldn't need temporary storage. 01961 const map_type& colMap = * (this->getColMap ()); 01962 const size_type numInds = indices.size (); 01963 01964 // mfh 27 Jun 2014: Some of the given local indices might be 01965 // invalid. That's OK, though, since the graph ignores them 01966 // and their corresponding values in transformGlobalValues. 01967 // Thus, we don't have to count how many indices are valid. 01968 // We do so just as a sanity check. 01969 Array<GO> gblInds (numInds); 01970 size_type numValid = 0; // sanity check count of # valid indices 01971 for (size_type k = 0; k < numInds; ++k) { 01972 const GO gid = colMap.getGlobalElement (indices[k]); 01973 gblInds[k] = gid; 01974 if (gid != Teuchos::OrdinalTraits<GO>::invalid ()) { 01975 ++numValid; // sanity check count of # valid indices 01976 } 01977 } 01978 const LO numXformed = 01979 staticGraph_->template transformGlobalValues<Scalar, f_type> (rowInfo, 01980 curVals, // target 01981 gblInds, 01982 values, // source 01983 f_type ()); 01984 if (static_cast<size_type> (numXformed) != numValid) { 01985 return Teuchos::OrdinalTraits<LO>::invalid (); 01986 } else { 01987 return numXformed; 01988 } 01989 } 01990 // NOTE (mfh 26 Jun 2014) In the current version of CrsMatrix, 01991 // it's possible for a matrix (or graph) to be neither locally 01992 // nor globally indexed on a process. This means that the graph 01993 // or matrix has no entries on that process. Epetra also works 01994 // like this. It's related to lazy allocation (on first 01995 // insertion, not at graph / matrix construction). Lazy 01996 // allocation will go away because it is not thread scalable. 01997 return static_cast<LO> (0); 01998 } 01999 } 02000 02001 02002 template<class Scalar, 02003 class LocalOrdinal, 02004 class GlobalOrdinal, class DeviceType> 02005 LocalOrdinal 02006 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02007 replaceGlobalValues (GlobalOrdinal globalRow, 02008 const ArrayView<const GlobalOrdinal> &indices, 02009 const ArrayView<const Scalar> &values) 02010 { 02011 typedef LocalOrdinal LO; 02012 typedef GlobalOrdinal GO; 02013 using Teuchos::Array; 02014 using Teuchos::ArrayView; 02015 typedef typename ArrayView<const GO>::size_type size_type; 02016 // project2nd is a binary function that returns its second 02017 // argument. This replaces entries in the given row with their 02018 // corresponding entry of values. 02019 typedef Tpetra::project2nd<Scalar, Scalar> f_type; 02020 02021 if (! isFillActive ()) { 02022 // Fill must be active in order to call this method. 02023 return Teuchos::OrdinalTraits<LO>::invalid (); 02024 } 02025 else if (values.size () != indices.size ()) { 02026 // The sizes of values and indices must match. 02027 return Teuchos::OrdinalTraits<LO>::invalid (); 02028 } 02029 02030 const LO lrow = this->getRowMap()->getLocalElement (globalRow); 02031 if (lrow == Teuchos::OrdinalTraits<LO>::invalid ()) { 02032 // We don't own the row, so we're not allowed to modify its values. 02033 return Teuchos::OrdinalTraits<LO>::invalid (); 02034 } 02035 02036 if (staticGraph_.is_null ()) { 02037 return Teuchos::OrdinalTraits<LO>::invalid (); 02038 } 02039 const crs_graph_type& graph = *staticGraph_; 02040 RowInfo rowInfo = graph.getRowInfo (lrow); 02041 if (indices.size () == 0) { 02042 return static_cast<LO> (0); 02043 } 02044 else { 02045 ArrayView<Scalar> curVals = this->getViewNonConst (rowInfo); 02046 if (isLocallyIndexed ()) { 02047 // Convert the given global indices to local indices. 02048 // 02049 // FIXME (mfh 08 Jul 2014) Why can't we ask the graph to do 02050 // that? It could do the conversions in place, so that we 02051 // wouldn't need temporary storage. 02052 const map_type& colMap = * (this->getColMap ()); 02053 const size_type numInds = indices.size (); 02054 Array<LO> lclInds (numInds); 02055 for (size_type k = 0; k < numInds; ++k) { 02056 // There is no need to filter out indices not in the 02057 // column Map. Those that aren't will be mapped to 02058 // invalid(), which the graph's transformGlobalValues() 02059 // will filter out (but not count in its return value). 02060 lclInds[k] = colMap.getLocalElement (indices[k]); 02061 } 02062 return graph.template transformLocalValues<Scalar, f_type> (rowInfo, 02063 curVals, 02064 lclInds (), 02065 values, 02066 f_type ()); 02067 } 02068 else if (isGloballyIndexed ()) { 02069 return graph.template transformGlobalValues<Scalar, f_type> (rowInfo, 02070 curVals, 02071 indices, 02072 values, 02073 f_type ()); 02074 } 02075 else { 02076 // If the graph is neither locally nor globally indexed on 02077 // the calling process, that means that the calling process 02078 // can't possibly have any entries in the owned row. Thus, 02079 // there are no entries to transform, so we return zero. 02080 return static_cast<LO> (0); 02081 } 02082 } 02083 } 02084 02085 02086 template<class Scalar, 02087 class LocalOrdinal, 02088 class GlobalOrdinal, class DeviceType> 02089 LocalOrdinal 02090 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02091 sumIntoGlobalValues (const GlobalOrdinal globalRow, 02092 const ArrayView<const GlobalOrdinal> &indices, 02093 const ArrayView<const Scalar> &values) 02094 02095 { 02096 typedef LocalOrdinal LO; 02097 typedef GlobalOrdinal GO; 02098 using Teuchos::Array; 02099 using Teuchos::ArrayView; 02100 typedef typename ArrayView<const GO>::size_type size_type; 02101 typedef std::plus<Scalar> f_type; 02102 02103 if (! isFillActive ()) { 02104 // Fill must be active in order to call this method. 02105 return Teuchos::OrdinalTraits<LO>::invalid (); 02106 } 02107 else if (values.size () != indices.size ()) { 02108 // The sizes of values and indices must match. 02109 return Teuchos::OrdinalTraits<LO>::invalid (); 02110 } 02111 02112 const LO lrow = this->getRowMap()->getLocalElement (globalRow); 02113 if (lrow == Teuchos::OrdinalTraits<LO>::invalid ()) { 02114 // globalRow is not in the row Map, so stash the given entries 02115 // away in a separate data structure. globalAssemble() (called 02116 // during fillComplete()) will exchange that data and sum it in 02117 // using sumIntoGlobalValues(). 02118 this->insertNonownedGlobalValues (globalRow, indices, values); 02119 // FIXME (mfh 08 Jul 2014) It's not clear what to return here, 02120 // since we won't know whether the given indices were valid 02121 // until globalAssemble (called in fillComplete) is called. 02122 // That's why insertNonownedGlobalValues doesn't return 02123 // anything. Just for consistency, I'll return the number of 02124 // entries that the user gave us. 02125 return static_cast<LO> (indices.size ()); 02126 } 02127 02128 if (staticGraph_.is_null ()) { 02129 return Teuchos::OrdinalTraits<LO>::invalid (); 02130 } 02131 const crs_graph_type& graph = *staticGraph_; 02132 RowInfo rowInfo = graph.getRowInfo (lrow); 02133 if (indices.size () == 0) { 02134 return static_cast<LO> (0); 02135 } 02136 else { 02137 ArrayView<Scalar> curVals = this->getViewNonConst (rowInfo); 02138 if (isLocallyIndexed ()) { 02139 // Convert the given global indices to local indices. 02140 // 02141 // FIXME (mfh 08 Jul 2014) Why can't we ask the graph to do 02142 // that? It could do the conversions in place, so that we 02143 // wouldn't need temporary storage. 02144 const map_type& colMap = * (this->getColMap ()); 02145 const size_type numInds = indices.size (); 02146 Array<LO> lclInds (numInds); 02147 for (size_type k = 0; k < numInds; ++k) { 02148 // There is no need to filter out indices not in the 02149 // column Map. Those that aren't will be mapped to 02150 // invalid(), which the graph's transformGlobalValues() 02151 // will filter out (but not count in its return value). 02152 lclInds[k] = colMap.getLocalElement (indices[k]); 02153 } 02154 return graph.template transformLocalValues<Scalar, f_type> (rowInfo, 02155 curVals, 02156 lclInds (), 02157 values, 02158 f_type ()); 02159 } 02160 else if (isGloballyIndexed ()) { 02161 return graph.template transformGlobalValues<Scalar, f_type> (rowInfo, 02162 curVals, 02163 indices, 02164 values, 02165 f_type ()); 02166 } 02167 else { 02168 // If the graph is neither locally nor globally indexed on 02169 // the calling process, that means that the calling process 02170 // can't possibly have any entries in the owned row. Thus, 02171 // there are no entries to transform, so we return zero. 02172 return static_cast<LO> (0); 02173 } 02174 } 02175 } 02176 02177 02178 template <class Scalar, 02179 class LocalOrdinal, 02180 class GlobalOrdinal, class DeviceType> 02181 LocalOrdinal 02182 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02183 sumIntoLocalValues (const LocalOrdinal localRow, 02184 const ArrayView<const LocalOrdinal>& indices, 02185 const ArrayView<const Scalar>& values) 02186 { 02187 using Teuchos::Array; 02188 using Teuchos::ArrayView; 02189 typedef std::plus<Scalar> f_type; 02190 typedef LocalOrdinal LO; 02191 typedef GlobalOrdinal GO; 02192 typedef typename ArrayView<GO>::size_type size_type; 02193 02194 if (! isFillActive ()) { 02195 // Fill must be active in order to call this method. 02196 return Teuchos::OrdinalTraits<LO>::invalid (); 02197 } 02198 else if (! this->hasColMap ()) { 02199 // There is no such thing as local column indices without a column Map. 02200 return Teuchos::OrdinalTraits<LO>::invalid (); 02201 } 02202 else if (values.size () != indices.size ()) { 02203 // The sizes of values and indices must match. 02204 return Teuchos::OrdinalTraits<LO>::invalid (); 02205 } 02206 const bool isLocalRow = getRowMap ()->isNodeLocalElement (localRow); 02207 if (! isLocalRow) { 02208 // The calling process doesn't own the local row, so we can't 02209 // insert into it. 02210 return static_cast<LO> (0); 02211 } 02212 02213 if (indices.size () == 0) { 02214 return static_cast<LO> (0); 02215 } 02216 else { 02217 RowInfo rowInfo = staticGraph_->getRowInfo (localRow); 02218 ArrayView<Scalar> curVals = this->getViewNonConst (rowInfo); 02219 if (isLocallyIndexed ()) { 02220 return staticGraph_->template transformLocalValues<Scalar, f_type> (rowInfo, curVals, 02221 indices, values, 02222 f_type ()); 02223 } 02224 else if (isGloballyIndexed ()) { 02225 // Convert the given local indices to global indices. 02226 // 02227 // FIXME (mfh 27 Jun 2014) Why can't we ask the graph to do 02228 // that? It could do the conversions in place, so that we 02229 // wouldn't need temporary storage. 02230 const map_type& colMap = * (this->getColMap ()); 02231 const size_type numInds = indices.size (); 02232 02233 // mfh 27 Jun 2014: Some of the given local indices might be 02234 // invalid. That's OK, though, since the graph ignores them 02235 // and their corresponding values in transformGlobalValues. 02236 // Thus, we don't have to count how many indices are valid. 02237 // We do so just as a sanity check. 02238 Array<GO> gblInds (numInds); 02239 size_type numValid = 0; // sanity check count of # valid indices 02240 for (size_type k = 0; k < numInds; ++k) { 02241 const GO gid = colMap.getGlobalElement (indices[k]); 02242 gblInds[k] = gid; 02243 if (gid != Teuchos::OrdinalTraits<GO>::invalid ()) { 02244 ++numValid; // sanity check count of # valid indices 02245 } 02246 } 02247 const LO numXformed = 02248 staticGraph_->template transformGlobalValues<Scalar, f_type> (rowInfo, 02249 curVals, // target 02250 gblInds, 02251 values, // source 02252 f_type ()); 02253 if (static_cast<size_type> (numXformed) != numValid) { 02254 return Teuchos::OrdinalTraits<LO>::invalid (); 02255 } else { 02256 return numXformed; 02257 } 02258 } 02259 // NOTE (mfh 26 Jun 2014) In the current version of CrsMatrix, 02260 // it's possible for a matrix (or graph) to be neither locally 02261 // nor globally indexed on a process. This means that the graph 02262 // or matrix has no entries on that process. Epetra also works 02263 // like this. It's related to lazy allocation (on first 02264 // insertion, not at graph / matrix construction). Lazy 02265 // allocation will go away because it is not thread scalable. 02266 return static_cast<LO> (0); 02267 } 02268 } 02269 02270 02271 template<class Scalar, 02272 class LocalOrdinal, 02273 class GlobalOrdinal, 02274 class DeviceType> 02275 Teuchos::ArrayView<const Scalar> 02276 CrsMatrix< 02277 Scalar, LocalOrdinal, GlobalOrdinal, 02278 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02279 getView (RowInfo rowinfo) const 02280 { 02281 if (values1D_ != null && rowinfo.allocSize > 0) { 02282 #ifdef HAVE_TPETRA_DEBUG 02283 TEUCHOS_TEST_FOR_EXCEPTION( 02284 rowinfo.offset1D + rowinfo.allocSize > values1D_.size (), 02285 std::range_error, "Tpetra::CrsMatrix::getView: Invalid access " 02286 "to 1-D storage of values." << std::endl << "rowinfo.offset1D (" << 02287 rowinfo.offset1D << ") + rowinfo.allocSize (" << rowinfo.allocSize << 02288 ") > values1D_.size() (" << values1D_.size () << ")."); 02289 #endif // HAVE_TPETRA_DEBUG 02290 return values1D_ (rowinfo.offset1D, rowinfo.allocSize); 02291 } 02292 else if (values2D_ != null) { 02293 return values2D_[rowinfo.localRow] (); 02294 } 02295 else { 02296 return Teuchos::ArrayView<Scalar> (); 02297 } 02298 } 02299 02300 02301 template<class Scalar, 02302 class LocalOrdinal, 02303 class GlobalOrdinal, 02304 class DeviceType> 02305 Teuchos::ArrayView<Scalar> 02306 CrsMatrix< 02307 Scalar, 02308 LocalOrdinal, 02309 GlobalOrdinal, 02310 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02311 getViewNonConst (RowInfo rowinfo) 02312 { 02313 if (values1D_ != null && rowinfo.allocSize > 0) { 02314 #ifdef HAVE_TPETRA_DEBUG 02315 TEUCHOS_TEST_FOR_EXCEPTION( 02316 rowinfo.offset1D + rowinfo.allocSize > values1D_.size (), 02317 std::range_error, "Tpetra::CrsMatrix::getViewNonConst: Invalid access " 02318 "to 1-D storage of values." << std::endl << "rowinfo.offset1D (" << 02319 rowinfo.offset1D << ") + rowinfo.allocSize (" << rowinfo.allocSize << 02320 ") > values1D_.size() (" << values1D_.size () << ")."); 02321 #endif // HAVE_TPETRA_DEBUG 02322 return values1D_ (rowinfo.offset1D, rowinfo.allocSize); 02323 } 02324 else if (values2D_ != null) { 02325 return values2D_[rowinfo.localRow] (); 02326 } 02327 else { 02328 return Teuchos::ArrayView<Scalar> (); 02329 } 02330 } 02331 02332 02333 template<class Scalar, 02334 class LocalOrdinal, 02335 class GlobalOrdinal, 02336 class DeviceType> 02337 void 02338 CrsMatrix< 02339 Scalar, 02340 LocalOrdinal, 02341 GlobalOrdinal, 02342 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02343 getLocalRowCopy (LocalOrdinal localRow, 02344 const Teuchos::ArrayView<LocalOrdinal>& indices, 02345 const Teuchos::ArrayView<Scalar>& values, 02346 size_t& numEntries) const 02347 { 02348 using Teuchos::ArrayView; 02349 typedef LocalOrdinal LO; 02350 typedef GlobalOrdinal GO; 02351 02352 TEUCHOS_TEST_FOR_EXCEPTION( 02353 isGloballyIndexed () && ! hasColMap (), std::runtime_error, 02354 "Tpetra::CrsMatrix::getLocalRowCopy: The matrix is globally indexed and " 02355 "does not have a column Map yet. That means we don't have local indices " 02356 "for columns yet, so it doesn't make sense to call this method. If the " 02357 "matrix doesn't have a column Map yet, you should call fillComplete on " 02358 "it first."); 02359 TEUCHOS_TEST_FOR_EXCEPTION( 02360 ! staticGraph_->hasRowInfo (), std::runtime_error, 02361 "Tpetra::CrsMatrix::getLocalRowCopy: The graph's row information was " 02362 "deleted at fillComplete()."); 02363 02364 if (! this->getRowMap ()->isNodeLocalElement (localRow)) { 02365 numEntries = 0; 02366 return; 02367 } 02368 02369 const RowInfo rowinfo = staticGraph_->getRowInfo(localRow); 02370 const size_t theNumEntries = rowinfo.numEntries; 02371 02372 TEUCHOS_TEST_FOR_EXCEPTION( 02373 static_cast<size_t> (indices.size ()) < theNumEntries || 02374 static_cast<size_t> (values.size ()) < theNumEntries, 02375 std::runtime_error, 02376 "Tpetra::CrsMatrix::getLocalRowCopy: The given row " << localRow 02377 << " has " << theNumEntries << " entries. One or both of the given " 02378 "ArrayViews are not long enough to store that many entries. indices " 02379 "can store " << indices.size() << " entries and values can store " 02380 << values.size() << " entries."); 02381 02382 numEntries = theNumEntries; 02383 02384 if (staticGraph_->isLocallyIndexed ()) { 02385 ArrayView<const LO> indrowview = staticGraph_->getLocalView (rowinfo); 02386 ArrayView<const Scalar> valrowview = getView (rowinfo); 02387 std::copy (indrowview.begin (), indrowview.begin () + numEntries, indices.begin ()); 02388 std::copy (valrowview.begin (), valrowview.begin () + numEntries, values.begin ()); 02389 } 02390 else if (staticGraph_->isGloballyIndexed ()) { 02391 ArrayView<const GO> indrowview = staticGraph_->getGlobalView (rowinfo); 02392 ArrayView<const Scalar> valrowview = getView (rowinfo); 02393 std::copy (valrowview.begin (), valrowview.begin () + numEntries, values.begin ()); 02394 02395 const map_type& colMap = * (this->getColMap ()); 02396 for (size_t j=0; j < numEntries; ++j) { 02397 indices[j] = colMap.getLocalElement (indrowview[j]); 02398 } 02399 } 02400 else { 02401 numEntries = 0; 02402 } 02403 } 02404 02405 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02406 void 02407 CrsMatrix< 02408 Scalar, LocalOrdinal, GlobalOrdinal, 02409 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02410 getGlobalRowCopy (GlobalOrdinal globalRow, 02411 const Teuchos::ArrayView<GlobalOrdinal>& indices, 02412 const Teuchos::ArrayView<Scalar>& values, 02413 size_t& numEntries) const 02414 { 02415 // Only locally owned rows can be queried, otherwise complain 02416 const char tfecfFuncName[] = "getGlobalRowCopy"; 02417 const LocalOrdinal lrow = getRowMap ()->getLocalElement (globalRow); 02418 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02419 lrow == OTL::invalid(), std::runtime_error, 02420 ": globalRow=" << globalRow << " does not belong to the calling process " 02421 << getComm()->getRank() << "."); 02422 02423 const RowInfo rowinfo = staticGraph_->getRowInfo (lrow); 02424 numEntries = rowinfo.numEntries; 02425 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02426 static_cast<size_t> (indices.size ()) < numEntries || static_cast<size_t> (values.size ()) < numEntries, 02427 std::runtime_error, 02428 ": size of indices,values must be sufficient to store the specified row."); 02429 02430 if (staticGraph_->isGloballyIndexed ()) { 02431 ArrayView<const GlobalOrdinal> indrowview = staticGraph_->getGlobalView(rowinfo); 02432 ArrayView<const Scalar> valrowview = getView(rowinfo); 02433 std::copy( indrowview.begin(), indrowview.begin() + numEntries, indices.begin() ); 02434 std::copy( valrowview.begin(), valrowview.begin() + numEntries, values.begin() ); 02435 } 02436 else if (staticGraph_->isLocallyIndexed ()) { 02437 ArrayView<const LocalOrdinal> indrowview = staticGraph_->getLocalView(rowinfo); 02438 ArrayView<const Scalar> valrowview = getView(rowinfo); 02439 std::copy( valrowview.begin(), valrowview.begin() + numEntries, values.begin() ); 02440 for (size_t j=0; j < numEntries; ++j) { 02441 indices[j] = getColMap ()->getGlobalElement (indrowview[j]); 02442 } 02443 } 02444 else { 02445 #ifdef HAVE_TPETRA_DEBUG 02446 // should have fallen in one of the above if indices are allocated 02447 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02448 staticGraph_->indicesAreAllocated (), std::logic_error, 02449 ": Internal logic error. Please contact Tpetra team."); 02450 #endif // HAVE_TPETRA_DEBUG 02451 numEntries = 0; 02452 } 02453 } 02454 02455 02458 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02459 void 02460 CrsMatrix< 02461 Scalar, LocalOrdinal, GlobalOrdinal, 02462 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02463 getLocalRowView (LocalOrdinal localRow, 02464 Teuchos::ArrayView<const LocalOrdinal>& indices, 02465 Teuchos::ArrayView<const Scalar>& values) const 02466 { 02467 const char tfecfFuncName[] = "getLocalRowView: "; 02468 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02469 isGloballyIndexed (), std::runtime_error, "The matrix currently stores " 02470 "its indices as global indices, so you cannot get a view with local " 02471 "column indices. If the matrix has a column Map, you may call " 02472 "getLocalRowCopy() to get local column indices; otherwise, you may get " 02473 "a view with global column indices by calling getGlobalRowCopy()."); 02474 indices = null; 02475 values = null; 02476 if (getRowMap ()->isNodeLocalElement (localRow)) { 02477 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow); 02478 if (rowinfo.numEntries > 0) { 02479 indices = staticGraph_->getLocalView(rowinfo); 02480 indices = indices(0,rowinfo.numEntries); 02481 values = getView(rowinfo); 02482 values = values(0,rowinfo.numEntries); 02483 } 02484 } 02485 #ifdef HAVE_TPETRA_DEBUG 02486 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02487 static_cast<size_t> (indices.size ()) != this->getNumEntriesInLocalRow (localRow) || 02488 indices.size () != values.size (), std::logic_error, 02489 "Violated stated post-conditions. Please contact Tpetra team."); 02490 #endif // HAVE_TPETRA_DEBUG 02491 } 02492 02493 02496 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02497 void 02498 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02499 getGlobalRowView (GlobalOrdinal globalRow, 02500 Teuchos::ArrayView<const GlobalOrdinal>& indices, 02501 Teuchos::ArrayView<const Scalar>& values) const 02502 { 02503 const char tfecfFuncName[] = "getGlobalRowView: "; 02504 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02505 isLocallyIndexed (), std::runtime_error, 02506 "The matrix is locally indexed, so we cannot return a view of the row " 02507 "with global column indices. Use getGlobalRowCopy() instead."); 02508 indices = Teuchos::null; 02509 values = Teuchos::null; 02510 const LocalOrdinal lrow = getRowMap ()->getLocalElement (globalRow); 02511 if (lrow != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) { 02512 // getRowInfo() requires a local row index, whether or not 02513 // storage has been optimized. 02514 const RowInfo rowinfo = staticGraph_->getRowInfo(lrow); 02515 if (rowinfo.numEntries > 0) { 02516 indices = staticGraph_->getGlobalView (rowinfo); 02517 indices = indices (0, rowinfo.numEntries); 02518 values = getView (rowinfo); 02519 values = values (0, rowinfo.numEntries); 02520 } 02521 } 02522 #ifdef HAVE_TPETRA_DEBUG 02523 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02524 static_cast<size_t> (indices.size ()) != this->getNumEntriesInGlobalRow (globalRow) || 02525 indices.size () != values.size (), 02526 std::logic_error, 02527 "Violated stated post-conditions. Please contact Tpetra team."); 02528 #endif // HAVE_TPETRA_DEBUG 02529 } 02530 02531 02534 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02535 void 02536 CrsMatrix< 02537 Scalar, LocalOrdinal, GlobalOrdinal, 02538 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02539 scale (const Scalar& alpha) 02540 { 02541 typedef LocalOrdinal LO; 02542 typedef Kokkos::SparseRowView<k_local_matrix_type> row_view_type; 02543 typedef typename Teuchos::Array<Scalar>::size_type size_type; 02544 const char tfecfFuncName[] = "scale: "; 02545 02546 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02547 ! isFillActive (), std::runtime_error, 02548 "Fill must be active before you may call this method. " 02549 "Please call resumeFill() to make fill active."); 02550 02551 const size_t nlrs = staticGraph_->getNodeNumRows (); 02552 const size_t numAlloc = staticGraph_->getNodeAllocationSize (); 02553 const size_t numEntries = staticGraph_->getNodeNumEntries (); 02554 if (! staticGraph_->indicesAreAllocated () || nlrs == 0 || 02555 numAlloc == 0 || numEntries == 0) { 02556 // do nothing 02557 } 02558 else { 02559 if (staticGraph_->getProfileType () == StaticProfile) { 02560 const LO lclNumRows = k_lclMatrix_.numRows (); 02561 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) { 02562 row_view_type row_i = k_lclMatrix_.row (lclRow); 02563 for (LO k = 0; k < row_i.length; ++k) { 02564 row_i.value (k) *= alpha; 02565 } 02566 } 02567 } 02568 else if (staticGraph_->getProfileType () == DynamicProfile) { 02569 for (size_t row = 0; row < nlrs; ++row) { 02570 const size_type numEnt = getNumEntriesInLocalRow (row); 02571 Teuchos::ArrayView<Scalar> rowVals = values2D_[row] (); 02572 for (size_type k = 0; k < numEnt; ++k) { 02573 rowVals[k] *= alpha; 02574 } 02575 } 02576 } 02577 } 02578 } 02579 02580 02583 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02584 void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::setAllToScalar(const Scalar &alpha) 02585 { 02586 const char tfecfFuncName[] = "setAllToScalar: "; 02587 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02588 ! isFillActive (), std::runtime_error, 02589 "Fill must be active before you may call this method. " 02590 "Please call resumeFill() to make fill active."); 02591 02592 // replace all values in the matrix 02593 // it is easiest to replace all allocated values, instead of replacing only the ones with valid entries 02594 // however, if there are no valid entries, we can short-circuit 02595 // furthermore, if the values aren't allocated, we can short-circuit (no entry have been inserted so far) 02596 const size_t nlrs = staticGraph_->getNodeNumRows(), 02597 numAlloc = staticGraph_->getNodeAllocationSize(), 02598 numEntries = staticGraph_->getNodeNumEntries(); 02599 if (staticGraph_->indicesAreAllocated() == false || numAlloc == 0 || numEntries == 0) { 02600 // do nothing 02601 } 02602 else { 02603 if (staticGraph_->getProfileType() == StaticProfile) { 02604 std::fill( values1D_.begin(), values1D_.end(), alpha ); 02605 } 02606 else if (staticGraph_->getProfileType() == DynamicProfile) { 02607 for (size_t row=0; row < nlrs; ++row) { 02608 std::fill( values2D_[row].begin(), values2D_[row].end(), alpha ); 02609 } 02610 } 02611 } 02612 } 02613 02614 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, 02615 class DeviceType> 02616 void 02617 CrsMatrix< 02618 Scalar, LocalOrdinal, GlobalOrdinal, 02619 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02620 setAllValues (const t_RowPtrs& rowPointers, 02621 const t_LocalOrdinal_1D& columnIndices, 02622 const t_ValuesType& values) 02623 { 02624 const char tfecfFuncName[] = "setAllValues"; 02625 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02626 columnIndices.size () != values.size (), std::runtime_error, 02627 ": columnIndices and values must have the same size. columnIndices.size() = " 02628 << columnIndices.size () << " != values.size() = " << values.size () << "."); 02629 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02630 myGraph_.is_null (), std::runtime_error, ": myGraph_ must not be null."); 02631 02632 try { 02633 myGraph_->setAllIndices (rowPointers, columnIndices); 02634 } 02635 catch (std::exception &e) { 02636 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02637 true, std::runtime_error, ": Caught exception while calling myGraph_->" 02638 "setAllIndices(): " << e.what ()); 02639 } 02640 k_values1D_ = values; 02641 values1D_ = Kokkos::Compat::persistingView (k_values1D_); 02642 checkInternalState(); 02643 } 02644 02645 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02646 void 02647 CrsMatrix< 02648 Scalar, LocalOrdinal, GlobalOrdinal, 02649 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02650 setAllValues (const Teuchos::ArrayRCP<size_t>& rowPointers, 02651 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices, 02652 const Teuchos::ArrayRCP<Scalar>& values) 02653 { 02654 const char tfecfFuncName[] = "setAllValues"; 02655 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02656 columnIndices.size () != values.size (), std::runtime_error, 02657 ": columnIndices and values must have the same size. columnIndices.size() = " 02658 << columnIndices.size () << " != values.size() = " << values.size () << "."); 02659 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02660 myGraph_.is_null (), std::runtime_error, ": myGraph_ must not be null."); 02661 02662 try { 02663 myGraph_->setAllIndices (rowPointers, columnIndices); 02664 } 02665 catch (std::exception &e) { 02666 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02667 true, std::runtime_error, ": Caught exception while calling myGraph_->" 02668 "setAllIndices(): " << e.what ()); 02669 } 02670 k_values1D_ = Kokkos::Compat::getKokkosViewDeepCopy<DeviceType> (values ()); 02671 values1D_ = Kokkos::Compat::persistingView (k_values1D_); 02672 checkInternalState(); 02673 } 02674 02675 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02676 void 02677 CrsMatrix< 02678 Scalar, LocalOrdinal, GlobalOrdinal, 02679 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02680 getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const 02681 { 02682 using Teuchos::ArrayRCP; 02683 using Teuchos::ArrayView; 02684 const char tfecfFuncName[] = "getLocalDiagOffsets"; 02685 02686 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02687 ! hasColMap (), std::runtime_error, 02688 ": This method requires that the matrix have a column Map."); 02689 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02690 staticGraph_.is_null (), std::runtime_error, 02691 ": This method requires that the matrix have a graph."); 02692 02693 const map_type& rowMap = * (this->getRowMap ()); 02694 const map_type& colMap = * (this->getColMap ()); 02695 02696 const size_t myNumRows = getNodeNumRows (); 02697 if (static_cast<size_t> (offsets.size ()) != myNumRows) { 02698 offsets.resize (static_cast<size_t> (myNumRows)); 02699 } 02700 02701 #ifdef HAVE_TPETRA_DEBUG 02702 bool allRowMapDiagEntriesInColMap = true; 02703 bool allDiagEntriesFound = true; 02704 #endif // HAVE_TPETRA_DEBUG 02705 02706 for (size_t r = 0; r < myNumRows; ++r) { 02707 const GlobalOrdinal rgid = rowMap.getGlobalElement (r); 02708 const LocalOrdinal rlid = colMap.getLocalElement (rgid); 02709 02710 #ifdef HAVE_TPETRA_DEBUG 02711 if (rlid == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) { 02712 allRowMapDiagEntriesInColMap = false; 02713 } 02714 #endif // HAVE_TPETRA_DEBUG 02715 02716 if (rlid != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) { 02717 RowInfo rowinfo = staticGraph_->getRowInfo (r); 02718 if (rowinfo.numEntries > 0) { 02719 offsets[r] = staticGraph_->findLocalIndex (rowinfo, rlid); 02720 } 02721 else { 02722 offsets[r] = Teuchos::OrdinalTraits<size_t>::invalid (); 02723 #ifdef HAVE_TPETRA_DEBUG 02724 allDiagEntriesFound = false; 02725 #endif // HAVE_TPETRA_DEBUG 02726 } 02727 } 02728 } 02729 02730 #ifdef HAVE_TPETRA_DEBUG 02731 using Teuchos::reduceAll; 02732 using std::endl; 02733 02734 const bool localSuccess = 02735 allRowMapDiagEntriesInColMap && allDiagEntriesFound; 02736 int localResults[3]; 02737 localResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0; 02738 localResults[1] = allDiagEntriesFound ? 1 : 0; 02739 // min-all-reduce will compute least rank of all the processes 02740 // that didn't succeed. 02741 localResults[2] = 02742 ! localSuccess ? getComm ()->getRank () : getComm ()->getSize (); 02743 int globalResults[3]; 02744 globalResults[0] = 0; 02745 globalResults[1] = 0; 02746 globalResults[2] = 0; 02747 reduceAll<int, int> (* (getComm ()), Teuchos::REDUCE_MIN, 02748 3, localResults, globalResults); 02749 if (globalResults[0] == 0 || globalResults[1] == 0) { 02750 std::ostringstream os; // build error message 02751 const bool both = 02752 globalResults[0] == 0 && globalResults[1] == 0; 02753 os << ": At least one process (including Process " << globalResults[2] 02754 << ") had the following issue" << (both ? "s" : "") << ":" << endl; 02755 if (globalResults[0] == 0) { 02756 os << " - The column Map does not contain at least one diagonal entry " 02757 "of the matrix." << endl; 02758 } 02759 if (globalResults[1] == 0) { 02760 os << " - There is a row on that / those process(es) that does not " 02761 "contain a diagonal entry." << endl; 02762 } 02763 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str()); 02764 } 02765 #endif // HAVE_TPETRA_DEBUG 02766 } 02767 02768 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, 02769 class DeviceType> 02770 void 02771 CrsMatrix< 02772 Scalar, LocalOrdinal, GlobalOrdinal, 02773 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02774 getLocalDiagCopy (Vector<Scalar, LocalOrdinal, GlobalOrdinal, node_type>& dvec) const 02775 { 02776 using Teuchos::ArrayRCP; 02777 using Teuchos::ArrayView; 02778 const char tfecfFuncName[] = "getLocalDiagCopy"; 02779 typedef Vector<Scalar, LocalOrdinal, GlobalOrdinal, node_type> vec_type; 02780 typedef typename vec_type::dual_view_type dual_view_type; 02781 typedef typename device_type::host_mirror_device_type host_device_type; 02782 02783 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02784 ! hasColMap (), std::runtime_error, 02785 ": This method requires that the matrix have a column Map."); 02786 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02787 staticGraph_.is_null (), std::runtime_error, 02788 ": This method requires that the matrix have a graph."); 02789 02790 const map_type& rowMap = * (this->getRowMap ()); 02791 const map_type& colMap = * (this->getColMap ()); 02792 02793 #ifdef HAVE_TPETRA_DEBUG 02794 // isCompatible() requires an all-reduce, and thus this check 02795 // should only be done in debug mode. 02796 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02797 ! dvec.getMap ()->isCompatible (rowMap), std::runtime_error, 02798 ": The input Vector's Map must be compatible with the CrsMatrix's row " 02799 "Map. You may check this by using Map's isCompatible method: " 02800 "dvec.getMap ()->isCompatible (A.getRowMap ());"); 02801 #endif // HAVE_TPETRA_DEBUG 02802 02803 // For now, we fill the Vector on the host and sync to device. 02804 // Later, we may write a parallel kernel that works entirely on 02805 // device. 02806 dual_view_type lclVec = dvec.getDualView (); 02807 typedef typename dual_view_type::t_host host_view_type; 02808 host_view_type lclVecHost = lclVec.h_view; 02809 lclVec.template modify<host_device_type> (); 02810 02811 // 1-D subview of lclVecHost. All the "typename" stuff ensures 02812 // that we get the same layout and memory traits as the original 02813 // 2-D view. 02814 typedef typename Kokkos::View<scalar_type*, 02815 typename host_view_type::array_layout, typename host_view_type::device_type, 02816 typename host_view_type::memory_traits> 02817 host_view_1d_type; 02818 host_view_1d_type lclVecHost1d = 02819 Kokkos::subview<host_view_1d_type> (lclVecHost, Kokkos::ALL (), 0); 02820 02821 // Find the diagonal entries and put them in lclVecHost1d. 02822 const size_t myNumRows = getNodeNumRows (); 02823 for (size_t r = 0; r < myNumRows; ++r) { 02824 lclVecHost1d(r) = STS::zero (); // default value if no diag entry 02825 const GlobalOrdinal rgid = rowMap.getGlobalElement (r); 02826 const LocalOrdinal rlid = colMap.getLocalElement (rgid); 02827 02828 if (rlid != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) { 02829 RowInfo rowinfo = staticGraph_->getRowInfo (r); 02830 if (rowinfo.numEntries > 0) { 02831 const size_t j = staticGraph_->findLocalIndex (rowinfo, rlid); 02832 if (j != Teuchos::OrdinalTraits<size_t>::invalid ()) { 02833 ArrayView<const Scalar> view = this->getView (rowinfo); 02834 lclVecHost1d(r) = view[j]; 02835 } 02836 } 02837 } 02838 } 02839 lclVec.template sync<device_type> (); // sync changes back to device 02840 } 02841 02842 02845 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02846 void 02847 CrsMatrix< 02848 Scalar, LocalOrdinal, GlobalOrdinal, 02849 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02850 getLocalDiagCopy (Vector<Scalar, LocalOrdinal, GlobalOrdinal, node_type>& diag, 02851 const Teuchos::ArrayView<const size_t>& offsets) const 02852 { 02853 using Teuchos::ArrayRCP; 02854 using Teuchos::ArrayView; 02855 typedef Vector<Scalar, LocalOrdinal, GlobalOrdinal, node_type> vec_type; 02856 typedef typename vec_type::dual_view_type dual_view_type; 02857 typedef typename device_type::host_mirror_device_type host_device_type; 02858 02859 #ifdef HAVE_TPETRA_DEBUG 02860 const char tfecfFuncName[] = "getLocalDiagCopy"; 02861 const map_type& rowMap = * (this->getRowMap ()); 02862 // isCompatible() requires an all-reduce, and thus this check 02863 // should only be done in debug mode. 02864 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02865 ! diag.getMap ()->isCompatible (rowMap), std::runtime_error, 02866 ": The input Vector's Map must be compatible with (in the sense of Map::" 02867 "isCompatible) the CrsMatrix's row Map."); 02868 #endif // HAVE_TPETRA_DEBUG 02869 02870 // For now, we fill the Vector on the host and sync to device. 02871 // Later, we may write a parallel kernel that works entirely on 02872 // device. 02873 dual_view_type lclVec = diag.getDualView (); 02874 typedef typename dual_view_type::t_host host_view_type; 02875 host_view_type lclVecHost = lclVec.h_view; 02876 lclVec.template modify<host_device_type> (); 02877 02878 // 1-D subview of lclVecHost. All the "typename" stuff ensures 02879 // that we get the same layout and memory traits as the original 02880 // 2-D view. 02881 typedef typename Kokkos::View<scalar_type*, 02882 typename host_view_type::array_layout, typename host_view_type::device_type, 02883 typename host_view_type::memory_traits> 02884 host_view_1d_type; 02885 host_view_1d_type lclVecHost1d = 02886 Kokkos::subview<host_view_1d_type> (lclVecHost, Kokkos::ALL (), 0); 02887 02888 // Find the diagonal entries and put them in lclVecHost1d. 02889 const size_t myNumRows = getNodeNumRows (); 02890 for (size_t i = 0; i < myNumRows; ++i) { 02891 lclVecHost1d(i) = STS::zero (); // default value if no diag entry 02892 if (offsets[i] != Teuchos::OrdinalTraits<size_t>::invalid ()) { 02893 ArrayView<const LocalOrdinal> ind; 02894 ArrayView<const Scalar> val; 02895 this->getLocalRowView (i, ind, val); 02896 lclVecHost1d(i) = val[offsets[i]]; 02897 } 02898 } 02899 lclVec.template sync<device_type> (); // sync changes back to device 02900 } 02901 02902 02903 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02904 void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::leftScale( 02905 const Vector<Scalar, LocalOrdinal, GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& x) 02906 { 02907 using Teuchos::ArrayRCP; 02908 using Teuchos::ArrayView; 02909 using Teuchos::null; 02910 using Teuchos::RCP; 02911 using Teuchos::rcp; 02912 using Teuchos::rcpFromRef; 02913 typedef Vector<Scalar, LocalOrdinal, GlobalOrdinal, node_type> vec_type; 02914 const char tfecfFuncName[] = "leftScale"; 02915 02916 // FIXME (mfh 06 Aug 2014) This doesn't make sense. The matrix 02917 // should only be modified when it is not fill complete. 02918 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02919 ! isFillComplete (), std::runtime_error, 02920 ": matrix must be fill complete."); 02921 RCP<const vec_type> xp; 02922 02923 if (getRangeMap ()->isSameAs (* (x.getMap ()))){ 02924 // Take from Epetra: If we have a non-trivial exporter, we must 02925 // import elements that are permuted or are on other processors. 02926 // (We will use the exporter to perform the import ("reverse 02927 // mode").) 02928 if (getCrsGraph ()->getExporter () != null) { 02929 RCP<vec_type> tempVec = rcp (new vec_type (getRowMap ())); 02930 tempVec->doImport (x, * (getCrsGraph ()->getExporter ()), INSERT); 02931 xp = tempVec; 02932 } 02933 else { 02934 xp = rcpFromRef (x); 02935 } 02936 } 02937 else if (getRowMap ()->isSameAs (* (x.getMap ()))) { 02938 xp = rcpFromRef (x); 02939 } 02940 else { 02941 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::invalid_argument, ": The " 02942 "input scaling vector x's Map must be the same as either the row Map or " 02943 "the range Map of the CrsMatrix."); 02944 } 02945 ArrayRCP<const Scalar> vectorVals = xp->getData(0); 02946 ArrayView<Scalar> rowValues = null; 02947 02948 const size_t lclNumRows = this->getNodeNumRows (); 02949 for (size_t i = 0; i < lclNumRows; ++i) { 02950 const RowInfo rowinfo = staticGraph_->getRowInfo (static_cast<LocalOrdinal> (i)); 02951 rowValues = this->getViewNonConst (rowinfo); 02952 const Scalar scaleValue = vectorVals[i]; 02953 for (size_t j = 0; j < rowinfo.numEntries; ++j) { 02954 rowValues[j] *= scaleValue; 02955 } 02956 rowValues = null; 02957 } 02958 } 02959 02960 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02961 void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::rightScale( 02962 const Vector<Scalar, LocalOrdinal, GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& x) 02963 { 02964 using Teuchos::ArrayRCP; 02965 using Teuchos::ArrayView; 02966 using Teuchos::null; 02967 using Teuchos::RCP; 02968 using Teuchos::rcp; 02969 using Teuchos::rcpFromRef; 02970 typedef Vector<Scalar, LocalOrdinal, GlobalOrdinal, node_type> vec_type; 02971 const char tfecfFuncName[] = "rightScale"; 02972 02973 // FIXME (mfh 06 Aug 2014) This doesn't make sense. The matrix 02974 // should only be modified when it is not fill complete. 02975 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02976 ! isFillComplete (), std::runtime_error, ": matrix must be fill complete."); 02977 RCP<const vec_type> xp; 02978 if (getDomainMap ()->isSameAs (* (x.getMap ()))) { 02979 // Take from Epetra: If we have a non-trivial exporter, we must 02980 // import elements that are permuted or are on other processors. 02981 // (We will use the exporter to perform the import.) 02982 if (getCrsGraph ()->getImporter () != null) { 02983 RCP<vec_type> tempVec = rcp (new vec_type (getColMap ())); 02984 tempVec->doImport (x, * (getCrsGraph ()->getImporter ()), INSERT); 02985 xp = tempVec; 02986 } 02987 else { 02988 xp = rcpFromRef (x); 02989 } 02990 } 02991 else if (getRowMap ()->isSameAs (* (x.getMap ()))) { 02992 xp = rcpFromRef (x); 02993 } 02994 else { 02995 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02996 true, std::runtime_error, ": The vector x must be the same as either " 02997 "the row map or the range map"); 02998 } 02999 03000 ArrayRCP<const Scalar> vectorVals = xp->getData(0); 03001 ArrayView<Scalar> rowValues = null; 03002 03003 const size_t lclNumRows = this->getNodeNumRows (); 03004 for (size_t i = 0; i < lclNumRows; ++i) { 03005 const RowInfo rowinfo = staticGraph_->getRowInfo (static_cast<LocalOrdinal> (i)); 03006 rowValues = this->getViewNonConst (rowinfo); 03007 ArrayView<const LocalOrdinal> colInds; 03008 getCrsGraph ()->getLocalRowView (static_cast<LocalOrdinal> (i), colInds); 03009 for (size_t j = 0; j < rowinfo.numEntries; ++j) { 03010 rowValues[j] *= vectorVals[colInds[j]]; 03011 } 03012 } 03013 } 03014 03015 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03016 typename ScalarTraits<Scalar>::magnitudeType 03017 CrsMatrix< 03018 Scalar, LocalOrdinal, GlobalOrdinal, 03019 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03020 getFrobeniusNorm () const 03021 { 03022 using Teuchos::outArg; 03023 using Teuchos::REDUCE_SUM; 03024 using Teuchos::reduceAll; 03025 typedef typename ArrayRCP<const Scalar>::size_type size_type; 03026 03027 // FIXME (mfh 05 Aug 2014) Write a thread-parallel kernel for the 03028 // local part of this computation. It could make sense to put 03029 // this operation in the Kokkos::CrsMatrix. 03030 03031 // check the cache first 03032 Magnitude frobNorm = frobNorm_; 03033 if (frobNorm == -STM::one ()) { 03034 Magnitude mySum = STM::zero (); 03035 if (getNodeNumEntries() > 0) { 03036 if (isStorageOptimized ()) { 03037 // "Optimized" storage is packed storage. That means we can 03038 // iterate in one pass through the 1-D values array. 03039 const size_type numEntries = 03040 static_cast<size_type> (getNodeNumEntries ()); 03041 for (size_type k = 0; k < numEntries; ++k) { 03042 // FIXME (mfh 05 Aug 2014) This assumes UVM. 03043 const Scalar val = k_values1D_(k); 03044 mySum += STS::real (val) * STS::real (val) + 03045 STS::imag (val) * STS::imag (val); 03046 } 03047 } 03048 else { 03049 const size_t numRows = getNodeNumRows (); 03050 for (size_t r = 0; r < numRows; ++r) { 03051 RowInfo rowInfo = myGraph_->getRowInfo (r); 03052 const size_type numEntries = 03053 static_cast<size_type> (rowInfo.numEntries); 03054 ArrayView<const Scalar> A_r = 03055 this->getView (rowInfo).view (0, numEntries); 03056 for (size_type k = 0; k < numEntries; ++k) { 03057 const Scalar val = A_r[k]; 03058 mySum += STS::real (val) * STS::real (val) + 03059 STS::imag (val) * STS::imag (val); 03060 } 03061 } 03062 } 03063 } 03064 Magnitude totalSum; 03065 reduceAll<int, Magnitude> (* (getComm ()), REDUCE_SUM, 03066 mySum, outArg (totalSum)); 03067 frobNorm = STM::squareroot (totalSum); 03068 } 03069 if (isFillComplete ()) { 03070 // Only cache the result if the matrix is fill complete. 03071 // Otherwise, the values might still change. resumeFill clears 03072 // the cache. 03073 frobNorm_ = frobNorm; 03074 } 03075 return frobNorm; 03076 } 03077 03078 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03079 void 03080 CrsMatrix< 03081 Scalar, 03082 LocalOrdinal, 03083 GlobalOrdinal, 03084 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03085 replaceColMap (const Teuchos::RCP<const map_type>& newColMap) 03086 { 03087 const char tfecfFuncName[] = "replaceColMap"; 03088 // FIXME (mfh 06 Aug 2014) What if the graph is locally indexed? 03089 // Then replacing the column Map might mean that we need to 03090 // reindex the column indices. 03091 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03092 myGraph_.is_null (), std::runtime_error, 03093 ": This method does not work if the matrix has a const graph. The whole " 03094 "idea of a const graph is that you are not allowed to change it, but this" 03095 " method necessarily must modify the graph, since the graph owns the " 03096 "matrix's column Map."); 03097 myGraph_->replaceColMap (newColMap); 03098 } 03099 03100 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03101 void 03102 CrsMatrix< 03103 Scalar, 03104 LocalOrdinal, 03105 GlobalOrdinal, 03106 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03107 reindexColumns (crs_graph_type* const graph, 03108 const Teuchos::RCP<const map_type>& newColMap, 03109 const Teuchos::RCP<const import_type>& newImport, 03110 const bool sortEachRow) 03111 { 03112 const char tfecfFuncName[] = "reindexColumns: "; 03113 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03114 graph == NULL && myGraph_.is_null (), std::invalid_argument, 03115 "The input graph is NULL, but the matrix does not own its graph."); 03116 03117 crs_graph_type& theGraph = (graph == NULL) ? *myGraph_ : *graph; 03118 const bool sortGraph = false; // we'll sort graph & matrix together below 03119 theGraph.reindexColumns (newColMap, newImport, sortGraph); 03120 if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) { 03121 // We can't just call sortEntries() here, because that fails if 03122 // the matrix has a const graph. We want to use the given graph 03123 // in that case. 03124 const size_t lclNumRows = theGraph.getNodeNumRows (); 03125 for (size_t row = 0; row < lclNumRows; ++row) { 03126 RowInfo rowInfo = theGraph.getRowInfo (row); 03127 theGraph.template sortRowIndicesAndValues<Scalar> (rowInfo, this->getViewNonConst (rowInfo)); 03128 } 03129 theGraph.indicesAreSorted_ = true; 03130 } 03131 } 03132 03133 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03134 void 03135 CrsMatrix< 03136 Scalar, 03137 LocalOrdinal, 03138 GlobalOrdinal, 03139 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03140 replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap, 03141 Teuchos::RCP<const import_type>& newImporter) 03142 { 03143 const char tfecfFuncName[] = "replaceDomainMapAndImporter: "; 03144 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03145 myGraph_.is_null (), std::runtime_error, 03146 "This method does not work if the matrix has a const graph. The whole " 03147 "idea of a const graph is that you are not allowed to change it, but this" 03148 " method necessarily must modify the graph, since the graph owns the " 03149 "matrix's domain Map and Import objects."); 03150 myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter); 03151 } 03152 03153 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03154 void 03155 CrsMatrix<Scalar, 03156 LocalOrdinal, 03157 GlobalOrdinal, 03158 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03159 insertNonownedGlobalValues (const GlobalOrdinal globalRow, 03160 const Teuchos::ArrayView<const GlobalOrdinal>& indices, 03161 const Teuchos::ArrayView<const Scalar>& values) 03162 { 03163 using Teuchos::Array; 03164 typedef GlobalOrdinal GO; 03165 typedef typename Array<GO>::size_type size_type; 03166 03167 const size_type numToInsert = indices.size (); 03168 // Add the new data to the list of nonlocals. 03169 // This creates the arrays if they don't exist yet. 03170 std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow]; 03171 Array<GO>& curRowInds = curRow.first; 03172 Array<Scalar>& curRowVals = curRow.second; 03173 const size_type newCapacity = curRowInds.size () + numToInsert; 03174 curRowInds.reserve (newCapacity); 03175 curRowVals.reserve (newCapacity); 03176 for (size_type k = 0; k < numToInsert; ++k) { 03177 curRowInds.push_back (indices[k]); 03178 curRowVals.push_back (values[k]); 03179 } 03180 } 03181 03182 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03183 void 03184 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, 03185 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03186 globalAssemble () 03187 { 03188 using Teuchos::arcp; 03189 using Teuchos::Array; 03190 using Teuchos::ArrayRCP; 03191 using Teuchos::ArrayView; 03192 using Teuchos::CommRequest; 03193 using Teuchos::gatherAll; 03194 using Teuchos::isend; 03195 using Teuchos::ireceive; 03196 using Teuchos::null; 03197 using Teuchos::outArg; 03198 using Teuchos::RCP; 03199 using Teuchos::rcpFromRef; 03200 using Teuchos::REDUCE_MAX; 03201 using Teuchos::reduceAll; 03202 using Teuchos::SerialDenseMatrix; 03203 using Teuchos::tuple; 03204 using Teuchos::waitAll; 03205 using std::make_pair; 03206 using std::pair; 03207 typedef GlobalOrdinal GO; 03208 typedef typename Array<GO>::size_type size_type; 03209 // nonlocals_ contains the entries stored by previous calls to 03210 // insertGlobalValues() for nonowned rows. 03211 typedef std::map<GO, pair<Array<GO>, Array<Scalar> > > nonlocals_map_type; 03212 typedef typename nonlocals_map_type::const_iterator nonlocals_iter_type; 03213 03214 const char tfecfFuncName[] = "globalAssemble"; 03215 const Teuchos::Comm<int>& comm = * (getComm ()); 03216 const int numImages = comm.getSize (); 03217 const int myImageID = comm.getRank (); 03218 03219 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03220 ! isFillActive (), std::runtime_error, ": requires that fill is active."); 03221 03222 // Determine (via a global all-reduce) if any processes have 03223 // nonlocal entries to share. This is necessary even if the 03224 // matrix has a static graph, because insertGlobalValues allows 03225 // nonlocal entries in that case. 03226 size_t MyNonlocals = nonlocals_.size(), 03227 MaxGlobalNonlocals; 03228 reduceAll<int, size_t> (comm, REDUCE_MAX, MyNonlocals, 03229 outArg (MaxGlobalNonlocals)); 03230 if (MaxGlobalNonlocals == 0) { 03231 return; // no entries to share 03232 } 03233 03234 // FIXME (mfh 14 Dec 2012) The code below reimplements an Export 03235 // operation. It would be better just to use an Export. See 03236 // Comment #34 in discussion of Bug 5782. 03237 // 03238 // mfh 24 Feb 2014: On the other hand, this is not technically an 03239 // Export, since the row Map might not necessarily be one-to-one. 03240 03241 // compute a list of NLRs from nonlocals_ and use it to compute: 03242 // IdsAndRows: a vector of (id,row) pairs 03243 // NLR2Id: a map from NLR to the Id that owns it 03244 // globalNeighbors: a global graph of connectivity between images: 03245 // globalNeighbors(i,j) indicates that j sends to i 03246 // sendIDs: a list of all images I send to 03247 // recvIDs: a list of all images I receive from (constructed later) 03248 Array<pair<int,GlobalOrdinal> > IdsAndRows; 03249 std::map<GlobalOrdinal,int> NLR2Id; 03250 SerialDenseMatrix<int,char> globalNeighbors; 03251 Array<int> sendIDs, recvIDs; 03252 { 03253 // Construct the set of all nonowned rows encountered by this 03254 // process in insertGlobalValues() or sumIntoGlobalValues(). 03255 std::set<GlobalOrdinal> setOfRows; 03256 for (nonlocals_iter_type iter = nonlocals_.begin (); 03257 iter != nonlocals_.end (); ++iter) { 03258 setOfRows.insert (iter->first); 03259 } 03260 // Copy the resulting set of nonowned rows into an Array. 03261 Array<GlobalOrdinal> NLRs (setOfRows.size ()); 03262 std::copy (setOfRows.begin (), setOfRows.end (), NLRs.begin ()); 03263 03264 // get a list of ImageIDs for the non-local rows (NLRs) 03265 Array<int> NLRIds (NLRs.size ()); 03266 { 03267 const LookupStatus stat = 03268 getRowMap ()->getRemoteIndexList (NLRs (), NLRIds ()); 03269 const int lclerr = (stat == IDNotPresent ? 1 : 0); 03270 int gblerr; 03271 reduceAll<int, int> (comm, REDUCE_MAX, lclerr, outArg (gblerr)); 03272 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03273 gblerr, std::runtime_error, ": non-local entries correspond to " 03274 "invalid rows."); 03275 } 03276 03277 // build up a list of neighbors, as well as a map between NLRs and Ids 03278 // localNeighbors[i] != 0 iff I have data to send to image i 03279 // put NLRs,Ids into an array of pairs 03280 IdsAndRows.reserve (NLRs.size ()); 03281 Array<char> localNeighbors (numImages, 0); 03282 typename Array<GO>::const_iterator nlr; 03283 typename Array<int>::const_iterator id; 03284 for (nlr = NLRs.begin (), id = NLRIds.begin (); 03285 nlr != NLRs.end (); ++nlr, ++id) { 03286 NLR2Id[*nlr] = *id; 03287 localNeighbors[*id] = 1; 03288 IdsAndRows.push_back (make_pair (*id, *nlr)); 03289 } 03290 for (int j = 0; j < numImages; ++j) { 03291 if (localNeighbors[j]) { 03292 sendIDs.push_back (j); 03293 } 03294 } 03295 // sort IdsAndRows, by Ids first, then rows 03296 std::sort (IdsAndRows.begin (), IdsAndRows.end ()); 03297 // gather from other nodes to form the full graph 03298 // 03299 // FIXME (mfh 24 Feb 2014) Ugh, this is awful!!! It's making a 03300 // P x P matrix which is the full graph of process connectivity. 03301 // Neither Export nor Import does this! It would probably be 03302 // more efficient to do the following: 03303 // 03304 // 1. Form the one-to-one version of the row Map, tgtMap 03305 // 2. Form the (possibly overlapping) Map srcMap, with the 03306 // global row indices which are the keys of nonlocals_ on 03307 // each process 03308 // 3. Construct an Export from srcMap to tgtMap 03309 // 4. Execute the Export with Tpetra::ADD 03310 globalNeighbors.shapeUninitialized (numImages, numImages); 03311 gatherAll (comm, numImages, localNeighbors.getRawPtr (), 03312 numImages*numImages, globalNeighbors.values ()); 03313 // globalNeighbors at this point contains (on all images) the 03314 // connectivity between the images. 03315 // globalNeighbors(i,j) != 0 means that j sends to i/that i receives from j 03316 } 03317 03319 // FIGURE OUT WHO IS SENDING TO WHOM AND HOW MUCH 03320 // DO THIS IN THE PROCESS OF PACKING ALL OUTGOING DATA ACCORDING TO DESTINATION ID 03322 03323 // loop over all columns to know from which images I can expect to receive something 03324 for (int j=0; j<numImages; ++j) { 03325 if (globalNeighbors (myImageID, j)) { 03326 recvIDs.push_back (j); 03327 } 03328 } 03329 const size_t numRecvs = recvIDs.size (); 03330 03331 // we know how many we're sending to already 03332 // form a contiguous list of all data to be sent 03333 // track the number of entries for each ID 03334 Array<Details::CrsIJV<GlobalOrdinal,Scalar> > IJVSendBuffer; 03335 Array<size_t> sendSizes(sendIDs.size(), 0); 03336 size_t numSends = 0; 03337 for (typename Array<pair<int,GlobalOrdinal> >::const_iterator IdAndRow = IdsAndRows.begin(); 03338 IdAndRow != IdsAndRows.end(); ++IdAndRow) 03339 { 03340 const int id = IdAndRow->first; 03341 const GO row = IdAndRow->second; 03342 03343 // have we advanced to a new send? 03344 if (sendIDs[numSends] != id) { 03345 numSends++; 03346 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03347 sendIDs[numSends] != id, std::logic_error, 03348 ": internal logic error. Contact Tpetra team."); 03349 } 03350 03351 // copy data for row into contiguous storage 03352 pair<Array<GO>, Array<Scalar> >& nonlocalsRow = nonlocals_[row]; 03353 ArrayView<const GO> nonlocalsRow_colInds = nonlocalsRow.first (); 03354 ArrayView<const Scalar> nonlocalsRow_values = nonlocalsRow.second (); 03355 const size_type numNonlocalsRow = nonlocalsRow_colInds.size (); 03356 03357 for (size_type k = 0; k < numNonlocalsRow; ++k) { 03358 const Scalar val = nonlocalsRow_values[k]; 03359 const GO col = nonlocalsRow_colInds[k]; 03360 IJVSendBuffer.push_back (Details::CrsIJV<GO, Scalar> (row, col, val)); 03361 sendSizes[numSends]++; 03362 } 03363 } 03364 if (IdsAndRows.size () > 0) { 03365 numSends++; // one last increment, to make it a count instead of an index 03366 } 03367 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03368 static_cast<size_type> (numSends) != sendIDs.size(), 03369 std::logic_error, ": internal logic error. Contact Tpetra team."); 03370 03371 // don't need this data anymore 03372 // clear it before we start allocating a bunch of new memory 03373 nonlocals_.clear (); 03374 03376 // TRANSMIT SIZE INFO BETWEEN SENDERS AND RECEIVERS 03378 // perform non-blocking sends: send sizes to our recipients 03379 Array<RCP<CommRequest<int> > > sendRequests; 03380 for (size_t s = 0; s < numSends ; ++s) { 03381 // we'll fake the memory management, because all communication will be local to this method and the scope of our data 03382 sendRequests.push_back (isend<int, size_t> (comm, rcpFromRef (sendSizes[s]), sendIDs[s])); 03383 } 03384 // perform non-blocking receives: receive sizes from our senders 03385 Array<RCP<CommRequest<int> > > recvRequests; 03386 Array<size_t> recvSizes (numRecvs); 03387 for (size_t r = 0; r < numRecvs; ++r) { 03388 // we'll fake the memory management, because all communication 03389 // will be local to this method and the scope of our data 03390 recvRequests.push_back (ireceive<int, size_t> (comm, rcpFromRef (recvSizes[r]), recvIDs[r])); 03391 } 03392 // wait on all 03393 if (! sendRequests.empty ()) { 03394 waitAll (comm, sendRequests ()); 03395 } 03396 if (! recvRequests.empty ()) { 03397 waitAll (comm, recvRequests ()); 03398 } 03399 comm.barrier (); 03400 sendRequests.clear (); 03401 recvRequests.clear (); 03402 03404 // NOW SEND/RECEIVE ALL ROW DATA 03406 // from the size info, build the ArrayViews into IJVSendBuffer 03407 Array<ArrayView<Details::CrsIJV<GO, Scalar> > > sendBuffers (numSends, null); 03408 { 03409 size_t cur = 0; 03410 for (size_t s=0; s<numSends; ++s) { 03411 sendBuffers[s] = IJVSendBuffer (cur, sendSizes[s]); 03412 cur += sendSizes[s]; 03413 } 03414 } 03415 // perform non-blocking sends 03416 for (size_t s = 0; s < numSends; ++s) { 03417 // we'll fake the memory management, because all communication 03418 // will be local to this method and the scope of our data 03419 ArrayRCP<Details::CrsIJV<GO, Scalar> > tmparcp = 03420 arcp (sendBuffers[s].getRawPtr (), 0, sendBuffers[s].size (), false); 03421 sendRequests.push_back (isend<int, Details::CrsIJV<GlobalOrdinal,Scalar> > (comm, tmparcp, sendIDs[s])); 03422 } 03423 // calculate amount of storage needed for receives 03424 // setup pointers for the receives as well 03425 size_t totalRecvSize = std::accumulate (recvSizes.begin (), recvSizes.end (), 0); 03426 Array<Details::CrsIJV<GO, Scalar> > IJVRecvBuffer (totalRecvSize); 03427 // from the size info, build the ArrayViews into IJVRecvBuffer 03428 Array<ArrayView<Details::CrsIJV<GO, Scalar> > > recvBuffers (numRecvs, null); 03429 { 03430 size_t cur = 0; 03431 for (size_t r = 0; r < numRecvs; ++r) { 03432 recvBuffers[r] = IJVRecvBuffer (cur, recvSizes[r]); 03433 cur += recvSizes[r]; 03434 } 03435 } 03436 // perform non-blocking recvs 03437 for (size_t r = 0; r < numRecvs ; ++r) { 03438 // we'll fake the memory management, because all communication 03439 // will be local to this method and the scope of our data 03440 ArrayRCP<Details::CrsIJV<GO, Scalar> > tmparcp = 03441 arcp (recvBuffers[r].getRawPtr (), 0, recvBuffers[r].size (), false); 03442 recvRequests.push_back (ireceive (comm, tmparcp, recvIDs[r])); 03443 } 03444 // perform waits 03445 if (! sendRequests.empty ()) { 03446 waitAll (comm, sendRequests ()); 03447 } 03448 if (! recvRequests.empty ()) { 03449 waitAll (comm, recvRequests ()); 03450 } 03451 comm.barrier (); 03452 sendRequests.clear (); 03453 recvRequests.clear (); 03454 03456 // NOW PROCESS THE RECEIVED ROW DATA 03458 // TODO: instead of adding one entry at a time, add one row at a time. 03459 // this requires resorting; they arrived sorted by sending node, so that entries could be non-contiguous if we received 03460 // multiple entries for a particular row from different processors. 03461 // it also requires restoring the data, which may make it not worth the trouble. 03462 03463 typedef typename Array<Details::CrsIJV<GO, Scalar> >::const_iterator ijv_iter_type; 03464 if (this->isStaticGraph ()) { 03465 for (ijv_iter_type ijv = IJVRecvBuffer.begin (); 03466 ijv != IJVRecvBuffer.end (); ++ijv) { 03467 sumIntoGlobalValues (ijv->i, tuple (ijv->j), tuple (ijv->v)); 03468 } 03469 } 03470 else { // Dynamic graph; can use insertGlobalValues () 03471 for (ijv_iter_type ijv = IJVRecvBuffer.begin (); 03472 ijv != IJVRecvBuffer.end (); ++ijv) { 03473 try { 03474 insertGlobalValues (ijv->i, tuple (ijv->j), tuple (ijv->v)); 03475 } 03476 catch (std::runtime_error &e) { 03477 std::ostringstream outmsg; 03478 outmsg << e.what() << std::endl 03479 << "caught in globalAssemble() in " << __FILE__ << ":" << __LINE__ 03480 << std::endl ; 03481 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, outmsg.str()); 03482 } 03483 } 03484 } 03485 03486 // WHEW! THAT WAS TIRING! 03487 } 03488 03489 03492 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03493 void 03494 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03495 resumeFill (const RCP<ParameterList> ¶ms) 03496 { 03497 if (! isStaticGraph ()) { // Don't resume fill of a nonowned graph. 03498 myGraph_->resumeFill (params); 03499 } 03500 clearGlobalConstants (); 03501 fillComplete_ = false; 03502 } 03503 03504 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03505 void 03506 CrsMatrix< 03507 Scalar, LocalOrdinal, GlobalOrdinal, 03508 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03509 computeGlobalConstants () 03510 { 03511 // This method doesn't do anything. The analogous method in 03512 // CrsGraph does actually compute something. 03513 // 03514 // Oddly enough, clearGlobalConstants() clears frobNorm_ (by 03515 // setting it to -1), but computeGlobalConstants() does _not_ 03516 // compute the Frobenius norm; this is done on demand in 03517 // getFrobeniusNorm(), and the result is cached there. 03518 } 03519 03520 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, 03521 class DeviceType> 03522 void 03523 CrsMatrix< 03524 Scalar, LocalOrdinal, GlobalOrdinal, 03525 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03526 clearGlobalConstants () { 03527 // We use -1 to indicate that the Frobenius norm needs to be 03528 // recomputed, since the values might change between now and the 03529 // next fillComplete call. 03530 // 03531 // Oddly enough, clearGlobalConstants() clears frobNorm_, but 03532 // computeGlobalConstants() does _not_ compute the Frobenius norm; 03533 // this is done on demand in getFrobeniusNorm(), and the result is 03534 // cached there. 03535 frobNorm_ = -STM::one (); 03536 } 03537 03538 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, 03539 class DeviceType> 03540 void 03541 CrsMatrix< 03542 Scalar, LocalOrdinal, GlobalOrdinal, 03543 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03544 fillComplete (const RCP<ParameterList>& params) 03545 { 03546 TEUCHOS_TEST_FOR_EXCEPTION( 03547 getCrsGraph ().is_null (), std::logic_error, "Tpetra::CrsMatrix::" 03548 "fillComplete(params): getCrsGraph() returns null. " 03549 "This should not happen at this point. " 03550 "Please report this bug to the Tpetra developers."); 03551 03552 if (isStaticGraph () && getCrsGraph ()->isFillComplete ()) { 03553 fillComplete (getCrsGraph ()->getDomainMap (), getCrsGraph ()->getRangeMap (), params); 03554 } else { 03555 fillComplete (getRowMap (), getRowMap (), params); 03556 } 03557 } 03558 03559 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, 03560 class DeviceType> 03561 void 03562 CrsMatrix< 03563 Scalar, LocalOrdinal, GlobalOrdinal, 03564 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03565 fillComplete (const RCP<const map_type>& domainMap, 03566 const RCP<const map_type>& rangeMap, 03567 const RCP<ParameterList>& params) 03568 { 03569 using Teuchos::ArrayRCP; 03570 using Teuchos::RCP; 03571 using Teuchos::rcp; 03572 const char tfecfFuncName[] = "fillComplete"; 03573 03574 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03575 ! isFillActive () || isFillComplete (), 03576 std::runtime_error, ": Matrix fill state must be active (isFillActive() " 03577 "must be true) before you may call fillComplete()."); 03578 const int numProcs = getComm ()->getSize (); 03579 03580 // 03581 // Read parameters from the input ParameterList. 03582 // 03583 03584 // If true, the caller promises that no process did nonlocal 03585 // changes since the last call to fillComplete. 03586 bool assertNoNonlocalInserts = false; 03587 // If true, makeColMap sorts remote GIDs (within each remote 03588 // process' group). 03589 bool sortGhosts = true; 03590 03591 if (! params.is_null ()) { 03592 assertNoNonlocalInserts = params->get ("No Nonlocal Changes", 03593 assertNoNonlocalInserts); 03594 if (params->isParameter ("sort column map ghost gids")) { 03595 sortGhosts = params->get ("sort column map ghost gids", sortGhosts); 03596 } 03597 else if (params->isParameter ("Sort column Map ghost GIDs")) { 03598 sortGhosts = params->get ("Sort column Map ghost GIDs", sortGhosts); 03599 } 03600 } 03601 // We also don't need to do global assembly if there is only one 03602 // process in the communicator. 03603 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1; 03604 // This parameter only matters if this matrix owns its graph. 03605 if (! myGraph_.is_null ()) { 03606 myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts; 03607 } 03608 03609 if (! getCrsGraph()->indicesAreAllocated()) { 03610 if (hasColMap ()) { 03611 // We have a column Map, so use local indices. 03612 allocateValues (LocalIndices, GraphNotYetAllocated); 03613 } else { 03614 // We don't have a column Map, so use global indices. 03615 allocateValues (GlobalIndices, GraphNotYetAllocated); 03616 } 03617 } 03618 // Global assemble, if we need to. This call only costs a single 03619 // all-reduce if we didn't need global assembly after all. 03620 if (needGlobalAssemble) { 03621 globalAssemble (); 03622 } 03623 else { 03624 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03625 numProcs == 1 && nonlocals_.size() > 0, 03626 std::runtime_error, ": cannot have nonlocal entries on a serial run. " 03627 "An invalid entry (i.e., with row index not in the row Map) must have " 03628 "been submitted to the CrsMatrix."); 03629 } 03630 03631 if (isStaticGraph ()) { 03632 // FIXME (mfh 18 Jun 2014) This check for correctness of the 03633 // input Maps incurs a penalty of two all-reduces for the 03634 // otherwise optimal const graph case. 03635 // 03636 // We could turn these (max) 2 all-reduces into (max) 1, by 03637 // fusing them. We could do this by adding a "locallySameAs" 03638 // method to Map, which would return one of four states: 03639 // 03640 // a. Certainly globally the same 03641 // b. Certainly globally not the same 03642 // c. Locally the same 03643 // d. Locally not the same 03644 // 03645 // The first two states don't require further communication. 03646 // The latter two states require an all-reduce to communicate 03647 // globally, but we only need one all-reduce, since we only need 03648 // to check whether at least one of the Maps is wrong. 03649 const bool domainMapsMatch = staticGraph_->getDomainMap ()->isSameAs (*domainMap); 03650 const bool rangeMapsMatch = staticGraph_->getRangeMap ()->isSameAs (*rangeMap); 03651 03652 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03653 ! domainMapsMatch, std::runtime_error, 03654 ": The CrsMatrix's domain Map does not match the graph's domain Map. " 03655 "The graph cannot be changed because it was given to the CrsMatrix " 03656 "constructor as const. You can fix this by passing in the graph's " 03657 "domain Map and range Map to the matrix's fillComplete call."); 03658 03659 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03660 ! rangeMapsMatch, std::runtime_error, 03661 ": The CrsMatrix's range Map does not match the graph's range Map. " 03662 "The graph cannot be changed because it was given to the CrsMatrix " 03663 "constructor as const. You can fix this by passing in the graph's " 03664 "domain Map and range Map to the matrix's fillComplete call."); 03665 } 03666 else { 03667 // Set the graph's domain and range Maps. This will clear the 03668 // Import if the domain Map has changed (is a different 03669 // pointer), and the Export if the range Map has changed (is a 03670 // different pointer). 03671 myGraph_->setDomainRangeMaps (domainMap, rangeMap); 03672 03673 // Make the graph's column Map, if necessary. 03674 if (! myGraph_->hasColMap ()) { 03675 myGraph_->makeColMap (); 03676 } 03677 03678 // Make indices local, if necessary. The method won't do 03679 // anything if the graph is already locally indexed. 03680 myGraph_->makeIndicesLocal (); 03681 03682 if (! myGraph_->isSorted ()) { 03683 sortEntries (); 03684 } 03685 if (! myGraph_->isMerged ()) { 03686 mergeRedundantEntries (); 03687 } 03688 // Make the Import and Export, if they haven't been made already. 03689 myGraph_->makeImportExport (); 03690 myGraph_->computeGlobalConstants (); 03691 myGraph_->fillComplete_ = true; 03692 myGraph_->checkInternalState (); 03693 } 03694 computeGlobalConstants (); 03695 // fill local objects; will fill and finalize local graph if appropriate 03696 if (myGraph_.is_null ()) { 03697 // The matrix does _not_ own the graph, and the graph's 03698 // structure is already fixed, so just fill the local matrix. 03699 fillLocalMatrix (params); 03700 } else { 03701 // The matrix _does_ own the graph, so fill the local graph at 03702 // the same time as the local matrix. 03703 fillLocalGraphAndMatrix (params); 03704 } 03705 03706 // Once we've initialized the sparse kernels, we're done with the 03707 // local objects. We may now release them and their memory, since 03708 // they will persist in the local sparse ops if necessary. We 03709 // keep the local graph if the parameters tell us to do so. 03710 03711 // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used. 03712 03713 fillComplete_ = true; // Now we're fill complete! 03714 checkInternalState (); 03715 } 03716 03717 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, 03718 class DeviceType> 03719 void 03720 CrsMatrix< 03721 Scalar, LocalOrdinal, GlobalOrdinal, 03722 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03723 expertStaticFillComplete (const Teuchos::RCP<const map_type> & domainMap, 03724 const Teuchos::RCP<const map_type> & rangeMap, 03725 const Teuchos::RCP<const import_type>& importer, 03726 const Teuchos::RCP<const export_type>& exporter, 03727 const Teuchos::RCP<Teuchos::ParameterList> ¶ms) 03728 { 03729 const char tfecfFuncName[] = "expertStaticFillComplete"; 03730 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(), 03731 std::runtime_error, ": Matrix fill state must be active (isFillActive() " 03732 "must be true) before calling fillComplete()."); 03733 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_==Teuchos::null, std::logic_error,": myGraph_ is null. This is not allowed."); 03734 03735 // We will presume globalAssemble is not needed, so we do the ESFC on the graph 03736 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter); 03737 03738 computeGlobalConstants(); 03739 03740 // Fill the local graph and matrix 03741 fillLocalGraphAndMatrix (params); 03742 03743 // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used. 03744 03745 // Now we're fill complete! 03746 fillComplete_ = true; 03747 03748 // Sanity checks at the end. 03749 #ifdef HAVE_TPETRA_DEBUG 03750 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error, 03751 ": We're at the end of fillComplete(), but isFillActive() is true. " 03752 "Please report this bug to the Tpetra developers."); 03753 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error, 03754 ": We're at the end of fillComplete(), but isFillActive() is true. " 03755 "Please report this bug to the Tpetra developers."); 03756 #endif // HAVE_TPETRA_DEBUG 03757 checkInternalState(); 03758 03759 } 03760 03763 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03764 void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::sortEntries() 03765 { 03766 TEUCHOS_TEST_FOR_EXCEPTION(isStaticGraph() == true, std::runtime_error, 03767 typeName(*this) << "::sortEntries(): cannot sort with static graph."); 03768 if (myGraph_->isSorted() == false) { 03769 for (size_t row=0; row < getNodeNumRows(); ++row) { 03770 RowInfo rowInfo = myGraph_->getRowInfo(row); 03771 myGraph_->template sortRowIndicesAndValues<Scalar>(rowInfo,this->getViewNonConst(rowInfo)); 03772 } 03773 // we just sorted every row 03774 myGraph_->indicesAreSorted_ = true; 03775 } 03776 } 03777 03778 03781 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03782 void 03783 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03784 mergeRedundantEntries () 03785 { 03786 TEUCHOS_TEST_FOR_EXCEPTION(isStaticGraph() == true, std::runtime_error, 03787 typeName(*this) << "::mergeRedundantEntries: Cannot merge with static graph."); 03788 if (! myGraph_->isMerged ()) { 03789 const size_t nodeNumRows = getNodeNumRows (); 03790 for (size_t row = 0; row < nodeNumRows; ++row) { 03791 RowInfo rowInfo = myGraph_->getRowInfo (row); 03792 Teuchos::ArrayView<Scalar> rowView = (this->getViewNonConst (rowInfo)) (); 03793 myGraph_->template mergeRowIndicesAndValues<Scalar> (rowInfo, rowView); 03794 } 03795 myGraph_->noRedundancies_ = true; // we just merged every row 03796 } 03797 } 03798 03801 template <class Scalar, 03802 class LocalOrdinal, 03803 class GlobalOrdinal, class DeviceType> 03804 void 03805 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03806 applyNonTranspose (const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal,node_type> & X_in, 03807 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal,node_type> & Y_in, 03808 Scalar alpha, 03809 Scalar beta) const 03810 { 03811 using Teuchos::null; 03812 using Teuchos::RCP; 03813 using Teuchos::rcp; 03814 using Teuchos::rcp_const_cast; 03815 using Teuchos::rcpFromRef; 03816 03817 // mfh 05 Jun 2014: Special case for alpha == 0. I added this to 03818 // fix an Ifpack2 test (RILUKSingleProcessUnitTests), which was 03819 // failing only for the Kokkos refactor version of Tpetra. It's a 03820 // good idea regardless to have the bypass. 03821 if (alpha == STS::zero ()) { 03822 if (beta == STS::zero ()) { 03823 Y_in.putScalar (STS::zero ()); 03824 } else if (beta != STS::one ()) { 03825 Y_in.scale (beta); 03826 } 03827 return; 03828 } 03829 03830 // It's possible that X is a view of Y or vice versa. We don't 03831 // allow this (apply() requires that X and Y not alias one 03832 // another), but it's helpful to detect and work around this case. 03833 // We don't try to to detect the more subtle cases (e.g., one is a 03834 // subview of the other, but their initial pointers differ). We 03835 // only need to do this if this matrix's Import is trivial; 03836 // otherwise, we don't actually apply the operator from X into Y. 03837 03838 RCP<const import_type> importer = this->getGraph ()->getImporter (); 03839 RCP<const export_type> exporter = this->getGraph ()->getExporter (); 03840 03841 // If beta == 0, then the output MV will be overwritten; none of 03842 // its entries should be read. (Sparse BLAS semantics say that we 03843 // must ignore any Inf or NaN entries in Y_in, if beta is zero.) 03844 // This matters if we need to do an Export operation; see below. 03845 const bool Y_is_overwritten = (beta == STS::zero()); 03846 03847 // We treat the case of a replicated MV output specially. 03848 const bool Y_is_replicated = ! Y_in.isDistributed (); 03849 03850 // This is part of the special case for replicated MV output. 03851 // We'll let each process do its thing, but do an all-reduce at 03852 // the end to sum up the results. Setting beta=0 on all processes 03853 // but Proc 0 makes the math work out for the all-reduce. (This 03854 // assumes that the replicated data is correctly replicated, so 03855 // that the data are the same on all processes.) 03856 if (Y_is_replicated && this->getComm ()->getRank () > 0) { 03857 beta = STS::zero (); 03858 } 03859 03860 // Temporary MV for Import operation. After the block of code 03861 // below, this will be an (Imported if necessary) column Map MV 03862 // ready to give to localMultiply(). 03863 RCP<const MV> X_colMap; 03864 if (importer.is_null ()) { 03865 if (! X_in.isConstantStride ()) { 03866 // Not all sparse mat-vec kernels can handle an input MV with 03867 // nonconstant stride correctly, so we have to copy it in that 03868 // case into a constant stride MV. To make a constant stride 03869 // copy of X_in, we force creation of the column (== domain) 03870 // Map MV (if it hasn't already been created, else fetch the 03871 // cached copy). This avoids creating a new MV each time. 03872 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true); 03873 Tpetra::deep_copy (*X_colMapNonConst, X_in); 03874 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst); 03875 } 03876 else { 03877 // The domain and column Maps are the same, so do the local 03878 // multiply using the domain Map input MV X_in. 03879 X_colMap = rcpFromRef (X_in); 03880 } 03881 } 03882 else { 03883 // We're doing an Import anyway, which will copy the relevant 03884 // elements of the domain Map MV X_in into a separate column Map 03885 // MV. Thus, we don't have to worry whether X_in is constant 03886 // stride. 03887 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in); 03888 03889 // Import from the domain Map MV to the column Map MV. 03890 X_colMapNonConst->doImport (X_in, *importer, INSERT); 03891 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst); 03892 } 03893 03894 // Temporary MV for Export operation, or for copying a nonconstant 03895 // stride output MV into a constant stride MV. 03896 RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in); 03897 03898 // If we have a nontrivial Export object, we must perform an 03899 // Export. In that case, the local multiply result will go into 03900 // the row Map multivector. We don't have to make a 03901 // constant-stride version of Y_in in this case, because we had to 03902 // make a constant stride Y_rowMap MV and do an Export anyway. 03903 if (! exporter.is_null ()) { 03904 this->template localMultiply<Scalar, Scalar> (*X_colMap, *Y_rowMap, 03905 Teuchos::NO_TRANS, 03906 alpha, STS::zero ()); 03907 // If we're overwriting the output MV Y_in completely (beta == 03908 // 0), then make sure that it is filled with zeros before we do 03909 // the Export. Otherwise, the ADD combine mode will use data in 03910 // Y_in, which is supposed to be zero. 03911 if (Y_is_overwritten) { 03912 Y_in.putScalar (STS::zero ()); 03913 } 03914 else { 03915 // Scale the output MV by beta, so that the Export sums in the 03916 // mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in. 03917 Y_in.scale (beta); 03918 } 03919 // Do the Export operation. 03920 Y_in.doExport (*Y_rowMap, *exporter, ADD); 03921 } 03922 else { // Don't do an Export: row Map and range Map are the same. 03923 // 03924 // If Y_in does not have constant stride, or if the column Map 03925 // MV aliases Y_in, then we can't let the kernel write directly 03926 // to Y_in. Instead, we have to use the cached row (== range) 03927 // Map MV as temporary storage. 03928 // 03929 // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if 03930 // the user passed in the same MultiVector for both X and Y. It 03931 // won't detect whether one MultiVector views the other. We 03932 // should also check the MultiVectors' raw data pointers. 03933 if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) { 03934 // Force creating the MV if it hasn't been created already. 03935 // This will reuse a previously created cached MV. 03936 Y_rowMap = getRowMapMultiVector (Y_in, true); 03937 03938 // If beta == 0, we don't need to copy Y_in into Y_rowMap, 03939 // since we're overwriting it anyway. 03940 if (beta != STS::zero ()) { 03941 Tpetra::deep_copy (*Y_rowMap, Y_in); 03942 } 03943 this->template localMultiply<Scalar, Scalar> (*X_colMap, 03944 *Y_rowMap, 03945 Teuchos::NO_TRANS, 03946 alpha, beta); 03947 Tpetra::deep_copy (Y_in, *Y_rowMap); 03948 } 03949 else { 03950 this->template localMultiply<Scalar, Scalar> (*X_colMap, Y_in, 03951 Teuchos::NO_TRANS, 03952 alpha, beta); 03953 } 03954 } 03955 03956 // If the range Map is a locally replicated Map, sum up 03957 // contributions from each process. We set beta = 0 on all 03958 // processes but Proc 0 initially, so this will handle the scaling 03959 // factor beta correctly. 03960 if (Y_is_replicated) { 03961 Y_in.reduce (); 03962 } 03963 } 03964 03965 template <class Scalar, 03966 class LocalOrdinal, 03967 class GlobalOrdinal, class DeviceType> 03968 void 03969 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03970 applyTranspose (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& X_in, 03971 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& Y_in, 03972 const Teuchos::ETransp mode, 03973 Scalar alpha, 03974 Scalar beta) const 03975 { 03976 using Teuchos::null; 03977 using Teuchos::RCP; 03978 using Teuchos::rcp; 03979 using Teuchos::rcp_const_cast; 03980 using Teuchos::rcpFromRef; 03981 03982 // Take shortcuts for alpha == 0. 03983 if (alpha == STS::zero ()) { 03984 // Follow the Sparse BLAS convention by ignoring both the matrix 03985 // and X_in, in this case. 03986 if (beta == STS::zero ()) { 03987 // Follow the Sparse BLAS convention by overwriting any Inf or 03988 // NaN values in Y_in, in this case. 03989 Y_in.putScalar (STS::zero ()); 03990 } 03991 else { 03992 Y_in.scale (beta); 03993 } 03994 return; 03995 } 03996 03997 const size_t numVectors = X_in.getNumVectors (); 03998 03999 // We don't allow X_in and Y_in to alias one another. It's hard 04000 // to check this, because advanced users could create views from 04001 // raw pointers. However, if X_in and Y_in reference the same 04002 // object, we will do the user a favor by copying X into new 04003 // storage (with a warning). We only need to do this if we have 04004 // trivial importers; otherwise, we don't actually apply the 04005 // operator from X into Y. 04006 RCP<const import_type> importer = this->getGraph ()->getImporter (); 04007 RCP<const export_type> exporter = this->getGraph ()->getExporter (); 04008 // access X indirectly, in case we need to create temporary storage 04009 RCP<const MV> X; 04010 04011 // some parameters for below 04012 const bool Y_is_replicated = ! Y_in.isDistributed (); 04013 const bool Y_is_overwritten = (beta == STS::zero ()); 04014 if (Y_is_replicated && this->getComm ()->getRank () > 0) { 04015 beta = STS::zero (); 04016 } 04017 04018 // The kernels do not allow input or output with nonconstant stride. 04019 if (! X_in.isConstantStride () && importer.is_null ()) { 04020 X = rcp (new MV (X_in)); // Constant-stride copy of X_in 04021 } else { 04022 X = rcpFromRef (X_in); // Reference to X_in 04023 } 04024 04025 // Set up temporary multivectors for Import and/or Export. 04026 if (importer != null) { 04027 if (importMV_ != null && importMV_->getNumVectors() != numVectors) { 04028 importMV_ = null; 04029 } 04030 if (importMV_ == null) { 04031 importMV_ = rcp (new MV (this->getColMap (), numVectors)); 04032 } 04033 } 04034 if (exporter != null) { 04035 if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) { 04036 exportMV_ = null; 04037 } 04038 if (exportMV_ == null) { 04039 exportMV_ = rcp (new MV (this->getRowMap (), numVectors)); 04040 } 04041 } 04042 04043 // If we have a non-trivial exporter, we must import elements that 04044 // are permuted or are on other processors. 04045 if (! exporter.is_null ()) { 04046 exportMV_->doImport (X_in, *exporter, INSERT); 04047 X = exportMV_; // multiply out of exportMV_ 04048 } 04049 04050 // If we have a non-trivial importer, we must export elements that 04051 // are permuted or belong to other processors. We will compute 04052 // solution into the to-be-exported MV; get a view. 04053 if (importer != null) { 04054 // Do the local computation. 04055 this->template localMultiply<Scalar, Scalar> (*X, *importMV_, mode, alpha, STS::zero ()); 04056 if (Y_is_overwritten) { 04057 Y_in.putScalar (STS::zero ()); 04058 } else { 04059 Y_in.scale (beta); 04060 } 04061 Y_in.doExport(*importMV_,*importer,ADD); 04062 } 04063 // otherwise, multiply into Y 04064 else { 04065 // can't multiply in-situ; can't multiply into non-strided multivector 04066 // 04067 // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if 04068 // the user passed in the same MultiVector for both X and Y. It 04069 // won't detect whether one MultiVector views the other. We 04070 // should also check the MultiVectors' raw data pointers. 04071 if (! Y_in.isConstantStride () || X.getRawPtr () == &Y_in) { 04072 // Make a deep copy of Y_in, into which to write the multiply result. 04073 MV Y (Y_in, Teuchos::Copy); 04074 this->template localMultiply<Scalar, Scalar> (*X, Y, mode, alpha, beta); 04075 Tpetra::deep_copy (Y_in, Y); 04076 } else { 04077 this->template localMultiply<Scalar, Scalar> (*X, Y_in, mode, alpha, beta); 04078 } 04079 } 04080 04081 // If the range Map is a locally replicated map, sum the 04082 // contributions from each process. (That's why we set beta=0 04083 // above for all processes but Proc 0.) 04084 if (Y_is_replicated) { 04085 Y_in.reduce (); 04086 } 04087 } 04088 04091 template <class Scalar, 04092 class LocalOrdinal, 04093 class GlobalOrdinal, class DeviceType> 04094 void 04095 CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04096 apply (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type> &X, 04097 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type> &Y, 04098 Teuchos::ETransp mode, 04099 Scalar alpha, 04100 Scalar beta) const 04101 { 04102 TEUCHOS_TEST_FOR_EXCEPTION( 04103 ! isFillComplete (), std::runtime_error, 04104 "Tpetra::CrsMatrix::apply(): Cannot call apply() until fillComplete() " 04105 "has been called."); 04106 if (mode == Teuchos::NO_TRANS) { 04107 applyNonTranspose (X, Y, alpha, beta); 04108 } else { 04109 applyTranspose (X, Y, mode, alpha, beta); 04110 } 04111 } 04112 04115 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04116 void 04117 CrsMatrix< 04118 Scalar, LocalOrdinal, GlobalOrdinal, 04119 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04120 gaussSeidel (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& B, 04121 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& X, 04122 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& D, 04123 const Scalar& dampingFactor, 04124 const ESweepDirection direction, 04125 const int numSweeps) const 04126 { 04127 reorderedGaussSeidel(B,X,D,Teuchos::null,dampingFactor,direction,numSweeps); 04128 } 04129 04130 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, 04131 class DeviceType> 04132 void 04133 CrsMatrix< 04134 Scalar, LocalOrdinal, GlobalOrdinal, 04135 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04136 reorderedGaussSeidel (const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, node_type>& B, 04137 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, node_type>& X, 04138 const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, node_type>& D, 04139 const Teuchos::ArrayView<LocalOrdinal>& rowIndices, 04140 const Scalar& dampingFactor, 04141 const ESweepDirection direction, 04142 const int numSweeps) const 04143 { 04144 using Teuchos::null; 04145 using Teuchos::RCP; 04146 using Teuchos::rcp; 04147 using Teuchos::rcpFromRef; 04148 using Teuchos::rcp_const_cast; 04149 typedef Scalar ST; 04150 04151 TEUCHOS_TEST_FOR_EXCEPTION( 04152 isFillComplete() == false, std::runtime_error, 04153 "Tpetra::CrsMatrix::gaussSeidel: cannot call this method until " 04154 "fillComplete() has been called."); 04155 TEUCHOS_TEST_FOR_EXCEPTION( 04156 numSweeps < 0, 04157 std::invalid_argument, 04158 "Tpetra::CrsMatrix::gaussSeidel: The number of sweeps must be , " 04159 "nonnegative but you provided numSweeps = " << numSweeps << " < 0."); 04160 04161 // Translate from global to local sweep direction. 04162 // While doing this, validate the input. 04163 KokkosClassic::ESweepDirection localDirection; 04164 if (direction == Forward) { 04165 localDirection = KokkosClassic::Forward; 04166 } 04167 else if (direction == Backward) { 04168 localDirection = KokkosClassic::Backward; 04169 } 04170 else if (direction == Symmetric) { 04171 // We'll control local sweep direction manually. 04172 localDirection = KokkosClassic::Forward; 04173 } 04174 else { 04175 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, 04176 "Tpetra::CrsMatrix::gaussSeidel: The 'direction' enum does not have " 04177 "any of its valid values: Forward, Backward, or Symmetric."); 04178 } 04179 04180 if (numSweeps == 0) { 04181 return; // Nothing to do. 04182 } 04183 04184 // We don't need the Export object because this method assumes 04185 // that the row, domain, and range Maps are the same. We do need 04186 // the Import object, if there is one, though. 04187 RCP<const import_type> importer = this->getGraph()->getImporter(); 04188 RCP<const export_type> exporter = this->getGraph()->getExporter(); 04189 TEUCHOS_TEST_FOR_EXCEPTION( 04190 ! exporter.is_null (), std::runtime_error, 04191 "Tpetra's gaussSeidel implementation requires that the row, domain, " 04192 "and range Maps be the same. This cannot be the case, because the " 04193 "matrix has a nontrivial Export object."); 04194 04195 RCP<const map_type> domainMap = this->getDomainMap (); 04196 RCP<const map_type> rangeMap = this->getRangeMap (); 04197 RCP<const map_type> rowMap = this->getGraph ()->getRowMap (); 04198 RCP<const map_type> colMap = this->getGraph ()->getColMap (); 04199 04200 #ifdef HAVE_TEUCHOS_DEBUG 04201 { 04202 // The relation 'isSameAs' is transitive. It's also a 04203 // collective, so we don't have to do a "shared" test for 04204 // exception (i.e., a global reduction on the test value). 04205 TEUCHOS_TEST_FOR_EXCEPTION( 04206 ! X.getMap ()->isSameAs (*domainMap), 04207 std::runtime_error, 04208 "Tpetra::CrsMatrix::gaussSeidel requires that the input " 04209 "multivector X be in the domain Map of the matrix."); 04210 TEUCHOS_TEST_FOR_EXCEPTION( 04211 ! B.getMap ()->isSameAs (*rangeMap), 04212 std::runtime_error, 04213 "Tpetra::CrsMatrix::gaussSeidel requires that the input " 04214 "B be in the range Map of the matrix."); 04215 TEUCHOS_TEST_FOR_EXCEPTION( 04216 ! D.getMap ()->isSameAs (*rowMap), 04217 std::runtime_error, 04218 "Tpetra::CrsMatrix::gaussSeidel requires that the input " 04219 "D be in the row Map of the matrix."); 04220 TEUCHOS_TEST_FOR_EXCEPTION( 04221 ! rowMap->isSameAs (*rangeMap), 04222 std::runtime_error, 04223 "Tpetra::CrsMatrix::gaussSeidel requires that the row Map and the " 04224 "range Map be the same (in the sense of Tpetra::Map::isSameAs)."); 04225 TEUCHOS_TEST_FOR_EXCEPTION( 04226 ! domainMap->isSameAs (*rangeMap), 04227 std::runtime_error, 04228 "Tpetra::CrsMatrix::gaussSeidel requires that the domain Map and " 04229 "the range Map of the matrix be the same."); 04230 } 04231 #else 04232 // Forestall any compiler warnings for unused variables. 04233 (void) rangeMap; 04234 (void) rowMap; 04235 #endif // HAVE_TEUCHOS_DEBUG 04236 04237 // If B is not constant stride, copy it into a constant stride 04238 // multivector. We'l handle the right-hand side B first and deal 04239 // with X right before the sweeps, to improve locality of the 04240 // first sweep. (If the problem is small enough, then that will 04241 // hopefully keep more of the entries of X in cache. This 04242 // optimizes for the typical case of a small number of sweeps.) 04243 RCP<const MV> B_in; 04244 if (B.isConstantStride()) { 04245 B_in = rcpFromRef (B); 04246 } 04247 else { 04248 // The range Map and row Map are the same in this case, so we 04249 // can use the (possibly cached) row Map multivector to store a 04250 // constant stride copy of B. We don't have to copy back, since 04251 // Gauss-Seidel won't modify B. 04252 RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true); 04253 *B_in_nonconst = B; // Copy from B into B_in(_nonconst). 04254 B_in = rcp_const_cast<const MV> (B_in_nonconst); 04255 04256 TPETRA_EFFICIENCY_WARNING( 04257 ! B.isConstantStride (), 04258 std::runtime_error, 04259 "gaussSeidel: The current implementation of the Gauss-Seidel kernel " 04260 "requires that X and B both have constant stride. Since B does not " 04261 "have constant stride, we had to make a copy. This is a limitation of " 04262 "the current implementation and not your fault, but we still report it " 04263 "as an efficiency warning for your information."); 04264 } 04265 04266 // If X is not constant stride, copy it into a constant stride 04267 // multivector. Also, make the column Map multivector X_colMap, 04268 // and its domain Map view X_domainMap. (X actually must be a 04269 // domain Map view of a column Map multivector; exploit this, if X 04270 // has constant stride.) 04271 04272 RCP<MV> X_domainMap; 04273 RCP<MV> X_colMap; 04274 bool copiedInput = false; 04275 04276 if (importer.is_null ()) { // Domain and column Maps are the same. 04277 if (X.isConstantStride ()) { 04278 X_domainMap = rcpFromRef (X); 04279 X_colMap = X_domainMap; 04280 copiedInput = false; 04281 } 04282 else { 04283 // Get a temporary column Map multivector, make a domain Map 04284 // view of it, and copy X into the domain Map view. We have 04285 // to copy here because we won't be doing Import operations. 04286 X_colMap = getColumnMapMultiVector (X, true); 04287 X_domainMap = X_colMap; // Domain and column Maps are the same. 04288 deep_copy(*X_domainMap, X); // Copy X into the domain Map view. 04289 copiedInput = true; 04290 TPETRA_EFFICIENCY_WARNING( 04291 ! X.isConstantStride (), std::runtime_error, 04292 "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the " 04293 "Gauss-Seidel kernel requires that X and B both have constant " 04294 "stride. Since X does not have constant stride, we had to make a " 04295 "copy. This is a limitation of the current implementation and not " 04296 "your fault, but we still report it as an efficiency warning for " 04297 "your information."); 04298 } 04299 } 04300 else { // We will be doing Import operations in the sweeps. 04301 if (X.isConstantStride ()) { 04302 X_domainMap = rcpFromRef (X); 04303 // This kernel assumes that X is a domain Map view of a column 04304 // Map multivector. We will only check if this is valid if 04305 // the CMake configure Teuchos_ENABLE_DEBUG is ON. 04306 X_colMap = X_domainMap->offsetViewNonConst (colMap, 0); 04307 04308 // FIXME (mfh 19 Mar 2013) Do we need to fill the remote 04309 // entries of X_colMap with zeros? Do we need to fill all of 04310 // X_domainMap initially with zeros? Ifpack 04311 // (Ifpack_PointRelaxation.cpp, line 906) creates an entirely 04312 // new MultiVector each time. 04313 04314 // Do the first Import for the first sweep. This simplifies 04315 // the logic in the sweeps. 04316 X_colMap->doImport (X, *importer, INSERT); 04317 copiedInput = false; 04318 } 04319 else { 04320 // Get a temporary column Map multivector X_colMap, and make a 04321 // domain Map view X_domainMap of it. Instead of copying, we 04322 // do an Import from X into X_domainMap. This saves us a 04323 // copy, since the Import has to copy the data anyway. 04324 X_colMap = getColumnMapMultiVector (X, true); 04325 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0); 04326 X_colMap->doImport (X, *importer, INSERT); 04327 copiedInput = true; 04328 TPETRA_EFFICIENCY_WARNING( 04329 ! X.isConstantStride (), std::runtime_error, 04330 "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the " 04331 "Gauss-Seidel kernel requires that X and B both have constant stride. " 04332 "Since X does not have constant stride, we had to make a copy. " 04333 "This is a limitation of the current implementation and not your fault, " 04334 "but we still report it as an efficiency warning for your information."); 04335 } 04336 } 04337 04338 for (int sweep = 0; sweep < numSweeps; ++sweep) { 04339 if (! importer.is_null () && sweep > 0) { 04340 // We already did the first Import for the zeroth sweep. 04341 X_colMap->doImport (*X_domainMap, *importer, INSERT); 04342 } 04343 04344 // Do local Gauss-Seidel. 04345 if (direction != Symmetric) { 04346 if(rowIndices.is_null()) 04347 this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D, 04348 dampingFactor, 04349 localDirection); 04350 else 04351 this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap, D, rowIndices, 04352 dampingFactor, 04353 localDirection); 04354 } else { // direction == Symmetri 04355 const bool doImportBetweenDirections = false; 04356 if(rowIndices.is_null()) { 04357 this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D, 04358 dampingFactor, 04359 KokkosClassic::Forward); 04360 // mfh 18 Mar 2013: Aztec's implementation of "symmetric 04361 // Gauss-Seidel" does _not_ do an Import between the forward 04362 // and backward sweeps. This makes sense, because Aztec 04363 // considers "symmetric Gauss-Seidel" a subdomain solver. 04364 if (doImportBetweenDirections) { 04365 // Communicate again before the Backward sweep. 04366 if (! importer.is_null ()) { 04367 X_colMap->doImport (*X_domainMap, *importer, INSERT); 04368 } 04369 } 04370 this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D, 04371 dampingFactor, 04372 KokkosClassic::Backward); 04373 } 04374 else { 04375 this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap, D, rowIndices, 04376 dampingFactor, 04377 KokkosClassic::Forward); 04378 if (doImportBetweenDirections) { 04379 // Communicate again before the Backward sweep. 04380 if (! importer.is_null ()) { 04381 X_colMap->doImport (*X_domainMap, *importer, INSERT); 04382 } 04383 } 04384 this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap, D, rowIndices, 04385 dampingFactor, 04386 KokkosClassic::Backward); 04387 } 04388 } 04389 } 04390 04391 if (copiedInput) { 04392 deep_copy(X, *X_domainMap); // Copy back from X_domainMap to X. 04393 } 04394 } 04395 04396 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04397 void 04398 CrsMatrix< 04399 Scalar, LocalOrdinal, GlobalOrdinal, 04400 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04401 gaussSeidelCopy (MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& X, 04402 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& B, 04403 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& D, 04404 const Scalar& dampingFactor, 04405 const ESweepDirection direction, 04406 const int numSweeps, 04407 const bool zeroInitialGuess) const 04408 { 04409 reorderedGaussSeidelCopy(X,B,D,Teuchos::null,dampingFactor,direction,numSweeps,zeroInitialGuess); 04410 } 04411 04412 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, 04413 class DeviceType> 04414 void 04415 CrsMatrix< 04416 Scalar, LocalOrdinal, GlobalOrdinal, 04417 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04418 reorderedGaussSeidelCopy (MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& X, 04419 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& B, 04420 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& D, 04421 const Teuchos::ArrayView<LocalOrdinal>& rowIndices, 04422 const Scalar& dampingFactor, 04423 const ESweepDirection direction, 04424 const int numSweeps, 04425 const bool zeroInitialGuess) const 04426 { 04427 using Teuchos::null; 04428 using Teuchos::RCP; 04429 using Teuchos::rcp; 04430 using Teuchos::rcpFromRef; 04431 using Teuchos::rcp_const_cast; 04432 typedef Scalar ST; 04433 TEUCHOS_TEST_FOR_EXCEPTION( 04434 isFillComplete() == false, std::runtime_error, 04435 "Tpetra::CrsMatrix::gaussSeidelCopy: cannot call this method until " 04436 "fillComplete() has been called."); 04437 TEUCHOS_TEST_FOR_EXCEPTION( 04438 numSweeps < 0, 04439 std::invalid_argument, 04440 "gaussSeidelCopy: The number of sweeps must be nonnegative, " 04441 "but you provided numSweeps = " << numSweeps << " < 0."); 04442 04443 // Translate from global to local sweep direction. 04444 // While doing this, validate the input. 04445 KokkosClassic::ESweepDirection localDirection; 04446 if (direction == Forward) { 04447 localDirection = KokkosClassic::Forward; 04448 } 04449 else if (direction == Backward) { 04450 localDirection = KokkosClassic::Backward; 04451 } 04452 else if (direction == Symmetric) { 04453 // We'll control local sweep direction manually. 04454 localDirection = KokkosClassic::Forward; 04455 } 04456 else { 04457 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, 04458 "gaussSeidelCopy: The 'direction' enum does not have any of its " 04459 "valid values: Forward, Backward, or Symmetric."); 04460 } 04461 04462 if (numSweeps == 0) { 04463 return; 04464 } 04465 04466 RCP<const import_type> importer = this->getGraph()->getImporter(); 04467 RCP<const export_type> exporter = this->getGraph()->getExporter(); 04468 TEUCHOS_TEST_FOR_EXCEPTION( 04469 ! exporter.is_null (), 04470 std::runtime_error, 04471 "Tpetra's gaussSeidelCopy implementation requires that the row, domain, " 04472 "and range Maps be the same. This cannot be the case, because the " 04473 "matrix has a nontrivial Export object."); 04474 04475 RCP<const map_type> domainMap = this->getDomainMap (); 04476 RCP<const map_type> rangeMap = this->getRangeMap (); 04477 RCP<const map_type> rowMap = this->getGraph ()->getRowMap (); 04478 RCP<const map_type> colMap = this->getGraph ()->getColMap (); 04479 04480 #ifdef HAVE_TEUCHOS_DEBUG 04481 { 04482 // The relation 'isSameAs' is transitive. It's also a 04483 // collective, so we don't have to do a "shared" test for 04484 // exception (i.e., a global reduction on the test value). 04485 TEUCHOS_TEST_FOR_EXCEPTION( 04486 ! X.getMap ()->isSameAs (*domainMap), 04487 std::runtime_error, 04488 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input " 04489 "multivector X be in the domain Map of the matrix."); 04490 TEUCHOS_TEST_FOR_EXCEPTION( 04491 ! B.getMap ()->isSameAs (*rangeMap), 04492 std::runtime_error, 04493 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input " 04494 "B be in the range Map of the matrix."); 04495 TEUCHOS_TEST_FOR_EXCEPTION( 04496 ! D.getMap ()->isSameAs (*rowMap), 04497 std::runtime_error, 04498 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input " 04499 "D be in the row Map of the matrix."); 04500 TEUCHOS_TEST_FOR_EXCEPTION( 04501 ! rowMap->isSameAs (*rangeMap), 04502 std::runtime_error, 04503 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the " 04504 "range Map be the same (in the sense of Tpetra::Map::isSameAs)."); 04505 TEUCHOS_TEST_FOR_EXCEPTION( 04506 ! domainMap->isSameAs (*rangeMap), 04507 std::runtime_error, 04508 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and " 04509 "the range Map of the matrix be the same."); 04510 } 04511 #else 04512 // Forestall any compiler warnings for unused variables. 04513 (void) rangeMap; 04514 (void) rowMap; 04515 #endif // HAVE_TEUCHOS_DEBUG 04516 04517 // Fetch a (possibly cached) temporary column Map multivector 04518 // X_colMap, and a domain Map view X_domainMap of it. Both have 04519 // constant stride by construction. We know that the domain Map 04520 // must include the column Map, because our Gauss-Seidel kernel 04521 // requires that the row Map, domain Map, and range Map are all 04522 // the same, and that each process owns all of its own diagonal 04523 // entries of the matrix. 04524 04525 RCP<MV> X_colMap; 04526 RCP<MV> X_domainMap; 04527 bool copyBackOutput = false; 04528 if (importer.is_null ()) { 04529 if (X.isConstantStride ()) { 04530 X_colMap = rcpFromRef (X); 04531 X_domainMap = rcpFromRef (X); 04532 // Column Map and domain Map are the same, so there are no 04533 // remote entries. Thus, if we are not setting the initial 04534 // guess to zero, we don't have to worry about setting remote 04535 // entries to zero, even though we are not doing an Import in 04536 // this case. 04537 if (zeroInitialGuess) { 04538 X_colMap->putScalar (STS::zero ()); 04539 } 04540 // No need to copy back to X at end. 04541 } 04542 else { // We must copy X into a constant stride multivector. 04543 // Just use the cached column Map multivector for that. 04544 // force=true means fill with zeros, so no need to fill 04545 // remote entries (not in domain Map) with zeros. 04546 X_colMap = getColumnMapMultiVector (X, true); 04547 // X_domainMap is always a domain Map view of the column Map 04548 // multivector. In this case, the domain and column Maps are 04549 // the same, so X_domainMap _is_ X_colMap. 04550 X_domainMap = X_colMap; 04551 if (! zeroInitialGuess) { // Don't copy if zero initial guess 04552 04553 try { 04554 deep_copy(*X_domainMap , X); // Copy X into constant stride multivector 04555 } catch (std::exception& e) { 04556 std::ostringstream os; 04557 os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: " 04558 "deep_copy(*X_domainMap, X) threw an exception: " 04559 << e.what () << "."; 04560 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ()); 04561 } 04562 } 04563 copyBackOutput = true; // Don't forget to copy back at end. 04564 TPETRA_EFFICIENCY_WARNING( 04565 ! X.isConstantStride (), 04566 std::runtime_error, 04567 "gaussSeidelCopy: The current implementation of the Gauss-Seidel " 04568 "kernel requires that X and B both have constant stride. Since X " 04569 "does not have constant stride, we had to make a copy. This is a " 04570 "limitation of the current implementation and not your fault, but we " 04571 "still report it as an efficiency warning for your information."); 04572 } 04573 } 04574 else { // Column Map and domain Map are _not_ the same. 04575 X_colMap = getColumnMapMultiVector (X); 04576 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0); 04577 04578 #ifdef HAVE_TPETRA_DEBUG 04579 typename MV::dual_view_type X_colMap_view = X_colMap->getDualView (); 04580 typename MV::dual_view_type X_domainMap_view = X_domainMap->getDualView (); 04581 04582 if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) { 04583 TEUCHOS_TEST_FOR_EXCEPTION( 04584 X_colMap_view.h_view.ptr_on_device () != X_domainMap_view.h_view.ptr_on_device (), 04585 std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: " 04586 "Pointer to start of column Map view of X is not equal to pointer to " 04587 "start of (domain Map view of) X. This may mean that " 04588 "Tpetra::MultiVector::offsetViewNonConst is broken. " 04589 "Please report this bug to the Tpetra developers."); 04590 } 04591 04592 TEUCHOS_TEST_FOR_EXCEPTION( 04593 X_colMap_view.dimension_0 () < X_domainMap_view.dimension_0 () || 04594 X_colMap->getLocalLength () < X_domainMap->getLocalLength (), 04595 std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: " 04596 "X_colMap has fewer local rows than X_domainMap. " 04597 "X_colMap_view.dimension_0() = " << X_colMap_view.dimension_0 () 04598 << ", X_domainMap_view.dimension_0() = " 04599 << X_domainMap_view.dimension_0 () 04600 << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength () 04601 << ", and X_domainMap->getLocalLength() = " 04602 << X_domainMap->getLocalLength () 04603 << ". This means that Tpetra::MultiVector::offsetViewNonConst " 04604 "is broken. Please report this bug to the Tpetra developers."); 04605 04606 TEUCHOS_TEST_FOR_EXCEPTION( 04607 X_colMap->getNumVectors () != X_domainMap->getNumVectors (), 04608 std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: " 04609 "X_colMap has a different number of columns than X_domainMap. " 04610 "X_colMap->getNumVectors() = " << X_colMap->getNumVectors () 04611 << " != X_domainMap->getNumVectors() = " 04612 << X_domainMap->getNumVectors () 04613 << ". This means that Tpetra::MultiVector::offsetViewNonConst " 04614 "is broken. Please report this bug to the Tpetra developers."); 04615 04616 // TEUCHOS_TEST_FOR_EXCEPTION( 04617 // X_colMap->getLocalMV ().getStride () != 04618 // X_domainMap->getLocalMV ().getStride (), 04619 // std::logic_error, 04620 // "Tpetra::CrsMatrix::gaussSeidelCopy: " 04621 // "X_colMap has local stride " << X_colMap->getLocalMV ().getStride () 04622 // << ", which does not equal the local stride " 04623 // << X_domainMap->getLocalMV ().getStride () << " of X_domainMap. " 04624 // "This means that Tpetra::MultiVector::offsetViewNonConst is broken. " 04625 // "Please report this bug to the Tpetra developers."); 04626 #endif // HAVE_TPETRA_DEBUG 04627 04628 if (zeroInitialGuess) { 04629 // No need for an Import, since we're filling with zeros. 04630 X_colMap->putScalar (STS::zero ()); 04631 } else { 04632 // We could just copy X into X_domainMap. However, that 04633 // wastes a copy, because the Import also does a copy (plus 04634 // communication). Since the typical use case for 04635 // Gauss-Seidel is a small number of sweeps (2 is typical), we 04636 // don't want to waste that copy. Thus, we do the Import 04637 // here, and skip the first Import in the first sweep. 04638 // Importing directly from X effects the copy into X_domainMap 04639 // (which is a view of X_colMap). 04640 X_colMap->doImport (X, *importer, INSERT); 04641 } 04642 copyBackOutput = true; // Don't forget to copy back at end. 04643 } // if column and domain Maps are (not) the same 04644 04645 // The Gauss-Seidel / SOR kernel expects multivectors of constant 04646 // stride. X_colMap is by construction, but B might not be. If 04647 // it's not, we have to make a copy. 04648 RCP<const MV> B_in; 04649 if (B.isConstantStride ()) { 04650 B_in = rcpFromRef (B); 04651 } 04652 else { 04653 // Range Map and row Map are the same in this case, so we can 04654 // use the cached row Map multivector to store a constant stride 04655 // copy of B. 04656 RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true); 04657 04658 try { 04659 deep_copy(*B_in_nonconst, B); 04660 } catch (std::exception& e) { 04661 std::ostringstream os; 04662 os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: " 04663 "deep_copy(*B_in_nonconst, B) threw an exception: " 04664 << e.what () << "."; 04665 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ()); 04666 } 04667 B_in = rcp_const_cast<const MV> (B_in_nonconst); 04668 04669 TPETRA_EFFICIENCY_WARNING( 04670 ! B.isConstantStride (), 04671 std::runtime_error, 04672 "gaussSeidelCopy: The current implementation requires that B have " 04673 "constant stride. Since B does not have constant stride, we had to " 04674 "copy it into a separate constant-stride multivector. This is a " 04675 "limitation of the current implementation and not your fault, but we " 04676 "still report it as an efficiency warning for your information."); 04677 } 04678 04679 for (int sweep = 0; sweep < numSweeps; ++sweep) { 04680 if (! importer.is_null () && sweep > 0) { 04681 // We already did the first Import for the zeroth sweep above, 04682 // if it was necessary. 04683 X_colMap->doImport (*X_domainMap, *importer, INSERT); 04684 } 04685 04686 // Do local Gauss-Seidel. 04687 if (direction != Symmetric) { 04688 if(rowIndices.is_null()) 04689 this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D, 04690 dampingFactor, 04691 localDirection); 04692 else 04693 this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap, D, rowIndices, 04694 dampingFactor, 04695 localDirection); 04696 } else { // direction == Symmetric 04697 if(rowIndices.is_null()) { 04698 this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D, 04699 dampingFactor, 04700 KokkosClassic::Forward); 04701 // mfh 18 Mar 2013: Aztec's implementation of "symmetric 04702 // Gauss-Seidel" does _not_ do an Import between the forward 04703 // and backward sweeps. This makes symmetric Gauss-Seidel a 04704 // symmetric preconditioner if the matrix A is symmetric. We 04705 // imitate Aztec's behavior here. 04706 this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D, 04707 dampingFactor, 04708 KokkosClassic::Backward); 04709 } 04710 else { 04711 this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap, D, rowIndices, 04712 dampingFactor, 04713 KokkosClassic::Forward); 04714 this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap, D, rowIndices, 04715 dampingFactor, 04716 KokkosClassic::Backward); 04717 04718 } 04719 } 04720 } 04721 04722 if (copyBackOutput) { 04723 try { 04724 deep_copy(X , *X_domainMap); // Copy result back into X. 04725 } catch (std::exception& e) { 04726 std::ostringstream os; 04727 os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: " 04728 "deep_copy(X, *X_domainMap) threw an exception: " 04729 << e.what () << "."; 04730 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ()); 04731 } 04732 } 04733 } 04734 04737 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04738 template <class DomainScalar, class RangeScalar> 04739 void 04740 CrsMatrix< 04741 Scalar, LocalOrdinal, GlobalOrdinal, 04742 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04743 localMultiply (const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,node_type>& X, 04744 MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,node_type>& Y, 04745 Teuchos::ETransp mode, 04746 RangeScalar alpha, 04747 RangeScalar beta) const 04748 { 04749 using Teuchos::NO_TRANS; 04750 #ifdef HAVE_TPETRA_DEBUG 04751 const char tfecfFuncName[] = "localMultiply: "; 04752 #endif // HAVE_TPETRA_DEBUG 04753 typedef Teuchos::ScalarTraits<RangeScalar> RST; 04754 #ifdef HAVE_TPETRA_DEBUG 04755 04756 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04757 X.getNumVectors() != Y.getNumVectors(), std::runtime_error, 04758 ": X and Y must have the same number of columns (vectors). "); 04759 04760 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04761 mode == NO_TRANS && X.getLocalLength () != getColMap ()->getNodeNumElements (), 04762 std::runtime_error, "NO_TRANS case: X has the wrong number of local rows. " 04763 "X.getLocalLength() = " << X.getLocalLength () << " != getColMap()->" 04764 "getNodeNumElements() = " << getColMap ()->getNodeNumElements () << "."); 04765 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04766 mode == NO_TRANS && Y.getLocalLength () != getRowMap ()->getNodeNumElements (), 04767 std::runtime_error, "NO_TRANS case: Y has the wrong number of local rows. " 04768 "Y.getLocalLength() = " << Y.getLocalLength () << " != getRowMap()->" 04769 "getNodeNumElements() = " << getRowMap ()->getNodeNumElements () << "."); 04770 04771 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04772 mode != NO_TRANS && X.getLocalLength () != getRowMap ()->getNodeNumElements (), 04773 std::runtime_error, "TRANS or CONJ_TRANS case: X has the wrong number of " 04774 "local rows. X.getLocalLength() = " << X.getLocalLength () << " != " 04775 "getRowMap()->getNodeNumElements() = " 04776 << getRowMap ()->getNodeNumElements () << "."); 04777 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04778 mode != NO_TRANS && Y.getLocalLength () != getColMap ()->getNodeNumElements (), 04779 std::runtime_error, "TRANS or CONJ_TRANS case: X has the wrong number of " 04780 "local rows. Y.getLocalLength() = " << Y.getLocalLength () << " != " 04781 "getColMap()->getNodeNumElements() = " 04782 << getColMap ()->getNodeNumElements () << "."); 04783 04784 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04785 ! isFillComplete (), std::runtime_error, ": It is incorrect to call this " 04786 "method unless the matrix is fill complete."); 04787 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04788 X.isConstantStride() == false || Y.isConstantStride() == false, 04789 std::runtime_error, ": X and Y must be constant stride."); 04790 // If the two pointers are NULL, then they don't alias one 04791 // another, even though they are equal. 04792 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 04793 X.getDualView ().d_view.ptr_on_device () == Y.getDualView ().d_view.ptr_on_device () && 04794 X.getDualView ().d_view.ptr_on_device () != NULL, 04795 std::runtime_error, ": X and Y may not alias one another."); 04796 #endif 04797 // 04798 // Call the matvec 04799 if (beta == RST::zero()) { 04800 // Y = alpha*op(M)*X with overwrite semantics 04801 04802 // FIXME (mfh 27 Mar 2014) What about CONJ_TRANS??? 04803 if (mode != NO_TRANS) { 04804 Kokkos::MV_MultiplyTranspose (RST::zero (), 04805 Y.template getLocalView<DeviceType> (), 04806 alpha, 04807 k_lclMatrix_, 04808 X.template getLocalView<DeviceType> ()); 04809 } 04810 else { // mode == NO_TRANS 04811 Kokkos::MV_Multiply (Y.template getLocalView<DeviceType> (), 04812 alpha, 04813 k_lclMatrix_, 04814 X.template getLocalView<DeviceType> ()); 04815 } 04816 } 04817 else { 04818 // Y = alpha*op(M) + beta*Y 04819 04820 // FIXME (mfh 27 Mar 2014) What about CONJ_TRANS??? 04821 if(mode != NO_TRANS) { 04822 Kokkos::MV_MultiplyTranspose (beta, 04823 Y.template getLocalView<DeviceType> (), 04824 alpha, 04825 k_lclMatrix_, 04826 X.template getLocalView<DeviceType> ()); 04827 } 04828 else { 04829 Kokkos::MV_Multiply (beta, 04830 Y.template getLocalView<DeviceType> (), 04831 alpha, 04832 k_lclMatrix_, 04833 X.template getLocalView<DeviceType> ()); 04834 } 04835 } 04836 } 04837 04838 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 04839 template <class DomainScalar, class RangeScalar> 04840 void 04841 CrsMatrix< 04842 Scalar, LocalOrdinal, GlobalOrdinal, 04843 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04844 localGaussSeidel (const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,node_type>& B, 04845 MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,node_type>& X, 04846 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& D, 04847 const RangeScalar& dampingFactor, 04848 const KokkosClassic::ESweepDirection direction) const 04849 { 04850 typedef LocalOrdinal LO; 04851 typedef GlobalOrdinal GO; 04852 typedef Tpetra::MultiVector<DomainScalar, LO, GO, node_type> DMV; 04853 typedef Tpetra::MultiVector<RangeScalar, LO, GO, node_type> RMV; 04854 typedef Tpetra::MultiVector<Scalar, LO, GO, node_type> MMV; 04855 typedef typename device_type::host_mirror_device_type HMDT; 04856 typedef typename Graph::LocalStaticCrsGraphType k_local_graph_type; 04857 typedef typename k_local_graph_type::size_type offset_type; 04858 const char prefix[] = "Tpetra::CrsMatrix::localGaussSeidel: "; 04859 04860 TEUCHOS_TEST_FOR_EXCEPTION( 04861 ! this->isFillComplete (), std::runtime_error, 04862 prefix << "The matrix is not fill complete."); 04863 const size_t lclNumRows = this->getNodeNumRows (); 04864 const size_t numVecs = B.getNumVectors (); 04865 TEUCHOS_TEST_FOR_EXCEPTION( 04866 X.getNumVectors () != numVecs, std::invalid_argument, 04867 prefix << "B.getNumVectors() = " << numVecs << " != " 04868 "X.getNumVectors() = " << X.getNumVectors () << "."); 04869 TEUCHOS_TEST_FOR_EXCEPTION( 04870 B.getLocalLength () != lclNumRows, std::invalid_argument, 04871 prefix << "B.getLocalLength() = " << B.getLocalLength () 04872 << " != this->getNodeNumRows() = " << lclNumRows << "."); 04873 04874 typename DMV::dual_view_type::t_host B_lcl = B.template getLocalView<HMDT> (); 04875 typename RMV::dual_view_type::t_host X_lcl = X.template getLocalView<HMDT> (); 04876 typename MMV::dual_view_type::t_host D_lcl = D.template getLocalView<HMDT> (); 04877 04878 offset_type B_stride[8], X_stride[8], D_stride[8]; 04879 B_lcl.stride (B_stride); 04880 X_lcl.stride (X_stride); 04881 D_lcl.stride (D_stride); 04882 04883 k_local_matrix_type lclMatrix = this->getLocalMatrix (); 04884 k_local_graph_type lclGraph = lclMatrix.graph; 04885 typename k_local_matrix_type::row_map_type ptr = lclGraph.row_map; 04886 typename k_local_matrix_type::index_type ind = lclGraph.entries; 04887 typename k_local_matrix_type::values_type val = lclMatrix.values; 04888 const offset_type* const ptrRaw = ptr.ptr_on_device (); 04889 const LO* const indRaw = ind.ptr_on_device (); 04890 const Scalar* const valRaw = val.ptr_on_device (); 04891 04892 Kokkos::Sequential::gaussSeidel (static_cast<LO> (lclNumRows), 04893 static_cast<LO> (numVecs), 04894 ptrRaw, indRaw, valRaw, 04895 B_lcl.ptr_on_device (), B_stride[1], 04896 X_lcl.ptr_on_device (), X_stride[1], 04897 D_lcl.ptr_on_device (), dampingFactor, 04898 direction); 04899 } 04900 04901 04902 template<class Scalar, 04903 class LocalOrdinal, 04904 class GlobalOrdinal, 04905 class DeviceType> 04906 template<class DomainScalar, 04907 class RangeScalar> 04908 void 04909 CrsMatrix< 04910 Scalar, LocalOrdinal, GlobalOrdinal, 04911 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04912 reorderedLocalGaussSeidel (const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,node_type>& B, 04913 MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,node_type>& X, 04914 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type>& D, 04915 const Teuchos::ArrayView<LocalOrdinal>& rowIndices, 04916 const RangeScalar& dampingFactor, 04917 const KokkosClassic::ESweepDirection direction) const 04918 { 04919 using Kokkos::Sequential::reorderedGaussSeidel; 04920 typedef LocalOrdinal LO; 04921 typedef GlobalOrdinal GO; 04922 typedef Tpetra::MultiVector<DomainScalar, LO, GO, node_type> DMV; 04923 typedef Tpetra::MultiVector<RangeScalar, LO, GO, node_type> RMV; 04924 typedef Tpetra::MultiVector<Scalar, LO, GO, node_type> MMV; 04925 typedef typename device_type::host_mirror_device_type HMDT; 04926 typedef typename Graph::LocalStaticCrsGraphType k_local_graph_type; 04927 typedef typename k_local_graph_type::size_type offset_type; 04928 const char prefix[] = "Tpetra::CrsMatrix::reorderedLocalGaussSeidel: "; 04929 04930 TEUCHOS_TEST_FOR_EXCEPTION( 04931 ! this->isFillComplete (), std::runtime_error, 04932 prefix << "The matrix is not fill complete."); 04933 const size_t lclNumRows = this->getNodeNumRows (); 04934 const size_t numVecs = B.getNumVectors (); 04935 TEUCHOS_TEST_FOR_EXCEPTION( 04936 X.getNumVectors () != numVecs, std::invalid_argument, 04937 prefix << "B.getNumVectors() = " << numVecs << " != " 04938 "X.getNumVectors() = " << X.getNumVectors () << "."); 04939 TEUCHOS_TEST_FOR_EXCEPTION( 04940 B.getLocalLength () != lclNumRows, std::invalid_argument, 04941 prefix << "B.getLocalLength() = " << B.getLocalLength () 04942 << " != this->getNodeNumRows() = " << lclNumRows << "."); 04943 TEUCHOS_TEST_FOR_EXCEPTION( 04944 static_cast<size_t> (rowIndices.size ()) < lclNumRows, 04945 std::invalid_argument, prefix << "rowIndices.size() = " 04946 << rowIndices.size () << " < this->getNodeNumRows() = " 04947 << lclNumRows << "."); 04948 04949 typename DMV::dual_view_type::t_host B_lcl = B.template getLocalView<HMDT> (); 04950 typename RMV::dual_view_type::t_host X_lcl = X.template getLocalView<HMDT> (); 04951 typename MMV::dual_view_type::t_host D_lcl = D.template getLocalView<HMDT> (); 04952 04953 offset_type B_stride[8], X_stride[8], D_stride[8]; 04954 B_lcl.stride (B_stride); 04955 X_lcl.stride (X_stride); 04956 D_lcl.stride (D_stride); 04957 04958 k_local_matrix_type lclMatrix = this->getLocalMatrix (); 04959 typename Graph::LocalStaticCrsGraphType lclGraph = lclMatrix.graph; 04960 typename k_local_matrix_type::index_type ind = lclGraph.entries; 04961 typename k_local_matrix_type::row_map_type ptr = lclGraph.row_map; 04962 typename k_local_matrix_type::values_type val = lclMatrix.values; 04963 const offset_type* const ptrRaw = ptr.ptr_on_device (); 04964 const LO* const indRaw = ind.ptr_on_device (); 04965 const Scalar* const valRaw = val.ptr_on_device (); 04966 04967 reorderedGaussSeidel (static_cast<LO> (lclNumRows), 04968 static_cast<LO> (numVecs), ptrRaw, indRaw, valRaw, 04969 B_lcl.ptr_on_device (), B_stride[1], 04970 X_lcl.ptr_on_device (), X_stride[1], 04971 D_lcl.ptr_on_device (), rowIndices.getRawPtr (), 04972 static_cast<LO> (lclNumRows), 04973 dampingFactor, direction); 04974 } 04975 04976 04977 template<class Scalar, 04978 class LocalOrdinal, 04979 class GlobalOrdinal, 04980 class DeviceType> 04981 template<class DomainScalar, 04982 class RangeScalar> 04983 void 04984 CrsMatrix< 04985 Scalar, LocalOrdinal, GlobalOrdinal, 04986 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 04987 localSolve (const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,node_type>& Y, 04988 MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,node_type>& X, 04989 Teuchos::ETransp mode) const 04990 { 04991 using Kokkos::Sequential::triSolveKokkos; 04992 using Teuchos::CONJ_TRANS; 04993 using Teuchos::NO_TRANS; 04994 using Teuchos::TRANS; 04995 typedef LocalOrdinal LO; 04996 typedef GlobalOrdinal GO; 04997 typedef Tpetra::MultiVector<DomainScalar, LO, GO, node_type> DMV; 04998 typedef Tpetra::MultiVector<RangeScalar, LO, GO, node_type> RMV; 04999 typedef typename device_type::host_mirror_device_type HMDT; 05000 05001 const char tfecfFuncName[] = "localSolve: "; 05002 05003 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05004 ! isFillComplete (), std::runtime_error, 05005 "The matrix is not fill complete."); 05006 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05007 ! X.isConstantStride () || ! Y.isConstantStride (), std::invalid_argument, 05008 "X and Y must be constant stride."); 05009 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05010 ! isUpperTriangular () && ! isLowerTriangular (), std::runtime_error, 05011 "The matrix is neither upper triangular or lower triangular. " 05012 "You may only call this method if the matrix is triangular. " 05013 "Remember that this is a local (per MPI process) property, and that " 05014 "Tpetra only knows how to do a local (per process) triangular solve."); 05015 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05016 STS::isComplex && mode == TRANS, std::logic_error, "This method does " 05017 "not currently support non-conjugated transposed solve (mode == " 05018 "Teuchos::TRANS) for complex scalar types."); 05019 05020 // FIXME (mfh 27 Aug 2014) Tpetra has always made the odd decision 05021 // that if _some_ diagonal entries are missing locally, then it 05022 // assumes that the matrix has an implicitly stored unit diagonal. 05023 // Whether the matrix has an implicit unit diagonal or not should 05024 // be up to the user to decide. What if the graph has no diagonal 05025 // entries, and the user wants it that way? The only reason this 05026 // matters, though, is for the triangular solve, and in that case, 05027 // missing diagonal entries will cause trouble anyway. However, 05028 // it would make sense to warn the user if they ask for a 05029 // triangular solve with an incomplete diagonal. Furthermore, 05030 // this code should only assume an implicitly stored unit diagonal 05031 // if the matrix has _no_ explicitly stored diagonal entries. 05032 const Teuchos::EDiag diag = getNodeNumDiags () < getNodeNumRows () ? 05033 Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG; 05034 Teuchos::EUplo uplo = Teuchos::UNDEF_TRI; 05035 if (isUpperTriangular ()) { 05036 uplo = Teuchos::UPPER_TRI; 05037 } else if (isLowerTriangular ()) { 05038 uplo = Teuchos::LOWER_TRI; 05039 } 05040 05041 k_local_matrix_type A_lcl = this->getLocalMatrix (); 05042 typename DMV::dual_view_type::t_host X_lcl = X.template getLocalView<HMDT> (); 05043 typename RMV::dual_view_type::t_host Y_lcl = Y.template getLocalView<HMDT> (); 05044 triSolveKokkos (X_lcl, A_lcl, Y_lcl, uplo, diag, mode); 05045 } 05046 05047 05048 template<class Scalar, 05049 class LocalOrdinal, 05050 class GlobalOrdinal, 05051 class DeviceType> 05052 template<class T> 05053 Teuchos::RCP<CrsMatrix< 05054 T, LocalOrdinal, GlobalOrdinal, 05055 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 05056 CrsMatrix< 05057 Scalar, LocalOrdinal, GlobalOrdinal, 05058 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05059 convert () const 05060 { 05061 using Teuchos::ArrayRCP; 05062 using Teuchos::RCP; 05063 using Teuchos::rcp; 05064 typedef CrsMatrix<T, LocalOrdinal, GlobalOrdinal, node_type> out_mat_type; 05065 typedef typename out_mat_type::t_ValuesType out_vals_type; 05066 typedef typename out_mat_type::k_local_matrix_type out_lcl_mat_type; 05067 typedef ArrayRCP<size_t>::size_type size_type; 05068 const char tfecfFuncName[] = "convert"; 05069 05070 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05071 isFillComplete () == false, std::runtime_error, 05072 ": fill must be complete."); 05073 05074 // mfh 27 Feb 2014: It seems reasonable that if this matrix has a 05075 // const graph, then the returned matrix should also. However, if 05076 // this matrix does not have a const graph, then neither should 05077 // the returned matrix. The code below implements this strategy. 05078 05079 RCP<out_mat_type> newmat; // the matrix to return 05080 05081 if (this->isStaticGraph ()) { 05082 // This matrix has a const graph, so the returned matrix should too. 05083 newmat = rcp (new out_mat_type (this->getCrsGraph ())); 05084 05085 // Convert the values from Scalar to T, and stuff them directly 05086 // into the matrix to return. 05087 const size_type numVals = 05088 static_cast<size_type> (this->k_lclMatrix_.values.dimension_0 ()); 05089 05090 // FIXME (mfh 05 Aug 2014) Write a copy kernel (scalar_type and 05091 // T differ, so we can't use Kokkos::deep_copy). 05092 // 05093 // FIXME (mfh 05 Aug 2014) This assumes UVM. 05094 out_vals_type newVals1D ("Tpetra::CrsMatrix::val", numVals); 05095 for (size_type k = 0; k < numVals; ++k) { 05096 newVals1D(k) = static_cast<T> (this->k_values1D_(k)); 05097 } 05098 newmat->k_lclMatrix_ = 05099 out_lcl_mat_type ("Tpetra::CrsMatrix::k_lclMatrix_", 05100 this->k_lclMatrix_.numCols (), newVals1D, 05101 this->k_lclMatrix_.graph); 05102 newmat->k_values1D_ = newVals1D; 05103 newmat->values1D_ = Kokkos::Compat::persistingView (newVals1D); 05104 // Since newmat has a static (const) graph, the graph already 05105 // has a column Map, and Import and Export objects already exist 05106 // (if applicable). Thus, calling fillComplete is cheap. 05107 newmat->fillComplete (this->getDomainMap (), this->getRangeMap ()); 05108 } 05109 else { 05110 // This matrix has a nonconst graph, so the returned matrix 05111 // should also have a nonconst graph. However, it's fine for 05112 // the returned matrix to have static profile. This will 05113 // certainly speed up its fillComplete. 05114 05115 // 05116 // FIXME (mfh 05 Aug 2014) Instead of the slow stuff below, we 05117 // should copy the values and existing graph into a new local 05118 // matrix (lclMatrix), and then use the Tpetra::CrsMatrix 05119 // constructor that takes (rowMap, colMap, lclMatrix, params). 05120 // 05121 05122 // Get this matrix's local data. 05123 ArrayRCP<const size_t> ptr; 05124 ArrayRCP<const LocalOrdinal> ind; 05125 ArrayRCP<const Scalar> oldVal; 05126 this->getAllValues (ptr, ind, oldVal); 05127 05128 RCP<const map_type> rowMap = this->getRowMap (); 05129 RCP<const map_type> colMap = this->getColMap (); 05130 05131 // Get an array of the number of entries in each (locally owned) 05132 // row, so that we can make the new matrix with static profile. 05133 const size_type numLocalRows = 05134 static_cast<size_type> (rowMap->getNodeNumElements ()); 05135 ArrayRCP<size_t> numEntriesPerRow (numLocalRows); 05136 for (size_type localRow = 0; localRow < numLocalRows; ++localRow) { 05137 numEntriesPerRow[localRow] = 05138 static_cast<size_type> (getNumEntriesInLocalRow (localRow)); 05139 } 05140 05141 newmat = rcp (new out_mat_type (rowMap, colMap, numEntriesPerRow, 05142 StaticProfile)); 05143 05144 // Convert this matrix's values from Scalar to T. 05145 const size_type numVals = this->k_lclMatrix_.values.dimension_0 (); 05146 ArrayRCP<T> newVals1D (numVals); 05147 // FIXME (mfh 05 Aug 2014) This assumes UVM. 05148 for (size_type k = 0; k < numVals; ++k) { 05149 newVals1D[k] = static_cast<T> (this->k_values1D_(k)); 05150 } 05151 05152 // Give this matrix all of its local data. We can all this 05153 // method because newmat was _not_ created with a const graph. 05154 // The data must be passed in as nonconst, so we have to copy it 05155 // first. 05156 ArrayRCP<size_t> newPtr (ptr.size ()); 05157 std::copy (ptr.begin (), ptr.end (), newPtr.begin ()); 05158 ArrayRCP<LocalOrdinal> newInd (ind.size ()); 05159 std::copy (ind.begin (), ind.end (), newInd.begin ()); 05160 newmat->setAllValues (newPtr, newInd, newVals1D); 05161 05162 // We already have the Import and Export (if applicable) objects 05163 // from the graph, so we can save a lot of time by passing them 05164 // in to expertStaticFillComplete. 05165 RCP<const map_type> domainMap = this->getDomainMap (); 05166 RCP<const map_type> rangeMap = this->getRangeMap (); 05167 RCP<const import_type> importer = this->getCrsGraph ()->getImporter (); 05168 RCP<const export_type> exporter = this->getCrsGraph ()->getExporter (); 05169 newmat->expertStaticFillComplete (domainMap, rangeMap, importer, exporter); 05170 } 05171 05172 return newmat; 05173 } 05174 05175 05176 template<class Scalar, 05177 class LocalOrdinal, 05178 class GlobalOrdinal, 05179 class DeviceType> 05180 void 05181 CrsMatrix< 05182 Scalar, LocalOrdinal, GlobalOrdinal, 05183 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05184 checkInternalState () const 05185 { 05186 #ifdef HAVE_TPETRA_DEBUG 05187 const char tfecfFuncName[] = "checkInternalState: "; 05188 const char err[] = "Internal state is not consistent. " 05189 "Please report this bug to the Tpetra developers."; 05190 // check the internal state of this data structure 05191 // this is called by numerous state-changing methods, in a debug build, to ensure that the object 05192 // always remains in a valid state 05193 05194 // we must have a static graph 05195 // 05196 // a dynamic graph, depending on which constructor was used. 05197 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05198 staticGraph_.is_null (), 05199 std::logic_error, err); 05200 // myGraph == null means that the matrix has a static graph. 05201 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05202 ! myGraph_.is_null () && myGraph_ != staticGraph_, 05203 std::logic_error, err); 05204 // if matrix is fill complete, then graph must be fill complete 05205 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05206 isFillComplete () && ! staticGraph_->isFillComplete (), 05207 std::logic_error, err << " Specifically, the matrix is fill complete, " 05208 "but its graph is NOT fill complete."); 05209 // if matrix is storage optimized, it should have a 1D allocation 05210 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05211 isStorageOptimized () && ! values2D_.is_null (), 05212 std::logic_error, err); 05213 // if matrix/graph are static profile, then 2D allocation should not be present 05214 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05215 getProfileType() == StaticProfile && values2D_ != null, 05216 std::logic_error, err); 05217 // if matrix/graph are dynamic profile, then 1D allocation should not be present 05218 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05219 getProfileType() == DynamicProfile && k_values1D_.dimension_0 () > 0, 05220 std::logic_error, err); 05221 // if values are allocated and they are non-zero in number, then 05222 // one of the allocations should be present 05223 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05224 staticGraph_->indicesAreAllocated () && 05225 staticGraph_->getNodeAllocationSize() > 0 && 05226 staticGraph_->getNodeNumRows() > 0 05227 && values2D_.is_null () && 05228 k_values1D_.dimension_0 () == 0, 05229 std::logic_error, err); 05230 // we cannot have both a 1D and 2D allocation 05231 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05232 k_values1D_.dimension_0 () > 0 && values2D_ != null, 05233 std::logic_error, err << " Specifically, k_values1D_ is allocated (has " 05234 "size " << k_values1D_.dimension_0 () << " > 0) and values2D_ is also " 05235 "allocated. CrsMatrix is not suppose to have both a 1-D and a 2-D " 05236 "allocation at the same time."); 05237 #endif 05238 } 05239 05240 template<class Scalar, 05241 class LocalOrdinal, 05242 class GlobalOrdinal, 05243 class DeviceType> 05244 std::string 05245 CrsMatrix< 05246 Scalar, LocalOrdinal, GlobalOrdinal, 05247 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05248 description () const 05249 { 05250 std::ostringstream os; 05251 05252 os << "Tpetra::CrsMatrix (Kokkos refactor): {"; 05253 if (this->getObjectLabel () != "") { 05254 os << "Label: \"" << this->getObjectLabel () << "\", "; 05255 } 05256 if (isFillComplete()) { 05257 os << "isFillComplete: true" 05258 << ", global dimensions: [" << getGlobalNumRows () << ", " 05259 << getGlobalNumCols () << "]" 05260 << ", global number of entries: " << getGlobalNumEntries () 05261 << "}"; 05262 } 05263 else { 05264 os << "isFillComplete: false" 05265 << ", global dimensions: [" << getGlobalNumRows () << ", " 05266 << getGlobalNumCols () << "]}"; 05267 } 05268 return os.str (); 05269 } 05270 05271 template<class Scalar, 05272 class LocalOrdinal, 05273 class GlobalOrdinal, 05274 class DeviceType> 05275 void 05276 CrsMatrix< 05277 Scalar, LocalOrdinal, GlobalOrdinal, 05278 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05279 describe (Teuchos::FancyOStream &out, 05280 const Teuchos::EVerbosityLevel verbLevel) const 05281 { 05282 using std::endl; 05283 using std::setw; 05284 using Teuchos::Comm; 05285 using Teuchos::RCP; 05286 using Teuchos::TypeNameTraits; 05287 using Teuchos::VERB_DEFAULT; 05288 using Teuchos::VERB_NONE; 05289 using Teuchos::VERB_LOW; 05290 using Teuchos::VERB_MEDIUM; 05291 using Teuchos::VERB_HIGH; 05292 using Teuchos::VERB_EXTREME; 05293 05294 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel; 05295 05296 if (vl == VERB_NONE) { 05297 return; // Don't print anything at all 05298 } 05299 // By convention, describe() always begins with a tab. 05300 Teuchos::OSTab tab0 (out); 05301 05302 RCP<const Comm<int> > comm = this->getComm(); 05303 const int myRank = comm->getRank(); 05304 const int numProcs = comm->getSize(); 05305 size_t width = 1; 05306 for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) { 05307 ++width; 05308 } 05309 width = std::max<size_t> (width, static_cast<size_t> (11)) + 2; 05310 05311 // none: print nothing 05312 // low: print O(1) info from node 0 05313 // medium: print O(P) info, num entries per process 05314 // high: print O(N) info, num entries per row 05315 // extreme: print O(NNZ) info: print indices and values 05316 // 05317 // for medium and higher, print constituent objects at specified verbLevel 05318 if (myRank == 0) { 05319 out << "Tpetra::CrsMatrix (Kokkos refactor):" << endl; 05320 } 05321 Teuchos::OSTab tab1 (out); 05322 05323 if (myRank == 0) { 05324 if (this->getObjectLabel () != "") { 05325 out << "Label: \"" << this->getObjectLabel () << "\", "; 05326 } 05327 { 05328 out << "Template parameters:" << endl; 05329 Teuchos::OSTab tab2 (out); 05330 out << "Scalar: " << TypeNameTraits<Scalar>::name () << endl 05331 << "LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl 05332 << "GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl 05333 << "Node: " << Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>::name () << endl; 05334 } 05335 if (isFillComplete()) { 05336 out << "isFillComplete: true" << endl 05337 << "Global dimensions: [" << getGlobalNumRows () << ", " 05338 << getGlobalNumCols () << "]" << endl 05339 << "Global number of entries: " << getGlobalNumEntries () << endl 05340 << "Global number of diagonal entries: " << getGlobalNumDiags () 05341 << endl << "Global max number of entries in a row: " 05342 << getGlobalMaxNumRowEntries () << endl; 05343 } 05344 else { 05345 out << "isFillComplete: false" << endl 05346 << "Global dimensions: [" << getGlobalNumRows () << ", " 05347 << getGlobalNumCols () << "]" << endl; 05348 } 05349 } 05350 05351 if (vl < VERB_MEDIUM) { 05352 return; // all done! 05353 } 05354 05355 // Describe the Map Map. 05356 if (myRank == 0) { 05357 out << endl << "Row Map:" << endl; 05358 } 05359 getRowMap ()->describe (out, vl); 05360 05361 // Describe the column Map. 05362 if (myRank == 0) { 05363 out << "Column Map: "; 05364 } 05365 if (getColMap ().is_null ()) { 05366 if (myRank == 0) { 05367 out << "null" << endl; 05368 } 05369 } else if (getColMap () == getRowMap ()) { 05370 if (myRank == 0) { 05371 out << "same as row Map" << endl; 05372 } 05373 } else { 05374 if (myRank == 0) { 05375 out << endl; 05376 } 05377 getColMap ()->describe (out, vl); 05378 } 05379 05380 // Describe the domain Map. 05381 if (myRank == 0) { 05382 out << "Domain Map: "; 05383 } 05384 if (getDomainMap ().is_null ()) { 05385 if (myRank == 0) { 05386 out << "null" << endl; 05387 } 05388 } else if (getDomainMap () == getRowMap ()) { 05389 if (myRank == 0) { 05390 out << "same as row Map" << endl; 05391 } 05392 } else if (getDomainMap () == getColMap ()) { 05393 if (myRank == 0) { 05394 out << "same as column Map" << endl; 05395 } 05396 } else { 05397 if (myRank == 0) { 05398 out << endl; 05399 } 05400 getColMap ()->describe (out, vl); 05401 } 05402 05403 // Describe the range Map. 05404 if (myRank == 0) { 05405 out << "Range Map: "; 05406 } 05407 if (getRangeMap ().is_null ()) { 05408 if (myRank == 0) { 05409 out << "null" << endl; 05410 } 05411 } else if (getRangeMap () == getDomainMap ()) { 05412 if (myRank == 0) { 05413 out << "same as domain Map" << endl; 05414 } 05415 } else if (getRangeMap () == getRowMap ()) { 05416 if (myRank == 0) { 05417 out << "same as row Map" << endl; 05418 } 05419 } else { 05420 if (myRank == 0) { 05421 out << endl; 05422 } 05423 getColMap ()->describe (out, vl); 05424 } 05425 05426 // O(P) data 05427 for (int curRank = 0; curRank < numProcs; ++curRank) { 05428 if (myRank == curRank) { 05429 out << "Process rank: " << curRank << endl; 05430 Teuchos::OSTab tab2 (out); 05431 if (! staticGraph_->indicesAreAllocated ()) { 05432 out << "Graph indices not allocated" << endl; 05433 } 05434 else { 05435 out << "Number of allocated entries: " 05436 << staticGraph_->getNodeAllocationSize () << endl; 05437 } 05438 out << "Number of entries: " << getNodeNumEntries () << endl; 05439 if (isFillComplete ()) { 05440 out << "Number of diagonal entries: " << getNodeNumDiags () << endl; 05441 } 05442 out << "Max number of entries per row: " << getNodeMaxNumRowEntries () 05443 << endl; 05444 } 05445 // Give output time to complete by executing some barriers. 05446 comm->barrier (); 05447 comm->barrier (); 05448 comm->barrier (); 05449 } 05450 05451 if (vl < VERB_HIGH) { 05452 return; // all done! 05453 } 05454 05455 // O(N) and O(NNZ) data 05456 for (int curRank = 0; curRank < numProcs; ++curRank) { 05457 if (myRank == curRank) { 05458 out << std::setw(width) << "Proc Rank" 05459 << std::setw(width) << "Global Row" 05460 << std::setw(width) << "Num Entries"; 05461 if (vl == VERB_EXTREME) { 05462 out << std::setw(width) << "(Index,Value)"; 05463 } 05464 out << endl; 05465 for (size_t r = 0; r < getNodeNumRows (); ++r) { 05466 const size_t nE = getNumEntriesInLocalRow(r); 05467 GlobalOrdinal gid = getRowMap()->getGlobalElement(r); 05468 out << std::setw(width) << myRank 05469 << std::setw(width) << gid 05470 << std::setw(width) << nE; 05471 if (vl == VERB_EXTREME) { 05472 if (isGloballyIndexed()) { 05473 ArrayView<const GlobalOrdinal> rowinds; 05474 ArrayView<const Scalar> rowvals; 05475 getGlobalRowView (gid, rowinds, rowvals); 05476 for (size_t j = 0; j < nE; ++j) { 05477 out << " (" << rowinds[j] 05478 << ", " << rowvals[j] 05479 << ") "; 05480 } 05481 } 05482 else if (isLocallyIndexed()) { 05483 ArrayView<const LocalOrdinal> rowinds; 05484 ArrayView<const Scalar> rowvals; 05485 getLocalRowView (r, rowinds, rowvals); 05486 for (size_t j=0; j < nE; ++j) { 05487 out << " (" << getColMap()->getGlobalElement(rowinds[j]) 05488 << ", " << rowvals[j] 05489 << ") "; 05490 } 05491 } // globally or locally indexed 05492 } // vl == VERB_EXTREME 05493 out << endl; 05494 } // for each row r on this process 05495 } // if (myRank == curRank) 05496 05497 // Give output time to complete 05498 comm->barrier (); 05499 comm->barrier (); 05500 comm->barrier (); 05501 } // for each process p 05502 } 05503 05504 05505 template<class Scalar, 05506 class LocalOrdinal, 05507 class GlobalOrdinal, 05508 class DeviceType> 05509 bool 05510 CrsMatrix< 05511 Scalar, LocalOrdinal, GlobalOrdinal, 05512 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05513 checkSizes (const SrcDistObject& source) 05514 { 05515 // It's not clear what kind of compatibility checks on sizes can 05516 // be performed here. Epetra_CrsGraph doesn't check any sizes for 05517 // compatibility. 05518 05519 // Currently, the source object must be a RowMatrix with the same 05520 // four template parameters as the target CrsMatrix. We might 05521 // relax this requirement later. 05522 typedef RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, node_type> row_matrix_type; 05523 const row_matrix_type* srcRowMat = 05524 dynamic_cast<const row_matrix_type*> (&source); 05525 return (srcRowMat != NULL); 05526 } 05527 05528 05529 template<class Scalar, 05530 class LocalOrdinal, 05531 class GlobalOrdinal, 05532 class DeviceType> 05533 void 05534 CrsMatrix< 05535 Scalar, LocalOrdinal, GlobalOrdinal, 05536 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05537 copyAndPermute (const SrcDistObject& source, 05538 size_t numSameIDs, 05539 const ArrayView<const LocalOrdinal> &permuteToLIDs, 05540 const ArrayView<const LocalOrdinal> &permuteFromLIDs) 05541 { 05542 using Teuchos::Array; 05543 using Teuchos::ArrayView; 05544 typedef LocalOrdinal LO; 05545 typedef GlobalOrdinal GO; 05546 typedef node_type NT; 05547 // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC. 05548 const char tfecfFuncName[] = "copyAndPermute"; 05549 05550 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05551 permuteToLIDs.size() != permuteFromLIDs.size(), 05552 std::invalid_argument, ": permuteToLIDs.size() = " << permuteToLIDs.size() 05553 << "!= permuteFromLIDs.size() = " << permuteFromLIDs.size() << "."); 05554 05555 // This dynamic cast should succeed, because we've already tested 05556 // it in checkSizes(). 05557 typedef RowMatrix<Scalar, LO, GO, NT> row_matrix_type; 05558 const row_matrix_type& srcMat = dynamic_cast<const row_matrix_type&> (source); 05559 05560 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed (); 05561 // 05562 // Copy the first numSame row from source to target (this matrix). 05563 // This involves copying rows corresponding to LIDs [0, numSame-1]. 05564 // 05565 const map_type& srcRowMap = * (srcMat.getRowMap ()); 05566 Array<GO> rowInds; 05567 Array<Scalar> rowVals; 05568 const LO numSameIDs_as_LID = static_cast<LO> (numSameIDs); 05569 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) { 05570 // Global ID for the current row index in the source matrix. 05571 // The first numSameIDs GIDs in the two input lists are the 05572 // same, so sourceGID == targetGID in this case. 05573 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID); 05574 const GO targetGID = sourceGID; 05575 05576 // Input views for the combineGlobalValues() call below. 05577 ArrayView<const GO> rowIndsConstView; 05578 ArrayView<const Scalar> rowValsConstView; 05579 05580 if (sourceIsLocallyIndexed) { 05581 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID); 05582 if (rowLength > static_cast<size_t> (rowInds.size())) { 05583 rowInds.resize (rowLength); 05584 rowVals.resize (rowLength); 05585 } 05586 // Resizing invalidates an Array's views, so we must make new 05587 // ones, even if rowLength hasn't changed. 05588 ArrayView<GO> rowIndsView = rowInds.view (0, rowLength); 05589 ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength); 05590 05591 // The source matrix is locally indexed, so we have to get a 05592 // copy. Really it's the GIDs that have to be copied (because 05593 // they have to be converted from LIDs). 05594 size_t checkRowLength = 0; 05595 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength); 05596 05597 #ifdef HAVE_TPETRA_DEBUG 05598 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, 05599 std::logic_error, ": For global row index " << sourceGID << ", the source" 05600 " matrix's getNumEntriesInGlobalRow() method returns a row length of " 05601 << rowLength << ", but the getGlobalRowCopy() method reports that " 05602 "the row length is " << checkRowLength << ". Please report this bug " 05603 "to the Tpetra developers."); 05604 #endif // HAVE_TPETRA_DEBUG 05605 05606 rowIndsConstView = rowIndsView.view (0, rowLength); 05607 rowValsConstView = rowValsView.view (0, rowLength); 05608 } 05609 else { // source matrix is globally indexed. 05610 srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView); 05611 } 05612 05613 // Combine the data into the target matrix. 05614 if (isStaticGraph()) { 05615 // Applying a permutation to a matrix with a static graph 05616 // means REPLACE-ing entries. 05617 combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, REPLACE); 05618 } 05619 else { 05620 // Applying a permutation to a matrix with a dynamic graph 05621 // means INSERT-ing entries. This has the same effect as 05622 // ADD, if the target graph already has an entry there. 05623 combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, INSERT); 05624 } 05625 } // For each of the consecutive source and target IDs that are the same 05626 05627 // 05628 // Permute the remaining rows. 05629 // 05630 const map_type& tgtRowMap = * (this->getRowMap ()); 05631 const size_t numPermuteToLIDs = static_cast<size_t> (permuteToLIDs.size ()); 05632 for (size_t p = 0; p < numPermuteToLIDs; ++p) { 05633 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]); 05634 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]); 05635 05636 // Input views for the combineGlobalValues() call below. 05637 ArrayView<const GO> rowIndsConstView; 05638 ArrayView<const Scalar> rowValsConstView; 05639 05640 if (sourceIsLocallyIndexed) { 05641 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID); 05642 if (rowLength > static_cast<size_t> (rowInds.size ())) { 05643 rowInds.resize (rowLength); 05644 rowVals.resize (rowLength); 05645 } 05646 // Resizing invalidates an Array's views, so we must make new 05647 // ones, even if rowLength hasn't changed. 05648 ArrayView<GO> rowIndsView = rowInds.view (0, rowLength); 05649 ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength); 05650 05651 // The source matrix is locally indexed, so we have to get a 05652 // copy. Really it's the GIDs that have to be copied (because 05653 // they have to be converted from LIDs). 05654 size_t checkRowLength = 0; 05655 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength); 05656 05657 #ifdef HAVE_TPETRA_DEBUG 05658 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, 05659 std::logic_error, ": For the source matrix's global row index " 05660 << sourceGID << ", the source matrix's getNumEntriesInGlobalRow() method " 05661 "returns a row length of " << rowLength << ", but the " 05662 "getGlobalRowCopy() method reports that the row length is " 05663 << checkRowLength << ". Please report this bug to the Tpetra " 05664 "developers."); 05665 #endif // HAVE_TPETRA_DEBUG 05666 05667 rowIndsConstView = rowIndsView.view (0, rowLength); 05668 rowValsConstView = rowValsView.view (0, rowLength); 05669 } 05670 else { 05671 srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView); 05672 } 05673 05674 // Combine the data into the target matrix. 05675 if (isStaticGraph()) { 05676 this->combineGlobalValues (targetGID, rowIndsConstView, 05677 rowValsConstView, REPLACE); 05678 } 05679 else { 05680 this->combineGlobalValues (targetGID, rowIndsConstView, 05681 rowValsConstView, INSERT); 05682 } 05683 } // For each ID to permute 05684 } 05685 05686 05687 template<class Scalar, 05688 class LocalOrdinal, 05689 class GlobalOrdinal, 05690 class DeviceType> 05691 void 05692 CrsMatrix< 05693 Scalar, LocalOrdinal, GlobalOrdinal, 05694 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05695 packAndPrepare (const SrcDistObject& source, 05696 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs, 05697 Teuchos::Array<char>& exports, 05698 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 05699 size_t& constantNumPackets, 05700 Distributor& distor) 05701 { 05702 using Teuchos::Array; 05703 using Teuchos::ArrayView; 05704 using Teuchos::av_reinterpret_cast; 05705 typedef LocalOrdinal LO; 05706 typedef GlobalOrdinal GO; 05707 //typedef typename ArrayView<const LO>::size_type size_type; // unused 05708 const char tfecfFuncName[] = "packAndPrepare"; 05709 05710 // Attempt to cast the source object to RowMatrix. If the cast 05711 // succeeds, use the source object's pack method to pack its data 05712 // for communication. If the source object is really a CrsMatrix, 05713 // this will pick up the CrsMatrix's more efficient override. If 05714 // the RowMatrix cast fails, then the source object doesn't have 05715 // the right type. 05716 // 05717 // FIXME (mfh 30 Jun 2013) We don't even need the RowMatrix to 05718 // have the same Node type. Unfortunately, we don't have a way to 05719 // ask if the RowMatrix is "a RowMatrix with any Node type," since 05720 // RowMatrix doesn't have a base class. A hypothetical 05721 // RowMatrixBase<Scalar, LO, GO> class, which does not currently 05722 // exist, would satisfy this requirement. 05723 // 05724 // Why RowMatrixBase<Scalar, LO, GO>? The source object's Scalar 05725 // type doesn't technically need to match the target object's 05726 // Scalar type, so we could just have RowMatrixBase<LO, GO>. LO 05727 // and GO need not be the same, as long as there is no overflow of 05728 // the indices. However, checking for index overflow is global 05729 // and therefore undesirable. 05730 typedef RowMatrix<Scalar, LO, GO, node_type> row_matrix_type; 05731 const row_matrix_type* srcRowMat = 05732 dynamic_cast<const row_matrix_type*> (&source); 05733 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05734 srcRowMat == NULL, std::invalid_argument, 05735 ": The source object of the Import or Export operation is neither a " 05736 "CrsMatrix (with the same template parameters as the target object), " 05737 "nor a RowMatrix (with the same first four template parameters as the " 05738 "target object)."); 05739 srcRowMat->pack (exportLIDs, exports, numPacketsPerLID, 05740 constantNumPackets, distor); 05741 } 05742 05743 05744 template<class Scalar, 05745 class LocalOrdinal, 05746 class GlobalOrdinal, 05747 class DeviceType> 05748 void 05749 CrsMatrix< 05750 Scalar, LocalOrdinal, GlobalOrdinal, 05751 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05752 pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs, 05753 Teuchos::Array<char>& exports, 05754 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 05755 size_t& constantNumPackets, 05756 Distributor &distor) const 05757 { 05758 using Teuchos::Array; 05759 using Teuchos::ArrayView; 05760 using Teuchos::av_reinterpret_cast; 05761 using Teuchos::RCP; 05762 typedef LocalOrdinal LO; 05763 typedef GlobalOrdinal GO; 05764 typedef typename ArrayView<const LO>::size_type size_type; 05765 const char tfecfFuncName[] = "pack"; 05766 05767 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 05768 exportLIDs.size() != numPacketsPerLID.size(), 05769 std::invalid_argument, ": exportLIDs.size() = " << exportLIDs.size() 05770 << "!= numPacketsPerLID.size() = " << numPacketsPerLID.size() << "."); 05771 05772 // Get a reference to the matrix's row Map. 05773 const map_type& rowMap = * (this->getRowMap ()); 05774 05775 const bool locallyIndexed = this->isLocallyIndexed (); 05776 constantNumPackets = 0; 05777 05778 // Get the GIDs of the rows we want to pack. 05779 Array<GO> exportGIDs (exportLIDs.size ()); 05780 const size_type numExportGIDs = exportGIDs.size (); 05781 for (size_type i = 0; i < numExportGIDs; ++i) { 05782 exportGIDs[i] = rowMap.getGlobalElement (exportLIDs[i]); 05783 } 05784 05785 // We say "Packet" is char (really a "byte"), but the actual unit 05786 // of packing is a (GID, value) pair. The GID is the column index 05787 // in that row of the sparse matrix, and the value is the value at 05788 // that entry of the sparse matrix. Thus, we have to scale 05789 // numPacketsPerLID by the number of bytes in a _packed_ (GID, 05790 // value) pair. (We pack the GID and value in each pair 05791 // separately, so the number of bytes in a packed pair is actually 05792 // sizeof(GO) + sizeof(Scalar).) 05793 // 05794 // FIXME (mfh 24 Feb 2013) This code is only correct if 05795 // sizeof(Scalar) is a meaningful representation of the amount of 05796 // data in a Scalar instance. (GO is always a built-in integer 05797 // type.) 05798 // 05799 // Compute the number of packets per export LID, and accumulate 05800 // the total number of packages. While doing so, find the max 05801 // number of entries in each row owned by this process; we will 05802 // use that to size temporary arrays below. 05803 const size_t sizeOfOrdValPair = sizeof (GO) + sizeof (Scalar); 05804 size_t totalNumEntries = 0; 05805 size_t maxRowLength = 0; 05806 for (size_type i = 0; i < exportGIDs.size(); ++i) { 05807 const size_t curNumEntries = 05808 this->getNumEntriesInGlobalRow (exportGIDs[i]); 05809 numPacketsPerLID[i] = curNumEntries * sizeOfOrdValPair; 05810 totalNumEntries += curNumEntries; 05811 maxRowLength = std::max (curNumEntries, maxRowLength); 05812 } 05813 05814 // Pack export data by interleaving rows' indices and values in 05815 // the following way: 05816 // 05817 // [inds_row0 vals_row0 inds_row1 vals_row1 ... ] 05818 if (totalNumEntries > 0) { 05819 // exports is an array of char (bytes), so scale the total 05820 // number of entries by the number of bytes per entry (where 05821 // "entry" includes both the column index and the value). 05822 const size_t totalNumBytes = totalNumEntries * sizeOfOrdValPair; 05823 exports.resize (totalNumBytes); 05824 05825 // Current position in the 'exports' output array. 05826 size_t curOffsetInBytes = 0; 05827 05828 // For each row of the matrix owned by the calling process, pack 05829 // that row's column indices and values into the exports array. 05830 // If the matrix is globally indexed, we can use view semantics 05831 // (getGlobalRowView), which should be faster than copy 05832 // semantics (getGlobalRowCopy). Otherwise, we'll have to use 05833 // copy semantics. 05834 // 05835 // FIXME (mfh 28 Jun 2013) This could be made a (shared-memory) 05836 // parallel kernel, by using the CSR data layout to calculate 05837 // positions in the output buffer. 05838 if (locallyIndexed) { 05839 // Locally indexed matrices always have a column Map. 05840 const map_type& colMap = * (this->getColMap ()); 05841 05842 // Views of the column LIDs and values in each row. It's 05843 // worth creating empty views here, because they aren't 05844 // returned by getLocalRowView; that method will modify (set) 05845 // them in place. 05846 ArrayView<const LO> lidsView; 05847 ArrayView<const Scalar> valsView; 05848 05849 // Temporary buffer for a copy of the column indices (as GIDs) 05850 // in each row. Import and Export operations to a CrsMatrix 05851 // target currently expect GIDs, not LIDs. 05852 // 05853 // FIXME (mfh 30 Jun 2013) If the source and target have the 05854 // same column Maps, it would make sense to pack column 05855 // indices as LIDs instead of GIDs. Packing them as GIDs is 05856 // correct, but it's inefficient to convert LIDs to GIDs and 05857 // then back again on receipt. Furthermore, GIDs might be 05858 // larger than LIDs, thus costing more bandwidth. 05859 Array<GO> gids (static_cast<size_type> (maxRowLength)); 05860 05861 const size_type numExportLIDs = exportLIDs.size (); 05862 for (size_type i = 0; i < numExportLIDs; ++i) { 05863 // Get a (locally indexed) view of the current row's data. 05864 this->getLocalRowView (exportLIDs[i], lidsView, valsView); 05865 05866 // Convert column indices as LIDs to column indices as GIDs. 05867 const size_type curNumEntries = lidsView.size (); 05868 ArrayView<GO> gidsView = gids (0, curNumEntries); 05869 for (size_type k = 0; k < curNumEntries; ++k) { 05870 gidsView[k] = colMap.getGlobalElement (lidsView[k]); 05871 } 05872 05873 // Get views of the spots in the exports array in which to 05874 // put the indices resp. values. The type cast makes the 05875 // views look like GO resp. Scalar, when the array they are 05876 // viewing is really an array of char. 05877 ArrayView<char> gidsViewOutChar = 05878 exports (curOffsetInBytes, 05879 static_cast<size_t> (curNumEntries) * sizeof (GO)); 05880 ArrayView<char> valsViewOutChar = 05881 exports (curOffsetInBytes + static_cast<size_t> (curNumEntries) * sizeof (GO), 05882 static_cast<size_t> (curNumEntries) * sizeof (Scalar)); 05883 ArrayView<GO> gidsViewOut = av_reinterpret_cast<GO> (gidsViewOutChar); 05884 ArrayView<Scalar> valsViewOut = av_reinterpret_cast<Scalar> (valsViewOutChar); 05885 05886 // Copy the row's data into the views of the exports array. 05887 std::copy (gidsView.begin (), 05888 gidsView.begin () + static_cast<size_type> (curNumEntries), 05889 gidsViewOut.begin ()); 05890 std::copy (valsView.begin (), 05891 valsView.begin () + static_cast<size_type> (curNumEntries), 05892 valsViewOut.begin ()); 05893 // Keep track of how many bytes we packed. 05894 curOffsetInBytes += sizeOfOrdValPair * curNumEntries; 05895 } 05896 } 05897 else { // the matrix is globally indexed 05898 ArrayView<const GO> gidsView; 05899 ArrayView<const Scalar> valsView; 05900 05901 const size_type numExportLIDs = exportLIDs.size (); 05902 for (size_type i = 0; i < numExportLIDs; ++i) { 05903 // Get a view of the current row's data. 05904 this->getGlobalRowView (exportGIDs[i], gidsView, valsView); 05905 const size_t curNumEntries = static_cast<size_t> (gidsView.size ()); 05906 // Get views of the spots in the exports array in which to 05907 // put the indices resp. values. See notes and FIXME above. 05908 05909 ArrayView<char> gidsViewOutChar = 05910 exports (curOffsetInBytes, curNumEntries * sizeof (GO)); 05911 ArrayView<char> valsViewOutChar = 05912 exports (curOffsetInBytes + curNumEntries * sizeof (GO), 05913 curNumEntries * sizeof (Scalar)); 05914 ArrayView<GO> gidsViewOut = av_reinterpret_cast<GO> (gidsViewOutChar); 05915 ArrayView<Scalar> valsViewOut = av_reinterpret_cast<Scalar> (valsViewOutChar); 05916 05917 // Copy the row's data into the views of the exports array. 05918 std::copy (gidsView.begin (), gidsView.end (), gidsViewOut.begin ()); 05919 std::copy (valsView.begin (), valsView.end (), valsViewOut.begin ()); 05920 // Keep track of how many bytes we packed. 05921 curOffsetInBytes += sizeOfOrdValPair * curNumEntries; 05922 } 05923 } 05924 05925 #ifdef HAVE_TPETRA_DEBUG 05926 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curOffsetInBytes != totalNumBytes, 05927 std::logic_error, ": At end of method, the final offset bytes count " 05928 "curOffsetInBytes=" << curOffsetInBytes << " does not equal the total " 05929 "number of bytes packed totalNumBytes=" << totalNumBytes << ". Please " 05930 "report this bug to the Tpetra developers."); 05931 #endif // HAVE_TPETRA_DEBUG 05932 } 05933 } 05934 05935 05936 template<class Scalar, 05937 class LocalOrdinal, 05938 class GlobalOrdinal, 05939 class DeviceType> 05940 void 05941 CrsMatrix< 05942 Scalar, LocalOrdinal, GlobalOrdinal, 05943 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 05944 combineGlobalValues (const GlobalOrdinal globalRowIndex, 05945 const ArrayView<const GlobalOrdinal> columnIndices, 05946 const ArrayView<const Scalar> values, 05947 const Tpetra::CombineMode combineMode) 05948 { 05949 if (isStaticGraph()) { 05950 // INSERT doesn't make sense for a static graph, since you 05951 // aren't allowed to change the structure of the graph. 05952 // However, all the other combine modes work. 05953 if (combineMode == ADD) { 05954 sumIntoGlobalValues (globalRowIndex, columnIndices, values); 05955 } 05956 else if (combineMode == REPLACE) { 05957 replaceGlobalValues (globalRowIndex, columnIndices, values); 05958 } 05959 else if (combineMode == ABSMAX) { 05960 using Details::AbsMax; 05961 AbsMax<Scalar> f; 05962 this->template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex, 05963 columnIndices(), 05964 values(), f); 05965 } 05966 else if (combineMode == INSERT) { 05967 TEUCHOS_TEST_FOR_EXCEPTION(isStaticGraph() && combineMode == INSERT, 05968 std::invalid_argument, "combineGlobalValues: INSERT combine mode " 05969 "is not allowed if the matrix has a static graph (i.e., was " 05970 "constructed with the CrsMatrix constructor that takes a const " 05971 "CrsGraph pointer)."); 05972 } 05973 else { 05974 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 05975 "combineGlobalValues: Invalid combine mode; should never get here! " 05976 "Please report this bug to the Tpetra developers."); 05977 } 05978 } 05979 else { // The matrix has a dynamic graph. 05980 if (combineMode == ADD || combineMode == INSERT) { 05981 // For a dynamic graph, all incoming column indices are 05982 // inserted into the target graph. Duplicate indices will 05983 // have their values summed. In this context, ADD and INSERT 05984 // are equivalent. We need to call insertGlobalValues() 05985 // anyway if the column indices don't yet exist in this row, 05986 // so we just call insertGlobalValues() for both cases. 05987 insertGlobalValuesFiltered (globalRowIndex, columnIndices, values); 05988 } 05989 // FIXME (mfh 14 Mar 2012): 05990 // 05991 // Implementing ABSMAX or REPLACE for a dynamic graph would 05992 // require modifying assembly to attach a possibly different 05993 // combine mode to each inserted (i, j, A_ij) entry. For 05994 // example, consider two different Export operations to the same 05995 // target CrsMatrix, the first with ABSMAX combine mode and the 05996 // second with REPLACE. This isn't a common use case, so we 05997 // won't mess with it for now. 05998 else if (combineMode == ABSMAX) { 05999 TEUCHOS_TEST_FOR_EXCEPTION(! isStaticGraph() && combineMode == ABSMAX, 06000 std::logic_error, "combineGlobalValues: ABSMAX combine mode when " 06001 "the matrix has a dynamic graph is not yet implemented."); 06002 } 06003 else if (combineMode == REPLACE) { 06004 TEUCHOS_TEST_FOR_EXCEPTION(! isStaticGraph() && combineMode == REPLACE, 06005 std::logic_error, "combineGlobalValues: REPLACE combine mode when " 06006 "the matrix has a dynamic graph is not yet implemented."); 06007 } 06008 else { 06009 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 06010 "combineGlobalValues: Should never get here! Please report this bug" 06011 "to the Tpetra developers."); 06012 } 06013 } 06014 } 06015 06016 06017 template<class Scalar, 06018 class LocalOrdinal, 06019 class GlobalOrdinal, 06020 class DeviceType> 06021 void 06022 CrsMatrix< 06023 Scalar, LocalOrdinal, GlobalOrdinal, 06024 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 06025 unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs, 06026 const Teuchos::ArrayView<const char>& imports, 06027 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 06028 size_t constantNumPackets, 06029 Distributor & /* distor */, 06030 CombineMode combineMode) 06031 { 06032 using Teuchos::ArrayView; 06033 using Teuchos::av_reinterpret_cast; 06034 typedef LocalOrdinal LO; 06035 typedef GlobalOrdinal GO; 06036 typedef typename ArrayView<const LO>::size_type size_type; 06037 const char tfecfFuncName[] = "unpackAndCombine"; 06038 06039 #ifdef HAVE_TPETRA_DEBUG 06040 const CombineMode validModes[4] = {ADD, REPLACE, ABSMAX, INSERT}; 06041 const char* validModeNames[4] = {"ADD", "REPLACE", "ABSMAX", "INSERT"}; 06042 const int numValidModes = 4; 06043 06044 if (std::find (validModes, validModes+numValidModes, combineMode) == 06045 validModes+numValidModes) { 06046 std::ostringstream os; 06047 os << "unpackAndCombine: Invalid combine mode. Valid modes are {"; 06048 for (int k = 0; k < numValidModes; ++k) { 06049 os << validModeNames[k]; 06050 if (k < numValidModes - 1) { 06051 os << ", "; 06052 } 06053 } 06054 os << "}."; 06055 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::invalid_argument, os.str()); 06056 } 06057 #endif // HAVE_TPETRA_DEBUG 06058 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 06059 importLIDs.size() != numPacketsPerLID.size(), 06060 std::invalid_argument, "importLIDs.size() = " << importLIDs.size() 06061 << "!= numPacketsPerLID.size() = " << numPacketsPerLID.size() << "."); 06062 06063 // FIXME (mfh 05 Dec 2012) Here are all the assumptions encoded in 06064 // the following line of code: 06065 // 06066 // 1. The data (index,value) for each element are packed tightly, 06067 // with no extra space in between. 06068 // 06069 // 2. sizeof(Scalar) says how much data were used to represent a 06070 // Scalar in its packed form. 06071 // 06072 // 3. All processes and all instances of Scalar use the same 06073 // amount of data to represent a Scalar. (GlobalOrdinal is 06074 // typically a built-in integer type, so this is generally true 06075 // for GlobalOrdinal.) 06076 // 06077 const size_t SizeOfOrdValPair = sizeof (GO) + sizeof (Scalar); 06078 const size_t totalNumBytes = imports.size (); // * sizeof(char), i.e., 1. 06079 const size_t totalNumEntries = totalNumBytes / SizeOfOrdValPair; 06080 06081 if (totalNumEntries > 0) { 06082 const map_type& rowMap = * (this->getMap ()); 06083 06084 // data packed as follows: 06085 // [inds_row0 vals_row0 inds_row1 vals_row1 ...] 06086 ArrayView<const char> avIndsC, avValsC; 06087 ArrayView<const GO> avInds; 06088 ArrayView<const Scalar> avVals; 06089 06090 size_t curOffsetInBytes = 0; 06091 for (size_type i = 0; i < importLIDs.size (); ++i) { 06092 const size_t rowSize = numPacketsPerLID[i] / SizeOfOrdValPair; 06093 // Needs to be in here in case of zero length rows. If not, 06094 // the lines following the if statement error out if the row 06095 // length is zero. KLN 13/06/2011 06096 // 06097 // mfh 05 Dec 2012: The problem to which Kurtis refers in the 06098 // above comment may no longer be an issue, since 06099 // ArrayView::view() (which implements ArrayView::operator()) 06100 // now allows views of length zero. 06101 if (rowSize == 0) { 06102 continue; 06103 } 06104 const LO LID = importLIDs[i]; 06105 const GO myGID = rowMap.getGlobalElement (LID); 06106 06107 // Get views of the import (incoming data) buffers. Again, 06108 // this code assumes that sizeof(Scalar) is the number of 06109 // bytes used by each Scalar. It also assumes that 06110 // Teuchos::Comm has correctly deserialized Scalar in place in 06111 // avValsC. 06112 avIndsC = imports (curOffsetInBytes, rowSize * sizeof (GO)); 06113 avValsC = imports (curOffsetInBytes + rowSize * sizeof (GO), 06114 rowSize * sizeof (Scalar)); 06115 avInds = av_reinterpret_cast<const GO> (avIndsC); 06116 avVals = av_reinterpret_cast<const Scalar> (avValsC); 06117 06118 combineGlobalValues (myGID, avInds (), avVals (), combineMode); 06119 curOffsetInBytes += rowSize * SizeOfOrdValPair; 06120 } 06121 #ifdef HAVE_TPETRA_DEBUG 06122 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curOffsetInBytes != totalNumBytes, 06123 std::logic_error, "After unpacking and combining all the imports, the " 06124 "final offset in bytes curOffsetInBytes=" << curOffsetInBytes << " != " 06125 "total number of bytes totalNumBytes=" << totalNumBytes << ". Please " 06126 "report this bug to the Tpetra developers."); 06127 #endif // HAVE_TPETRA_DEBUG 06128 } 06129 } 06130 06131 template<class Scalar, 06132 class LocalOrdinal, 06133 class GlobalOrdinal, 06134 class DeviceType> 06135 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, 06136 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 06137 CrsMatrix< 06138 Scalar, LocalOrdinal, GlobalOrdinal, 06139 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 06140 getColumnMapMultiVector (const MV& X_domainMap, 06141 const bool force) const 06142 { 06143 using Teuchos::null; 06144 using Teuchos::RCP; 06145 using Teuchos::rcp; 06146 06147 TEUCHOS_TEST_FOR_EXCEPTION( 06148 ! this->hasColMap (), std::runtime_error, "Tpetra::CrsMatrix::getColumn" 06149 "MapMultiVector: You may only call this method if the matrix has a " 06150 "column Map. If the matrix does not yet have a column Map, you should " 06151 "first call fillComplete (with domain and range Map if necessary)."); 06152 06153 // If the graph is not fill complete, then the Import object (if 06154 // one should exist) hasn't been constructed yet. 06155 TEUCHOS_TEST_FOR_EXCEPTION( 06156 ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::" 06157 "CrsMatrix::getColumnMapMultiVector: You may only call this method if " 06158 "this matrix's graph is fill complete."); 06159 06160 const size_t numVecs = X_domainMap.getNumVectors (); 06161 RCP<const import_type> importer = this->getGraph ()->getImporter (); 06162 RCP<const map_type> colMap = this->getColMap (); 06163 06164 RCP<MV> X_colMap; // null by default 06165 06166 // If the Import object is trivial (null), then we don't need a 06167 // separate column Map multivector. Just return null in that 06168 // case. The caller is responsible for knowing not to use the 06169 // returned null pointer. 06170 // 06171 // If the Import is nontrivial, then we do need a separate 06172 // column Map multivector for the Import operation. Check in 06173 // that case if we have to (re)create the column Map 06174 // multivector. 06175 if (! importer.is_null () || force) { 06176 if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) { 06177 X_colMap = rcp (new MV (colMap, numVecs)); 06178 06179 // Cache the newly created multivector for later reuse. 06180 importMV_ = X_colMap; 06181 } 06182 else { // Yay, we can reuse the cached multivector! 06183 X_colMap = importMV_; 06184 // mfh 09 Jan 2013: We don't have to fill with zeros first, 06185 // because the Import uses INSERT combine mode, which overwrites 06186 // existing entries. 06187 // 06188 //X_colMap->putScalar (STS::zero ()); 06189 } 06190 } 06191 return X_colMap; 06192 } 06193 06194 06195 template <class Scalar, 06196 class LocalOrdinal, 06197 class GlobalOrdinal, 06198 class DeviceType> 06199 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, 06200 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 06201 CrsMatrix< 06202 Scalar, LocalOrdinal, GlobalOrdinal, 06203 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 06204 getRowMapMultiVector (const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& Y_rangeMap, 06205 const bool force) const 06206 { 06207 using Teuchos::null; 06208 using Teuchos::RCP; 06209 using Teuchos::rcp; 06210 06211 // If the graph is not fill complete, then the Export object (if 06212 // one should exist) hasn't been constructed yet. 06213 TEUCHOS_TEST_FOR_EXCEPTION( 06214 ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::" 06215 "CrsMatrix::getRowMapMultiVector: You may only call this method if this " 06216 "matrix's graph is fill complete."); 06217 06218 const size_t numVecs = Y_rangeMap.getNumVectors (); 06219 RCP<const export_type> exporter = this->getGraph ()->getExporter (); 06220 // Every version of the constructor takes either a row Map, or a 06221 // graph (all of whose constructors take a row Map). Thus, the 06222 // matrix always has a row Map. 06223 RCP<const map_type> rowMap = this->getRowMap (); 06224 06225 RCP<MV> Y_rowMap; // null by default 06226 06227 // If the Export object is trivial (null), then we don't need a 06228 // separate row Map multivector. Just return null in that case. 06229 // The caller is responsible for knowing not to use the returned 06230 // null pointer. 06231 // 06232 // If the Export is nontrivial, then we do need a separate row 06233 // Map multivector for the Export operation. Check in that case 06234 // if we have to (re)create the row Map multivector. 06235 if (! exporter.is_null () || force) { 06236 if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) { 06237 Y_rowMap = rcp (new MV (rowMap, numVecs)); 06238 06239 // Cache the newly created multivector for later reuse. 06240 exportMV_ = Y_rowMap; 06241 } 06242 else { // Yay, we can reuse the cached multivector! 06243 Y_rowMap = exportMV_; 06244 } 06245 } 06246 return Y_rowMap; 06247 } 06248 06249 06250 template <class Scalar, 06251 class LocalOrdinal, 06252 class GlobalOrdinal, 06253 class DeviceType> 06254 void 06255 CrsMatrix< 06256 Scalar, LocalOrdinal, GlobalOrdinal, 06257 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 06258 removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap) 06259 { 06260 TEUCHOS_TEST_FOR_EXCEPTION( 06261 myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::" 06262 "removeEmptyProcessesInPlace: This method does not work when the matrix " 06263 "was created with a constant graph (that is, when it was created using " 06264 "the version of its constructor that takes an RCP<const CrsGraph>). " 06265 "This is because the matrix is not allowed to modify the graph in that " 06266 "case, but removing empty processes requires modifying the graph."); 06267 myGraph_->removeEmptyProcessesInPlace (newMap); 06268 // Even though CrsMatrix's row Map (as returned by getRowMap()) 06269 // comes from its CrsGraph, CrsMatrix still implements DistObject, 06270 // so we also have to change the DistObject's Map. 06271 this->map_ = this->getRowMap (); 06272 // In the nonconst graph case, staticGraph_ is just a const 06273 // pointer to myGraph_. This assignment is probably redundant, 06274 // but it doesn't hurt. 06275 staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_); 06276 } 06277 06278 06279 template <class Scalar, 06280 class LocalOrdinal, 06281 class GlobalOrdinal, 06282 class DeviceType> 06283 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, 06284 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 06285 CrsMatrix< 06286 Scalar, LocalOrdinal, GlobalOrdinal, 06287 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 06288 add (const Scalar& alpha, 06289 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, node_type>& A, 06290 const Scalar& beta, 06291 const Teuchos::RCP<const map_type>& domainMap, 06292 const Teuchos::RCP<const map_type>& rangeMap, 06293 const Teuchos::RCP<Teuchos::ParameterList>& params) const 06294 { 06295 using Teuchos::Array; 06296 using Teuchos::ArrayRCP; 06297 using Teuchos::as; 06298 using Teuchos::ParameterList; 06299 using Teuchos::RCP; 06300 using Teuchos::rcp; 06301 using Teuchos::rcp_implicit_cast; 06302 using Teuchos::sublist; 06303 typedef LocalOrdinal LO; 06304 typedef GlobalOrdinal GO; 06305 typedef RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, node_type> row_matrix_type; 06306 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, node_type> crs_matrix_type; 06307 06308 const crs_matrix_type& B = *this; // a convenient abbreviation 06309 06310 // If the user didn't supply a domain or range Map, then try to 06311 // get one from B first (if it has them), then from A (if it has 06312 // them). If we don't have any domain or range Maps, scold the 06313 // user. 06314 RCP<const map_type> A_domainMap = A.getDomainMap (); 06315 RCP<const map_type> A_rangeMap = A.getRangeMap (); 06316 RCP<const map_type> B_domainMap = B.getDomainMap (); 06317 RCP<const map_type> B_rangeMap = B.getRangeMap (); 06318 06319 RCP<const map_type> theDomainMap = domainMap; 06320 RCP<const map_type> theRangeMap = rangeMap; 06321 06322 if (domainMap.is_null ()) { 06323 if (B_domainMap.is_null ()) { 06324 TEUCHOS_TEST_FOR_EXCEPTION( 06325 A_domainMap.is_null (), std::invalid_argument, 06326 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, " 06327 "then you must supply a nonnull domain Map to this method."); 06328 theDomainMap = A_domainMap; 06329 } else { 06330 theDomainMap = B_domainMap; 06331 } 06332 } 06333 if (rangeMap.is_null ()) { 06334 if (B_rangeMap.is_null ()) { 06335 TEUCHOS_TEST_FOR_EXCEPTION( 06336 A_rangeMap.is_null (), std::invalid_argument, 06337 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, " 06338 "then you must supply a nonnull range Map to this method."); 06339 theRangeMap = A_rangeMap; 06340 } else { 06341 theRangeMap = B_rangeMap; 06342 } 06343 } 06344 06345 #ifdef HAVE_TPETRA_DEBUG 06346 // In a debug build, check that A and B have matching domain and 06347 // range Maps, if they have domain and range Maps at all. (If 06348 // they aren't fill complete, then they may not yet have them.) 06349 if (! A_domainMap.is_null () && ! A_rangeMap.is_null ()) { 06350 if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) { 06351 TEUCHOS_TEST_FOR_EXCEPTION( 06352 ! B_domainMap->isSameAs (*A_domainMap), std::invalid_argument, 06353 "Tpetra::CrsMatrix::add: The input RowMatrix A must have a domain Map " 06354 "which is the same as (isSameAs) this RowMatrix's domain Map."); 06355 TEUCHOS_TEST_FOR_EXCEPTION( 06356 ! B_rangeMap->isSameAs (*A_rangeMap), std::invalid_argument, 06357 "Tpetra::CrsMatrix::add: The input RowMatrix A must have a range Map " 06358 "which is the same as (isSameAs) this RowMatrix's range Map."); 06359 TEUCHOS_TEST_FOR_EXCEPTION( 06360 ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap), 06361 std::invalid_argument, 06362 "Tpetra::CrsMatrix::add: The input domain Map must be the same as " 06363 "(isSameAs) this RowMatrix's domain Map."); 06364 TEUCHOS_TEST_FOR_EXCEPTION( 06365 ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap), 06366 std::invalid_argument, 06367 "Tpetra::CrsMatrix::add: The input range Map must be the same as " 06368 "(isSameAs) this RowMatrix's range Map."); 06369 } 06370 } 06371 else if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) { 06372 TEUCHOS_TEST_FOR_EXCEPTION( 06373 ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap), 06374 std::invalid_argument, 06375 "Tpetra::CrsMatrix::add: The input domain Map must be the same as " 06376 "(isSameAs) this RowMatrix's domain Map."); 06377 TEUCHOS_TEST_FOR_EXCEPTION( 06378 ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap), 06379 std::invalid_argument, 06380 "Tpetra::CrsMatrix::add: The input range Map must be the same as " 06381 "(isSameAs) this RowMatrix's range Map."); 06382 } 06383 else { 06384 TEUCHOS_TEST_FOR_EXCEPTION( 06385 domainMap.is_null () || rangeMap.is_null (), std::invalid_argument, 06386 "Tpetra::CrsMatrix::add: If neither A nor B have a domain and range " 06387 "Map, then you must supply a nonnull domain and range Map to this " 06388 "method."); 06389 } 06390 #endif // HAVE_TPETRA_DEBUG 06391 06392 // What parameters do we pass to C's constructor? Do we call 06393 // fillComplete on C after filling it? And if so, what parameters 06394 // do we pass to C's fillComplete call? 06395 bool callFillComplete = true; 06396 RCP<ParameterList> constructorSublist; 06397 RCP<ParameterList> fillCompleteSublist; 06398 if (! params.is_null ()) { 06399 callFillComplete = params->get ("Call fillComplete", callFillComplete); 06400 constructorSublist = sublist (params, "Constructor parameters"); 06401 fillCompleteSublist = sublist (params, "fillComplete parameters"); 06402 } 06403 06404 RCP<const map_type> A_rowMap = A.getRowMap (); 06405 RCP<const map_type> B_rowMap = B.getRowMap (); 06406 RCP<const map_type> C_rowMap = B_rowMap; // see discussion in documentation 06407 RCP<crs_matrix_type> C; // The result matrix. 06408 06409 // If A and B's row Maps are the same, we can compute an upper 06410 // bound on the number of entries in each row of C, before 06411 // actually computing the sum. A reasonable upper bound is the 06412 // sum of the two entry counts in each row. If we choose this as 06413 // the actual per-row upper bound, we can use static profile. 06414 if (A_rowMap->isSameAs (*B_rowMap)) { 06415 const LO localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ()); 06416 ArrayRCP<size_t> C_maxNumEntriesPerRow (localNumRows, 0); 06417 06418 // Get the number of entries in each row of A. 06419 if (alpha != STS::zero ()) { 06420 for (LO localRow = 0; localRow < localNumRows; ++localRow) { 06421 const size_t A_numEntries = A.getNumEntriesInLocalRow (localRow); 06422 C_maxNumEntriesPerRow[localRow] += A_numEntries; 06423 } 06424 } 06425 // Get the number of entries in each row of B. 06426 if (beta != STS::zero ()) { 06427 for (LO localRow = 0; localRow < localNumRows; ++localRow) { 06428 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow); 06429 C_maxNumEntriesPerRow[localRow] += B_numEntries; 06430 } 06431 } 06432 // Construct the result matrix C. 06433 if (constructorSublist.is_null ()) { 06434 C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow, 06435 StaticProfile)); 06436 } else { 06437 C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow, 06438 StaticProfile, constructorSublist)); 06439 } 06440 // Since A and B have the same row Maps, we could add them 06441 // together all at once and merge values before we call 06442 // insertGlobalValues. However, we don't really need to, since 06443 // we've already allocated enough space in each row of C for C 06444 // to do the merge itself. 06445 } 06446 else { // the row Maps of A and B are not the same 06447 // Construct the result matrix C. 06448 if (constructorSublist.is_null ()) { 06449 C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile)); 06450 } else { 06451 C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile, 06452 constructorSublist)); 06453 } 06454 } 06455 06456 #ifdef HAVE_TPETRA_DEBUG 06457 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error, 06458 "Tpetra::RowMatrix::add: C should not be null at this point. " 06459 "Please report this bug to the Tpetra developers."); 06460 #endif // HAVE_TPETRA_DEBUG 06461 // 06462 // Compute C = alpha*A + beta*B. 06463 // 06464 Array<GO> ind; 06465 Array<Scalar> val; 06466 06467 if (alpha != STS::zero ()) { 06468 const LO A_localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ()); 06469 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) { 06470 size_t A_numEntries = A.getNumEntriesInLocalRow (localRow); 06471 const GO globalRow = A_rowMap->getGlobalElement (localRow); 06472 if (A_numEntries > static_cast<size_t> (ind.size ())) { 06473 ind.resize (A_numEntries); 06474 val.resize (A_numEntries); 06475 } 06476 ArrayView<GO> indView = ind (0, A_numEntries); 06477 ArrayView<Scalar> valView = val (0, A_numEntries); 06478 A.getGlobalRowCopy (globalRow, indView, valView, A_numEntries); 06479 06480 if (alpha != STS::one ()) { 06481 for (size_t k = 0; k < A_numEntries; ++k) { 06482 valView[k] *= alpha; 06483 } 06484 } 06485 C->insertGlobalValues (globalRow, indView, valView); 06486 } 06487 } 06488 06489 if (beta != STS::zero ()) { 06490 const LO B_localNumRows = static_cast<LO> (B_rowMap->getNodeNumElements ()); 06491 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) { 06492 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow); 06493 const GO globalRow = B_rowMap->getGlobalElement (localRow); 06494 if (B_numEntries > static_cast<size_t> (ind.size ())) { 06495 ind.resize (B_numEntries); 06496 val.resize (B_numEntries); 06497 } 06498 ArrayView<GO> indView = ind (0, B_numEntries); 06499 ArrayView<Scalar> valView = val (0, B_numEntries); 06500 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries); 06501 06502 if (beta != STS::one ()) { 06503 for (size_t k = 0; k < B_numEntries; ++k) { 06504 valView[k] *= beta; 06505 } 06506 } 06507 C->insertGlobalValues (globalRow, indView, valView); 06508 } 06509 } 06510 06511 if (callFillComplete) { 06512 if (fillCompleteSublist.is_null ()) { 06513 C->fillComplete (theDomainMap, theRangeMap); 06514 } else { 06515 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist); 06516 } 06517 } 06518 06519 return rcp_implicit_cast<row_matrix_type> (C); 06520 } 06521 06522 06523 template <class Scalar, 06524 class LocalOrdinal, 06525 class GlobalOrdinal, 06526 class DeviceType> 06527 void 06528 CrsMatrix< 06529 Scalar, LocalOrdinal, GlobalOrdinal, 06530 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 06531 transferAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, node_type> > & destMat, 06532 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, node_type>& rowTransfer, 06533 const Teuchos::RCP<const map_type>& domainMap, 06534 const Teuchos::RCP<const map_type>& rangeMap, 06535 const Teuchos::RCP<Teuchos::ParameterList>& params) const 06536 { 06537 using Teuchos::ArrayView; 06538 using Teuchos::ParameterList; 06539 using Teuchos::RCP; 06540 typedef LocalOrdinal LO; 06541 typedef GlobalOrdinal GO; 06542 typedef node_type NT; 06543 typedef CrsMatrix<Scalar, LO, GO, NT> this_type; 06544 typedef Vector<int, LO, GO, NT> IntVectorType; 06545 06546 // Make sure that the input argument rowTransfer is either an 06547 // Import or an Export. Import and Export are the only two 06548 // subclasses of Transfer that we defined, but users might 06549 // (unwisely, for now at least) decide to implement their own 06550 // subclasses. Exclude this possibility. 06551 const import_type* xferAsImport = dynamic_cast<const import_type*> (&rowTransfer); 06552 const export_type* xferAsExport = dynamic_cast<const export_type*> (&rowTransfer); 06553 TEUCHOS_TEST_FOR_EXCEPTION( 06554 xferAsImport == NULL && xferAsExport == NULL, std::invalid_argument, 06555 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input " 06556 "argument must be either an Import or an Export, and its template " 06557 "parameters must match the corresponding template parameters of the " 06558 "CrsMatrix."); 06559 06560 // FIXME (mfh 15 May 2014) Wouldn't communication still be needed, 06561 // if the source Map is not distributed but the target Map is? 06562 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed (); 06563 06564 // 06565 // Get the caller's parameters 06566 // 06567 06568 bool reverseMode = false; // Are we in reverse mode? 06569 bool restrictComm = false; // Do we need to restrict the communicator? 06570 RCP<ParameterList> matrixparams; // parameters for the destination matrix 06571 if (! params.is_null ()) { 06572 reverseMode = params->get ("Reverse Mode", reverseMode); 06573 restrictComm = params->get ("Restrict Communicator", restrictComm); 06574 matrixparams = sublist (params, "CrsMatrix"); 06575 } 06576 06577 // Get the new domain and range Maps. We need some of them for 06578 // error checking, now that we have the reverseMode parameter. 06579 RCP<const map_type> MyRowMap = reverseMode ? 06580 rowTransfer.getSourceMap () : rowTransfer.getTargetMap (); 06581 RCP<const map_type> MyColMap; // create this below 06582 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ? 06583 domainMap : getDomainMap (); 06584 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ? 06585 rangeMap : getRangeMap (); 06586 RCP<const map_type> BaseRowMap = MyRowMap; 06587 RCP<const map_type> BaseDomainMap = MyDomainMap; 06588 06589 // If the user gave us a nonnull destMat, then check whether it's 06590 // "pristine." That means that it has no entries. 06591 // 06592 // FIXME (mfh 15 May 2014) If this is not true on all processes, 06593 // then this exception test may hang. It would be better to 06594 // forward an error flag to the next communication phase. 06595 if (! destMat.is_null ()) { 06596 // FIXME (mfh 15 May 2014): The classic Petra idiom for checking 06597 // whether a graph or matrix has no entries on the calling 06598 // process, is that it is neither locally nor globally indexed. 06599 // This may change eventually with the Kokkos refactor version 06600 // of Tpetra, so it would be better just to check the quantity 06601 // of interest directly. Note that with the Kokkos refactor 06602 // version of Tpetra, asking for the total number of entries in 06603 // a graph or matrix that is not fill complete might require 06604 // computation (kernel launch), since it is not thread scalable 06605 // to update a count every time an entry is inserted. 06606 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () && 06607 ! destMat->getGraph ()->isGloballyIndexed (); 06608 TEUCHOS_TEST_FOR_EXCEPTION( 06609 ! NewFlag, std::invalid_argument, "Tpetra::CrsMatrix::" 06610 "transferAndFillComplete: The input argument 'destMat' is only allowed " 06611 "to be nonnull, if its graph is empty (neither locally nor globally " 06612 "indexed)."); 06613 // FIXME (mfh 15 May 2014) At some point, we want to change 06614 // graphs and matrices so that their DistObject Map 06615 // (this->getMap()) may differ from their row Map. This will 06616 // make redistribution for 2-D distributions more efficient. I 06617 // hesitate to change this check, because I'm not sure how much 06618 // the code here depends on getMap() and getRowMap() being the 06619 // same. 06620 TEUCHOS_TEST_FOR_EXCEPTION( 06621 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument, 06622 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the " 06623 "input argument 'destMat' is not the same as the (row) Map specified " 06624 "by the input argument 'rowTransfer'."); 06625 TEUCHOS_TEST_FOR_EXCEPTION( 06626 ! destMat->checkSizes (*this), std::invalid_argument, 06627 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull " 06628 "destination matrix, but checkSizes() indicates that it is not a legal " 06629 "legal target for redistribution from the source matrix (*this). This " 06630 "may mean that they do not have the same dimensions."); 06631 } 06632 06633 // If forward mode (the default), then *this's (row) Map must be 06634 // the same as the source Map of the Transfer. If reverse mode, 06635 // then *this's (row) Map must be the same as the target Map of 06636 // the Transfer. 06637 // 06638 // FIXME (mfh 15 May 2014) At some point, we want to change graphs 06639 // and matrices so that their DistObject Map (this->getMap()) may 06640 // differ from their row Map. This will make redistribution for 06641 // 2-D distributions more efficient. I hesitate to change this 06642 // check, because I'm not sure how much the code here depends on 06643 // getMap() and getRowMap() being the same. 06644 TEUCHOS_TEST_FOR_EXCEPTION( 06645 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())), 06646 std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: " 06647 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode."); 06648 TEUCHOS_TEST_FOR_EXCEPTION( 06649 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())), 06650 std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: " 06651 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode."); 06652 06653 // The basic algorithm here is: 06654 // 06655 // 1. Call the moral equivalent of "distor.do" to handle the import. 06656 // 2. Copy all the Imported and Copy/Permuted data into the raw 06657 // CrsMatrix / CrsGraphData pointers, still using GIDs. 06658 // 3. Call an optimized version of MakeColMap that avoids the 06659 // Directory lookups (since the importer knows who owns all the 06660 // GIDs) AND reindexes to LIDs. 06661 // 4. Call expertStaticFillComplete() 06662 06663 // Get information from the Importer 06664 const size_t NumSameIDs = rowTransfer.getNumSameIDs(); 06665 ArrayView<const LO> ExportLIDs = reverseMode ? 06666 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs (); 06667 ArrayView<const LO> RemoteLIDs = reverseMode ? 06668 rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs (); 06669 ArrayView<const LO> PermuteToLIDs = reverseMode ? 06670 rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs (); 06671 ArrayView<const LO> PermuteFromLIDs = reverseMode ? 06672 rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs (); 06673 Distributor& Distor = rowTransfer.getDistributor (); 06674 06675 // Owning PIDs 06676 Teuchos::Array<int> SourcePids; 06677 Teuchos::Array<int> TargetPids; 06678 int MyPID = getComm ()->getRank (); 06679 06680 // Temp variables for sub-communicators 06681 RCP<const map_type> ReducedRowMap, ReducedColMap, 06682 ReducedDomainMap, ReducedRangeMap; 06683 RCP<const Comm<int> > ReducedComm; 06684 06685 // If the user gave us a null destMat, then construct the new 06686 // destination matrix. We will replace its column Map later. 06687 if (destMat.is_null ()) { 06688 destMat = rcp (new this_type (MyRowMap, 0, StaticProfile, matrixparams)); 06689 } 06690 06691 /***************************************************/ 06692 /***** 1) First communicator restriction phase ****/ 06693 /***************************************************/ 06694 if (restrictComm) { 06695 ReducedRowMap = MyRowMap->removeEmptyProcesses (); 06696 ReducedComm = ReducedRowMap.is_null () ? Teuchos::null : ReducedRowMap->getComm (); 06697 destMat->removeEmptyProcessesInPlace (ReducedRowMap); 06698 06699 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ? 06700 ReducedRowMap : 06701 MyDomainMap->replaceCommWithSubset (ReducedComm); 06702 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ? 06703 ReducedRowMap : 06704 MyRangeMap->replaceCommWithSubset (ReducedComm); 06705 06706 // Reset the "my" maps 06707 MyRowMap = ReducedRowMap; 06708 MyDomainMap = ReducedDomainMap; 06709 MyRangeMap = ReducedRangeMap; 06710 06711 // Update my PID, if we've restricted the communicator 06712 if (! ReducedComm.is_null ()) { 06713 MyPID = ReducedComm->getRank (); 06714 } 06715 else { 06716 MyPID = -2; // For debugging 06717 } 06718 } 06719 else { 06720 ReducedComm = MyRowMap->getComm (); 06721 } 06722 06723 /***************************************************/ 06724 /***** 2) From Tpera::DistObject::doTransfer() ****/ 06725 /***************************************************/ 06726 06727 // Get the owning PIDs 06728 RCP<const import_type> MyImporter = getGraph ()->getImporter (); 06729 06730 if (! restrictComm && ! MyImporter.is_null () && 06731 BaseDomainMap->isSameAs (*getDomainMap ())) { 06732 // Same domain map as source matrix 06733 // 06734 // NOTE: This won't work for restrictComm (because the Import 06735 // doesn't know the restricted PIDs), though writing an 06736 // optimized version for that case would be easy (Import an 06737 // IntVector of the new PIDs). Might want to add this later. 06738 Import_Util::getPids (*MyImporter, SourcePids, false); 06739 } 06740 else if (MyImporter.is_null () && BaseDomainMap->isSameAs (*getDomainMap ())) { 06741 // Matrix has no off-process entries 06742 SourcePids.resize (getColMap ()->getNodeNumElements ()); 06743 SourcePids.assign (getColMap ()->getNodeNumElements (), MyPID); 06744 } 06745 else if (BaseDomainMap->isSameAs (*BaseRowMap) && 06746 getDomainMap ()->isSameAs (*getRowMap ())) { 06747 // We can use the rowTransfer + SourceMatrix's Import to find out who owns what. 06748 IntVectorType TargetRow_pids (domainMap); 06749 IntVectorType SourceRow_pids (getRowMap ()); 06750 IntVectorType SourceCol_pids (getColMap ()); 06751 06752 TargetRow_pids.putScalar (MyPID); 06753 if (! reverseMode && xferAsImport != NULL) { 06754 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport, INSERT); 06755 } 06756 else if (reverseMode && xferAsExport != NULL) { 06757 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport, INSERT); 06758 } 06759 else if (! reverseMode && xferAsExport != NULL) { 06760 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport, INSERT); 06761 } 06762 else if (reverseMode && xferAsImport != NULL) { 06763 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport, INSERT); 06764 } 06765 else { 06766 TEUCHOS_TEST_FOR_EXCEPTION( 06767 true, std::logic_error, "Tpetra::CrsMatrix::" 06768 "transferAndFillComplete: Should never get here! " 06769 "Please report this bug to a Tpetra developer."); 06770 } 06771 SourceCol_pids.doImport (SourceRow_pids, *MyImporter, INSERT); 06772 SourcePids.resize (getColMap ()->getNodeNumElements ()); 06773 SourceCol_pids.get1dCopy (SourcePids ()); 06774 } 06775 else { 06776 TEUCHOS_TEST_FOR_EXCEPTION( 06777 true, std::invalid_argument, "Tpetra::CrsMatrix::" 06778 "transferAndFillComplete: This method only allows either domainMap == " 06779 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and " 06780 "getDomainMap () == getRowMap ())."); 06781 } 06782 06783 // Tpetra-specific stuff 06784 // 06785 // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix 06786 // inherits from DistObject (in which case all arrays that get 06787 // resized here are Teuchos::Array), but it won't work if 06788 // CrsMatrix inherits from DistObjectKA (in which case all arrays 06789 // that get resized here are Kokkos::View). In the latter case, 06790 // imports_ and numExportPacketsPerLID_ each have only a device 06791 // view, but numImportPacketsPerLID_ has a device view and a host 06792 // view (host_numImportPacketsPerLID_). 06793 // 06794 // Currently, CrsMatrix inherits from DistObject, not 06795 // DistObjectKA, so the code below should be fine for the Kokkos 06796 // refactor version of CrsMatrix. 06797 // 06798 // For this and for all other cases in this function that want to 06799 // resize the DistObject's communication arrays, it would make 06800 // sense to give DistObject (and DistObjectKA) methods for 06801 // resizing that don't expose the details of whether these are 06802 // Teuchos::Array or Kokkos::View. 06803 size_t constantNumPackets = destMat->constantNumberOfPackets (); 06804 if (constantNumPackets == 0) { 06805 destMat->numExportPacketsPerLID_old_.resize (ExportLIDs.size ()); 06806 destMat->numImportPacketsPerLID_old_.resize (RemoteLIDs.size ()); 06807 } 06808 else { 06809 // There are a constant number of packets per element. We 06810 // already know (from the number of "remote" (incoming) 06811 // elements) how many incoming elements we expect, so we can 06812 // resize the buffer accordingly. 06813 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets; 06814 if (static_cast<size_t> (destMat->imports_old_.size ()) != rbufLen) { 06815 destMat->imports_old_.resize (rbufLen); 06816 } 06817 } 06818 06819 // Pack & Prepare w/ owning PIDs 06820 // 06821 // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix 06822 // inherits from DistObject (in which case all arrays that get 06823 // passed in here are Teuchos::Array), but it won't work if 06824 // CrsMatrix inherits from DistObjectKA (in which case all arrays 06825 // that get passed in here are Kokkos::View). In the latter case, 06826 // exports_ and numExportPacketsPerLID_ each have only a device 06827 // view. 06828 // 06829 // Currently, CrsMatrix inherits from DistObject, not 06830 // DistObjectKA, so the code below should be fine for the Kokkos 06831 // refactor version of CrsMatrix. 06832 Import_Util::packAndPrepareWithOwningPIDs (*this, ExportLIDs, 06833 destMat->exports_old_, 06834 destMat->numExportPacketsPerLID_old_ (), 06835 constantNumPackets, Distor, 06836 SourcePids); 06837 06838 // Do the exchange of remote data. 06839 // 06840 // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix 06841 // inherits from DistObject (in which case all arrays that get 06842 // passed in here are Teuchos::Array), but it won't work if 06843 // CrsMatrix inherits from DistObjectKA (in which case all arrays 06844 // that get passed in here are Kokkos::View). 06845 // 06846 // In the latter case, imports_, exports_, and 06847 // numExportPacketsPerLID_ each have only a device view. 06848 // numImportPacketsPerLIDs_ is a device view, and also has a host 06849 // view (host_numImportPacketsPerLID_). 06850 if (communication_needed) { 06851 if (reverseMode) { 06852 if (constantNumPackets == 0) { // variable number of packets per LID 06853 Distor.doReversePostsAndWaits (destMat->numExportPacketsPerLID_old_ ().getConst (), 1, 06854 destMat->numImportPacketsPerLID_old_ ()); 06855 size_t totalImportPackets = 0; 06856 for (Array_size_type i = 0; i < destMat->numImportPacketsPerLID_old_.size (); ++i) { 06857 totalImportPackets += destMat->numImportPacketsPerLID_old_[i]; 06858 } 06859 destMat->imports_old_.resize (totalImportPackets); 06860 Distor.doReversePostsAndWaits (destMat->exports_old_ ().getConst (), 06861 destMat->numExportPacketsPerLID_old_ (), 06862 destMat->imports_old_ (), 06863 destMat->numImportPacketsPerLID_old_ ()); 06864 } 06865 else { // constant number of packets per LID 06866 Distor.doReversePostsAndWaits (destMat->exports_old_ ().getConst (), 06867 constantNumPackets, 06868 destMat->imports_old_ ()); 06869 } 06870 } 06871 else { // forward mode (the default) 06872 if (constantNumPackets == 0) { // variable number of packets per LID 06873 Distor.doPostsAndWaits (destMat->numExportPacketsPerLID_old_ ().getConst (), 1, 06874 destMat->numImportPacketsPerLID_old_ ()); 06875 size_t totalImportPackets = 0; 06876 for (Array_size_type i = 0; i < destMat->numImportPacketsPerLID_old_.size (); ++i) { 06877 totalImportPackets += destMat->numImportPacketsPerLID_old_[i]; 06878 } 06879 destMat->imports_old_.resize (totalImportPackets); 06880 Distor.doPostsAndWaits (destMat->exports_old_ ().getConst (), 06881 destMat->numExportPacketsPerLID_old_ (), 06882 destMat->imports_old_ (), 06883 destMat->numImportPacketsPerLID_old_ ()); 06884 } 06885 else { // constant number of packets per LID 06886 Distor.doPostsAndWaits (destMat->exports_old_ ().getConst (), 06887 constantNumPackets, 06888 destMat->imports_old_ ()); 06889 } 06890 } 06891 } 06892 06893 /*********************************************************************/ 06894 /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/ 06895 /*********************************************************************/ 06896 06897 // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix 06898 // inherits from DistObject (in which case all arrays that get 06899 // passed in here are Teuchos::Array), but it won't work if 06900 // CrsMatrix inherits from DistObjectKA (in which case all arrays 06901 // that get passed in here are Kokkos::View). 06902 // 06903 // In the latter case, imports_ only has a device view. 06904 // numImportPacketsPerLIDs_ is a device view, and also has a host 06905 // view (host_numImportPacketsPerLID_). 06906 size_t mynnz = 06907 Import_Util::unpackAndCombineWithOwningPIDsCount (*this, RemoteLIDs, 06908 destMat->imports_old_ (), 06909 destMat->numImportPacketsPerLID_old_ (), 06910 constantNumPackets, Distor, INSERT, 06911 NumSameIDs, PermuteToLIDs, 06912 PermuteFromLIDs); 06913 size_t N = BaseRowMap->getNodeNumElements (); 06914 06915 // Allocations 06916 ArrayRCP<size_t> CSR_rowptr(N+1); 06917 ArrayRCP<GO> CSR_colind_GID; 06918 ArrayRCP<LO> CSR_colind_LID; 06919 ArrayRCP<Scalar> CSR_vals; 06920 CSR_colind_GID.resize (mynnz); 06921 CSR_vals.resize (mynnz); 06922 06923 // If LO and GO are the same, we can reuse memory when 06924 // converting the column indices from global to local indices. 06925 if (typeid (LO) == typeid (GO)) { 06926 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID); 06927 } 06928 else { 06929 CSR_colind_LID.resize (mynnz); 06930 } 06931 06932 // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix 06933 // inherits from DistObject (in which case all arrays that get 06934 // passed in here are Teuchos::Array), but it won't work if 06935 // CrsMatrix inherits from DistObjectKA (in which case all arrays 06936 // that get passed in here are Kokkos::View). 06937 // 06938 // In the latter case, imports_ only has a device view. 06939 // numImportPacketsPerLIDs_ is a device view, and also has a host 06940 // view (host_numImportPacketsPerLID_). 06941 // 06942 // FIXME (mfh 15 May 2014) Why can't we abstract this out as an 06943 // unpackAndCombine method on a "CrsArrays" object? This passing 06944 // in a huge list of arrays is icky. Can't we have a bit of an 06945 // abstraction? Implementing a concrete DistObject subclass only 06946 // takes five methods. 06947 Import_Util::unpackAndCombineIntoCrsArrays (*this, RemoteLIDs, destMat->imports_old_ (), 06948 destMat->numImportPacketsPerLID_old_ (), 06949 constantNumPackets, Distor, INSERT, NumSameIDs, 06950 PermuteToLIDs, PermuteFromLIDs, N, mynnz, MyPID, 06951 CSR_rowptr (), CSR_colind_GID (), CSR_vals (), 06952 SourcePids (), TargetPids); 06953 06954 /**************************************************************/ 06955 /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/ 06956 /**************************************************************/ 06957 06958 // Call an optimized version of makeColMap that avoids the 06959 // Directory lookups (since the Import object knows who owns all 06960 // the GIDs). 06961 Teuchos::Array<int> RemotePids; 06962 Import_Util::lowCommunicationMakeColMapAndReindex (CSR_rowptr (), 06963 CSR_colind_LID (), 06964 CSR_colind_GID (), 06965 BaseDomainMap, 06966 TargetPids, RemotePids, 06967 MyColMap); 06968 06969 /*******************************************************/ 06970 /**** 4) Second communicator restriction phase ****/ 06971 /*******************************************************/ 06972 if (restrictComm) { 06973 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ? 06974 ReducedRowMap : 06975 MyColMap->replaceCommWithSubset (ReducedComm); 06976 MyColMap = ReducedColMap; // Reset the "my" maps 06977 } 06978 06979 // Replace the col map 06980 destMat->replaceColMap (MyColMap); 06981 06982 // Short circuit if the processor is no longer in the communicator 06983 // 06984 // NOTE: Epetra replaces modifies all "removed" processes so they 06985 // have a dummy (serial) Map that doesn't touch the original 06986 // communicator. Duplicating that here might be a good idea. 06987 if (ReducedComm.is_null ()) { 06988 return; 06989 } 06990 06991 /***************************************************/ 06992 /**** 5) Sort ****/ 06993 /***************************************************/ 06994 Import_Util::sortCrsEntries (CSR_rowptr (), 06995 CSR_colind_LID (), 06996 CSR_vals ()); 06997 if ((! reverseMode && xferAsImport != NULL) || 06998 (reverseMode && xferAsExport != NULL)) { 06999 Import_Util::sortCrsEntries (CSR_rowptr (), 07000 CSR_colind_LID (), 07001 CSR_vals ()); 07002 } 07003 else if ((! reverseMode && xferAsExport != NULL) || 07004 (reverseMode && xferAsImport != NULL)) { 07005 Import_Util::sortAndMergeCrsEntries (CSR_rowptr (), 07006 CSR_colind_LID (), 07007 CSR_vals ()); 07008 if (CSR_rowptr[N] != mynnz) { 07009 CSR_colind_LID.resize (CSR_rowptr[N]); 07010 CSR_vals.resize (CSR_rowptr[N]); 07011 } 07012 } 07013 else { 07014 TEUCHOS_TEST_FOR_EXCEPTION( 07015 true, std::logic_error, "Tpetra::CrsMatrix::" 07016 "transferAndFillComplete: Should never get here! " 07017 "Please report this bug to a Tpetra developer."); 07018 } 07019 /***************************************************/ 07020 /**** 6) Reset the colmap and the arrays ****/ 07021 /***************************************************/ 07022 07023 // Call constructor for the new matrix (restricted as needed) 07024 // 07025 // NOTE (mfh 15 May 2014) This should work fine for the Kokkos 07026 // refactor version of CrsMatrix, though it reserves the right to 07027 // make a deep copy of the arrays. 07028 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals); 07029 07030 /***************************************************/ 07031 /**** 7) Build Importer & Call ESFC ****/ 07032 /***************************************************/ 07033 // Pre-build the importer using the existing PIDs 07034 RCP<import_type> MyImport = rcp (new import_type (MyDomainMap, MyColMap, RemotePids)); 07035 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport); 07036 } 07037 07038 07039 template <class Scalar, 07040 class LocalOrdinal, 07041 class GlobalOrdinal, 07042 class DeviceType> 07043 void 07044 CrsMatrix< 07045 Scalar, LocalOrdinal, GlobalOrdinal, 07046 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 07047 importAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, node_type> >& destMatrix, 07048 const import_type& importer, 07049 const Teuchos::RCP<const map_type>& domainMap, 07050 const Teuchos::RCP<const map_type>& rangeMap, 07051 const Teuchos::RCP<Teuchos::ParameterList>& params) const 07052 { 07053 transferAndFillComplete (destMatrix, importer, domainMap, rangeMap, params); 07054 } 07055 07056 07057 template <class Scalar, 07058 class LocalOrdinal, 07059 class GlobalOrdinal, 07060 class DeviceType> 07061 void 07062 CrsMatrix< 07063 Scalar, LocalOrdinal, GlobalOrdinal, 07064 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 07065 exportAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, node_type> >& destMatrix, 07066 const export_type& exporter, 07067 const Teuchos::RCP<const map_type>& domainMap, 07068 const Teuchos::RCP<const map_type>& rangeMap, 07069 const Teuchos::RCP<Teuchos::ParameterList>& params) const 07070 { 07071 transferAndFillComplete (destMatrix, exporter, domainMap, rangeMap, params); 07072 } 07073 07074 } // namespace Tpetra 07075 07076 #endif
1.7.6.1