|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_VBRMATRIX_DEF_HPP 00043 #define TPETRA_VBRMATRIX_DEF_HPP 00044 00045 #include <Tpetra_BlockMap.hpp> 00046 #include <Tpetra_BlockCrsGraph.hpp> 00047 #include <Tpetra_DistObject.hpp> 00048 #include <Tpetra_Vector.hpp> 00049 #include <Kokkos_NodeHelpers.hpp> 00050 #include <Kokkos_VbrMatrix.hpp> 00051 #include <Teuchos_SerialDenseMatrix.hpp> 00052 #include <algorithm> 00053 #include <sstream> 00054 00055 #ifdef DOXYGEN_USE_ONLY 00056 #include "Tpetra_VbrMatrix_decl.hpp" 00057 #endif 00058 00059 namespace Tpetra { 00060 00061 template<class Scalar,class Node> 00062 void fill_device_ArrayRCP(Teuchos::RCP<Node>& node, Teuchos::ArrayRCP<Scalar>& ptr, Scalar value) 00063 { 00064 KokkosClassic::ReadyBufferHelper<Node> rbh(node); 00065 KokkosClassic::InitOp<Scalar> wdp; 00066 wdp.alpha = value; 00067 rbh.begin(); 00068 wdp.x = rbh.addNonConstBuffer(ptr); 00069 rbh.end(); 00070 node->template parallel_for<KokkosClassic::InitOp<Scalar> >(0, ptr.size(), wdp); 00071 } 00072 00073 //------------------------------------------------------------------- 00074 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00075 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::VbrMatrix(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &blkRowMap, size_t maxNumEntriesPerRow, ProfileType pftype) 00076 : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkRowMap)), 00077 blkGraph_(Teuchos::rcp(new BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(blkRowMap, maxNumEntriesPerRow, pftype))), 00078 constBlkGraph_(blkGraph_), 00079 lclMatrix_ (Teuchos::rcp (new local_matrix_type (blkRowMap->getNodeNumBlocks (), 00080 blkRowMap->getPointMap()->getNode ()))), 00081 pbuf_values1D_(), 00082 pbuf_indx_(), 00083 lclMatOps_(blkRowMap->getPointMap()->getNode()), 00084 importer_(), 00085 exporter_(), 00086 importedVec_(), 00087 exportedVec_(), 00088 data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)), 00089 nonlocal_data_(), 00090 is_fill_completed_(false), 00091 is_storage_optimized_(false) 00092 { 00093 //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where 00094 //each entry in the graph corresponds to a block-entry in the matrix. 00095 //That is, you can think of a VBR matrix as a Crs matrix of dense 00096 //submatrices... 00097 } 00098 00099 //------------------------------------------------------------------- 00100 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00101 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::VbrMatrix(const Teuchos::RCP<const BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> > &blkGraph) 00102 : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkGraph->getBlockRowMap())), 00103 blkGraph_(), 00104 constBlkGraph_(blkGraph), 00105 lclMatrix_ (Teuchos::rcp (new local_matrix_type (blkGraph->getBlockRowMap ()->getNodeNumBlocks (), 00106 blkGraph->getBlockRowMap ()->getPointMap ()->getNode ()))), 00107 pbuf_values1D_(), 00108 pbuf_indx_(), 00109 lclMatOps_(blkGraph->getBlockRowMap()->getPointMap()->getNode()), 00110 importer_(), 00111 exporter_(), 00112 importedVec_(), 00113 exportedVec_(), 00114 data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)), 00115 nonlocal_data_(), 00116 is_fill_completed_(false), 00117 is_storage_optimized_(false) 00118 { 00119 //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where 00120 //each entry in the graph corresponds to a block-entry in the matrix. 00121 //That is, you can think of a VBR matrix as a Crs matrix of dense 00122 //submatrices... 00123 00124 TEUCHOS_TEST_FOR_EXCEPTION(blkGraph->isFillComplete() == false, std::runtime_error, 00125 "Tpetra::VbrMatrix::VbrMatrix(BlockCrsGraph) ERROR, this constructor requires graph.isFillComplete()==true."); 00126 00127 createImporterExporter(); 00128 00129 is_fill_completed_ = true; 00130 00131 optimizeStorage(); 00132 00133 fillLocalMatrix(); 00134 fillLocalMatVec(); 00135 } 00136 00137 //------------------------------------------------------------------- 00138 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00139 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::~VbrMatrix() 00140 { 00141 } 00142 00143 //------------------------------------------------------------------- 00144 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00145 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > 00146 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getDomainMap() const 00147 { 00148 return getBlockDomainMap()->getPointMap(); 00149 } 00150 00151 //------------------------------------------------------------------- 00152 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00153 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > 00154 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getRangeMap() const 00155 { 00156 return getBlockRangeMap()->getPointMap(); 00157 } 00158 00159 //------------------------------------------------------------------- 00160 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00161 bool 00162 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::isFillComplete() const 00163 { 00164 return is_fill_completed_; 00165 } 00166 00167 //------------------------------------------------------------------- 00168 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00169 template <class DomainScalar, class RangeScalar> 00170 void 00171 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>:: 00172 multiply (const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X, 00173 MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y, 00174 Teuchos::ETransp trans, 00175 RangeScalar alpha, 00176 RangeScalar beta) const 00177 { 00178 TEUCHOS_TEST_FOR_EXCEPTION( 00179 ! isFillComplete (), std::runtime_error, 00180 "Tpetra::VbrMatrix::multiply: This method may only be called after " 00181 "fillComplete has been called."); 00182 00183 KokkosClassic::MultiVector<Scalar,Node> lclX = X.getLocalMV (); 00184 KokkosClassic::MultiVector<Scalar,Node> lclY = Y.getLocalMV (); 00185 00186 lclMatOps_.template multiply<Scalar,Scalar> (trans, alpha, lclX, beta, lclY); 00187 } 00188 00189 //------------------------------------------------------------------- 00190 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00191 template<class DomainScalar, class RangeScalar> 00192 void 00193 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>:: 00194 solve (const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y, 00195 MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X, 00196 Teuchos::ETransp trans) const 00197 { 00198 TEUCHOS_TEST_FOR_EXCEPTION(X.isConstantStride() == false || Y.isConstantStride() == false, std::runtime_error, 00199 "Tpetra::VbrMatrix::solve(X,Y): X and Y must be constant stride."); 00200 00201 TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error, 00202 "Tpetra::VbrMatrix::solve ERROR, solve may only be called after fillComplete has been called."); 00203 00204 TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isUpperTriangular()==false && constBlkGraph_->isLowerTriangular()==false, std::runtime_error, 00205 "Tpetra::VbrMatrix::solve ERROR, matrix must be either upper or lower triangular."); 00206 00207 KokkosClassic::MultiVector<RangeScalar,Node> lclY = Y.getLocalMV (); 00208 KokkosClassic::MultiVector<DomainScalar,Node> lclX = X.getLocalMV (); 00209 00210 const Teuchos::EUplo triang = 00211 constBlkGraph_->isUpperTriangular () ? 00212 Teuchos::UPPER_TRI : 00213 Teuchos::LOWER_TRI; 00214 const Teuchos::EDiag diag = 00215 (constBlkGraph_->getNodeNumBlockDiags () < constBlkGraph_->getNodeNumBlockRows ()) ? 00216 Teuchos::UNIT_DIAG : 00217 Teuchos::NON_UNIT_DIAG; 00218 00219 lclMatOps_.template solve<DomainScalar,RangeScalar> (trans, triang, diag, lclY, lclX); 00220 } 00221 00222 //------------------------------------------------------------------- 00223 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00224 void 00225 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::updateImport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X) const 00226 { 00227 if (importer_ != Teuchos::null) { 00228 if (importedVec_ == Teuchos::null || importedVec_->getNumVectors() != X.getNumVectors()) { 00229 importedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), X.getNumVectors())); 00230 } 00231 00232 importedVec_->doImport(X, *importer_, REPLACE); 00233 } 00234 } 00235 00236 //------------------------------------------------------------------- 00237 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00238 void 00239 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::updateExport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const 00240 { 00241 if (exporter_ != Teuchos::null) { 00242 if (exportedVec_ == Teuchos::null || exportedVec_->getNumVectors() != Y.getNumVectors()) { 00243 exportedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), Y.getNumVectors())); 00244 } 00245 00246 exportedVec_->doExport(Y, *exporter_, REPLACE); 00247 } 00248 } 00249 00250 //------------------------------------------------------------------- 00251 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00252 void 00253 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::apply( 00254 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, 00255 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, 00256 Teuchos::ETransp trans, 00257 Scalar alpha, 00258 Scalar beta) const 00259 { 00260 if (trans == Teuchos::NO_TRANS) { 00261 updateImport(X); 00262 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_; 00263 this->template multiply<Scalar,Scalar>(Xref, Y, trans, alpha, beta); 00264 } 00265 else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) { 00266 updateImport(Y); 00267 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_; 00268 this->template multiply<Scalar,Scalar>(X, Yref, trans, alpha, beta); 00269 if (importedVec_ != Teuchos::null) { 00270 Y.doExport(*importedVec_, *importer_, ADD); 00271 } 00272 } 00273 } 00274 00275 //------------------------------------------------------------------- 00276 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00277 void 00278 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverse( 00279 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, 00280 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, 00281 Teuchos::ETransp trans) const 00282 { 00283 if (trans == Teuchos::NO_TRANS) { 00284 updateImport(Y); 00285 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_; 00286 this->template solve<Scalar,Scalar>(Y, Xref, trans); 00287 } 00288 else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) { 00289 updateImport(Y); 00290 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_; 00291 this->template solve<Scalar,Scalar>(Yref, X, trans); 00292 if (importedVec_ != Teuchos::null) { 00293 X.doExport(*importedVec_, *importer_, ADD); 00294 } 00295 } 00296 } 00297 00298 //------------------------------------------------------------------- 00299 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00300 bool 00301 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::hasTransposeApply() const 00302 { 00303 return true; 00304 } 00305 00306 //------------------------------------------------------------------- 00307 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00308 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > 00309 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getBlockRowMap() const 00310 { 00311 return constBlkGraph_->getBlockRowMap(); 00312 } 00313 00314 //------------------------------------------------------------------- 00315 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00316 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > 00317 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getPointRowMap() const 00318 { 00319 return constBlkGraph_->getBlockRowMap()->getPointMap(); 00320 } 00321 00322 //------------------------------------------------------------------- 00323 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00324 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > 00325 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getBlockColMap() const 00326 { 00327 return constBlkGraph_->getBlockColMap(); 00328 } 00329 00330 //------------------------------------------------------------------- 00331 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00332 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > 00333 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getBlockDomainMap() const 00334 { 00335 return constBlkGraph_->getBlockDomainMap(); 00336 } 00337 00338 //------------------------------------------------------------------- 00339 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00340 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > 00341 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getBlockRangeMap() const 00342 { 00343 return constBlkGraph_->getBlockRangeMap(); 00344 } 00345 00346 //------------------------------------------------------------------- 00347 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00348 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > 00349 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getPointColMap() const 00350 { 00351 return constBlkGraph_->getBlockColMap()->getPointMap(); 00352 } 00353 00354 //------------------------------------------------------------------- 00355 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00356 void 00357 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>:: 00358 getGlobalBlockRowView (GlobalOrdinal globalBlockRow, 00359 LocalOrdinal& numPtRows, 00360 Teuchos::ArrayView<const GlobalOrdinal>& blockCols, 00361 Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol, 00362 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> >& blockEntries) const 00363 { 00364 TEUCHOS_TEST_FOR_EXCEPTION( 00365 isFillComplete(), std::runtime_error, 00366 "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, " 00367 "isFillComplete() is required to be false."); 00368 00369 typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t; 00370 00371 LocalOrdinal localRow = getBlockRowMap()->getLocalBlockID(globalBlockRow); 00372 numPtRows = getBlockRowMap()->getLocalBlockSize(localRow); 00373 constBlkGraph_->getGlobalBlockRowView(globalBlockRow, blockCols); 00374 ptColsPerBlockCol.resize(blockCols.size()); 00375 blockEntries.resize(blockCols.size()); 00376 for (Tsize_t i = 0; i < blockCols.size (); ++i) { 00377 getGlobalBlockEntryView (globalBlockRow, blockCols[i], 00378 numPtRows, ptColsPerBlockCol[i], blockEntries[i]); 00379 } 00380 } 00381 00382 //------------------------------------------------------------------- 00383 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00384 void 00385 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalBlockRowView( 00386 LocalOrdinal localBlockRow, 00387 LocalOrdinal& numPtRows, 00388 Teuchos::ArrayView<const LocalOrdinal>& blockCols, 00389 Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol, 00390 Teuchos::ArrayRCP<const Scalar>& blockEntries) const 00391 { 00392 TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error, 00393 "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, isFillComplete() is required to be true."); 00394 00395 typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t; 00396 typedef Teuchos::ArrayRCP<const size_t> Host_View; 00397 typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO; 00398 00399 numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow); 00400 constBlkGraph_->getLocalBlockRowView(localBlockRow, blockCols); 00401 ptColsPerBlockCol.resize(blockCols.size()); 00402 LocalOrdinal num_scalars_in_block_row = 0; 00403 for(Tsize_t i=0; i<blockCols.size(); ++i) { 00404 ptColsPerBlockCol[i] = getBlockColMap()->getLocalBlockSize(blockCols[i]); 00405 num_scalars_in_block_row += numPtRows*ptColsPerBlockCol[i]; 00406 } 00407 00408 Teuchos::RCP<Node> node = getNode(); 00409 Host_View bpr = constBlkGraph_->getNodeRowOffsets(); 00410 const size_t bindx_offset = bpr[localBlockRow]; 00411 Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bindx_offset); 00412 const LocalOrdinal offset = indx[0]; 00413 blockEntries = node->template viewBuffer<Scalar>(num_scalars_in_block_row, pbuf_values1D_+offset); 00414 } 00415 00416 //------------------------------------------------------------------- 00417 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00418 void 00419 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getGlobalBlockEntryViewNonConst( 00420 GlobalOrdinal globalBlockRow, 00421 GlobalOrdinal globalBlockCol, 00422 LocalOrdinal& numPtRows, 00423 LocalOrdinal& numPtCols, 00424 Teuchos::ArrayRCP<Scalar>& blockEntry) 00425 { 00426 //Return a non-const, read-write view of a block-entry (as an ArrayRCP), 00427 //creating/allocating the block-entry if it doesn't already exist, (but only 00428 //if fillComplete hasn't been called yet, and if the arguments numPtRows 00429 //and numPtCols have been set on input). 00430 00431 LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow); 00432 00433 if (localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid()) { 00434 VbrUtils::getGlobalBlockEntryViewNonConst(nonlocal_data_, 00435 globalBlockRow, globalBlockCol, 00436 numPtRows, numPtCols, blockEntry); 00437 return; 00438 } 00439 00440 if (is_storage_optimized_) { 00441 TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error, 00442 "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst internal ERROR, storage is optimized but isFillComplete() is false."); 00443 00444 LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol); 00445 00446 getLocalBlockEntryViewNonConst(localBlockRow, localBlockCol, 00447 numPtRows, numPtCols, blockEntry); 00448 return; 00449 } 00450 00451 //If we get to here, fillComplete hasn't been called yet, and the matrix data 00452 //is stored in un-packed '2D' form. 00453 00454 if (data_2D_->size() == 0) { 00455 LocalOrdinal numBlockRows = constBlkGraph_->getNodeNumBlockRows(); 00456 data_2D_->resize(numBlockRows); 00457 } 00458 00459 RowGlobalCols& blkrow = (*data_2D_)[localBlockRow]; 00460 00461 typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol); 00462 00463 if (col_iter != blkrow.end()) { 00464 blockEntry = col_iter->second; 00465 } 00466 else { 00467 //blockEntry doesn't already exist, so we will create it. 00468 00469 //make sure block-size is specified: 00470 TEUCHOS_TEST_FOR_EXCEPTION(numPtRows==0 || numPtCols==0, std::runtime_error, 00471 "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst ERROR: creating block-entry, but numPtRows and/or numPtCols is 0."); 00472 00473 Teuchos::RCP<Node> node = getNode(); 00474 size_t blockSize = numPtRows*numPtCols; 00475 blockEntry = Teuchos::arcp(new Scalar[blockSize], 0, blockSize); 00476 std::fill(blockEntry.begin(), blockEntry.end(), (Scalar) 0); 00477 blkrow.insert(std::make_pair(globalBlockCol, blockEntry)); 00478 blkGraph_->insertGlobalIndices(globalBlockRow, Teuchos::ArrayView<GlobalOrdinal>(&globalBlockCol, 1)); 00479 } 00480 } 00481 00482 //------------------------------------------------------------------- 00483 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00484 void 00485 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getGlobalBlockEntryView( 00486 GlobalOrdinal globalBlockRow, 00487 GlobalOrdinal globalBlockCol, 00488 LocalOrdinal& numPtRows, 00489 LocalOrdinal& numPtCols, 00490 Teuchos::ArrayRCP<const Scalar>& blockEntry) const 00491 { 00492 //This method returns a const-view of a block-entry (as an ArrayRCP). 00493 //Throws an exception if the block-entry doesn't already exist. 00494 00495 if (is_storage_optimized_) { 00496 TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error, 00497 "Tpetra::VbrMatrix::getGlobalBlockEntryView internal ERROR, storage is optimized but isFillComplete() is false."); 00498 00499 LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow); 00500 LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol); 00501 getLocalBlockEntryView(localBlockRow, localBlockCol, numPtRows, numPtCols, blockEntry); 00502 return; 00503 } 00504 00505 TEUCHOS_TEST_FOR_EXCEPTION(data_2D_->size() == 0, std::runtime_error, 00506 "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, matrix storage not yet allocated, can't return a const view."); 00507 00508 //this acts as a range-check for globalBlockRow: 00509 LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow); 00510 TEUCHOS_TEST_FOR_EXCEPTION( localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(), 00511 std::runtime_error, 00512 "Tpetra::VbrMatrix::getGlobalBlockEntryView, globalBlockRow not on the local processor."); 00513 00514 RowGlobalCols& blkrow = (*data_2D_)[localBlockRow]; 00515 typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol); 00516 00517 if (col_iter == blkrow.end()) { 00518 throw std::runtime_error("Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, specified block-entry doesn't exist."); 00519 } 00520 00521 numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow); 00522 00523 TEUCHOS_TEST_FOR_EXCEPTION(numPtRows == 0, std::runtime_error, 00524 "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, numPtRows == 0."); 00525 00526 blockEntry = col_iter->second; 00527 numPtCols = blockEntry.size() / numPtRows; 00528 } 00529 00530 //------------------------------------------------------------------- 00531 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00532 void 00533 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalBlockEntryViewNonConst( 00534 LocalOrdinal localBlockRow, 00535 LocalOrdinal localBlockCol, 00536 LocalOrdinal& numPtRows, 00537 LocalOrdinal& numPtCols, 00538 Teuchos::ArrayRCP<Scalar>& blockEntry) 00539 { 00540 typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO; 00541 typedef Teuchos::ArrayRCP<const size_t> Host_View; 00542 typedef typename Host_View_LO::iterator ITER; 00543 //This method returns a non-constant view of a block-entry (as an ArrayRCP). 00544 00545 TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error, 00546 "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called after fillComplete() has been called."); 00547 00548 TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error, 00549 "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called if storage is optimized."); 00550 00551 Teuchos::RCP<Node> node = getNode(); 00552 00553 Host_View bptr = constBlkGraph_->getNodeRowOffsets(); 00554 LocalOrdinal bindx_offset = bptr[localBlockRow]; 00555 LocalOrdinal length = bptr[localBlockRow+1] - bindx_offset; 00556 00557 TEUCHOS_TEST_FOR_EXCEPTION( length < 1, std::runtime_error, 00558 "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found in localBlockRow."); 00559 00560 Host_View_LO bindx = constBlkGraph_->getNodePackedIndices(); 00561 ITER bindx_beg = bindx.begin() + bindx_offset, 00562 bindx_end = bindx_beg + length; 00563 ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol); 00564 00565 TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error, 00566 "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found."); 00567 00568 numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow); 00569 numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol); 00570 00571 const LocalOrdinal blkSize = numPtRows*numPtCols; 00572 Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1,pbuf_indx_+bptr[localBlockRow]+(it-bindx_beg)); 00573 const LocalOrdinal offset = indx[0]; 00574 blockEntry = node->template viewBufferNonConst<Scalar>(KokkosClassic::ReadWrite, blkSize, pbuf_values1D_ + offset); 00575 } 00576 00577 //------------------------------------------------------------------- 00578 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00579 void 00580 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalDiagCopy( 00581 Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& diag) const 00582 { 00583 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = getBlockRowMap(); 00584 TEUCHOS_TEST_FOR_EXCEPTION(diag.getMap()->isSameAs(*(rowmap->getPointMap())) != true, 00585 std::runtime_error, "Tpetra::VbrMatrix::getLocalDiagCopy ERROR, vector must be distributed the same as this matrix' row-map."); 00586 00587 Teuchos::ArrayRCP<Scalar> diag_view = diag.get1dViewNonConst(); 00588 Teuchos::ArrayView<const GlobalOrdinal> blockIDs = rowmap->getNodeBlockIDs(); 00589 size_t offset = 0; 00590 typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t; 00591 for(Tsize_t i=0; i<blockIDs.size(); ++i) { 00592 LocalOrdinal localBlockID = rowmap->getLocalBlockID(blockIDs[i]); 00593 LocalOrdinal blockSize = rowmap->getLocalBlockSize(localBlockID); 00594 Teuchos::ArrayRCP<const Scalar> blockEntry; 00595 getGlobalBlockEntryView(blockIDs[i], blockIDs[i], blockSize, blockSize, blockEntry); 00596 00597 for(LocalOrdinal j=0; j<blockSize; ++j) { 00598 diag_view[offset++] = blockEntry[j*blockSize+j]; 00599 } 00600 } 00601 } 00602 00603 //------------------------------------------------------------------- 00604 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00605 bool 00606 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>:: 00607 checkSizes (const SrcDistObject& source) 00608 { 00609 typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> this_type; 00610 const this_type* srcVbrMat = dynamic_cast<const this_type*> (&source); 00611 00612 if (srcVbrMat == NULL) { 00613 typedef VbrUtils::VbrDataDist<LocalOrdinal,GlobalOrdinal,Scalar,Node> VDD; 00614 const VDD* vdd = const_cast<VDD*> (dynamic_cast<const VDD*> (&source)); 00615 if (vdd == NULL) { 00616 return false; 00617 } else { 00618 return (this->getMap ()->getMinAllGlobalIndex () <= 00619 vdd->getMap ()->getMinAllGlobalIndex ()) && 00620 (this->getMap ()->getMaxAllGlobalIndex () >= 00621 vdd->getMap ()->getMaxAllGlobalIndex ()); 00622 } 00623 } else { 00624 return (this->getMap ()->getMinAllGlobalIndex () <= 00625 srcVbrMat->getMap ()->getMinAllGlobalIndex ()) && 00626 (this->getMap ()->getMaxAllGlobalIndex () >= 00627 srcVbrMat->getMap ()->getMaxAllGlobalIndex ()); 00628 } 00629 } 00630 00631 //------------------------------------------------------------------- 00632 template<class Scalar, 00633 class LocalOrdinal, 00634 class GlobalOrdinal, 00635 class Node> 00636 void 00637 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>:: 00638 copyAndPermute (const SrcDistObject& source, 00639 size_t numSameIDs, 00640 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs, 00641 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs) 00642 { 00643 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t; 00644 typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> this_type; 00645 const this_type* src_mat_ptr = dynamic_cast<const this_type*> (&source); 00646 00647 if (src_mat_ptr == NULL) { 00648 throw std::runtime_error ("VbrMatrix::copyAndPermute ERROR, dynamic_cast failed."); 00649 } 00650 const this_type& src_mat = *src_mat_ptr; 00651 00652 Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol; 00653 LocalOrdinal numPtRows; 00654 00655 LocalOrdinal numSame = numSameIDs; 00656 for(LocalOrdinal i=0; i<numSame; ++i) { 00657 if (isFillComplete()) { 00658 Teuchos::ArrayRCP<const Scalar> src_blkEntries; 00659 Teuchos::ArrayView<const LocalOrdinal> src_blkCols; 00660 00661 src_mat.getLocalBlockRowView (i, numPtRows, src_blkCols, 00662 src_ptColsPerBlkCol, src_blkEntries); 00663 unsigned offset = 0; 00664 for (Tsize_t j=0; j<src_blkCols.size(); ++j) { 00665 LocalOrdinal numPtCols = src_ptColsPerBlkCol[j]; 00666 setLocalBlockEntry (i, src_blkCols[j], numPtRows, numPtCols, numPtCols, 00667 src_blkEntries.view (offset, numPtRows*numPtCols)); 00668 offset += numPtRows*numPtCols; 00669 } 00670 } 00671 else { 00672 GlobalOrdinal gRow = getBlockRowMap ()->getGlobalBlockID (i); 00673 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries; 00674 Teuchos::ArrayView<const GlobalOrdinal> src_blkCols; 00675 00676 src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols, 00677 src_ptColsPerBlkCol, src_blkEntries); 00678 for (Tsize_t j = 0; j < src_blkCols.size (); ++j) { 00679 LocalOrdinal numPtCols = src_ptColsPerBlkCol[j]; 00680 setGlobalBlockEntry (gRow, src_blkCols[j], 00681 numPtRows, numPtCols, numPtRows, 00682 src_blkEntries[j].view (0, numPtRows * numPtCols)); 00683 } 00684 } 00685 } 00686 00687 for (Tsize_t i = 0; i < permuteToLIDs.size (); ++i) { 00688 if (isFillComplete ()) { 00689 Teuchos::ArrayRCP<const Scalar> src_blkEntries; 00690 Teuchos::ArrayView<const LocalOrdinal> src_blkCols; 00691 00692 src_mat.getLocalBlockRowView (permuteFromLIDs[i], numPtRows, 00693 src_blkCols, src_ptColsPerBlkCol, 00694 src_blkEntries); 00695 unsigned offset = 0; 00696 for (Tsize_t j=0; j<src_blkCols.size(); ++j) { 00697 LocalOrdinal numPtCols = src_ptColsPerBlkCol[j]; 00698 setLocalBlockEntry (permuteToLIDs[i], src_blkCols[j], numPtRows, numPtCols, numPtCols, 00699 src_blkEntries.view(offset, numPtRows*numPtCols)); 00700 offset += numPtRows*numPtCols; 00701 } 00702 } 00703 else { 00704 GlobalOrdinal gRow = getBlockRowMap ()->getGlobalBlockID (permuteToLIDs[i]); 00705 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries; 00706 Teuchos::ArrayView<const GlobalOrdinal> src_blkCols; 00707 00708 src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols, 00709 src_ptColsPerBlkCol, src_blkEntries); 00710 for (Tsize_t j = 0; j < src_blkCols.size (); ++j) { 00711 LocalOrdinal numPtCols = src_ptColsPerBlkCol[j]; 00712 setGlobalBlockEntry (gRow, src_blkCols[j], 00713 numPtRows, numPtCols, numPtRows, 00714 src_blkEntries[j].view (0, numPtRows * numPtCols)); 00715 } 00716 } 00717 } 00718 } 00719 00720 //------------------------------------------------------------------- 00721 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00722 void 00723 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>:: 00724 packAndPrepare (const SrcDistObject& source, 00725 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs, 00726 Teuchos::Array<char>& exports, 00727 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 00728 size_t& constantNumPackets, 00729 Distributor& distor) 00730 { 00731 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t; 00732 typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> this_type; 00733 const this_type* src_mat_ptr = dynamic_cast<const this_type*> (&source); 00734 00735 if (src_mat_ptr == NULL) { 00736 typedef VbrUtils::VbrDataDist<LocalOrdinal, GlobalOrdinal, Scalar, Node> VDD; 00737 const VDD* vdd = dynamic_cast<const VDD*> (&source); 00738 TEUCHOS_TEST_FOR_EXCEPTION( 00739 vdd == NULL, std::invalid_argument, "Tpetra::VbrMatrix::packAndPrepare: " 00740 "Source object input must be either a VbrMatrix or a VbrDataDist with " 00741 "matching template parameters."); 00742 vdd->pack (exportLIDs, exports, numPacketsPerLID, constantNumPackets, distor); 00743 return; 00744 } 00745 const this_type& src_mat = *src_mat_ptr; 00746 00747 //We will pack each row's data into the exports buffer as follows: 00748 //[num-block-cols,numPtRows,{list-of-blk-cols},{list-of-ptColsPerBlkCol},{all vals}] 00749 //so the length of the char exports buffer for a row is: 00750 //sizeof(LocalOrdinal)*(2+2*(num-block-cols)) + sizeof(Scalar)*numPtRows*sum(numPtCols_i) 00751 00752 //For each row corresponding to exportLIDs, accumulate the size that it will 00753 //occupy in the exports buffer: 00754 size_t total_exports_size = 0; 00755 for (Tsize_t i = 0; i < exportLIDs.size (); ++i) { 00756 Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol; 00757 LocalOrdinal numPtRows; 00758 LocalOrdinal numBlkCols = 0; 00759 LocalOrdinal numScalars = 0; 00760 00761 if (src_mat.isFillComplete ()) { 00762 Teuchos::ArrayRCP<const Scalar> src_blkEntries; 00763 Teuchos::ArrayView<const LocalOrdinal> src_blkCols; 00764 src_mat.getLocalBlockRowView (exportLIDs[i], numPtRows, 00765 src_blkCols, src_ptColsPerBlkCol, 00766 src_blkEntries); 00767 numBlkCols = src_blkCols.size (); 00768 numScalars = src_blkEntries.size (); 00769 } 00770 else { 00771 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries; 00772 GlobalOrdinal gRow = 00773 src_mat.getBlockRowMap ()->getGlobalBlockID (exportLIDs[i]); 00774 Teuchos::ArrayView<const GlobalOrdinal> src_blkCols; 00775 src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols, 00776 src_ptColsPerBlkCol, src_blkEntries); 00777 numBlkCols = src_blkCols.size (); 00778 for (Tsize_t j = 0; j < src_blkEntries.size (); ++j) { 00779 numScalars += src_blkEntries[j].size (); 00780 } 00781 } 00782 00783 const size_t size_for_this_row = 00784 sizeof (GlobalOrdinal) * (2 + 2*numBlkCols) + 00785 sizeof (Scalar) * numScalars; 00786 numPacketsPerLID[i] = size_for_this_row; 00787 total_exports_size += size_for_this_row; 00788 } 00789 00790 exports.resize (total_exports_size); 00791 00792 ArrayView<char> avIndsC, avValsC; 00793 ArrayView<Scalar> avVals; 00794 00795 size_t offset = 0; 00796 00797 for (Tsize_t i = 0; i < exportLIDs.size (); ++i) { 00798 Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol; 00799 LocalOrdinal numPtRows; 00800 LocalOrdinal numBlkCols = 0; 00801 LocalOrdinal numScalars = 0; 00802 00803 if (src_mat.isFillComplete ()) { 00804 Teuchos::ArrayRCP<const Scalar> src_blkEntries; 00805 Teuchos::ArrayView<const LocalOrdinal> src_blkCols; 00806 src_mat.getLocalBlockRowView (exportLIDs[i], numPtRows, src_blkCols, 00807 src_ptColsPerBlkCol, src_blkEntries); 00808 numBlkCols = src_blkCols.size (); 00809 numScalars = src_blkEntries.size (); 00810 00811 LocalOrdinal num_chars_for_ordinals = 00812 (2*numBlkCols + 2) * sizeof (LocalOrdinal); 00813 //get export views 00814 avIndsC = exports (offset, num_chars_for_ordinals); 00815 avValsC = exports (offset + num_chars_for_ordinals, numScalars * sizeof (Scalar)); 00816 ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC); 00817 typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin(); 00818 00819 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& col_map = 00820 src_mat.getBlockColMap (); 00821 00822 //put row info into the buffer views: 00823 *ind_it++ = numBlkCols; 00824 *ind_it++ = numPtRows; 00825 for (Tsize_t j = 0; j < src_blkCols.size (); ++j) { 00826 *ind_it++ = col_map->getGlobalBlockID (src_blkCols[j]); 00827 } 00828 std::copy (src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it); 00829 avVals = av_reinterpret_cast<Scalar> (avValsC); 00830 std::copy (src_blkEntries.begin(), src_blkEntries.end(), avVals.begin()); 00831 } 00832 else { 00833 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries; 00834 GlobalOrdinal gRow = src_mat.getBlockRowMap()->getGlobalBlockID(exportLIDs[i]); 00835 Teuchos::ArrayView<const GlobalOrdinal> src_blkCols; 00836 src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols, 00837 src_ptColsPerBlkCol, src_blkEntries); 00838 numBlkCols = src_blkCols.size(); 00839 numScalars = 0; 00840 for(Tsize_t j=0; j<src_blkEntries.size(); ++j) { 00841 numScalars += numPtRows*src_ptColsPerBlkCol[j]; 00842 } 00843 00844 LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal); 00845 //get export views 00846 avIndsC = exports(offset, num_chars_for_ordinals); 00847 avValsC = exports(offset+ num_chars_for_ordinals, numScalars*sizeof(Scalar)); 00848 ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC); 00849 typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin(); 00850 00851 //put row info into the buffer views: 00852 *ind_it++ = numBlkCols; 00853 *ind_it++ = numPtRows; 00854 std::copy(src_blkCols.begin(), src_blkCols.end(), ind_it); 00855 ind_it += src_blkCols.size(); 00856 std::copy(src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it); 00857 avVals = av_reinterpret_cast<Scalar>(avValsC); 00858 typename ArrayView<Scalar>::iterator val_it = avVals.begin(); 00859 for(Tsize_t j=0; j<src_blkEntries.size(); ++j) { 00860 std::copy(src_blkEntries[j].begin(), src_blkEntries[j].end(), val_it); 00861 val_it += src_blkEntries[j].size(); 00862 } 00863 } 00864 00865 const size_t size_for_this_row = 00866 sizeof (GlobalOrdinal) * (2 + 2*numBlkCols) 00867 + sizeof (Scalar) * numScalars; 00868 offset += size_for_this_row; 00869 } 00870 00871 constantNumPackets = 0; 00872 } 00873 00874 //------------------------------------------------------------------- 00875 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00876 void 00877 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>:: 00878 unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs, 00879 const Teuchos::ArrayView<const char>& imports, 00880 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 00881 size_t constantNumPackets, 00882 Distributor& distor, 00883 CombineMode CM) 00884 { 00885 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t; 00886 00887 if (CM == Tpetra::ABSMAX) { 00888 std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl; 00889 } 00890 00891 size_t offset = 0; 00892 for(Tsize_t i=0; i<importLIDs.size(); ++i) { 00893 ArrayView<const char> avC = imports.view(offset, numPacketsPerLID[i]); 00894 ArrayView<const GlobalOrdinal> avOrds = av_reinterpret_cast<const GlobalOrdinal>(avC); 00895 GlobalOrdinal gRow = this->getBlockRowMap()->getGlobalBlockID(importLIDs[i]); 00896 size_t avOrdsOffset = 0; 00897 LocalOrdinal numBlkCols = avOrds[avOrdsOffset++]; 00898 LocalOrdinal numPtRows = avOrds[avOrdsOffset++]; 00899 LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal); 00900 ArrayView<const char> avValsC = imports.view(offset+num_chars_for_ordinals, numPacketsPerLID[i]-num_chars_for_ordinals); 00901 ArrayView<const Scalar> avVals = av_reinterpret_cast<const Scalar>(avValsC); 00902 00903 size_t avValsOffset = 0; 00904 for(Tsize_t j=0; j<numBlkCols; ++j) { 00905 GlobalOrdinal blkCol = avOrds[avOrdsOffset+j]; 00906 LocalOrdinal numPtCols = avOrds[avOrdsOffset+numBlkCols+j]; 00907 LocalOrdinal LDA = numPtRows; 00908 if (CM == Tpetra::ADD) { 00909 sumIntoGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA, 00910 avVals.view(avValsOffset, numPtRows*numPtCols)); 00911 } 00912 else if (CM == Tpetra::INSERT || CM == Tpetra::REPLACE) { 00913 setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA, 00914 avVals.view(avValsOffset, numPtRows*numPtCols)); 00915 } 00916 else { 00917 // std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl; 00918 setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA, 00919 avVals.view(avValsOffset, numPtRows*numPtCols)); 00920 } 00921 avValsOffset += numPtRows*numPtCols; 00922 } 00923 } 00924 } 00925 00926 //------------------------------------------------------------------- 00927 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00928 void 00929 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalBlockEntryView( 00930 LocalOrdinal localBlockRow, 00931 LocalOrdinal localBlockCol, 00932 LocalOrdinal& numPtRows, 00933 LocalOrdinal& numPtCols, 00934 Teuchos::ArrayRCP<const Scalar>& blockEntry) const 00935 { 00936 typedef Teuchos::ArrayRCP<const size_t> Host_View; 00937 typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO; 00938 typedef typename Host_View_LO::iterator ITER; 00939 //This method returns a constant view of a block-entry (as an ArrayRCP). 00940 00941 TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error, 00942 "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called after fillComplete() has been called."); 00943 00944 TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error, 00945 "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called if storage is optimized."); 00946 00947 Teuchos::RCP<Node> node = getNode(); 00948 00949 Host_View bpr = constBlkGraph_->getNodeRowOffsets(); 00950 const size_t bindx_offset = bpr[localBlockRow]; 00951 const size_t length = bpr[localBlockRow+1] - bindx_offset; 00952 00953 Host_View_LO bindx = constBlkGraph_->getNodePackedIndices(); 00954 ITER bindx_beg = bindx.begin()+bindx_offset, 00955 bindx_end = bindx_beg + length; 00956 ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol); 00957 00958 TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error, 00959 "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, specified localBlockCol not found."); 00960 00961 numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow); 00962 numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol); 00963 00964 const LocalOrdinal blkSize = numPtRows*numPtCols; 00965 Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bpr[localBlockRow]+(it-bindx_beg)); 00966 const LocalOrdinal offset = indx[0]; 00967 blockEntry = node->template viewBuffer<Scalar>(blkSize, pbuf_values1D_ + offset); 00968 } 00969 00970 //------------------------------------------------------------------- 00971 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00972 Teuchos::RCP<Node> 00973 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getNode() const 00974 { 00975 return getBlockRowMap()->getPointMap()->getNode(); 00976 } 00977 00978 //------------------------------------------------------------------- 00979 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00980 void 00981 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::putScalar(Scalar s) 00982 { 00983 Teuchos::RCP<Node> node = getNode(); 00984 00985 if (isFillComplete()) { 00986 fill_device_ArrayRCP(node, pbuf_values1D_, s); 00987 } 00988 else { 00989 typedef typename Teuchos::Array<RowGlobalCols>::size_type Tsize_t; 00990 Teuchos::Array<RowGlobalCols>& rows = *data_2D_; 00991 Tsize_t numBlockRows = rows.size(); 00992 for(Tsize_t r=0; r<numBlockRows; ++r) { 00993 typename RowGlobalCols::iterator 00994 col_iter = rows[r].begin(), 00995 col_end = rows[r].end(); 00996 for(; col_iter != col_end; ++col_iter) { 00997 Teuchos::ArrayRCP<Scalar>& vals = col_iter->second; 00998 std::fill(vals.begin(), vals.end(), s); 00999 } 01000 } 01001 } 01002 } 01003 01004 //------------------------------------------------------------------- 01005 template<class ArrayType1,class ArrayType2,class Ordinal> 01006 void set_array_values(ArrayType1& dest, ArrayType2& src, Ordinal rows, Ordinal cols, Ordinal stride, Tpetra::CombineMode mode) 01007 { 01008 size_t offset = 0; 01009 size_t src_offset = 0; 01010 01011 if (mode == ADD) { 01012 for(Ordinal col=0; col<cols; ++col) { 01013 for(Ordinal row=0; row<rows; ++row) { 01014 dest[offset++] += src[src_offset + row]; 01015 } 01016 src_offset += stride; 01017 } 01018 } 01019 else { 01020 for(Ordinal col=0; col<cols; ++col) { 01021 for(Ordinal row=0; row<rows; ++row) { 01022 dest[offset++] = src[src_offset + row]; 01023 } 01024 src_offset += stride; 01025 } 01026 } 01027 } 01028 01029 //------------------------------------------------------------------- 01030 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01031 void 01032 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry) 01033 { 01034 //first get an ArrayRCP for the internal storage for this block-entry: 01035 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01036 LocalOrdinal blkRowSize = blockEntry.numRows(); 01037 LocalOrdinal blkColSize = blockEntry.numCols(); 01038 getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01039 01040 //now copy the incoming block-entry into internal storage: 01041 const Scalar* inputvalues = blockEntry.values(); 01042 set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal >( 01043 internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE); 01044 } 01045 01046 //------------------------------------------------------------------- 01047 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01048 void 01049 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry) 01050 { 01051 //first get an ArrayRCP for the internal storage for this block-entry: 01052 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01053 LocalOrdinal blkRowSize = blockEntry.numRows(); 01054 LocalOrdinal blkColSize = blockEntry.numCols(); 01055 getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01056 01057 //now copy the incoming block-entry into internal storage: 01058 const Scalar* inputvalues = blockEntry.values(); 01059 set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>( 01060 internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE); 01061 } 01062 01063 //------------------------------------------------------------------- 01064 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01065 void 01066 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry) 01067 { 01068 //first get an ArrayRCP for the internal storage for this block-entry: 01069 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01070 LocalOrdinal blkRowSize = blockEntry.numRows(); 01071 LocalOrdinal blkColSize = blockEntry.numCols(); 01072 getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01073 01074 //now sum (add) the incoming block-entry into internal storage: 01075 const Scalar* inputvalues = blockEntry.values(); 01076 set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>( 01077 internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD); 01078 } 01079 01080 //------------------------------------------------------------------- 01081 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01082 void 01083 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry) 01084 { 01085 //first get an ArrayRCP for the internal storage for this block-entry: 01086 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01087 LocalOrdinal blkRowSize = blockEntry.numRows(); 01088 LocalOrdinal blkColSize = blockEntry.numCols(); 01089 getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01090 01091 //now sum (add) the incoming block-entry into internal storage: 01092 const Scalar* inputvalues = blockEntry.values(); 01093 set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>( 01094 internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD); 01095 } 01096 01097 //------------------------------------------------------------------- 01098 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01099 void 01100 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry) 01101 { 01102 //first get an ArrayRCP for the internal storage for this block-entry: 01103 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01104 getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01105 01106 LocalOrdinal blk_size = blockEntry.size(); 01107 TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error, 01108 "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes."); 01109 01110 //copy the incoming block-entry into internal storage: 01111 set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE); 01112 } 01113 01114 //------------------------------------------------------------------- 01115 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01116 void 01117 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry) 01118 { 01119 //first get an ArrayRCP for the internal storage for this block-entry: 01120 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01121 getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01122 01123 LocalOrdinal blk_size = blockEntry.size(); 01124 TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error, 01125 "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes."); 01126 01127 //copy the incoming block-entry into internal storage: 01128 set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE); 01129 } 01130 01131 //------------------------------------------------------------------- 01132 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01133 void 01134 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry) 01135 { 01136 //first get an ArrayRCP for the internal storage for this block-entry: 01137 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01138 getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01139 01140 LocalOrdinal blk_size = blockEntry.size(); 01141 TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error, 01142 "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes."); 01143 01144 //copy the incoming block-entry into internal storage: 01145 set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD); 01146 } 01147 01148 //------------------------------------------------------------------- 01149 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01150 void 01151 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry) 01152 { 01153 //first get an ArrayRCP for the internal storage for this block-entry: 01154 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01155 getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01156 01157 LocalOrdinal blk_size = blockEntry.size(); 01158 TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error, 01159 "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes."); 01160 01161 //copy the incoming block-entry into internal storage: 01162 set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD); 01163 } 01164 01165 //------------------------------------------------------------------- 01166 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01167 void 01168 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::optimizeStorage() 01169 { 01170 typedef Teuchos::ArrayRCP<const size_t> Host_View; 01171 typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO; 01172 typedef typename Teuchos::ArrayRCP<size_t>::size_type Tsize_t; 01173 01174 if (is_storage_optimized_ == true) return; 01175 01176 TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isFillComplete() != true, std::runtime_error, 01177 "Tpetra::VbrMatrix::optimizeStorage ERROR, isFillComplete() is false, required to be true before optimizeStorage is called."); 01178 01179 size_t num_block_nonzeros = constBlkGraph_->getNodeNumBlockEntries(); 01180 01181 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = constBlkGraph_->getBlockRowMap(); 01182 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& colmap = constBlkGraph_->getBlockColMap(); 01183 01184 const Teuchos::RCP<Node>& node = getBlockRowMap()->getPointMap()->getNode(); 01185 01186 //need to count the number of point-entries: 01187 size_t num_point_entries = 0; 01188 Host_View bptr = constBlkGraph_->getNodeRowOffsets(); 01189 01190 if (bptr.size() == 0) { 01191 is_storage_optimized_ = true; 01192 return; 01193 } 01194 01195 Host_View_LO bindx = constBlkGraph_->getNodePackedIndices(); 01196 01197 for(Tsize_t r=0; r<bptr.size()-1; ++r) { 01198 size_t rbeg = bptr[r]; 01199 size_t rend = bptr[r+1]; 01200 01201 LocalOrdinal rsize = rowmap->getLocalBlockSize(r); 01202 01203 for(size_t j=rbeg; j<rend; ++j) { 01204 LocalOrdinal csize = colmap->getLocalBlockSize(bindx[j]); 01205 num_point_entries += rsize*csize; 01206 } 01207 } 01208 01209 pbuf_indx_ = node->template allocBuffer<LocalOrdinal>(num_block_nonzeros+1); 01210 pbuf_values1D_ = node->template allocBuffer<Scalar>(num_point_entries); 01211 01212 Teuchos::ArrayRCP<LocalOrdinal> view_indx = node->template viewBufferNonConst<LocalOrdinal>(KokkosClassic::WriteOnly, num_block_nonzeros+1, pbuf_indx_); 01213 Teuchos::ArrayRCP<Scalar> view_values1D = node->template viewBufferNonConst<Scalar>(KokkosClassic::WriteOnly, num_point_entries, pbuf_values1D_); 01214 01215 bool have_2D_data = data_2D_->size() > 0; 01216 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero(); 01217 01218 size_t ioffset = 0; 01219 size_t offset = 0; 01220 for(Tsize_t r=0; r<bptr.size()-1; ++r) { 01221 LocalOrdinal rsize = rowmap->getLocalBlockSize(r); 01222 01223 RowGlobalCols* blk_row = have_2D_data ? &((*data_2D_)[r]) : NULL; 01224 01225 for(size_t c=bptr[r]; c<bptr[r+1]; ++c) { 01226 view_indx[ioffset++] = offset; 01227 01228 LocalOrdinal csize = colmap->getLocalBlockSize(bindx[c]); 01229 Tsize_t blkSize = rsize*csize; 01230 01231 if (blk_row != NULL) { 01232 GlobalOrdinal global_col = colmap->getGlobalBlockID(bindx[c]); 01233 typename RowGlobalCols::iterator iter = blk_row->find(global_col); 01234 TEUCHOS_TEST_FOR_EXCEPTION(iter == blk_row->end(), std::runtime_error, 01235 "Tpetra::VbrMatrix::optimizeStorage ERROR, global_col not found in row."); 01236 01237 Teuchos::ArrayRCP<Scalar> vals = iter->second; 01238 for(Tsize_t n=0; n<blkSize; ++n) { 01239 view_values1D[offset+n] = vals[n]; 01240 } 01241 } 01242 else { 01243 for(Tsize_t n=0; n<blkSize; ++n) view_values1D[offset+n] = zero; 01244 } 01245 offset += blkSize; 01246 } 01247 } 01248 view_indx[ioffset] = offset; 01249 01250 //Final step: release memory for the "2D" storage: 01251 data_2D_ = Teuchos::null; 01252 01253 is_storage_optimized_ = true; 01254 } 01255 01256 //------------------------------------------------------------------- 01257 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01258 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::fillLocalMatrix() 01259 { 01260 //We insist that optimzeStorage has already been called. 01261 //We don't care whether this function (fillLocalMatrix()) is being 01262 //called for the first time or not. 01263 TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error, 01264 "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called."); 01265 01266 Teuchos::ArrayRCP<const size_t > nodeRowOffsets = constBlkGraph_->getNodeRowOffsets(); 01267 Teuchos::ArrayRCP<const LocalOrdinal> nodePackedInds = constBlkGraph_->getNodePackedIndices(); 01268 if (Node::isHostNode == false) { 01269 RCP<Node> node = getRangeMap()->getNode(); 01270 // 01271 Teuchos::ArrayRCP<size_t> dev_nodeRowOffsets = node->template allocBuffer<size_t>(nodeRowOffsets.size()); 01272 node->copyToBuffer(nodeRowOffsets.size(), nodeRowOffsets(), dev_nodeRowOffsets); 01273 nodeRowOffsets = dev_nodeRowOffsets; 01274 // 01275 Teuchos::ArrayRCP<LocalOrdinal> dev_nodePackedInds = node->template allocBuffer<LocalOrdinal>(nodePackedInds.size()); 01276 node->copyToBuffer(nodePackedInds.size(), nodePackedInds(), dev_nodePackedInds); 01277 nodePackedInds = dev_nodePackedInds; 01278 } 01279 lclMatrix_->setPackedValues (pbuf_values1D_, 01280 getBlockRowMap()->getNodeFirstPointInBlocks_Device(), 01281 getBlockColMap()->getNodeFirstPointInBlocks_Device(), 01282 nodeRowOffsets, 01283 nodePackedInds, 01284 pbuf_indx_); 01285 } 01286 01287 //------------------------------------------------------------------- 01288 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01289 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::fillLocalMatVec() 01290 { 01291 //We insist that optimzeStorage has already been called. 01292 //We don't care whether this function (fillLocalMatVec()) is being 01293 //called for the first time or not. 01294 TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error, 01295 "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called."); 01296 01297 lclMatOps_.initializeValues (*lclMatrix_); 01298 } 01299 01300 //------------------------------------------------------------------- 01301 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01302 void 01303 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::createImporterExporter() 01304 { 01305 typedef typename Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > PtMap; 01306 01307 const PtMap& ptDomMap = (constBlkGraph_->getBlockDomainMap())->getPointMap(); 01308 const PtMap& ptRngMap = (constBlkGraph_->getBlockRangeMap())->getPointMap(); 01309 const PtMap& ptRowMap = (constBlkGraph_->getBlockRowMap())->getPointMap(); 01310 const PtMap& ptColMap = (constBlkGraph_->getBlockColMap())->getPointMap(); 01311 01312 if (!ptDomMap->isSameAs(*ptColMap)) { 01313 importer_ = Teuchos::rcp(new Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>(ptDomMap, ptColMap)); 01314 } 01315 if (!ptRngMap->isSameAs(*ptRowMap)) { 01316 exporter_ = Teuchos::rcp(new Tpetra::Export<LocalOrdinal,GlobalOrdinal,Node>(ptRowMap, ptRngMap)); 01317 } 01318 } 01319 01320 //------------------------------------------------------------------- 01321 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01322 void 01323 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::fillComplete(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockDomainMap, const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockRangeMap) 01324 { 01325 if (isFillComplete()) return; 01326 01327 globalAssemble(); 01328 01329 blkGraph_->fillComplete(blockDomainMap, blockRangeMap); 01330 01331 createImporterExporter(); 01332 01333 is_fill_completed_ = true; 01334 01335 optimizeStorage(); 01336 01337 fillLocalMatrix(); 01338 fillLocalMatVec(); 01339 } 01340 01341 //------------------------------------------------------------------- 01342 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01343 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::fillComplete() 01344 { 01345 //In this case, our block-row-map will also be our domain-map and 01346 //range-map. 01347 01348 fillComplete(getBlockRowMap(), getBlockRowMap()); 01349 } 01350 01351 //------------------------------------------------------------------- 01352 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01353 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::globalAssemble() 01354 { 01355 if (getPointRowMap()->getComm()->getSize() == 1) { 01356 return; 01357 } 01358 01359 //nonlocal_data_ contains data that belongs on other processors. 01360 //We'll refer to this as overlapping data, and create an 'overlapMap' 01361 //that describes the layout of this overlapping data. 01362 Teuchos::RCP<const Tpetra::BlockMap<LocalOrdinal,GlobalOrdinal,Node> > 01363 overlapMap = VbrUtils::createOverlapMap(nonlocal_data_, *getBlockRowMap()); 01364 01365 if (overlapMap->getGlobalNumBlocks() == 0) { 01366 return; 01367 } 01368 01369 //VbrDataDist is a wrapper that makes our overlapping data behave 01370 //like a DistObject so that the overlapping data can be exported 01371 //to the owning processors in a standard way. 01372 typedef VbrUtils::VbrDataDist<LocalOrdinal, GlobalOrdinal, Scalar, Node> VDD; 01373 VDD vdd (nonlocal_data_, *overlapMap); 01374 01375 //Create an exporter where the source map is our overlapMap and the 01376 //target map is the rowmap of our VbrMatrix. 01377 typedef Import<LocalOrdinal,GlobalOrdinal,Node> import_type; 01378 import_type importer (vdd.getMap (), convertBlockMapToPointMap (*getBlockRowMap ())); 01379 01380 //Communicate the overlapping data to the owning processors and add it 01381 //into this VbrMatrix. 01382 this->doImport (vdd, importer, Tpetra::ADD); 01383 01384 //Zero out the overlapping data so it can be re-populated and re-assembled 01385 //in future calls to globalAssemble. 01386 nonlocal_data_.zeroEntries (); 01387 } 01388 01389 //------------------------------------------------------------------- 01390 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01391 std::string 01392 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::description() const 01393 { 01394 std::ostringstream oss; 01395 oss << Teuchos::Describable::description(); 01396 if (isFillComplete()) { 01397 oss << "{status = fill complete, global num block rows = " 01398 << getBlockRowMap()->getGlobalNumBlocks() << "}"; 01399 } 01400 else { 01401 oss << "{status = fill not complete, global num block rows = " 01402 << getBlockRowMap()->getGlobalNumBlocks() << "}"; 01403 } 01404 return oss.str(); 01405 } 01406 01407 //------------------------------------------------------------------- 01408 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 01409 void 01410 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const 01411 { 01412 Teuchos::EVerbosityLevel vl = verbLevel; 01413 if (vl == Teuchos::VERB_DEFAULT) vl = Teuchos::VERB_LOW; 01414 01415 if (vl == Teuchos::VERB_NONE) return; 01416 01417 Teuchos::RCP<const Teuchos::Comm<int> > comm = getBlockRowMap()->getPointMap()->getComm(); 01418 const int myProc = comm->getRank(); 01419 01420 if (myProc == 0) out << "VbrMatrix::describe is under construction..." << std::endl; 01421 } 01422 01423 }//namespace Tpetra 01424 01425 // 01426 // Explicit instantiation macro 01427 // 01428 // Must be expanded from within the Tpetra namespace! 01429 // 01430 01431 #define TPETRA_VBRMATRIX_INSTANT(SCALAR,LO,GO,NODE) \ 01432 \ 01433 template class VbrMatrix< SCALAR , LO , GO , NODE >; 01434 01435 #endif //TPETRA_VBRMATRIX_DEF_HPP 01436
1.7.6.1