|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_VBRMATRIX_DEF_HPP 00043 #define TPETRA_VBRMATRIX_DEF_HPP 00044 00045 #include <algorithm> 00046 #include <sstream> 00047 00048 #include <Kokkos_NodeHelpers.hpp> 00049 #include <Tpetra_Vector.hpp> 00050 00051 #ifdef DOXYGEN_USE_ONLY 00052 #include "Tpetra_VbrMatrix_decl.hpp" 00053 #endif 00054 00055 namespace Tpetra { 00056 00057 template<class Scalar,class Node> 00058 void fill_device_ArrayRCP(Teuchos::RCP<Node>& node, Teuchos::ArrayRCP<Scalar>& ptr, Scalar value) 00059 { 00060 Kokkos::ReadyBufferHelper<Node> rbh(node); 00061 Kokkos::InitOp<Scalar> wdp; 00062 wdp.alpha = value; 00063 rbh.begin(); 00064 wdp.x = rbh.addNonConstBuffer(ptr); 00065 rbh.end(); 00066 node->template parallel_for<Kokkos::InitOp<Scalar> >(0, ptr.size(), wdp); 00067 } 00068 00069 //------------------------------------------------------------------- 00070 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00071 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::VbrMatrix(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &blkRowMap, size_t maxNumEntriesPerRow, ProfileType pftype) 00072 : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkRowMap)), 00073 blkGraph_(Teuchos::rcp(new BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(blkRowMap, maxNumEntriesPerRow, pftype))), 00074 constBlkGraph_(blkGraph_), 00075 lclMatrix_(blkRowMap->getNodeNumBlocks(), blkRowMap->getPointMap()->getNode()), 00076 pbuf_values1D_(), 00077 pbuf_indx_(), 00078 lclMatOps_(blkRowMap->getPointMap()->getNode()), 00079 importer_(), 00080 exporter_(), 00081 importedVec_(), 00082 exportedVec_(), 00083 data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)), 00084 nonlocal_data_(), 00085 is_fill_completed_(false), 00086 is_storage_optimized_(false) 00087 { 00088 //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where 00089 //each entry in the graph corresponds to a block-entry in the matrix. 00090 //That is, you can think of a VBR matrix as a Crs matrix of dense 00091 //submatrices... 00092 } 00093 00094 //------------------------------------------------------------------- 00095 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00096 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::VbrMatrix(const Teuchos::RCP<const BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> > &blkGraph) 00097 : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkGraph->getBlockRowMap())), 00098 blkGraph_(), 00099 constBlkGraph_(blkGraph), 00100 lclMatrix_(blkGraph->getBlockRowMap()->getNodeNumBlocks(), 00101 blkGraph->getBlockRowMap()->getPointMap()->getNode()), 00102 pbuf_values1D_(), 00103 pbuf_indx_(), 00104 lclMatOps_(blkGraph->getBlockRowMap()->getPointMap()->getNode()), 00105 importer_(), 00106 exporter_(), 00107 importedVec_(), 00108 exportedVec_(), 00109 data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)), 00110 nonlocal_data_(), 00111 is_fill_completed_(false), 00112 is_storage_optimized_(false) 00113 { 00114 //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where 00115 //each entry in the graph corresponds to a block-entry in the matrix. 00116 //That is, you can think of a VBR matrix as a Crs matrix of dense 00117 //submatrices... 00118 00119 TEUCHOS_TEST_FOR_EXCEPTION(blkGraph->isFillComplete() == false, std::runtime_error, 00120 "Tpetra::VbrMatrix::VbrMatrix(BlockCrsGraph) ERROR, this constructor requires graph.isFillComplete()==true."); 00121 00122 createImporterExporter(); 00123 00124 is_fill_completed_ = true; 00125 00126 optimizeStorage(); 00127 00128 fillLocalMatrix(); 00129 fillLocalMatVec(); 00130 } 00131 00132 //------------------------------------------------------------------- 00133 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00134 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::~VbrMatrix() 00135 { 00136 } 00137 00138 //------------------------------------------------------------------- 00139 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00140 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 00141 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getDomainMap() const 00142 { 00143 return getBlockDomainMap()->getPointMap(); 00144 } 00145 00146 //------------------------------------------------------------------- 00147 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00148 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 00149 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getRangeMap() const 00150 { 00151 return getBlockRangeMap()->getPointMap(); 00152 } 00153 00154 //------------------------------------------------------------------- 00155 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00156 bool 00157 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::isFillComplete() const 00158 { 00159 return is_fill_completed_; 00160 } 00161 00162 //------------------------------------------------------------------- 00163 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00164 template <class DomainScalar, class RangeScalar> 00165 void 00166 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::multiply(const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> & X, MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &Y, Teuchos::ETransp trans, RangeScalar alpha, RangeScalar beta) const 00167 { 00168 TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error, 00169 "Tpetra::VbrMatrix::multiply ERROR, multiply may only be called after fillComplete has been called."); 00170 00171 const Kokkos::MultiVector<Scalar,Node> *lclX = &X.getLocalMV(); 00172 Kokkos::MultiVector<Scalar,Node> *lclY = &Y.getLocalMVNonConst(); 00173 00174 lclMatOps_.template multiply<Scalar,Scalar>(trans,alpha,*lclX,beta,*lclY); 00175 } 00176 00177 //------------------------------------------------------------------- 00178 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00179 template<class DomainScalar, class RangeScalar> 00180 void 00181 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::solve(const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y, MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X, Teuchos::ETransp trans) const 00182 { 00183 TEUCHOS_TEST_FOR_EXCEPTION(X.isConstantStride() == false || Y.isConstantStride() == false, std::runtime_error, 00184 "Tpetra::VbrMatrix::solve(X,Y): X and Y must be constant stride."); 00185 00186 TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error, 00187 "Tpetra::VbrMatrix::solve ERROR, solve may only be called after fillComplete has been called."); 00188 00189 TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isUpperTriangular()==false && constBlkGraph_->isLowerTriangular()==false, std::runtime_error, 00190 "Tpetra::VbrMatrix::solve ERROR, matrix must be either upper or lower triangular."); 00191 00192 const Kokkos::MultiVector<RangeScalar,Node> *lclY = &Y.getLocalMV(); 00193 Kokkos::MultiVector<DomainScalar,Node> *lclX = &X.getLocalMVNonConst(); 00194 00195 Teuchos::EUplo triang = constBlkGraph_->isUpperTriangular() ? Teuchos::UPPER_TRI : Teuchos::LOWER_TRI; 00196 Teuchos::EDiag diag = (constBlkGraph_->getNodeNumBlockDiags() < constBlkGraph_->getNodeNumBlockRows()) ? Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG; 00197 00198 lclMatOps_.template solve<DomainScalar,RangeScalar>(trans,triang,diag, *lclY,*lclX); 00199 } 00200 00201 //------------------------------------------------------------------- 00202 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00203 void 00204 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::updateImport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X) const 00205 { 00206 if (importer_ != Teuchos::null) { 00207 if (importedVec_ == Teuchos::null || importedVec_->getNumVectors() != X.getNumVectors()) { 00208 importedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), X.getNumVectors())); 00209 } 00210 00211 importedVec_->doImport(X, *importer_, REPLACE); 00212 } 00213 } 00214 00215 //------------------------------------------------------------------- 00216 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00217 void 00218 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::updateExport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const 00219 { 00220 if (exporter_ != Teuchos::null) { 00221 if (exportedVec_ == Teuchos::null || exportedVec_->getNumVectors() != Y.getNumVectors()) { 00222 exportedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), Y.getNumVectors())); 00223 } 00224 00225 exportedVec_->doExport(Y, *exporter_, REPLACE); 00226 } 00227 } 00228 00229 //------------------------------------------------------------------- 00230 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00231 void 00232 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::apply( 00233 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, 00234 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, 00235 Teuchos::ETransp trans, 00236 Scalar alpha, 00237 Scalar beta) const 00238 { 00239 if (trans == Teuchos::NO_TRANS) { 00240 updateImport(X); 00241 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_; 00242 this->template multiply<Scalar,Scalar>(Xref, Y, trans, alpha, beta); 00243 } 00244 else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) { 00245 updateImport(Y); 00246 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_; 00247 this->template multiply<Scalar,Scalar>(X, Yref, trans, alpha, beta); 00248 if (importedVec_ != Teuchos::null) { 00249 Y.doExport(*importedVec_, *importer_, ADD); 00250 } 00251 } 00252 } 00253 00254 //------------------------------------------------------------------- 00255 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00256 void 00257 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::applyInverse( 00258 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, 00259 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, 00260 Teuchos::ETransp trans) const 00261 { 00262 if (trans == Teuchos::NO_TRANS) { 00263 updateImport(Y); 00264 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_; 00265 this->template solve<Scalar,Scalar>(Y, Xref, trans); 00266 } 00267 else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) { 00268 updateImport(Y); 00269 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_; 00270 this->template solve<Scalar,Scalar>(Yref, X, trans); 00271 if (importedVec_ != Teuchos::null) { 00272 X.doExport(*importedVec_, *importer_, ADD); 00273 } 00274 } 00275 } 00276 00277 //------------------------------------------------------------------- 00278 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00279 bool 00280 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::hasTransposeApply() const 00281 { 00282 return true; 00283 } 00284 00285 //------------------------------------------------------------------- 00286 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00287 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > & 00288 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockRowMap() const 00289 { 00290 return constBlkGraph_->getBlockRowMap(); 00291 } 00292 00293 //------------------------------------------------------------------- 00294 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00295 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 00296 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getPointRowMap() const 00297 { 00298 return constBlkGraph_->getBlockRowMap()->getPointMap(); 00299 } 00300 00301 //------------------------------------------------------------------- 00302 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00303 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > & 00304 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockColMap() const 00305 { 00306 return constBlkGraph_->getBlockColMap(); 00307 } 00308 00309 //------------------------------------------------------------------- 00310 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00311 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > & 00312 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockDomainMap() const 00313 { 00314 return constBlkGraph_->getBlockDomainMap(); 00315 } 00316 00317 //------------------------------------------------------------------- 00318 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00319 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > & 00320 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockRangeMap() const 00321 { 00322 return constBlkGraph_->getBlockRangeMap(); 00323 } 00324 00325 //------------------------------------------------------------------- 00326 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00327 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 00328 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getPointColMap() const 00329 { 00330 return constBlkGraph_->getBlockColMap()->getPointMap(); 00331 } 00332 00333 //------------------------------------------------------------------- 00334 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00335 void 00336 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockRowView( 00337 GlobalOrdinal globalBlockRow, 00338 LocalOrdinal& numPtRows, 00339 Teuchos::ArrayView<const GlobalOrdinal>& blockCols, 00340 Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol, 00341 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> >& blockEntries) const 00342 { 00343 TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete(), std::runtime_error, 00344 "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, isFillComplete() is required to be false."); 00345 00346 typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t; 00347 00348 LocalOrdinal localRow = getBlockRowMap()->getLocalBlockID(globalBlockRow); 00349 numPtRows = getBlockRowMap()->getLocalBlockSize(localRow); 00350 constBlkGraph_->getGlobalBlockRowView(globalBlockRow, blockCols); 00351 ptColsPerBlockCol.resize(blockCols.size()); 00352 blockEntries.resize(blockCols.size()); 00353 for(Tsize_t i=0; i<blockCols.size(); ++i) { 00354 getGlobalBlockEntryView(globalBlockRow, blockCols[i], 00355 numPtRows, ptColsPerBlockCol[i], blockEntries[i]); 00356 } 00357 } 00358 00359 //------------------------------------------------------------------- 00360 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00361 void 00362 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockRowView( 00363 LocalOrdinal localBlockRow, 00364 LocalOrdinal& numPtRows, 00365 Teuchos::ArrayView<const LocalOrdinal>& blockCols, 00366 Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol, 00367 Teuchos::ArrayRCP<const Scalar>& blockEntries) const 00368 { 00369 TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error, 00370 "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, isFillComplete() is required to be true."); 00371 00372 typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t; 00373 typedef Teuchos::ArrayRCP<const size_t> Host_View; 00374 typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO; 00375 00376 numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow); 00377 constBlkGraph_->getLocalBlockRowView(localBlockRow, blockCols); 00378 ptColsPerBlockCol.resize(blockCols.size()); 00379 LocalOrdinal num_scalars_in_block_row = 0; 00380 for(Tsize_t i=0; i<blockCols.size(); ++i) { 00381 ptColsPerBlockCol[i] = getBlockColMap()->getLocalBlockSize(blockCols[i]); 00382 num_scalars_in_block_row += numPtRows*ptColsPerBlockCol[i]; 00383 } 00384 00385 Teuchos::RCP<Node> node = getNode(); 00386 Host_View bpr = constBlkGraph_->getNodeRowOffsets(); 00387 const size_t bindx_offset = bpr[localBlockRow]; 00388 Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bindx_offset); 00389 const LocalOrdinal offset = indx[0]; 00390 blockEntries = node->template viewBuffer<Scalar>(num_scalars_in_block_row, pbuf_values1D_+offset); 00391 } 00392 00393 //------------------------------------------------------------------- 00394 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00395 void 00396 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockEntryViewNonConst( 00397 GlobalOrdinal globalBlockRow, 00398 GlobalOrdinal globalBlockCol, 00399 LocalOrdinal& numPtRows, 00400 LocalOrdinal& numPtCols, 00401 Teuchos::ArrayRCP<Scalar>& blockEntry) 00402 { 00403 //Return a non-const, read-write view of a block-entry (as an ArrayRCP), 00404 //creating/allocating the block-entry if it doesn't already exist, (but only 00405 //if fillComplete hasn't been called yet, and if the arguments numPtRows 00406 //and numPtCols have been set on input). 00407 00408 LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow); 00409 00410 if (localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid()) { 00411 VbrUtils::getGlobalBlockEntryViewNonConst(nonlocal_data_, 00412 globalBlockRow, globalBlockCol, 00413 numPtRows, numPtCols, blockEntry); 00414 return; 00415 } 00416 00417 if (is_storage_optimized_) { 00418 TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error, 00419 "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst internal ERROR, storage is optimized but isFillComplete() is false."); 00420 00421 LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol); 00422 00423 getLocalBlockEntryViewNonConst(localBlockRow, localBlockCol, 00424 numPtRows, numPtCols, blockEntry); 00425 return; 00426 } 00427 00428 //If we get to here, fillComplete hasn't been called yet, and the matrix data 00429 //is stored in un-packed '2D' form. 00430 00431 if (data_2D_->size() == 0) { 00432 LocalOrdinal numBlockRows = constBlkGraph_->getNodeNumBlockRows(); 00433 data_2D_->resize(numBlockRows); 00434 } 00435 00436 RowGlobalCols& blkrow = (*data_2D_)[localBlockRow]; 00437 00438 typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol); 00439 00440 if (col_iter != blkrow.end()) { 00441 blockEntry = col_iter->second; 00442 } 00443 else { 00444 //blockEntry doesn't already exist, so we will create it. 00445 00446 //make sure block-size is specified: 00447 TEUCHOS_TEST_FOR_EXCEPTION(numPtRows==0 || numPtCols==0, std::runtime_error, 00448 "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst ERROR: creating block-entry, but numPtRows and/or numPtCols is 0."); 00449 00450 Teuchos::RCP<Node> node = getNode(); 00451 size_t blockSize = numPtRows*numPtCols; 00452 blockEntry = Teuchos::arcp(new Scalar[blockSize], 0, blockSize); 00453 std::fill(blockEntry.begin(), blockEntry.end(), 0); 00454 blkrow.insert(std::make_pair(globalBlockCol, blockEntry)); 00455 blkGraph_->insertGlobalIndices(globalBlockRow, Teuchos::ArrayView<GlobalOrdinal>(&globalBlockCol, 1)); 00456 } 00457 } 00458 00459 //------------------------------------------------------------------- 00460 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00461 void 00462 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockEntryView( 00463 GlobalOrdinal globalBlockRow, 00464 GlobalOrdinal globalBlockCol, 00465 LocalOrdinal& numPtRows, 00466 LocalOrdinal& numPtCols, 00467 Teuchos::ArrayRCP<const Scalar>& blockEntry) const 00468 { 00469 //This method returns a const-view of a block-entry (as an ArrayRCP). 00470 //Throws an exception if the block-entry doesn't already exist. 00471 00472 if (is_storage_optimized_) { 00473 TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error, 00474 "Tpetra::VbrMatrix::getGlobalBlockEntryView internal ERROR, storage is optimized but isFillComplete() is false."); 00475 00476 LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow); 00477 LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol); 00478 getLocalBlockEntryView(localBlockRow, localBlockCol, numPtRows, numPtCols, blockEntry); 00479 return; 00480 } 00481 00482 TEUCHOS_TEST_FOR_EXCEPTION(data_2D_->size() == 0, std::runtime_error, 00483 "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, matrix storage not yet allocated, can't return a const view."); 00484 00485 //this acts as a range-check for globalBlockRow: 00486 LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow); 00487 TEUCHOS_TEST_FOR_EXCEPTION( localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(), 00488 std::runtime_error, 00489 "Tpetra::VbrMatrix::getGlobalBlockEntryView, globalBlockRow not on the local processor."); 00490 00491 RowGlobalCols& blkrow = (*data_2D_)[localBlockRow]; 00492 typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol); 00493 00494 if (col_iter == blkrow.end()) { 00495 throw std::runtime_error("Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, specified block-entry doesn't exist."); 00496 } 00497 00498 numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow); 00499 00500 TEUCHOS_TEST_FOR_EXCEPTION(numPtRows == 0, std::runtime_error, 00501 "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, numPtRows == 0."); 00502 00503 blockEntry = col_iter->second; 00504 numPtCols = blockEntry.size() / numPtRows; 00505 } 00506 00507 //------------------------------------------------------------------- 00508 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00509 void 00510 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockEntryViewNonConst( 00511 LocalOrdinal localBlockRow, 00512 LocalOrdinal localBlockCol, 00513 LocalOrdinal& numPtRows, 00514 LocalOrdinal& numPtCols, 00515 Teuchos::ArrayRCP<Scalar>& blockEntry) 00516 { 00517 typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO; 00518 typedef Teuchos::ArrayRCP<const size_t> Host_View; 00519 typedef typename Host_View_LO::iterator ITER; 00520 //This method returns a non-constant view of a block-entry (as an ArrayRCP). 00521 00522 TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error, 00523 "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called after fillComplete() has been called."); 00524 00525 TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error, 00526 "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called if storage is optimized."); 00527 00528 Teuchos::RCP<Node> node = getNode(); 00529 00530 Host_View bptr = constBlkGraph_->getNodeRowOffsets(); 00531 LocalOrdinal bindx_offset = bptr[localBlockRow]; 00532 LocalOrdinal length = bptr[localBlockRow+1] - bindx_offset; 00533 00534 TEUCHOS_TEST_FOR_EXCEPTION( length < 1, std::runtime_error, 00535 "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found in localBlockRow."); 00536 00537 Host_View_LO bindx = constBlkGraph_->getNodePackedIndices(); 00538 ITER bindx_beg = bindx.begin() + bindx_offset, 00539 bindx_end = bindx_beg + length; 00540 ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol); 00541 00542 TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error, 00543 "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found."); 00544 00545 numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow); 00546 numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol); 00547 00548 const LocalOrdinal blkSize = numPtRows*numPtCols; 00549 Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1,pbuf_indx_+bptr[localBlockRow]+(it-bindx_beg)); 00550 const LocalOrdinal offset = indx[0]; 00551 blockEntry = node->template viewBufferNonConst<Scalar>(Kokkos::ReadWrite, blkSize, pbuf_values1D_ + offset); 00552 } 00553 00554 //------------------------------------------------------------------- 00555 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00556 void 00557 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalDiagCopy( 00558 Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& diag) const 00559 { 00560 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = getBlockRowMap(); 00561 TEUCHOS_TEST_FOR_EXCEPTION(diag.getMap()->isSameAs(*(rowmap->getPointMap())) != true, 00562 std::runtime_error, "Tpetra::VbrMatrix::getLocalDiagCopy ERROR, vector must be distributed the same as this matrix' row-map."); 00563 00564 Teuchos::ArrayRCP<Scalar> diag_view = diag.get1dViewNonConst(); 00565 Teuchos::ArrayView<const GlobalOrdinal> blockIDs = rowmap->getNodeBlockIDs(); 00566 size_t offset = 0; 00567 typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t; 00568 for(Tsize_t i=0; i<blockIDs.size(); ++i) { 00569 LocalOrdinal localBlockID = rowmap->getLocalBlockID(blockIDs[i]); 00570 LocalOrdinal blockSize = rowmap->getLocalBlockSize(localBlockID); 00571 Teuchos::ArrayRCP<const Scalar> blockEntry; 00572 getGlobalBlockEntryView(blockIDs[i], blockIDs[i], blockSize, blockSize, blockEntry); 00573 00574 for(LocalOrdinal j=0; j<blockSize; ++j) { 00575 diag_view[offset++] = blockEntry[j*blockSize+j]; 00576 } 00577 } 00578 } 00579 00580 //------------------------------------------------------------------- 00581 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00582 bool 00583 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::checkSizes(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source) 00584 { 00585 bool ok = this->getMap()->getMinAllGlobalIndex() <= source.getMap()->getMinAllGlobalIndex(); 00586 ok = ok && this->getMap()->getMaxAllGlobalIndex() >= source.getMap()->getMaxAllGlobalIndex(); 00587 return ok; 00588 } 00589 00590 //------------------------------------------------------------------- 00591 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00592 void 00593 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::copyAndPermute( 00594 const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source, 00595 size_t numSameIDs, 00596 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs, 00597 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs) 00598 { 00599 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t; 00600 00601 const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>* src_mat_ptr = dynamic_cast<const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>*>(&source); 00602 if (src_mat_ptr == NULL) { 00603 throw std::runtime_error("VbrMatrix::copyAndPermute ERROR, dynamic_cast failed."); 00604 } 00605 const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>& src_mat = *src_mat_ptr; 00606 00607 Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol; 00608 LocalOrdinal numPtRows; 00609 00610 LocalOrdinal numSame = numSameIDs; 00611 for(LocalOrdinal i=0; i<numSame; ++i) { 00612 if (isFillComplete()) { 00613 Teuchos::ArrayRCP<const Scalar> src_blkEntries; 00614 Teuchos::ArrayView<const LocalOrdinal> src_blkCols; 00615 00616 src_mat.getLocalBlockRowView(i, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries); 00617 unsigned offset = 0; 00618 for(Tsize_t j=0; j<src_blkCols.size(); ++j) { 00619 LocalOrdinal numPtCols = src_ptColsPerBlkCol[j]; 00620 setLocalBlockEntry(i, src_blkCols[j], numPtRows, numPtCols, numPtCols, 00621 src_blkEntries.view(offset, numPtRows*numPtCols)); 00622 offset += numPtRows*numPtCols; 00623 } 00624 } 00625 else { 00626 GlobalOrdinal gRow = getBlockRowMap()->getGlobalBlockID(i); 00627 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries; 00628 Teuchos::ArrayView<const GlobalOrdinal> src_blkCols; 00629 00630 src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries); 00631 for(Tsize_t j=0; j<src_blkCols.size(); ++j) { 00632 LocalOrdinal numPtCols = src_ptColsPerBlkCol[j]; 00633 setGlobalBlockEntry(gRow, src_blkCols[j], numPtRows, numPtCols, numPtRows, src_blkEntries[j].view(0, numPtRows*numPtCols)); 00634 } 00635 } 00636 } 00637 00638 for(Tsize_t i=0; i<permuteToLIDs.size(); ++i) { 00639 00640 if (isFillComplete()) { 00641 Teuchos::ArrayRCP<const Scalar> src_blkEntries; 00642 Teuchos::ArrayView<const LocalOrdinal> src_blkCols; 00643 00644 src_mat.getLocalBlockRowView(permuteFromLIDs[i], numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries); 00645 unsigned offset = 0; 00646 for(Tsize_t j=0; j<src_blkCols.size(); ++j) { 00647 LocalOrdinal numPtCols = src_ptColsPerBlkCol[j]; 00648 setLocalBlockEntry(permuteToLIDs[i], src_blkCols[j], numPtRows, numPtCols, numPtCols, 00649 src_blkEntries.view(offset, numPtRows*numPtCols)); 00650 offset += numPtRows*numPtCols; 00651 } 00652 } 00653 else { 00654 GlobalOrdinal gRow = getBlockRowMap()->getGlobalBlockID(permuteToLIDs[i]); 00655 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries; 00656 Teuchos::ArrayView<const GlobalOrdinal> src_blkCols; 00657 00658 src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries); 00659 for(Tsize_t j=0; j<src_blkCols.size(); ++j) { 00660 LocalOrdinal numPtCols = src_ptColsPerBlkCol[j]; 00661 setGlobalBlockEntry(gRow, src_blkCols[j], numPtRows, numPtCols, numPtRows, src_blkEntries[j].view(0, numPtRows*numPtCols)); 00662 } 00663 } 00664 } 00665 } 00666 00667 //------------------------------------------------------------------- 00668 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00669 void 00670 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::packAndPrepare( 00671 const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source, 00672 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs, 00673 Teuchos::Array<char>& exports, 00674 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 00675 size_t& constantNumPackets, 00676 Distributor& distor) 00677 { 00678 const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>* src_mat_ptr = dynamic_cast<const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>*>(&source); 00679 if (src_mat_ptr == NULL) { 00680 typedef VbrUtils::VbrDataDist<LocalOrdinal,GlobalOrdinal,Scalar,Node> VDD; 00681 VDD* vdd = const_cast<VDD*>(dynamic_cast<const VDD*>(&source)); 00682 if (vdd != NULL) { 00683 vdd->packAndPrepare(source, exportLIDs, exports, numPacketsPerLID, constantNumPackets, distor); 00684 } 00685 else { 00686 throw std::runtime_error("VbrMatrix::packAndPrepare ERROR, dynamic_cast failed."); 00687 } 00688 return; 00689 } 00690 00691 const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>& src_mat = *src_mat_ptr; 00692 00693 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t; 00694 00695 //We will pack each row's data into the exports buffer as follows: 00696 //[num-block-cols,numPtRows,{list-of-blk-cols},{list-of-ptColsPerBlkCol},{all vals}] 00697 //so the length of the char exports buffer for a row is: 00698 //sizeof(LocalOrdinal)*(2+2*(num-block-cols)) + sizeof(Scalar)*numPtRows*sum(numPtCols_i) 00699 00700 //For each row corresponding to exportLIDs, accumulate the size that it will 00701 //occupy in the exports buffer: 00702 size_t total_exports_size = 0; 00703 for(Tsize_t i=0; i<exportLIDs.size(); ++i) { 00704 Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol; 00705 LocalOrdinal numPtRows; 00706 00707 LocalOrdinal numBlkCols = 0; 00708 LocalOrdinal numScalars = 0; 00709 00710 if (src_mat.isFillComplete()) { 00711 Teuchos::ArrayRCP<const Scalar> src_blkEntries; 00712 Teuchos::ArrayView<const LocalOrdinal> src_blkCols; 00713 src_mat.getLocalBlockRowView(exportLIDs[i], numPtRows, 00714 src_blkCols, src_ptColsPerBlkCol, src_blkEntries); 00715 numBlkCols = src_blkCols.size(); 00716 numScalars = src_blkEntries.size(); 00717 } 00718 else {//src_mat.isFillComplete()==false 00719 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries; 00720 GlobalOrdinal gRow = src_mat.getBlockRowMap()->getGlobalBlockID(exportLIDs[i]); 00721 Teuchos::ArrayView<const GlobalOrdinal> src_blkCols; 00722 src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries); 00723 numBlkCols = src_blkCols.size(); 00724 for(Tsize_t j=0; j<src_blkEntries.size(); ++j) { 00725 numScalars += src_blkEntries[j].size(); 00726 } 00727 } 00728 00729 size_t size_for_this_row = sizeof(GlobalOrdinal)*(2+2*numBlkCols) 00730 + sizeof(Scalar)*numScalars; 00731 numPacketsPerLID[i] = size_for_this_row; 00732 total_exports_size += size_for_this_row; 00733 } 00734 00735 exports.resize(total_exports_size); 00736 00737 ArrayView<char> avIndsC, avValsC; 00738 ArrayView<Scalar> avVals; 00739 00740 size_t offset = 0; 00741 00742 for(Tsize_t i=0; i<exportLIDs.size(); ++i) { 00743 Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol; 00744 LocalOrdinal numPtRows; 00745 00746 LocalOrdinal numBlkCols = 0; 00747 LocalOrdinal numScalars = 0; 00748 00749 if (src_mat.isFillComplete()) { 00750 Teuchos::ArrayRCP<const Scalar> src_blkEntries; 00751 Teuchos::ArrayView<const LocalOrdinal> src_blkCols; 00752 src_mat.getLocalBlockRowView(exportLIDs[i], numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries); 00753 numBlkCols = src_blkCols.size(); 00754 numScalars = src_blkEntries.size(); 00755 00756 LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(LocalOrdinal); 00757 //get export views 00758 avIndsC = exports(offset, num_chars_for_ordinals); 00759 avValsC = exports(offset+ num_chars_for_ordinals, numScalars*sizeof(Scalar)); 00760 ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC); 00761 typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin(); 00762 00763 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& col_map = src_mat.getBlockColMap(); 00764 00765 //put row info into the buffer views: 00766 *ind_it++ = numBlkCols; 00767 *ind_it++ = numPtRows; 00768 for(Tsize_t j=0; j<src_blkCols.size(); ++j) { 00769 *ind_it++ = col_map->getGlobalBlockID(src_blkCols[j]); 00770 } 00771 std::copy(src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it); 00772 avVals = av_reinterpret_cast<Scalar>(avValsC); 00773 std::copy(src_blkEntries.begin(), src_blkEntries.end(), avVals.begin()); 00774 } 00775 else {//src_mat.isFillComplete()==false 00776 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries; 00777 GlobalOrdinal gRow = src_mat.getBlockRowMap()->getGlobalBlockID(exportLIDs[i]); 00778 Teuchos::ArrayView<const GlobalOrdinal> src_blkCols; 00779 src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries); 00780 numBlkCols = src_blkCols.size(); 00781 numScalars = 0; 00782 for(Tsize_t j=0; j<src_blkEntries.size(); ++j) { 00783 numScalars += numPtRows*src_ptColsPerBlkCol[j]; 00784 } 00785 00786 LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal); 00787 //get export views 00788 avIndsC = exports(offset, num_chars_for_ordinals); 00789 avValsC = exports(offset+ num_chars_for_ordinals, numScalars*sizeof(Scalar)); 00790 ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC); 00791 typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin(); 00792 00793 //put row info into the buffer views: 00794 *ind_it++ = numBlkCols; 00795 *ind_it++ = numPtRows; 00796 std::copy(src_blkCols.begin(), src_blkCols.end(), ind_it); 00797 ind_it += src_blkCols.size(); 00798 std::copy(src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it); 00799 avVals = av_reinterpret_cast<Scalar>(avValsC); 00800 typename ArrayView<Scalar>::iterator val_it = avVals.begin(); 00801 for(Tsize_t j=0; j<src_blkEntries.size(); ++j) { 00802 std::copy(src_blkEntries[j].begin(), src_blkEntries[j].end(), val_it); 00803 val_it += src_blkEntries[j].size(); 00804 } 00805 } 00806 00807 size_t size_for_this_row = sizeof(GlobalOrdinal)*(2+2*numBlkCols) 00808 + sizeof(Scalar)*numScalars; 00809 offset += size_for_this_row; 00810 } 00811 00812 constantNumPackets = 0; 00813 } 00814 00815 //------------------------------------------------------------------- 00816 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00817 void 00818 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::unpackAndCombine( 00819 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs, 00820 const Teuchos::ArrayView<const char>& imports, 00821 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 00822 size_t constantNumPackets, 00823 Distributor& distor, 00824 CombineMode CM) 00825 { 00826 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t; 00827 00828 if (CM == Tpetra::ABSMAX) { 00829 std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl; 00830 } 00831 00832 size_t offset = 0; 00833 for(Tsize_t i=0; i<importLIDs.size(); ++i) { 00834 ArrayView<const char> avC = imports.view(offset, numPacketsPerLID[i]); 00835 ArrayView<const GlobalOrdinal> avOrds = av_reinterpret_cast<const GlobalOrdinal>(avC); 00836 GlobalOrdinal gRow = this->getBlockRowMap()->getGlobalBlockID(importLIDs[i]); 00837 size_t avOrdsOffset = 0; 00838 LocalOrdinal numBlkCols = avOrds[avOrdsOffset++]; 00839 LocalOrdinal numPtRows = avOrds[avOrdsOffset++]; 00840 LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal); 00841 ArrayView<const char> avValsC = imports.view(offset+num_chars_for_ordinals, numPacketsPerLID[i]-num_chars_for_ordinals); 00842 ArrayView<const Scalar> avVals = av_reinterpret_cast<const Scalar>(avValsC); 00843 00844 size_t avValsOffset = 0; 00845 for(Tsize_t j=0; j<numBlkCols; ++j) { 00846 GlobalOrdinal blkCol = avOrds[avOrdsOffset+j]; 00847 LocalOrdinal numPtCols = avOrds[avOrdsOffset+numBlkCols+j]; 00848 LocalOrdinal LDA = numPtRows; 00849 if (CM == Tpetra::ADD) { 00850 sumIntoGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA, 00851 avVals.view(avValsOffset, numPtRows*numPtCols)); 00852 } 00853 else if (CM == Tpetra::INSERT || CM == Tpetra::REPLACE) { 00854 setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA, 00855 avVals.view(avValsOffset, numPtRows*numPtCols)); 00856 } 00857 else { 00858 // std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl; 00859 setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA, 00860 avVals.view(avValsOffset, numPtRows*numPtCols)); 00861 } 00862 avValsOffset += numPtRows*numPtCols; 00863 } 00864 } 00865 } 00866 00867 //------------------------------------------------------------------- 00868 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00869 void 00870 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockEntryView( 00871 LocalOrdinal localBlockRow, 00872 LocalOrdinal localBlockCol, 00873 LocalOrdinal& numPtRows, 00874 LocalOrdinal& numPtCols, 00875 Teuchos::ArrayRCP<const Scalar>& blockEntry) const 00876 { 00877 typedef Teuchos::ArrayRCP<const size_t> Host_View; 00878 typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO; 00879 typedef typename Host_View_LO::iterator ITER; 00880 //This method returns a constant view of a block-entry (as an ArrayRCP). 00881 00882 TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error, 00883 "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called after fillComplete() has been called."); 00884 00885 TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error, 00886 "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called if storage is optimized."); 00887 00888 Teuchos::RCP<Node> node = getNode(); 00889 00890 Host_View bpr = constBlkGraph_->getNodeRowOffsets(); 00891 const size_t bindx_offset = bpr[localBlockRow]; 00892 const size_t length = bpr[localBlockRow+1] - bindx_offset; 00893 00894 Host_View_LO bindx = constBlkGraph_->getNodePackedIndices(); 00895 ITER bindx_beg = bindx.begin()+bindx_offset, 00896 bindx_end = bindx_beg + length; 00897 ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol); 00898 00899 TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error, 00900 "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, specified localBlockCol not found."); 00901 00902 numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow); 00903 numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol); 00904 00905 const LocalOrdinal blkSize = numPtRows*numPtCols; 00906 Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bpr[localBlockRow]+(it-bindx_beg)); 00907 const LocalOrdinal offset = indx[0]; 00908 blockEntry = node->template viewBuffer<Scalar>(blkSize, pbuf_values1D_ + offset); 00909 } 00910 00911 //------------------------------------------------------------------- 00912 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00913 Teuchos::RCP<Node> 00914 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getNode() const 00915 { 00916 return getBlockRowMap()->getPointMap()->getNode(); 00917 } 00918 00919 //------------------------------------------------------------------- 00920 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00921 void 00922 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::putScalar(Scalar s) 00923 { 00924 Teuchos::RCP<Node> node = getNode(); 00925 00926 if (isFillComplete()) { 00927 fill_device_ArrayRCP(node, pbuf_values1D_, s); 00928 } 00929 else { 00930 typedef typename Teuchos::Array<RowGlobalCols>::size_type Tsize_t; 00931 Teuchos::Array<RowGlobalCols>& rows = *data_2D_; 00932 Tsize_t numBlockRows = rows.size(); 00933 for(Tsize_t r=0; r<numBlockRows; ++r) { 00934 typename RowGlobalCols::iterator 00935 col_iter = rows[r].begin(), 00936 col_end = rows[r].end(); 00937 for(; col_iter != col_end; ++col_iter) { 00938 Teuchos::ArrayRCP<Scalar>& vals = col_iter->second; 00939 std::fill(vals.begin(), vals.end(), s); 00940 } 00941 } 00942 } 00943 } 00944 00945 //------------------------------------------------------------------- 00946 template<class ArrayType1,class ArrayType2,class Ordinal> 00947 void set_array_values(ArrayType1& dest, ArrayType2& src, Ordinal rows, Ordinal cols, Ordinal stride, Tpetra::CombineMode mode) 00948 { 00949 size_t offset = 0; 00950 size_t src_offset = 0; 00951 00952 if (mode == ADD) { 00953 for(Ordinal col=0; col<cols; ++col) { 00954 for(Ordinal row=0; row<rows; ++row) { 00955 dest[offset++] += src[src_offset + row]; 00956 } 00957 src_offset += stride; 00958 } 00959 } 00960 else { 00961 for(Ordinal col=0; col<cols; ++col) { 00962 for(Ordinal row=0; row<rows; ++row) { 00963 dest[offset++] = src[src_offset + row]; 00964 } 00965 src_offset += stride; 00966 } 00967 } 00968 } 00969 00970 //------------------------------------------------------------------- 00971 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00972 void 00973 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry) 00974 { 00975 //first get an ArrayRCP for the internal storage for this block-entry: 00976 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 00977 LocalOrdinal blkRowSize = blockEntry.numRows(); 00978 LocalOrdinal blkColSize = blockEntry.numCols(); 00979 getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry); 00980 00981 //now copy the incoming block-entry into internal storage: 00982 const Scalar* inputvalues = blockEntry.values(); 00983 set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE); 00984 } 00985 00986 //------------------------------------------------------------------- 00987 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00988 void 00989 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry) 00990 { 00991 //first get an ArrayRCP for the internal storage for this block-entry: 00992 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 00993 LocalOrdinal blkRowSize = blockEntry.numRows(); 00994 LocalOrdinal blkColSize = blockEntry.numCols(); 00995 getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry); 00996 00997 //now copy the incoming block-entry into internal storage: 00998 const Scalar* inputvalues = blockEntry.values(); 00999 set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE); 01000 } 01001 01002 //------------------------------------------------------------------- 01003 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01004 void 01005 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry) 01006 { 01007 //first get an ArrayRCP for the internal storage for this block-entry: 01008 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01009 LocalOrdinal blkRowSize = blockEntry.numRows(); 01010 LocalOrdinal blkColSize = blockEntry.numCols(); 01011 getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01012 01013 //now sum (add) the incoming block-entry into internal storage: 01014 const Scalar* inputvalues = blockEntry.values(); 01015 set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD); 01016 } 01017 01018 //------------------------------------------------------------------- 01019 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01020 void 01021 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry) 01022 { 01023 //first get an ArrayRCP for the internal storage for this block-entry: 01024 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01025 LocalOrdinal blkRowSize = blockEntry.numRows(); 01026 LocalOrdinal blkColSize = blockEntry.numCols(); 01027 getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01028 01029 //now sum (add) the incoming block-entry into internal storage: 01030 const Scalar* inputvalues = blockEntry.values(); 01031 set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD); 01032 } 01033 01034 //------------------------------------------------------------------- 01035 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01036 void 01037 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry) 01038 { 01039 //first get an ArrayRCP for the internal storage for this block-entry: 01040 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01041 getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01042 01043 LocalOrdinal blk_size = blockEntry.size(); 01044 TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error, 01045 "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes."); 01046 01047 //copy the incoming block-entry into internal storage: 01048 set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE); 01049 } 01050 01051 //------------------------------------------------------------------- 01052 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01053 void 01054 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry) 01055 { 01056 //first get an ArrayRCP for the internal storage for this block-entry: 01057 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01058 getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01059 01060 LocalOrdinal blk_size = blockEntry.size(); 01061 TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error, 01062 "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes."); 01063 01064 //copy the incoming block-entry into internal storage: 01065 set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE); 01066 } 01067 01068 //------------------------------------------------------------------- 01069 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01070 void 01071 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry) 01072 { 01073 //first get an ArrayRCP for the internal storage for this block-entry: 01074 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01075 getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01076 01077 LocalOrdinal blk_size = blockEntry.size(); 01078 TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error, 01079 "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes."); 01080 01081 //copy the incoming block-entry into internal storage: 01082 set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD); 01083 } 01084 01085 //------------------------------------------------------------------- 01086 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01087 void 01088 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry) 01089 { 01090 //first get an ArrayRCP for the internal storage for this block-entry: 01091 Teuchos::ArrayRCP<Scalar> internalBlockEntry; 01092 getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry); 01093 01094 LocalOrdinal blk_size = blockEntry.size(); 01095 TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error, 01096 "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes."); 01097 01098 //copy the incoming block-entry into internal storage: 01099 set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD); 01100 } 01101 01102 //------------------------------------------------------------------- 01103 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01104 void 01105 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::optimizeStorage() 01106 { 01107 typedef Teuchos::ArrayRCP<const size_t> Host_View; 01108 typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO; 01109 typedef typename Teuchos::ArrayRCP<size_t>::size_type Tsize_t; 01110 01111 if (is_storage_optimized_ == true) return; 01112 01113 TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isFillComplete() != true, std::runtime_error, 01114 "Tpetra::VbrMatrix::optimizeStorage ERROR, isFillComplete() is false, required to be true before optimizeStorage is called."); 01115 01116 size_t num_block_nonzeros = constBlkGraph_->getNodeNumBlockEntries(); 01117 01118 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = constBlkGraph_->getBlockRowMap(); 01119 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& colmap = constBlkGraph_->getBlockColMap(); 01120 01121 const Teuchos::RCP<Node>& node = getBlockRowMap()->getPointMap()->getNode(); 01122 01123 //need to count the number of point-entries: 01124 size_t num_point_entries = 0; 01125 Host_View bptr = constBlkGraph_->getNodeRowOffsets(); 01126 01127 if (bptr.size() == 0) { 01128 is_storage_optimized_ = true; 01129 return; 01130 } 01131 01132 Host_View_LO bindx = constBlkGraph_->getNodePackedIndices(); 01133 01134 for(Tsize_t r=0; r<bptr.size()-1; ++r) { 01135 size_t rbeg = bptr[r]; 01136 size_t rend = bptr[r+1]; 01137 01138 LocalOrdinal rsize = rowmap->getLocalBlockSize(r); 01139 01140 for(size_t j=rbeg; j<rend; ++j) { 01141 LocalOrdinal csize = colmap->getLocalBlockSize(bindx[j]); 01142 num_point_entries += rsize*csize; 01143 } 01144 } 01145 01146 pbuf_indx_ = node->template allocBuffer<LocalOrdinal>(num_block_nonzeros+1); 01147 pbuf_values1D_ = node->template allocBuffer<Scalar>(num_point_entries); 01148 01149 Teuchos::ArrayRCP<LocalOrdinal> view_indx = node->template viewBufferNonConst<LocalOrdinal>(Kokkos::WriteOnly, num_block_nonzeros+1, pbuf_indx_); 01150 Teuchos::ArrayRCP<Scalar> view_values1D = node->template viewBufferNonConst<Scalar>(Kokkos::WriteOnly, num_point_entries, pbuf_values1D_); 01151 01152 bool have_2D_data = data_2D_->size() > 0; 01153 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero(); 01154 01155 size_t ioffset = 0; 01156 size_t offset = 0; 01157 for(Tsize_t r=0; r<bptr.size()-1; ++r) { 01158 LocalOrdinal rsize = rowmap->getLocalBlockSize(r); 01159 01160 RowGlobalCols* blk_row = have_2D_data ? &((*data_2D_)[r]) : NULL; 01161 01162 for(size_t c=bptr[r]; c<bptr[r+1]; ++c) { 01163 view_indx[ioffset++] = offset; 01164 01165 LocalOrdinal csize = colmap->getLocalBlockSize(bindx[c]); 01166 Tsize_t blkSize = rsize*csize; 01167 01168 if (blk_row != NULL) { 01169 GlobalOrdinal global_col = colmap->getGlobalBlockID(bindx[c]); 01170 typename RowGlobalCols::iterator iter = blk_row->find(global_col); 01171 TEUCHOS_TEST_FOR_EXCEPTION(iter == blk_row->end(), std::runtime_error, 01172 "Tpetra::VbrMatrix::optimizeStorage ERROR, global_col not found in row."); 01173 01174 Teuchos::ArrayRCP<Scalar> vals = iter->second; 01175 for(Tsize_t n=0; n<blkSize; ++n) { 01176 view_values1D[offset+n] = vals[n]; 01177 } 01178 } 01179 else { 01180 for(Tsize_t n=0; n<blkSize; ++n) view_values1D[offset+n] = zero; 01181 } 01182 offset += blkSize; 01183 } 01184 } 01185 view_indx[ioffset] = offset; 01186 01187 //Final step: release memory for the "2D" storage: 01188 data_2D_ = Teuchos::null; 01189 01190 is_storage_optimized_ = true; 01191 } 01192 01193 //------------------------------------------------------------------- 01194 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01195 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillLocalMatrix() 01196 { 01197 //We insist that optimzeStorage has already been called. 01198 //We don't care whether this function (fillLocalMatrix()) is being 01199 //called for the first time or not. 01200 TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error, 01201 "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called."); 01202 01203 Teuchos::ArrayRCP<const size_t > nodeRowOffsets = constBlkGraph_->getNodeRowOffsets(); 01204 Teuchos::ArrayRCP<const LocalOrdinal> nodePackedInds = constBlkGraph_->getNodePackedIndices(); 01205 if (Node::isHostNode == false) { 01206 RCP<Node> node = getRangeMap()->getNode(); 01207 // 01208 Teuchos::ArrayRCP<size_t> dev_nodeRowOffsets = node->template allocBuffer<size_t>(nodeRowOffsets.size()); 01209 node->copyToBuffer(nodeRowOffsets.size(), nodeRowOffsets(), dev_nodeRowOffsets); 01210 nodeRowOffsets = dev_nodeRowOffsets; 01211 // 01212 Teuchos::ArrayRCP<LocalOrdinal> dev_nodePackedInds = node->template allocBuffer<LocalOrdinal>(nodePackedInds.size()); 01213 node->copyToBuffer(nodePackedInds.size(), nodePackedInds(), dev_nodePackedInds); 01214 nodePackedInds = dev_nodePackedInds; 01215 } 01216 lclMatrix_.setPackedValues(pbuf_values1D_, 01217 getBlockRowMap()->getNodeFirstPointInBlocks_Device(), 01218 getBlockColMap()->getNodeFirstPointInBlocks_Device(), 01219 nodeRowOffsets, 01220 nodePackedInds, 01221 pbuf_indx_); 01222 } 01223 01224 //------------------------------------------------------------------- 01225 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01226 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillLocalMatVec() 01227 { 01228 //We insist that optimzeStorage has already been called. 01229 //We don't care whether this function (fillLocalMatVec()) is being 01230 //called for the first time or not. 01231 TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error, 01232 "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called."); 01233 01234 lclMatOps_.initializeValues(lclMatrix_); 01235 } 01236 01237 //------------------------------------------------------------------- 01238 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01239 void 01240 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::createImporterExporter() 01241 { 01242 typedef typename Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > PtMap; 01243 01244 const PtMap& ptDomMap = (constBlkGraph_->getBlockDomainMap())->getPointMap(); 01245 const PtMap& ptRngMap = (constBlkGraph_->getBlockRangeMap())->getPointMap(); 01246 const PtMap& ptRowMap = (constBlkGraph_->getBlockRowMap())->getPointMap(); 01247 const PtMap& ptColMap = (constBlkGraph_->getBlockColMap())->getPointMap(); 01248 01249 if (!ptDomMap->isSameAs(*ptColMap)) { 01250 importer_ = Teuchos::rcp(new Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>(ptDomMap, ptColMap)); 01251 } 01252 if (!ptRngMap->isSameAs(*ptRowMap)) { 01253 exporter_ = Teuchos::rcp(new Tpetra::Export<LocalOrdinal,GlobalOrdinal,Node>(ptRowMap, ptRngMap)); 01254 } 01255 } 01256 01257 //------------------------------------------------------------------- 01258 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01259 void 01260 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillComplete(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockDomainMap, const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockRangeMap) 01261 { 01262 if (isFillComplete()) return; 01263 01264 globalAssemble(); 01265 01266 blkGraph_->fillComplete(blockDomainMap, blockRangeMap); 01267 01268 createImporterExporter(); 01269 01270 is_fill_completed_ = true; 01271 01272 optimizeStorage(); 01273 01274 fillLocalMatrix(); 01275 fillLocalMatVec(); 01276 } 01277 01278 //------------------------------------------------------------------- 01279 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01280 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillComplete() 01281 { 01282 //In this case, our block-row-map will also be our domain-map and 01283 //range-map. 01284 01285 fillComplete(getBlockRowMap(), getBlockRowMap()); 01286 } 01287 01288 //------------------------------------------------------------------- 01289 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01290 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::globalAssemble() 01291 { 01292 if (getPointRowMap()->getComm()->getSize() == 1) { 01293 return; 01294 } 01295 01296 //nonlocal_data_ contains data that belongs on other processors. 01297 //We'll refer to this as overlapping data, and create an 'overlapMap' 01298 //that describes the layout of this overlapping data. 01299 Teuchos::RCP<const Tpetra::BlockMap<LocalOrdinal,GlobalOrdinal,Node> > 01300 overlapMap = VbrUtils::createOverlapMap(nonlocal_data_, *getBlockRowMap()); 01301 01302 if (overlapMap->getGlobalNumBlocks() == 0) { 01303 return; 01304 } 01305 01306 //VbrDataDist is a wrapper that makes our overlapping data behave 01307 //like a DistObject so that the overlapping data can be exported 01308 //to the owning processors in a standard way. 01309 VbrUtils::VbrDataDist<LocalOrdinal,GlobalOrdinal,Scalar,Node> 01310 vdd(nonlocal_data_, *overlapMap, *this->getBlockRowMap()); 01311 01312 //Create an exporter where the source map is our overlapMap and the 01313 //target map is the rowmap of our VbrMatrix. 01314 Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node> 01315 importer( vdd.getMap(), convertBlockMapToPointMap(*getBlockRowMap())); 01316 01317 //Communicate the overlapping data to the owning processors and add it 01318 //into this VbrMatrix. 01319 this->doImport(vdd, importer, Tpetra::ADD); 01320 01321 //Zero out the overlapping data so it can be re-populated and re-assembled 01322 //in future calls to globalAssemble. 01323 VbrUtils::zeroEntries(nonlocal_data_); 01324 } 01325 01326 //------------------------------------------------------------------- 01327 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01328 std::string 01329 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::description() const 01330 { 01331 std::ostringstream oss; 01332 oss << Teuchos::Describable::description(); 01333 if (isFillComplete()) { 01334 oss << "{status = fill complete, global num block rows = " << getBlockRowMap()->getGlobalNumBlocks() << "}"; 01335 } 01336 else { 01337 oss << "{status = fill not complete, global num block rows = " << getBlockRowMap()->getGlobalNumBlocks() << "}"; 01338 } 01339 return oss.str(); 01340 } 01341 01342 //------------------------------------------------------------------- 01343 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 01344 void 01345 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const 01346 { 01347 Teuchos::EVerbosityLevel vl = verbLevel; 01348 if (vl == Teuchos::VERB_DEFAULT) vl = Teuchos::VERB_LOW; 01349 01350 if (vl == Teuchos::VERB_NONE) return; 01351 01352 Teuchos::RCP<const Teuchos::Comm<int> > comm = getBlockRowMap()->getPointMap()->getComm(); 01353 const int myProc = comm->getRank(); 01354 01355 if (myProc == 0) out << "VbrMatrix::describe is under construction..." << std::endl; 01356 } 01357 01358 }//namespace Tpetra 01359 01360 // 01361 // Explicit instantiation macro 01362 // 01363 // Must be expanded from within the Tpetra namespace! 01364 // 01365 01366 #define TPETRA_VBRMATRIX_INSTANT(SCALAR,LO,GO,NODE) \ 01367 \ 01368 template class VbrMatrix< SCALAR , LO , GO , NODE >; 01369 01370 #endif //TPETRA_VBRMATRIX_DEF_HPP 01371
1.7.6.1