|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_VBRMATRIX_DECL_HPP 00043 #define TPETRA_VBRMATRIX_DECL_HPP 00044 00047 00048 #include <Tpetra_ConfigDefs.hpp> 00049 #include <Tpetra_DistObject_decl.hpp> 00050 #include <Tpetra_Operator.hpp> 00051 #include <Tpetra_VbrUtils.hpp> 00052 #include <Kokkos_DefaultNode.hpp> 00053 #include <Kokkos_DefaultKernels.hpp> 00054 00055 // Forward declarations, to avoid including files that we don't really 00056 // need just for declaring VbrMatrix. The #ifndef ... #endif prevents 00057 // Doxygen from generating spurious documentation for the forward 00058 // declarations. 00059 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00060 namespace Tpetra { 00061 template<class LocalOrdinal, class GlobalOrdinal, class Node> 00062 class BlockMap; 00063 00064 template<class LocalOrdinal, class GlobalOrdinal, class Node> 00065 class BlockCrsGraph; 00066 00067 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00068 class Vector; 00069 } // namespace Tpetra 00070 00071 namespace KokkosClassic { 00072 template<class Scalar, class LocalOrdinal, class Node> 00073 class VbrMatrix; 00074 } // namespace KokkosClassic 00075 00076 namespace Teuchos { 00077 template<class OrdinalType, class ScalarType> 00078 class SerialDenseMatrix; 00079 } // namespace Teuchos 00080 #endif // DOXYGEN_SHOULD_SKIP_THIS 00081 00082 namespace Tpetra { 00083 00085 00113 template<class Scalar = Operator<>::scalar_type, 00114 class LocalOrdinal = typename Operator<Scalar>::local_ordinal_type, 00115 class GlobalOrdinal = typename Operator<Scalar, LocalOrdinal>::global_ordinal_type, 00116 class Node = typename Operator<Scalar, LocalOrdinal, GlobalOrdinal>::node_type> 00117 class VbrMatrix : 00118 public Tpetra::DistObject<char, LocalOrdinal, GlobalOrdinal, Node>, 00119 public Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> { 00120 public: 00121 typedef Scalar scalar_type; 00122 typedef LocalOrdinal local_ordinal_type; 00123 typedef GlobalOrdinal global_ordinal_type; 00124 typedef Node node_type; 00125 typedef typename KokkosClassic::DefaultKernels<Scalar, LocalOrdinal, Node>::BlockSparseOps sparse_ops_type; 00126 00128 00129 00131 00136 VbrMatrix (const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blkRowMap, 00137 size_t maxNumEntriesPerRow, 00138 ProfileType pftype = DynamicProfile); 00139 00141 00151 VbrMatrix (const Teuchos::RCP<const BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> >& blkGraph); 00152 00154 virtual ~VbrMatrix(); 00155 00157 00158 00159 00161 00166 template <class DomainScalar, class RangeScalar> 00167 void 00168 multiply (const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X, 00169 MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y, 00170 Teuchos::ETransp trans, 00171 RangeScalar alpha, 00172 RangeScalar beta) const; 00173 00175 00187 template <class DomainScalar, class RangeScalar> 00188 void 00189 solve (const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y, 00190 MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X, 00191 Teuchos::ETransp trans) const; 00192 00194 00195 00196 00198 00200 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > getDomainMap() const; 00201 00203 00205 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > getRangeMap() const; 00206 00208 00213 void 00214 apply (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, 00215 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, 00216 Teuchos::ETransp trans = Teuchos::NO_TRANS, 00217 Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(), 00218 Scalar beta = Teuchos::ScalarTraits<Scalar>::zero()) const; 00219 00221 00224 void 00225 applyInverse (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & Y, 00226 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, 00227 Teuchos::ETransp trans) const; 00228 00230 00233 bool hasTransposeApply() const; 00234 00236 00238 00239 00241 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > getBlockRowMap() const; 00242 00244 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > getBlockColMap() const; 00245 00247 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > getBlockDomainMap() const; 00248 00250 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > getBlockRangeMap() const; 00251 00253 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > getPointRowMap() const; 00254 00256 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > getPointColMap() const; 00257 00259 bool isFillComplete() const; 00260 00262 00263 00264 00266 00269 void putScalar(Scalar s); 00270 00272 00287 void setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry); 00288 00290 00297 void setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry); 00298 00300 00315 void sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry); 00316 00318 00325 void sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry); 00326 00328 00343 void setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry); 00344 00346 00353 void setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry); 00354 00356 00371 void sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry); 00372 00374 00381 void sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry); 00382 00384 00386 00387 00389 00393 void fillComplete(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockDomainMap, const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockRangeMap); 00394 00396 00399 void fillComplete(); 00400 00402 void globalAssemble(); 00403 00405 00406 00407 00409 00411 void getGlobalBlockRowView(GlobalOrdinal globalBlockRow, 00412 LocalOrdinal& numPtRows, 00413 Teuchos::ArrayView<const GlobalOrdinal>& blockCols, 00414 Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol, 00415 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> >& blockEntries) const; 00416 00418 00420 void getLocalBlockRowView(LocalOrdinal localBlockRow, 00421 LocalOrdinal& numPtRows, 00422 Teuchos::ArrayView<const LocalOrdinal>& blockCols, 00423 Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol, 00424 Teuchos::ArrayRCP<const Scalar>& blockEntries) const; 00425 00427 00435 void getGlobalBlockEntryView(GlobalOrdinal globalBlockRow, 00436 GlobalOrdinal globalBlockCol, 00437 LocalOrdinal& numPtRows, 00438 LocalOrdinal& numPtCols, 00439 Teuchos::ArrayRCP<const Scalar>& blockEntry) const; 00440 00442 00452 void getGlobalBlockEntryViewNonConst(GlobalOrdinal globalBlockRow, 00453 GlobalOrdinal globalBlockCol, 00454 LocalOrdinal& numPtRows, 00455 LocalOrdinal& numPtCols, 00456 Teuchos::ArrayRCP<Scalar>& blockEntry); 00457 00459 00469 void getLocalBlockEntryView(LocalOrdinal localBlockRow, 00470 LocalOrdinal localBlockCol, 00471 LocalOrdinal& numPtRows, 00472 LocalOrdinal& numPtCols, 00473 Teuchos::ArrayRCP<const Scalar>& blockEntry) const; 00474 00476 00492 void getLocalBlockEntryViewNonConst(LocalOrdinal localBlockRow, 00493 LocalOrdinal localBlockCol, 00494 LocalOrdinal& numPtRows, 00495 LocalOrdinal& numPtCols, 00496 Teuchos::ArrayRCP<Scalar>& blockEntry); 00497 00499 00503 void getLocalDiagCopy (Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& diag) const; 00504 00505 Teuchos::RCP<const BlockCrsGraph<LocalOrdinal, GlobalOrdinal, Node> > 00506 getBlockCrsGraph () { 00507 return constBlkGraph_; 00508 } 00509 00511 00512 00513 00514 virtual bool checkSizes (const SrcDistObject& source); 00515 00516 virtual void 00517 copyAndPermute (const SrcDistObject& source, 00518 size_t numSameIDs, 00519 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs, 00520 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs); 00521 00522 virtual void 00523 packAndPrepare (const SrcDistObject& source, 00524 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs, 00525 Teuchos::Array<char>& exports, 00526 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 00527 size_t& constantNumPackets, 00528 Distributor& distor); 00529 00530 virtual void 00531 unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs, 00532 const Teuchos::ArrayView<const char>& imports, 00533 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 00534 size_t constantNumPackets, 00535 Distributor& distor, 00536 CombineMode CM); 00537 00539 00540 00541 00542 std::string description() const; 00543 00545 void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const; 00547 00548 private: 00549 Teuchos::RCP<Node> getNode() const; 00550 00551 void updateImport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X) const; 00552 void updateExport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const; 00553 00554 void createImporterExporter(); 00555 void optimizeStorage(); 00556 void fillLocalMatrix(); 00557 void fillLocalMatVec(); 00558 00559 //private data members: 00560 00561 //We hold two graph pointers, one const and the other non-const. 00562 //If a BlockCrsGraph is provided at construction, it is const and VbrMatrix 00563 //never changes it. 00564 //If a BlockCrsGraph is not provided at construction, VbrMatrix creates one 00565 //internally and fills it as the matrix is filled, up until fillComplete() 00566 //is called. 00567 // 00568 //blkGraph_ is either the internally created graph, or is null. 00569 //constBlkGraph_ is either a pointer to blkGraph_, or a pointer to the 00570 //graph provided at construction time. 00571 // 00572 Teuchos::RCP<BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> > blkGraph_; 00573 Teuchos::RCP<const BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> > constBlkGraph_; 00574 00575 typedef KokkosClassic::VbrMatrix<Scalar, LocalOrdinal, Node> local_matrix_type; 00576 00577 // We use a pointer so that the _decl header file only needs a 00578 // forward declaration, not an include. We don't really need an RCP 00579 // here, since the local matrix object never gets shared outside the 00580 // class. std::unique_ptr (C++11) would be more appropriate. 00581 Teuchos::RCP<local_matrix_type> lclMatrix_; 00582 00583 //A variable-block-row matrix is represented by 6 arrays 00584 //in packed (contiguous storage) form. For a description of these 00585 //arrays, see the text at the bottom of this file. 00586 //(2 of those arrays, rptr and cptr, are represented by arrays in the 00587 //getBlockRowMap() and getBlockColMap() objects, and 00588 //another two of those arrays, bptr and bindx, are represented by arrays in 00589 //the BlockCrsGraph object.) 00590 //This is noted in the comments for rptr,cptr,bptr,bindx below. 00591 // 00592 //These arrays are handled as if they may point to memory that resides on 00593 //a separate device (e.g., a GPU). In other words, when the contents of these 00594 //arrays are manipulated, we use views or buffers obtained from the Node 00595 //object. 00596 Teuchos::ArrayRCP<Scalar> pbuf_values1D_; 00597 Teuchos::ArrayRCP<LocalOrdinal> pbuf_indx_; 00598 00599 sparse_ops_type lclMatOps_; 00600 Teuchos::RCP<Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer_; 00601 Teuchos::RCP<Tpetra::Export<LocalOrdinal,GlobalOrdinal,Node> > exporter_; 00602 mutable Teuchos::RCP<Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > importedVec_; 00603 mutable Teuchos::RCP<Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > exportedVec_; 00604 00605 typedef typename std::map<GlobalOrdinal,Teuchos::ArrayRCP<Scalar> > RowGlobalCols; 00606 00607 //We use an array-of-maps to represent the variable-block-row matrix in 00608 //un-packed '2D' form. 00609 // 00610 //This unpacked data is assumed to be resident in CPU (host) memory. 00611 //It doesn't make sense to copy this data back and forth to a separate 00612 //compute device (e.g., a GPU), since we don't support doing matrix-vector 00613 //products until after fillComplete is called, at which time contiguous 00614 //arrays are allocated on the device and matrix data is copied into them. 00615 Teuchos::RCP<Teuchos::Array<RowGlobalCols> > data_2D_; 00616 00617 VbrUtils::VbrData<LocalOrdinal,GlobalOrdinal,Scalar> nonlocal_data_; 00618 00619 bool is_fill_completed_; 00620 bool is_storage_optimized_; 00621 };//class VbrMatrix 00622 00623 }//namespace Tpetra 00624 00625 //---------------------------------------------------------------------------- 00626 // Description of arrays representing the VBR format: 00627 // 00628 // (For more description, see this URL (valid as of 5/26/2010): 00629 // http://docs.sun.com/source/817-0086-10/prog-sparse-support.html) 00630 // ...and of course more can be found using google... 00631 // The old Aztec manual was a great resource for this but I can't 00632 // find a copy of that these days... 00633 // 00634 // 00635 // Here is a brief description of the 6 arrays that are required to 00636 // represent a VBR matrix in packed (contiguous-memory-storage) format: 00637 // 00638 // rptr: length num_block_rows + 1 00639 // rptr[i]: the pt-row corresponding to the i-th block-row 00640 // Note: rptr is getBlockRowMap()->getNodeFirstPointInBlocks(). 00641 // 00642 // cptr: length num_distinct_block_cols + 1 00643 // cptr[j]: the pt-col corresponding to the j-th block-col 00644 // Note: cptr is getBlockColMap()->getNodeFirstPointInBlocks(). 00645 // 00646 // bptr: length num_block_rows + 1 00647 // bptr[i]: location in bindx of the first nonzero block-entry 00648 // of the i-th block-row 00649 // Note: bptr is blkGraph_->getNodeRowOffsets(); 00650 // 00651 // bindx: length num-nonzero-block-entries 00652 // bindx[j]: block-col-index of j-th block-entry 00653 // Note: bindx is blkGraph_->getNodePackedIndices(); 00654 // 00655 // indx: length num-nonzero-block-entries + 1 00656 // indx[j]: location in vals of the beginning of the j-th 00657 // block-entry 00658 // 00659 // vals: length num-nonzero-scalar-entries 00660 // 00661 // 00662 // Some example look-ups: 00663 // 00664 // int nbr = num_block_rows; 00665 // int total_num_block_nonzeros = bptr[nbr]; 00666 // int total_num_scalar_nonzeros = indx[num_block_nonzeros]; 00667 // 00668 // //get arrays for i-th block-row: 00669 // int* bindx_i = &bindx[bptr[i]]; 00670 // double* vals_i = &val[indx[bptr[i]]]; 00671 // int num_block_nonzeros_in_row_i = bptr[i+1]-bptr[i]; 00672 // 00673 //---------------------------------------------------------------------------- 00674 00675 #endif //TPETRA_VBRMATRIX_DECL_HPP 00676
1.7.6.1