|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_VBRMATRIX_DECL_HPP 00043 #define TPETRA_VBRMATRIX_DECL_HPP 00044 00045 #include <Kokkos_DefaultNode.hpp> 00046 #include <Kokkos_DefaultKernels.hpp> 00047 #include <Kokkos_VbrMatrix.hpp> 00048 00049 #include "Tpetra_ConfigDefs.hpp" 00050 #include "Tpetra_Operator.hpp" 00051 #include "Tpetra_BlockMap.hpp" 00052 #include "Tpetra_BlockCrsGraph.hpp" 00053 #include "Tpetra_VbrUtils.hpp" 00054 #include "Teuchos_SerialDenseMatrix.hpp" 00055 00060 namespace Tpetra { 00061 00063 00091 template <class Scalar, 00092 class LocalOrdinal = int, 00093 class GlobalOrdinal = LocalOrdinal, 00094 class Node = Kokkos::DefaultNode::DefaultNodeType, 00095 class LocalMatOps = typename Kokkos::DefaultKernels<Scalar,LocalOrdinal,Node>::BlockSparseOps > 00096 class VbrMatrix : public Tpetra::DistObject<char, LocalOrdinal, GlobalOrdinal, Node>, public Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> { 00097 public: 00098 typedef Scalar scalar_type; 00099 typedef LocalOrdinal local_ordinal_type; 00100 typedef GlobalOrdinal global_ordinal_type; 00101 typedef Node node_type; 00102 typedef LocalMatOps mat_vec_type; 00103 00105 00106 00108 00113 VbrMatrix(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &blkRowMap, size_t maxNumEntriesPerRow, ProfileType pftype = DynamicProfile); 00114 00116 00126 VbrMatrix(const Teuchos::RCP<const BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> >& blkGraph); 00127 00129 virtual ~VbrMatrix(); 00130 00132 00134 00135 00137 00142 template <class DomainScalar, class RangeScalar> 00143 void multiply(const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> & X, MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &Y, Teuchos::ETransp trans, RangeScalar alpha, RangeScalar beta) const; 00144 00146 00158 template <class DomainScalar, class RangeScalar> 00159 void solve(const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> & Y, MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> &X, Teuchos::ETransp trans) const; 00160 00162 00164 00165 00167 00169 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & getDomainMap() const; 00170 00172 00174 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & getRangeMap() const; 00175 00177 00182 void apply(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, 00183 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, 00184 Teuchos::ETransp trans = Teuchos::NO_TRANS, 00185 Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(), 00186 Scalar beta = Teuchos::ScalarTraits<Scalar>::zero()) const; 00187 00189 00192 void applyInverse(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & Y, 00193 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, 00194 Teuchos::ETransp trans) const; 00195 00197 00200 bool hasTransposeApply() const; 00201 00203 00205 00206 00208 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > & getBlockRowMap() const; 00209 00211 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > & getBlockColMap() const; 00212 00214 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > & getBlockDomainMap() const; 00215 00217 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > & getBlockRangeMap() const; 00218 00220 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & getPointRowMap() const; 00221 00223 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & getPointColMap() const; 00224 00226 bool isFillComplete() const; 00228 00230 00231 00233 00236 void putScalar(Scalar s); 00237 00239 00254 void setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry); 00255 00257 00264 void setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry); 00265 00267 00282 void sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry); 00283 00285 00292 void sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry); 00293 00295 00310 void setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry); 00311 00313 00320 void setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry); 00321 00323 00338 void sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry); 00339 00341 00348 void sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry); 00349 00351 00353 00354 00356 00360 void fillComplete(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockDomainMap, const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockRangeMap); 00361 00363 00366 void fillComplete(); 00367 00369 void globalAssemble(); 00371 00373 00374 00376 00378 void getGlobalBlockRowView(GlobalOrdinal globalBlockRow, 00379 LocalOrdinal& numPtRows, 00380 Teuchos::ArrayView<const GlobalOrdinal>& blockCols, 00381 Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol, 00382 Teuchos::Array<Teuchos::ArrayRCP<const Scalar> >& blockEntries) const; 00383 00385 00387 void getLocalBlockRowView(LocalOrdinal localBlockRow, 00388 LocalOrdinal& numPtRows, 00389 Teuchos::ArrayView<const LocalOrdinal>& blockCols, 00390 Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol, 00391 Teuchos::ArrayRCP<const Scalar>& blockEntries) const; 00392 00394 00402 void getGlobalBlockEntryView(GlobalOrdinal globalBlockRow, 00403 GlobalOrdinal globalBlockCol, 00404 LocalOrdinal& numPtRows, 00405 LocalOrdinal& numPtCols, 00406 Teuchos::ArrayRCP<const Scalar>& blockEntry) const; 00407 00409 00419 void getGlobalBlockEntryViewNonConst(GlobalOrdinal globalBlockRow, 00420 GlobalOrdinal globalBlockCol, 00421 LocalOrdinal& numPtRows, 00422 LocalOrdinal& numPtCols, 00423 Teuchos::ArrayRCP<Scalar>& blockEntry); 00424 00426 00436 void getLocalBlockEntryView(LocalOrdinal localBlockRow, 00437 LocalOrdinal localBlockCol, 00438 LocalOrdinal& numPtRows, 00439 LocalOrdinal& numPtCols, 00440 Teuchos::ArrayRCP<const Scalar>& blockEntry) const; 00441 00443 00459 void getLocalBlockEntryViewNonConst(LocalOrdinal localBlockRow, 00460 LocalOrdinal localBlockCol, 00461 LocalOrdinal& numPtRows, 00462 LocalOrdinal& numPtCols, 00463 Teuchos::ArrayRCP<Scalar>& blockEntry); 00464 00466 00470 void getLocalDiagCopy(Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& diag) const; 00471 00472 const Teuchos::RCP<const BlockCrsGraph<LocalOrdinal, GlobalOrdinal, Node> >& getBlockCrsGraph() {return constBlkGraph_;} 00474 00476 00477 00478 bool checkSizes(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source); 00479 00480 void copyAndPermute(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source, size_t numSameIDs, const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs, const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs); 00481 00482 void packAndPrepare(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source, const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs, Teuchos::Array<char>& exports, const Teuchos::ArrayView<size_t>& numPacketsPerLID, size_t& constantNumPackets, Distributor& distor); 00483 00484 void unpackAndCombine(const Teuchos::ArrayView<const LocalOrdinal>& importLIDs, const Teuchos::ArrayView<const char>& imports, const Teuchos::ArrayView<size_t>& numPacketsPerLID, size_t constantNumPackets, Distributor& distor, CombineMode CM); 00485 00487 00489 00490 std::string description() const; 00491 00494 void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const; 00496 00497 private: 00498 //private methods: 00499 00500 Teuchos::RCP<Node> getNode() const; 00501 00502 void updateImport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X) const; 00503 void updateExport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const; 00504 00505 void createImporterExporter(); 00506 void optimizeStorage(); 00507 void fillLocalMatrix(); 00508 void fillLocalMatVec(); 00509 00510 //private data members: 00511 00512 //We hold two graph pointers, one const and the other non-const. 00513 //If a BlockCrsGraph is provided at construction, it is const and VbrMatrix 00514 //never changes it. 00515 //If a BlockCrsGraph is not provided at construction, VbrMatrix creates one 00516 //internally and fills it as the matrix is filled, up until fillComplete() 00517 //is called. 00518 // 00519 //blkGraph_ is either the internally created graph, or is null. 00520 //constBlkGraph_ is either a pointer to blkGraph_, or a pointer to the 00521 //graph provided at construction time. 00522 // 00523 Teuchos::RCP<BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> > blkGraph_; 00524 Teuchos::RCP<const BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> > constBlkGraph_; 00525 00526 Kokkos::VbrMatrix<Scalar,LocalOrdinal,Node> lclMatrix_; 00527 00528 //A variable-block-row matrix is represented by 6 arrays 00529 //in packed (contiguous storage) form. For a description of these 00530 //arrays, see the text at the bottom of this file. 00531 //(2 of those arrays, rptr and cptr, are represented by arrays in the 00532 //getBlockRowMap() and getBlockColMap() objects, and 00533 //another two of those arrays, bptr and bindx, are represented by arrays in 00534 //the BlockCrsGraph object.) 00535 //This is noted in the comments for rptr,cptr,bptr,bindx below. 00536 // 00537 //These arrays are handled as if they may point to memory that resides on 00538 //a separate device (e.g., a GPU). In other words, when the contents of these 00539 //arrays are manipulated, we use views or buffers obtained from the Node 00540 //object. 00541 Teuchos::ArrayRCP<Scalar> pbuf_values1D_; 00542 Teuchos::ArrayRCP<LocalOrdinal> pbuf_indx_; 00543 00544 LocalMatOps lclMatOps_; 00545 Teuchos::RCP<Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer_; 00546 Teuchos::RCP<Tpetra::Export<LocalOrdinal,GlobalOrdinal,Node> > exporter_; 00547 mutable Teuchos::RCP<Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > importedVec_; 00548 mutable Teuchos::RCP<Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > exportedVec_; 00549 00550 typedef typename std::map<GlobalOrdinal,Teuchos::ArrayRCP<Scalar> > RowGlobalCols; 00551 00552 //We use an array-of-maps to represent the variable-block-row matrix in 00553 //un-packed '2D' form. 00554 // 00555 //This unpacked data is assumed to be resident in CPU (host) memory. 00556 //It doesn't make sense to copy this data back and forth to a separate 00557 //compute device (e.g., a GPU), since we don't support doing matrix-vector 00558 //products until after fillComplete is called, at which time contiguous 00559 //arrays are allocated on the device and matrix data is copied into them. 00560 Teuchos::RCP<Teuchos::Array<RowGlobalCols> > data_2D_; 00561 00562 VbrUtils::VbrData<LocalOrdinal,GlobalOrdinal,Scalar> nonlocal_data_; 00563 00564 bool is_fill_completed_; 00565 bool is_storage_optimized_; 00566 };//class VbrMatrix 00567 00568 }//namespace Tpetra 00569 00570 //---------------------------------------------------------------------------- 00571 // Description of arrays representing the VBR format: 00572 // 00573 // (For more description, see this URL (valid as of 5/26/2010): 00574 // http://docs.sun.com/source/817-0086-10/prog-sparse-support.html) 00575 // ...and of course more can be found using google... 00576 // The old Aztec manual was a great resource for this but I can't 00577 // find a copy of that these days... 00578 // 00579 // 00580 // Here is a brief description of the 6 arrays that are required to 00581 // represent a VBR matrix in packed (contiguous-memory-storage) format: 00582 // 00583 // rptr: length num_block_rows + 1 00584 // rptr[i]: the pt-row corresponding to the i-th block-row 00585 // Note: rptr is getBlockRowMap()->getNodeFirstPointInBlocks(). 00586 // 00587 // cptr: length num_distinct_block_cols + 1 00588 // cptr[j]: the pt-col corresponding to the j-th block-col 00589 // Note: cptr is getBlockColMap()->getNodeFirstPointInBlocks(). 00590 // 00591 // bptr: length num_block_rows + 1 00592 // bptr[i]: location in bindx of the first nonzero block-entry 00593 // of the i-th block-row 00594 // Note: bptr is blkGraph_->getNodeRowOffsets(); 00595 // 00596 // bindx: length num-nonzero-block-entries 00597 // bindx[j]: block-col-index of j-th block-entry 00598 // Note: bindx is blkGraph_->getNodePackedIndices(); 00599 // 00600 // indx: length num-nonzero-block-entries + 1 00601 // indx[j]: location in vals of the beginning of the j-th 00602 // block-entry 00603 // 00604 // vals: length num-nonzero-scalar-entries 00605 // 00606 // 00607 // Some example look-ups: 00608 // 00609 // int nbr = num_block_rows; 00610 // int total_num_block_nonzeros = bptr[nbr]; 00611 // int total_num_scalar_nonzeros = indx[num_block_nonzeros]; 00612 // 00613 // //get arrays for i-th block-row: 00614 // int* bindx_i = &bindx[bptr[i]]; 00615 // double* vals_i = &val[indx[bptr[i]]]; 00616 // int num_block_nonzeros_in_row_i = bptr[i+1]-bptr[i]; 00617 // 00618 //---------------------------------------------------------------------------- 00619 00620 #endif //TPETRA_VBRMATRIX_DECL_HPP 00621
1.7.6.1