Tpetra Matrix/Vector Services  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
Tpetra_VbrMatrix_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_VBRMATRIX_DEF_HPP
00043 #define TPETRA_VBRMATRIX_DEF_HPP
00044 
00045 #include <Tpetra_BlockMap.hpp>
00046 #include <Tpetra_BlockCrsGraph.hpp>
00047 #include <Tpetra_DistObject.hpp>
00048 #include <Tpetra_Vector.hpp>
00049 #include <Kokkos_NodeHelpers.hpp>
00050 #include <Kokkos_VbrMatrix.hpp>
00051 #include <Teuchos_SerialDenseMatrix.hpp>
00052 #include <algorithm>
00053 #include <sstream>
00054 
00055 #ifdef DOXYGEN_USE_ONLY
00056 #include "Tpetra_VbrMatrix_decl.hpp"
00057 #endif
00058 
00059 namespace Tpetra {
00060 
00061 template<class Scalar,class Node>
00062 void fill_device_ArrayRCP(Teuchos::RCP<Node>& node, Teuchos::ArrayRCP<Scalar>& ptr, Scalar value)
00063 {
00064   KokkosClassic::ReadyBufferHelper<Node> rbh(node);
00065   KokkosClassic::InitOp<Scalar> wdp;
00066   wdp.alpha = value;
00067   rbh.begin();
00068   wdp.x = rbh.addNonConstBuffer(ptr);
00069   rbh.end();
00070   node->template parallel_for<KokkosClassic::InitOp<Scalar> >(0, ptr.size(), wdp);
00071 }
00072 
00073 //-------------------------------------------------------------------
00074 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00075 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::VbrMatrix(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &blkRowMap, size_t maxNumEntriesPerRow, ProfileType pftype)
00076  : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkRowMap)),
00077    blkGraph_(Teuchos::rcp(new BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(blkRowMap, maxNumEntriesPerRow, pftype))),
00078    constBlkGraph_(blkGraph_),
00079    lclMatrix_ (Teuchos::rcp (new local_matrix_type (blkRowMap->getNodeNumBlocks (),
00080                                                     blkRowMap->getPointMap()->getNode ()))),
00081    pbuf_values1D_(),
00082    pbuf_indx_(),
00083    lclMatOps_(blkRowMap->getPointMap()->getNode()),
00084    importer_(),
00085    exporter_(),
00086    importedVec_(),
00087    exportedVec_(),
00088    data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)),
00089    nonlocal_data_(),
00090    is_fill_completed_(false),
00091    is_storage_optimized_(false)
00092 {
00093   //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where
00094   //each entry in the graph corresponds to a block-entry in the matrix.
00095   //That is, you can think of a VBR matrix as a Crs matrix of dense
00096   //submatrices...
00097 }
00098 
00099 //-------------------------------------------------------------------
00100 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00101 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::VbrMatrix(const Teuchos::RCP<const BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> > &blkGraph)
00102  : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkGraph->getBlockRowMap())),
00103    blkGraph_(),
00104    constBlkGraph_(blkGraph),
00105    lclMatrix_ (Teuchos::rcp (new local_matrix_type (blkGraph->getBlockRowMap ()->getNodeNumBlocks (),
00106                                                     blkGraph->getBlockRowMap ()->getPointMap ()->getNode ()))),
00107    pbuf_values1D_(),
00108    pbuf_indx_(),
00109    lclMatOps_(blkGraph->getBlockRowMap()->getPointMap()->getNode()),
00110    importer_(),
00111    exporter_(),
00112    importedVec_(),
00113    exportedVec_(),
00114    data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)),
00115    nonlocal_data_(),
00116    is_fill_completed_(false),
00117    is_storage_optimized_(false)
00118 {
00119   //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where
00120   //each entry in the graph corresponds to a block-entry in the matrix.
00121   //That is, you can think of a VBR matrix as a Crs matrix of dense
00122   //submatrices...
00123 
00124   TEUCHOS_TEST_FOR_EXCEPTION(blkGraph->isFillComplete() == false, std::runtime_error,
00125    "Tpetra::VbrMatrix::VbrMatrix(BlockCrsGraph) ERROR, this constructor requires graph.isFillComplete()==true.");
00126 
00127   createImporterExporter();
00128 
00129   is_fill_completed_ = true;
00130 
00131   optimizeStorage();
00132 
00133   fillLocalMatrix();
00134   fillLocalMatVec();
00135 }
00136 
00137 //-------------------------------------------------------------------
00138 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00139 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::~VbrMatrix()
00140 {
00141 }
00142 
00143 //-------------------------------------------------------------------
00144 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00145 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00146 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getDomainMap() const
00147 {
00148   return getBlockDomainMap()->getPointMap();
00149 }
00150 
00151 //-------------------------------------------------------------------
00152 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00153 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00154 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getRangeMap() const
00155 {
00156   return getBlockRangeMap()->getPointMap();
00157 }
00158 
00159 //-------------------------------------------------------------------
00160 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00161 bool
00162 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::isFillComplete() const
00163 {
00164   return is_fill_completed_;
00165 }
00166 
00167 //-------------------------------------------------------------------
00168 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00169 template <class DomainScalar, class RangeScalar>
00170 void
00171 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00172 multiply (const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X,
00173           MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y,
00174           Teuchos::ETransp trans,
00175           RangeScalar alpha,
00176           RangeScalar beta) const
00177 {
00178   TEUCHOS_TEST_FOR_EXCEPTION(
00179     ! isFillComplete (), std::runtime_error,
00180     "Tpetra::VbrMatrix::multiply: This method may only be called after "
00181     "fillComplete has been called.");
00182 
00183   KokkosClassic::MultiVector<Scalar,Node> lclX = X.getLocalMV ();
00184   KokkosClassic::MultiVector<Scalar,Node> lclY = Y.getLocalMV ();
00185 
00186   lclMatOps_.template multiply<Scalar,Scalar> (trans, alpha, lclX, beta, lclY);
00187 }
00188 
00189 //-------------------------------------------------------------------
00190 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00191 template<class DomainScalar, class RangeScalar>
00192 void
00193 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00194 solve (const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y,
00195        MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X,
00196        Teuchos::ETransp trans) const
00197 {
00198   TEUCHOS_TEST_FOR_EXCEPTION(X.isConstantStride() == false || Y.isConstantStride() == false, std::runtime_error,
00199         "Tpetra::VbrMatrix::solve(X,Y): X and Y must be constant stride.");
00200 
00201   TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00202     "Tpetra::VbrMatrix::solve ERROR, solve may only be called after fillComplete has been called.");
00203 
00204   TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isUpperTriangular()==false && constBlkGraph_->isLowerTriangular()==false, std::runtime_error,
00205     "Tpetra::VbrMatrix::solve ERROR, matrix must be either upper or lower triangular.");
00206 
00207   KokkosClassic::MultiVector<RangeScalar,Node> lclY = Y.getLocalMV ();
00208   KokkosClassic::MultiVector<DomainScalar,Node> lclX = X.getLocalMV ();
00209 
00210   const Teuchos::EUplo triang =
00211     constBlkGraph_->isUpperTriangular () ?
00212     Teuchos::UPPER_TRI :
00213     Teuchos::LOWER_TRI;
00214   const Teuchos::EDiag diag =
00215     (constBlkGraph_->getNodeNumBlockDiags () < constBlkGraph_->getNodeNumBlockRows ()) ?
00216     Teuchos::UNIT_DIAG :
00217     Teuchos::NON_UNIT_DIAG;
00218 
00219   lclMatOps_.template solve<DomainScalar,RangeScalar> (trans, triang, diag, lclY, lclX);
00220 }
00221 
00222 //-------------------------------------------------------------------
00223 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00224 void
00225 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::updateImport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X) const
00226 {
00227   if (importer_ != Teuchos::null) {
00228     if (importedVec_ == Teuchos::null || importedVec_->getNumVectors() != X.getNumVectors()) {
00229       importedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), X.getNumVectors()));
00230     }
00231 
00232     importedVec_->doImport(X, *importer_, REPLACE);
00233   }
00234 }
00235 
00236 //-------------------------------------------------------------------
00237 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00238 void
00239 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::updateExport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const
00240 {
00241   if (exporter_ != Teuchos::null) {
00242     if (exportedVec_ == Teuchos::null || exportedVec_->getNumVectors() != Y.getNumVectors()) {
00243       exportedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), Y.getNumVectors()));
00244     }
00245 
00246     exportedVec_->doExport(Y, *exporter_, REPLACE);
00247   }
00248 }
00249 
00250 //-------------------------------------------------------------------
00251 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00252 void
00253 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::apply(
00254          const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
00255                MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
00256                Teuchos::ETransp trans,
00257                Scalar alpha,
00258                Scalar beta) const
00259 {
00260   if (trans == Teuchos::NO_TRANS) {
00261     updateImport(X);
00262     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_;
00263     this->template multiply<Scalar,Scalar>(Xref, Y, trans, alpha, beta);
00264   }
00265   else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) {
00266     updateImport(Y);
00267     MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_;
00268     this->template multiply<Scalar,Scalar>(X, Yref, trans, alpha, beta);
00269     if (importedVec_ != Teuchos::null) {
00270       Y.doExport(*importedVec_, *importer_, ADD);
00271     }
00272   }
00273 }
00274 
00275 //-------------------------------------------------------------------
00276 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00277 void
00278 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverse(
00279          const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
00280                MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
00281                Teuchos::ETransp trans) const
00282 {
00283   if (trans == Teuchos::NO_TRANS) {
00284     updateImport(Y);
00285     MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_;
00286     this->template solve<Scalar,Scalar>(Y, Xref, trans);
00287   }
00288   else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) {
00289     updateImport(Y);
00290     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_;
00291     this->template solve<Scalar,Scalar>(Yref, X, trans);
00292     if (importedVec_ != Teuchos::null) {
00293       X.doExport(*importedVec_, *importer_, ADD);
00294     }
00295   }
00296 }
00297 
00298 //-------------------------------------------------------------------
00299 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00300 bool
00301 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::hasTransposeApply() const
00302 {
00303   return true;
00304 }
00305 
00306 //-------------------------------------------------------------------
00307 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00308 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00309 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getBlockRowMap() const
00310 {
00311   return constBlkGraph_->getBlockRowMap();
00312 }
00313 
00314 //-------------------------------------------------------------------
00315 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00316 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00317 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getPointRowMap() const
00318 {
00319   return constBlkGraph_->getBlockRowMap()->getPointMap();
00320 }
00321 
00322 //-------------------------------------------------------------------
00323 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00324 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00325 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getBlockColMap() const
00326 {
00327   return constBlkGraph_->getBlockColMap();
00328 }
00329 
00330 //-------------------------------------------------------------------
00331 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00332 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00333 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getBlockDomainMap() const
00334 {
00335   return constBlkGraph_->getBlockDomainMap();
00336 }
00337 
00338 //-------------------------------------------------------------------
00339 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00340 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00341 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getBlockRangeMap() const
00342 {
00343   return constBlkGraph_->getBlockRangeMap();
00344 }
00345 
00346 //-------------------------------------------------------------------
00347 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00348 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00349 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getPointColMap() const
00350 {
00351   return constBlkGraph_->getBlockColMap()->getPointMap();
00352 }
00353 
00354 //-------------------------------------------------------------------
00355 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00356 void
00357 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00358 getGlobalBlockRowView (GlobalOrdinal globalBlockRow,
00359                        LocalOrdinal& numPtRows,
00360                        Teuchos::ArrayView<const GlobalOrdinal>& blockCols,
00361                        Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol,
00362                        Teuchos::Array<Teuchos::ArrayRCP<const Scalar> >& blockEntries) const
00363 {
00364   TEUCHOS_TEST_FOR_EXCEPTION(
00365     isFillComplete(), std::runtime_error,
00366     "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, "
00367     "isFillComplete() is required to be false.");
00368 
00369   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00370 
00371   LocalOrdinal localRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00372   numPtRows = getBlockRowMap()->getLocalBlockSize(localRow);
00373   constBlkGraph_->getGlobalBlockRowView(globalBlockRow, blockCols);
00374   ptColsPerBlockCol.resize(blockCols.size());
00375   blockEntries.resize(blockCols.size());
00376   for (Tsize_t i = 0; i < blockCols.size (); ++i) {
00377     getGlobalBlockEntryView (globalBlockRow, blockCols[i],
00378                              numPtRows, ptColsPerBlockCol[i], blockEntries[i]);
00379   }
00380 }
00381 
00382 //-------------------------------------------------------------------
00383 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00384 void
00385 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalBlockRowView(
00386          LocalOrdinal localBlockRow,
00387          LocalOrdinal& numPtRows,
00388          Teuchos::ArrayView<const LocalOrdinal>& blockCols,
00389          Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol,
00390          Teuchos::ArrayRCP<const Scalar>& blockEntries) const
00391 {
00392   TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00393       "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, isFillComplete() is required to be true.");
00394 
00395   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00396   typedef Teuchos::ArrayRCP<const size_t>       Host_View;
00397   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00398 
00399   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00400   constBlkGraph_->getLocalBlockRowView(localBlockRow, blockCols);
00401   ptColsPerBlockCol.resize(blockCols.size());
00402   LocalOrdinal num_scalars_in_block_row = 0;
00403   for(Tsize_t i=0; i<blockCols.size(); ++i) {
00404     ptColsPerBlockCol[i] = getBlockColMap()->getLocalBlockSize(blockCols[i]);
00405     num_scalars_in_block_row += numPtRows*ptColsPerBlockCol[i];
00406   }
00407 
00408   Teuchos::RCP<Node> node = getNode();
00409   Host_View bpr = constBlkGraph_->getNodeRowOffsets();
00410   const size_t bindx_offset = bpr[localBlockRow];
00411   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bindx_offset);
00412   const LocalOrdinal offset = indx[0];
00413   blockEntries = node->template viewBuffer<Scalar>(num_scalars_in_block_row, pbuf_values1D_+offset);
00414 }
00415 
00416 //-------------------------------------------------------------------
00417 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00418 void
00419 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getGlobalBlockEntryViewNonConst(
00420     GlobalOrdinal globalBlockRow,
00421     GlobalOrdinal globalBlockCol,
00422     LocalOrdinal& numPtRows,
00423     LocalOrdinal& numPtCols,
00424     Teuchos::ArrayRCP<Scalar>& blockEntry)
00425 {
00426   //Return a non-const, read-write view of a block-entry (as an ArrayRCP),
00427   //creating/allocating the block-entry if it doesn't already exist, (but only
00428   //if fillComplete hasn't been called yet, and if the arguments numPtRows
00429   //and numPtCols have been set on input).
00430 
00431   LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00432 
00433   if (localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid()) {
00434     VbrUtils::getGlobalBlockEntryViewNonConst(nonlocal_data_,
00435                    globalBlockRow, globalBlockCol,
00436                    numPtRows, numPtCols, blockEntry);
00437     return;
00438   }
00439 
00440   if (is_storage_optimized_) {
00441     TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00442       "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst internal ERROR, storage is optimized but isFillComplete() is false.");
00443 
00444     LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol);
00445 
00446     getLocalBlockEntryViewNonConst(localBlockRow, localBlockCol,
00447                                    numPtRows, numPtCols, blockEntry);
00448     return;
00449   }
00450 
00451   //If we get to here, fillComplete hasn't been called yet, and the matrix data
00452   //is stored in un-packed '2D' form.
00453 
00454   if (data_2D_->size() == 0) {
00455     LocalOrdinal numBlockRows = constBlkGraph_->getNodeNumBlockRows();
00456     data_2D_->resize(numBlockRows);
00457   }
00458 
00459   RowGlobalCols& blkrow = (*data_2D_)[localBlockRow];
00460 
00461   typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol);
00462 
00463   if (col_iter != blkrow.end()) {
00464     blockEntry = col_iter->second;
00465   }
00466   else {
00467     //blockEntry doesn't already exist, so we will create it.
00468 
00469     //make sure block-size is specified:
00470     TEUCHOS_TEST_FOR_EXCEPTION(numPtRows==0 || numPtCols==0, std::runtime_error,
00471       "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst ERROR: creating block-entry, but numPtRows and/or numPtCols is 0.");
00472 
00473     Teuchos::RCP<Node> node = getNode();
00474     size_t blockSize = numPtRows*numPtCols;
00475     blockEntry = Teuchos::arcp(new Scalar[blockSize], 0, blockSize);
00476     std::fill(blockEntry.begin(), blockEntry.end(), (Scalar) 0);
00477     blkrow.insert(std::make_pair(globalBlockCol, blockEntry));
00478     blkGraph_->insertGlobalIndices(globalBlockRow, Teuchos::ArrayView<GlobalOrdinal>(&globalBlockCol, 1));
00479   }
00480 }
00481 
00482 //-------------------------------------------------------------------
00483 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00484 void
00485 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getGlobalBlockEntryView(
00486       GlobalOrdinal globalBlockRow,
00487       GlobalOrdinal globalBlockCol,
00488       LocalOrdinal& numPtRows,
00489       LocalOrdinal& numPtCols,
00490       Teuchos::ArrayRCP<const Scalar>& blockEntry) const
00491 {
00492   //This method returns a const-view of a block-entry (as an ArrayRCP).
00493   //Throws an exception if the block-entry doesn't already exist.
00494 
00495   if (is_storage_optimized_) {
00496     TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00497       "Tpetra::VbrMatrix::getGlobalBlockEntryView internal ERROR, storage is optimized but isFillComplete() is false.");
00498 
00499     LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00500     LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol);
00501     getLocalBlockEntryView(localBlockRow, localBlockCol, numPtRows, numPtCols, blockEntry);
00502     return;
00503   }
00504 
00505   TEUCHOS_TEST_FOR_EXCEPTION(data_2D_->size() == 0, std::runtime_error,
00506     "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, matrix storage not yet allocated, can't return a const view.");
00507 
00508   //this acts as a range-check for globalBlockRow:
00509   LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00510   TEUCHOS_TEST_FOR_EXCEPTION( localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
00511      std::runtime_error,
00512      "Tpetra::VbrMatrix::getGlobalBlockEntryView, globalBlockRow not on the local processor.");
00513 
00514   RowGlobalCols& blkrow = (*data_2D_)[localBlockRow];
00515   typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol);
00516 
00517   if (col_iter == blkrow.end()) {
00518     throw std::runtime_error("Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, specified block-entry doesn't exist.");
00519   }
00520 
00521   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00522 
00523   TEUCHOS_TEST_FOR_EXCEPTION(numPtRows == 0, std::runtime_error,
00524     "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, numPtRows == 0.");
00525 
00526   blockEntry = col_iter->second;
00527   numPtCols = blockEntry.size() / numPtRows;
00528 }
00529 
00530 //-------------------------------------------------------------------
00531 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00532 void
00533 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalBlockEntryViewNonConst(
00534     LocalOrdinal localBlockRow,
00535     LocalOrdinal localBlockCol,
00536     LocalOrdinal& numPtRows,
00537     LocalOrdinal& numPtCols,
00538     Teuchos::ArrayRCP<Scalar>& blockEntry)
00539 {
00540   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00541   typedef Teuchos::ArrayRCP<const size_t> Host_View;
00542   typedef typename Host_View_LO::iterator ITER;
00543   //This method returns a non-constant view of a block-entry (as an ArrayRCP).
00544 
00545   TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error,
00546    "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called after fillComplete() has been called.");
00547 
00548   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error,
00549    "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called if storage is optimized.");
00550 
00551   Teuchos::RCP<Node> node = getNode();
00552 
00553   Host_View bptr = constBlkGraph_->getNodeRowOffsets();
00554   LocalOrdinal bindx_offset = bptr[localBlockRow];
00555   LocalOrdinal length = bptr[localBlockRow+1] - bindx_offset;
00556 
00557   TEUCHOS_TEST_FOR_EXCEPTION( length < 1, std::runtime_error,
00558     "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found in localBlockRow.");
00559 
00560   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
00561   ITER bindx_beg = bindx.begin() + bindx_offset,
00562        bindx_end = bindx_beg + length;
00563   ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol);
00564 
00565   TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error,
00566     "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found.");
00567 
00568   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00569   numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol);
00570 
00571   const LocalOrdinal blkSize = numPtRows*numPtCols;
00572   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1,pbuf_indx_+bptr[localBlockRow]+(it-bindx_beg));
00573   const LocalOrdinal offset = indx[0];
00574   blockEntry = node->template viewBufferNonConst<Scalar>(KokkosClassic::ReadWrite, blkSize, pbuf_values1D_ + offset);
00575 }
00576 
00577 //-------------------------------------------------------------------
00578 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00579 void
00580 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalDiagCopy(
00581   Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& diag) const
00582 {
00583   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = getBlockRowMap();
00584   TEUCHOS_TEST_FOR_EXCEPTION(diag.getMap()->isSameAs(*(rowmap->getPointMap())) != true,
00585     std::runtime_error, "Tpetra::VbrMatrix::getLocalDiagCopy ERROR, vector must be distributed the same as this matrix' row-map.");
00586 
00587   Teuchos::ArrayRCP<Scalar> diag_view = diag.get1dViewNonConst();
00588   Teuchos::ArrayView<const GlobalOrdinal> blockIDs = rowmap->getNodeBlockIDs();
00589   size_t offset = 0;
00590   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00591   for(Tsize_t i=0; i<blockIDs.size(); ++i) {
00592     LocalOrdinal localBlockID = rowmap->getLocalBlockID(blockIDs[i]);
00593     LocalOrdinal blockSize = rowmap->getLocalBlockSize(localBlockID);
00594     Teuchos::ArrayRCP<const Scalar> blockEntry;
00595     getGlobalBlockEntryView(blockIDs[i], blockIDs[i], blockSize, blockSize, blockEntry);
00596 
00597     for(LocalOrdinal j=0; j<blockSize; ++j) {
00598       diag_view[offset++] = blockEntry[j*blockSize+j];
00599     }
00600   }
00601 }
00602 
00603 //-------------------------------------------------------------------
00604 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00605 bool
00606 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00607 checkSizes (const SrcDistObject& source)
00608 {
00609   typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> this_type;
00610   const this_type* srcVbrMat = dynamic_cast<const this_type*> (&source);
00611 
00612   if (srcVbrMat == NULL) {
00613     typedef VbrUtils::VbrDataDist<LocalOrdinal,GlobalOrdinal,Scalar,Node> VDD;
00614     const VDD* vdd = const_cast<VDD*> (dynamic_cast<const VDD*> (&source));
00615     if (vdd == NULL) {
00616       return false;
00617     } else {
00618       return (this->getMap ()->getMinAllGlobalIndex () <=
00619               vdd->getMap ()->getMinAllGlobalIndex ()) &&
00620         (this->getMap ()->getMaxAllGlobalIndex () >=
00621          vdd->getMap ()->getMaxAllGlobalIndex ());
00622     }
00623   } else {
00624     return (this->getMap ()->getMinAllGlobalIndex () <=
00625             srcVbrMat->getMap ()->getMinAllGlobalIndex ()) &&
00626       (this->getMap ()->getMaxAllGlobalIndex () >=
00627        srcVbrMat->getMap ()->getMaxAllGlobalIndex ());
00628   }
00629 }
00630 
00631 //-------------------------------------------------------------------
00632 template<class Scalar,
00633          class LocalOrdinal,
00634          class GlobalOrdinal,
00635          class Node>
00636 void
00637 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00638 copyAndPermute (const SrcDistObject& source,
00639                 size_t numSameIDs,
00640                 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
00641                 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
00642 {
00643   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00644   typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> this_type;
00645   const this_type* src_mat_ptr = dynamic_cast<const this_type*> (&source);
00646 
00647   if (src_mat_ptr == NULL) {
00648     throw std::runtime_error ("VbrMatrix::copyAndPermute ERROR, dynamic_cast failed.");
00649   }
00650   const this_type& src_mat = *src_mat_ptr;
00651 
00652   Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00653   LocalOrdinal numPtRows;
00654 
00655   LocalOrdinal numSame = numSameIDs;
00656   for(LocalOrdinal i=0; i<numSame; ++i) {
00657     if (isFillComplete()) {
00658       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00659       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00660 
00661       src_mat.getLocalBlockRowView (i, numPtRows, src_blkCols,
00662                                     src_ptColsPerBlkCol, src_blkEntries);
00663       unsigned offset = 0;
00664       for (Tsize_t j=0; j<src_blkCols.size(); ++j) {
00665         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00666         setLocalBlockEntry (i, src_blkCols[j], numPtRows, numPtCols, numPtCols,
00667                             src_blkEntries.view (offset, numPtRows*numPtCols));
00668         offset += numPtRows*numPtCols;
00669       }
00670     }
00671     else {
00672       GlobalOrdinal gRow = getBlockRowMap ()->getGlobalBlockID (i);
00673       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00674       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00675 
00676       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols,
00677                                      src_ptColsPerBlkCol, src_blkEntries);
00678       for (Tsize_t j = 0; j < src_blkCols.size (); ++j) {
00679         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00680         setGlobalBlockEntry (gRow, src_blkCols[j],
00681                              numPtRows, numPtCols, numPtRows,
00682                              src_blkEntries[j].view (0, numPtRows * numPtCols));
00683       }
00684     }
00685   }
00686 
00687   for (Tsize_t i = 0; i < permuteToLIDs.size (); ++i) {
00688     if (isFillComplete ()) {
00689       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00690       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00691 
00692       src_mat.getLocalBlockRowView (permuteFromLIDs[i], numPtRows,
00693                                     src_blkCols, src_ptColsPerBlkCol,
00694                                     src_blkEntries);
00695       unsigned offset = 0;
00696       for (Tsize_t j=0; j<src_blkCols.size(); ++j) {
00697         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00698         setLocalBlockEntry (permuteToLIDs[i], src_blkCols[j], numPtRows, numPtCols, numPtCols,
00699                             src_blkEntries.view(offset, numPtRows*numPtCols));
00700         offset += numPtRows*numPtCols;
00701       }
00702     }
00703     else {
00704       GlobalOrdinal gRow = getBlockRowMap ()->getGlobalBlockID (permuteToLIDs[i]);
00705       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00706       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00707 
00708       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols,
00709                                      src_ptColsPerBlkCol, src_blkEntries);
00710       for (Tsize_t j = 0; j < src_blkCols.size (); ++j) {
00711         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00712         setGlobalBlockEntry (gRow, src_blkCols[j],
00713                              numPtRows, numPtCols, numPtRows,
00714                              src_blkEntries[j].view (0, numPtRows * numPtCols));
00715       }
00716     }
00717   }
00718 }
00719 
00720 //-------------------------------------------------------------------
00721 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00722 void
00723 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00724 packAndPrepare (const SrcDistObject& source,
00725                 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
00726                 Teuchos::Array<char>& exports,
00727                 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00728                 size_t& constantNumPackets,
00729                 Distributor& distor)
00730 {
00731   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00732   typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> this_type;
00733   const this_type* src_mat_ptr = dynamic_cast<const this_type*> (&source);
00734 
00735   if (src_mat_ptr == NULL) {
00736     typedef VbrUtils::VbrDataDist<LocalOrdinal, GlobalOrdinal, Scalar, Node> VDD;
00737     const VDD* vdd = dynamic_cast<const VDD*> (&source);
00738     TEUCHOS_TEST_FOR_EXCEPTION(
00739       vdd == NULL, std::invalid_argument, "Tpetra::VbrMatrix::packAndPrepare: "
00740       "Source object input must be either a VbrMatrix or a VbrDataDist with "
00741       "matching template parameters.");
00742     vdd->pack (exportLIDs, exports, numPacketsPerLID, constantNumPackets, distor);
00743     return;
00744   }
00745   const this_type& src_mat = *src_mat_ptr;
00746 
00747   //We will pack each row's data into the exports buffer as follows:
00748   //[num-block-cols,numPtRows,{list-of-blk-cols},{list-of-ptColsPerBlkCol},{all vals}]
00749   //so the length of the char exports buffer for a row is:
00750   //sizeof(LocalOrdinal)*(2+2*(num-block-cols)) + sizeof(Scalar)*numPtRows*sum(numPtCols_i)
00751 
00752   //For each row corresponding to exportLIDs, accumulate the size that it will
00753   //occupy in the exports buffer:
00754   size_t total_exports_size = 0;
00755   for (Tsize_t i = 0; i < exportLIDs.size (); ++i) {
00756     Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00757     LocalOrdinal numPtRows;
00758     LocalOrdinal numBlkCols = 0;
00759     LocalOrdinal numScalars = 0;
00760 
00761     if (src_mat.isFillComplete ()) {
00762       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00763       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00764       src_mat.getLocalBlockRowView (exportLIDs[i], numPtRows,
00765                                     src_blkCols, src_ptColsPerBlkCol,
00766                                     src_blkEntries);
00767       numBlkCols = src_blkCols.size ();
00768       numScalars = src_blkEntries.size ();
00769     }
00770     else {
00771       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00772       GlobalOrdinal gRow =
00773         src_mat.getBlockRowMap ()->getGlobalBlockID (exportLIDs[i]);
00774       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00775       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols,
00776                                      src_ptColsPerBlkCol, src_blkEntries);
00777       numBlkCols = src_blkCols.size ();
00778       for (Tsize_t j = 0; j < src_blkEntries.size (); ++j) {
00779         numScalars += src_blkEntries[j].size ();
00780       }
00781     }
00782 
00783     const size_t size_for_this_row =
00784       sizeof (GlobalOrdinal) * (2 + 2*numBlkCols) +
00785       sizeof (Scalar) * numScalars;
00786     numPacketsPerLID[i] = size_for_this_row;
00787     total_exports_size += size_for_this_row;
00788   }
00789 
00790   exports.resize (total_exports_size);
00791 
00792   ArrayView<char> avIndsC, avValsC;
00793   ArrayView<Scalar>        avVals;
00794 
00795   size_t offset = 0;
00796 
00797   for (Tsize_t i = 0; i < exportLIDs.size (); ++i) {
00798     Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00799     LocalOrdinal numPtRows;
00800     LocalOrdinal numBlkCols = 0;
00801     LocalOrdinal numScalars = 0;
00802 
00803     if (src_mat.isFillComplete ()) {
00804       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00805       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00806       src_mat.getLocalBlockRowView (exportLIDs[i], numPtRows, src_blkCols,
00807                                     src_ptColsPerBlkCol, src_blkEntries);
00808       numBlkCols = src_blkCols.size ();
00809       numScalars = src_blkEntries.size ();
00810 
00811       LocalOrdinal num_chars_for_ordinals =
00812         (2*numBlkCols + 2) * sizeof (LocalOrdinal);
00813       //get export views
00814       avIndsC = exports (offset, num_chars_for_ordinals);
00815       avValsC = exports (offset + num_chars_for_ordinals, numScalars * sizeof (Scalar));
00816       ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC);
00817       typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin();
00818 
00819       const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& col_map =
00820         src_mat.getBlockColMap ();
00821 
00822       //put row info into the buffer views:
00823       *ind_it++ = numBlkCols;
00824       *ind_it++ = numPtRows;
00825       for (Tsize_t j = 0; j < src_blkCols.size (); ++j) {
00826         *ind_it++ = col_map->getGlobalBlockID (src_blkCols[j]);
00827       }
00828       std::copy (src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it);
00829       avVals = av_reinterpret_cast<Scalar> (avValsC);
00830       std::copy (src_blkEntries.begin(), src_blkEntries.end(), avVals.begin());
00831     }
00832     else {
00833       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00834       GlobalOrdinal gRow = src_mat.getBlockRowMap()->getGlobalBlockID(exportLIDs[i]);
00835       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00836       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols,
00837                                      src_ptColsPerBlkCol, src_blkEntries);
00838       numBlkCols = src_blkCols.size();
00839       numScalars = 0;
00840       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00841         numScalars += numPtRows*src_ptColsPerBlkCol[j];
00842       }
00843 
00844       LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal);
00845       //get export views
00846       avIndsC = exports(offset, num_chars_for_ordinals);
00847       avValsC = exports(offset+ num_chars_for_ordinals, numScalars*sizeof(Scalar));
00848       ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC);
00849       typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin();
00850 
00851       //put row info into the buffer views:
00852       *ind_it++ = numBlkCols;
00853       *ind_it++ = numPtRows;
00854       std::copy(src_blkCols.begin(), src_blkCols.end(), ind_it);
00855       ind_it += src_blkCols.size();
00856       std::copy(src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it);
00857       avVals = av_reinterpret_cast<Scalar>(avValsC);
00858       typename ArrayView<Scalar>::iterator val_it = avVals.begin();
00859       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00860         std::copy(src_blkEntries[j].begin(), src_blkEntries[j].end(), val_it);
00861         val_it += src_blkEntries[j].size();
00862       }
00863     }
00864 
00865     const size_t size_for_this_row =
00866       sizeof (GlobalOrdinal) * (2 + 2*numBlkCols)
00867       + sizeof (Scalar) * numScalars;
00868     offset += size_for_this_row;
00869   }
00870 
00871   constantNumPackets = 0;
00872 }
00873 
00874 //-------------------------------------------------------------------
00875 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00876 void
00877 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00878 unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
00879                   const Teuchos::ArrayView<const char>& imports,
00880                   const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00881                   size_t constantNumPackets,
00882                   Distributor& distor,
00883                   CombineMode CM)
00884 {
00885   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00886 
00887   if (CM == Tpetra::ABSMAX) {
00888     std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl;
00889   }
00890 
00891   size_t offset = 0;
00892   for(Tsize_t i=0; i<importLIDs.size(); ++i) {
00893     ArrayView<const char> avC = imports.view(offset, numPacketsPerLID[i]);
00894     ArrayView<const GlobalOrdinal> avOrds = av_reinterpret_cast<const GlobalOrdinal>(avC);
00895     GlobalOrdinal gRow = this->getBlockRowMap()->getGlobalBlockID(importLIDs[i]);
00896     size_t avOrdsOffset = 0;
00897     LocalOrdinal numBlkCols = avOrds[avOrdsOffset++];
00898     LocalOrdinal numPtRows = avOrds[avOrdsOffset++];
00899     LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal);
00900     ArrayView<const char> avValsC = imports.view(offset+num_chars_for_ordinals, numPacketsPerLID[i]-num_chars_for_ordinals);
00901     ArrayView<const Scalar> avVals = av_reinterpret_cast<const Scalar>(avValsC);
00902 
00903     size_t avValsOffset = 0;
00904     for(Tsize_t j=0; j<numBlkCols; ++j) {
00905       GlobalOrdinal blkCol = avOrds[avOrdsOffset+j];
00906       LocalOrdinal numPtCols = avOrds[avOrdsOffset+numBlkCols+j];
00907       LocalOrdinal LDA = numPtRows;
00908       if (CM == Tpetra::ADD) {
00909         sumIntoGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00910                          avVals.view(avValsOffset, numPtRows*numPtCols));
00911       }
00912       else if (CM == Tpetra::INSERT || CM == Tpetra::REPLACE) {
00913         setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00914                            avVals.view(avValsOffset, numPtRows*numPtCols));
00915       }
00916       else {
00917 //    std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl;
00918         setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00919                            avVals.view(avValsOffset, numPtRows*numPtCols));
00920       }
00921       avValsOffset += numPtRows*numPtCols;
00922     }
00923   }
00924 }
00925 
00926 //-------------------------------------------------------------------
00927 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00928 void
00929 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalBlockEntryView(
00930       LocalOrdinal localBlockRow,
00931       LocalOrdinal localBlockCol,
00932       LocalOrdinal& numPtRows,
00933       LocalOrdinal& numPtCols,
00934       Teuchos::ArrayRCP<const Scalar>& blockEntry) const
00935 {
00936   typedef Teuchos::ArrayRCP<const size_t>       Host_View;
00937   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00938   typedef typename Host_View_LO::iterator ITER;
00939   //This method returns a constant view of a block-entry (as an ArrayRCP).
00940 
00941   TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error,
00942    "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called after fillComplete() has been called.");
00943 
00944   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error,
00945    "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called if storage is optimized.");
00946 
00947   Teuchos::RCP<Node> node = getNode();
00948 
00949   Host_View bpr = constBlkGraph_->getNodeRowOffsets();
00950   const size_t bindx_offset = bpr[localBlockRow];
00951   const size_t length       = bpr[localBlockRow+1] - bindx_offset;
00952 
00953   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
00954   ITER bindx_beg = bindx.begin()+bindx_offset,
00955        bindx_end = bindx_beg + length;
00956   ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol);
00957 
00958   TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error,
00959     "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, specified localBlockCol not found.");
00960 
00961   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00962   numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol);
00963 
00964   const LocalOrdinal blkSize = numPtRows*numPtCols;
00965   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bpr[localBlockRow]+(it-bindx_beg));
00966   const LocalOrdinal offset = indx[0];
00967   blockEntry = node->template viewBuffer<Scalar>(blkSize, pbuf_values1D_ + offset);
00968 }
00969 
00970 //-------------------------------------------------------------------
00971 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00972 Teuchos::RCP<Node>
00973 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getNode() const
00974 {
00975   return getBlockRowMap()->getPointMap()->getNode();
00976 }
00977 
00978 //-------------------------------------------------------------------
00979 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00980 void
00981 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::putScalar(Scalar s)
00982 {
00983   Teuchos::RCP<Node> node = getNode();
00984 
00985   if (isFillComplete()) {
00986     fill_device_ArrayRCP(node, pbuf_values1D_, s);
00987   }
00988   else {
00989     typedef typename Teuchos::Array<RowGlobalCols>::size_type Tsize_t;
00990     Teuchos::Array<RowGlobalCols>& rows = *data_2D_;
00991     Tsize_t numBlockRows = rows.size();
00992     for(Tsize_t r=0; r<numBlockRows; ++r) {
00993       typename RowGlobalCols::iterator
00994         col_iter = rows[r].begin(),
00995         col_end  = rows[r].end();
00996       for(; col_iter != col_end; ++col_iter) {
00997         Teuchos::ArrayRCP<Scalar>& vals = col_iter->second;
00998         std::fill(vals.begin(), vals.end(), s);
00999       }
01000     }
01001   }
01002 }
01003 
01004 //-------------------------------------------------------------------
01005 template<class ArrayType1,class ArrayType2,class Ordinal>
01006 void set_array_values(ArrayType1& dest, ArrayType2& src, Ordinal rows, Ordinal cols, Ordinal stride, Tpetra::CombineMode mode)
01007 {
01008   size_t offset = 0;
01009   size_t src_offset = 0;
01010 
01011   if (mode == ADD) {
01012     for(Ordinal col=0; col<cols; ++col) {
01013       for(Ordinal row=0; row<rows; ++row) {
01014         dest[offset++] += src[src_offset + row];
01015       }
01016       src_offset += stride;
01017     }
01018   }
01019   else {
01020     for(Ordinal col=0; col<cols; ++col) {
01021       for(Ordinal row=0; row<rows; ++row) {
01022         dest[offset++] = src[src_offset + row];
01023       }
01024       src_offset += stride;
01025     }
01026   }
01027 }
01028 
01029 //-------------------------------------------------------------------
01030 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01031 void
01032 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01033 {
01034   //first get an ArrayRCP for the internal storage for this block-entry:
01035   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01036   LocalOrdinal blkRowSize = blockEntry.numRows();
01037   LocalOrdinal blkColSize = blockEntry.numCols();
01038   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01039 
01040   //now copy the incoming block-entry into internal storage:
01041   const Scalar* inputvalues = blockEntry.values();
01042   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal >(
01043     internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE);
01044 }
01045 
01046 //-------------------------------------------------------------------
01047 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01048 void
01049 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01050 {
01051   //first get an ArrayRCP for the internal storage for this block-entry:
01052   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01053   LocalOrdinal blkRowSize = blockEntry.numRows();
01054   LocalOrdinal blkColSize = blockEntry.numCols();
01055   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01056 
01057   //now copy the incoming block-entry into internal storage:
01058   const Scalar* inputvalues = blockEntry.values();
01059   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>(
01060 internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE);
01061 }
01062 
01063 //-------------------------------------------------------------------
01064 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01065 void
01066 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01067 {
01068   //first get an ArrayRCP for the internal storage for this block-entry:
01069   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01070   LocalOrdinal blkRowSize = blockEntry.numRows();
01071   LocalOrdinal blkColSize = blockEntry.numCols();
01072   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01073 
01074   //now sum (add) the incoming block-entry into internal storage:
01075   const Scalar* inputvalues = blockEntry.values();
01076   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>(
01077     internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD);
01078 }
01079 
01080 //-------------------------------------------------------------------
01081 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01082 void
01083 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01084 {
01085   //first get an ArrayRCP for the internal storage for this block-entry:
01086   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01087   LocalOrdinal blkRowSize = blockEntry.numRows();
01088   LocalOrdinal blkColSize = blockEntry.numCols();
01089   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01090 
01091   //now sum (add) the incoming block-entry into internal storage:
01092   const Scalar* inputvalues = blockEntry.values();
01093   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>(
01094     internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD);
01095 }
01096 
01097 //-------------------------------------------------------------------
01098 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01099 void
01100 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01101 {
01102   //first get an ArrayRCP for the internal storage for this block-entry:
01103   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01104   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01105 
01106   LocalOrdinal blk_size = blockEntry.size();
01107   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01108     "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes.");
01109 
01110   //copy the incoming block-entry into internal storage:
01111   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE);
01112 }
01113 
01114 //-------------------------------------------------------------------
01115 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01116 void
01117 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01118 {
01119   //first get an ArrayRCP for the internal storage for this block-entry:
01120   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01121   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01122 
01123   LocalOrdinal blk_size = blockEntry.size();
01124   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01125     "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes.");
01126 
01127   //copy the incoming block-entry into internal storage:
01128   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE);
01129 }
01130 
01131 //-------------------------------------------------------------------
01132 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01133 void
01134 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01135 {
01136   //first get an ArrayRCP for the internal storage for this block-entry:
01137   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01138   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01139 
01140   LocalOrdinal blk_size = blockEntry.size();
01141   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01142     "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes.");
01143 
01144   //copy the incoming block-entry into internal storage:
01145   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD);
01146 }
01147 
01148 //-------------------------------------------------------------------
01149 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01150 void
01151 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01152 {
01153   //first get an ArrayRCP for the internal storage for this block-entry:
01154   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01155   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01156 
01157   LocalOrdinal blk_size = blockEntry.size();
01158   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01159     "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes.");
01160 
01161   //copy the incoming block-entry into internal storage:
01162   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD);
01163 }
01164 
01165 //-------------------------------------------------------------------
01166 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01167 void
01168 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::optimizeStorage()
01169 {
01170   typedef Teuchos::ArrayRCP<const size_t> Host_View;
01171   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
01172   typedef typename Teuchos::ArrayRCP<size_t>::size_type Tsize_t;
01173 
01174   if (is_storage_optimized_ == true) return;
01175 
01176   TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isFillComplete() != true, std::runtime_error,
01177     "Tpetra::VbrMatrix::optimizeStorage ERROR, isFillComplete() is false, required to be true before optimizeStorage is called.");
01178 
01179   size_t num_block_nonzeros = constBlkGraph_->getNodeNumBlockEntries();
01180 
01181   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = constBlkGraph_->getBlockRowMap();
01182   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& colmap = constBlkGraph_->getBlockColMap();
01183 
01184   const Teuchos::RCP<Node>& node = getBlockRowMap()->getPointMap()->getNode();
01185 
01186   //need to count the number of point-entries:
01187   size_t num_point_entries = 0;
01188   Host_View bptr = constBlkGraph_->getNodeRowOffsets();
01189 
01190   if (bptr.size() == 0) {
01191     is_storage_optimized_ = true;
01192     return;
01193   }
01194 
01195   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
01196 
01197   for(Tsize_t r=0; r<bptr.size()-1; ++r) {
01198     size_t rbeg = bptr[r];
01199     size_t rend = bptr[r+1];
01200 
01201     LocalOrdinal rsize = rowmap->getLocalBlockSize(r);
01202 
01203     for(size_t j=rbeg; j<rend; ++j) {
01204       LocalOrdinal csize = colmap->getLocalBlockSize(bindx[j]);
01205       num_point_entries += rsize*csize;
01206     }
01207   }
01208 
01209   pbuf_indx_ = node->template allocBuffer<LocalOrdinal>(num_block_nonzeros+1);
01210   pbuf_values1D_ = node->template allocBuffer<Scalar>(num_point_entries);
01211 
01212   Teuchos::ArrayRCP<LocalOrdinal> view_indx = node->template viewBufferNonConst<LocalOrdinal>(KokkosClassic::WriteOnly, num_block_nonzeros+1, pbuf_indx_);
01213   Teuchos::ArrayRCP<Scalar> view_values1D = node->template viewBufferNonConst<Scalar>(KokkosClassic::WriteOnly, num_point_entries, pbuf_values1D_);
01214 
01215   bool have_2D_data = data_2D_->size() > 0;
01216   Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
01217 
01218   size_t ioffset = 0;
01219   size_t offset = 0;
01220   for(Tsize_t r=0; r<bptr.size()-1; ++r) {
01221     LocalOrdinal rsize = rowmap->getLocalBlockSize(r);
01222 
01223     RowGlobalCols* blk_row = have_2D_data ? &((*data_2D_)[r]) : NULL;
01224 
01225     for(size_t c=bptr[r]; c<bptr[r+1]; ++c) {
01226       view_indx[ioffset++] = offset;
01227 
01228       LocalOrdinal csize = colmap->getLocalBlockSize(bindx[c]);
01229       Tsize_t blkSize = rsize*csize;
01230 
01231       if (blk_row != NULL) {
01232         GlobalOrdinal global_col = colmap->getGlobalBlockID(bindx[c]);
01233         typename RowGlobalCols::iterator iter = blk_row->find(global_col);
01234         TEUCHOS_TEST_FOR_EXCEPTION(iter == blk_row->end(), std::runtime_error,
01235           "Tpetra::VbrMatrix::optimizeStorage ERROR, global_col not found in row.");
01236 
01237         Teuchos::ArrayRCP<Scalar> vals = iter->second;
01238         for(Tsize_t n=0; n<blkSize; ++n) {
01239           view_values1D[offset+n] = vals[n];
01240         }
01241       }
01242       else {
01243         for(Tsize_t n=0; n<blkSize; ++n) view_values1D[offset+n] = zero;
01244       }
01245       offset += blkSize;
01246     }
01247   }
01248   view_indx[ioffset] = offset;
01249 
01250   //Final step: release memory for the "2D" storage:
01251   data_2D_ = Teuchos::null;
01252 
01253   is_storage_optimized_ = true;
01254 }
01255 
01256 //-------------------------------------------------------------------
01257 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01258 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::fillLocalMatrix()
01259 {
01260   //We insist that optimzeStorage has already been called.
01261   //We don't care whether this function (fillLocalMatrix()) is being
01262   //called for the first time or not.
01263   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error,
01264     "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called.");
01265 
01266   Teuchos::ArrayRCP<const size_t      > nodeRowOffsets = constBlkGraph_->getNodeRowOffsets();
01267   Teuchos::ArrayRCP<const LocalOrdinal> nodePackedInds = constBlkGraph_->getNodePackedIndices();
01268   if (Node::isHostNode == false) {
01269     RCP<Node> node = getRangeMap()->getNode();
01270     //
01271     Teuchos::ArrayRCP<size_t> dev_nodeRowOffsets = node->template allocBuffer<size_t>(nodeRowOffsets.size());
01272     node->copyToBuffer(nodeRowOffsets.size(), nodeRowOffsets(), dev_nodeRowOffsets);
01273     nodeRowOffsets = dev_nodeRowOffsets;
01274     //
01275     Teuchos::ArrayRCP<LocalOrdinal> dev_nodePackedInds = node->template allocBuffer<LocalOrdinal>(nodePackedInds.size());
01276     node->copyToBuffer(nodePackedInds.size(), nodePackedInds(), dev_nodePackedInds);
01277     nodePackedInds = dev_nodePackedInds;
01278   }
01279   lclMatrix_->setPackedValues (pbuf_values1D_,
01280                                getBlockRowMap()->getNodeFirstPointInBlocks_Device(),
01281                                getBlockColMap()->getNodeFirstPointInBlocks_Device(),
01282                                nodeRowOffsets,
01283                                nodePackedInds,
01284                                pbuf_indx_);
01285 }
01286 
01287 //-------------------------------------------------------------------
01288 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01289 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::fillLocalMatVec()
01290 {
01291   //We insist that optimzeStorage has already been called.
01292   //We don't care whether this function (fillLocalMatVec()) is being
01293   //called for the first time or not.
01294   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error,
01295     "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called.");
01296 
01297   lclMatOps_.initializeValues (*lclMatrix_);
01298 }
01299 
01300 //-------------------------------------------------------------------
01301 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01302 void
01303 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::createImporterExporter()
01304 {
01305   typedef typename Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > PtMap;
01306 
01307   const PtMap& ptDomMap = (constBlkGraph_->getBlockDomainMap())->getPointMap();
01308   const PtMap& ptRngMap = (constBlkGraph_->getBlockRangeMap())->getPointMap();
01309   const PtMap& ptRowMap = (constBlkGraph_->getBlockRowMap())->getPointMap();
01310   const PtMap& ptColMap = (constBlkGraph_->getBlockColMap())->getPointMap();
01311 
01312   if (!ptDomMap->isSameAs(*ptColMap)) {
01313     importer_ = Teuchos::rcp(new Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>(ptDomMap, ptColMap));
01314   }
01315   if (!ptRngMap->isSameAs(*ptRowMap)) {
01316     exporter_ = Teuchos::rcp(new Tpetra::Export<LocalOrdinal,GlobalOrdinal,Node>(ptRowMap, ptRngMap));
01317   }
01318 }
01319 
01320 //-------------------------------------------------------------------
01321 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01322 void
01323 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::fillComplete(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockDomainMap, const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockRangeMap)
01324 {
01325   if (isFillComplete()) return;
01326 
01327   globalAssemble();
01328 
01329   blkGraph_->fillComplete(blockDomainMap, blockRangeMap);
01330 
01331   createImporterExporter();
01332 
01333   is_fill_completed_ = true;
01334 
01335   optimizeStorage();
01336 
01337   fillLocalMatrix();
01338   fillLocalMatVec();
01339 }
01340 
01341 //-------------------------------------------------------------------
01342 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01343 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::fillComplete()
01344 {
01345   //In this case, our block-row-map will also be our domain-map and
01346   //range-map.
01347 
01348   fillComplete(getBlockRowMap(), getBlockRowMap());
01349 }
01350 
01351 //-------------------------------------------------------------------
01352 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01353 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::globalAssemble()
01354 {
01355   if (getPointRowMap()->getComm()->getSize() == 1) {
01356     return;
01357   }
01358 
01359   //nonlocal_data_ contains data that belongs on other processors.
01360   //We'll refer to this as overlapping data, and create an 'overlapMap'
01361   //that describes the layout of this overlapping data.
01362   Teuchos::RCP<const Tpetra::BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
01363      overlapMap = VbrUtils::createOverlapMap(nonlocal_data_, *getBlockRowMap());
01364 
01365   if (overlapMap->getGlobalNumBlocks() == 0) {
01366     return;
01367   }
01368 
01369   //VbrDataDist is a wrapper that makes our overlapping data behave
01370   //like a DistObject so that the overlapping data can be exported
01371   //to the owning processors in a standard way.
01372   typedef VbrUtils::VbrDataDist<LocalOrdinal, GlobalOrdinal, Scalar, Node> VDD;
01373   VDD vdd (nonlocal_data_, *overlapMap);
01374 
01375   //Create an exporter where the source map is our overlapMap and the
01376   //target map is the rowmap of our VbrMatrix.
01377   typedef Import<LocalOrdinal,GlobalOrdinal,Node> import_type;
01378   import_type importer (vdd.getMap (), convertBlockMapToPointMap (*getBlockRowMap ()));
01379 
01380   //Communicate the overlapping data to the owning processors and add it
01381   //into this VbrMatrix.
01382   this->doImport (vdd, importer, Tpetra::ADD);
01383 
01384   //Zero out the overlapping data so it can be re-populated and re-assembled
01385   //in future calls to globalAssemble.
01386   nonlocal_data_.zeroEntries ();
01387 }
01388 
01389 //-------------------------------------------------------------------
01390 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01391 std::string
01392 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::description() const
01393 {
01394   std::ostringstream oss;
01395   oss << Teuchos::Describable::description();
01396   if (isFillComplete()) {
01397     oss << "{status = fill complete, global num block rows = "
01398         << getBlockRowMap()->getGlobalNumBlocks() << "}";
01399   }
01400   else {
01401     oss << "{status = fill not complete, global num block rows = "
01402         << getBlockRowMap()->getGlobalNumBlocks() << "}";
01403   }
01404   return oss.str();
01405 }
01406 
01407 //-------------------------------------------------------------------
01408 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01409 void
01410 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
01411 {
01412   Teuchos::EVerbosityLevel vl = verbLevel;
01413   if (vl == Teuchos::VERB_DEFAULT) vl = Teuchos::VERB_LOW;
01414 
01415   if (vl == Teuchos::VERB_NONE) return;
01416 
01417   Teuchos::RCP<const Teuchos::Comm<int> > comm = getBlockRowMap()->getPointMap()->getComm();
01418   const int myProc = comm->getRank();
01419 
01420   if (myProc == 0) out << "VbrMatrix::describe is under construction..." << std::endl;
01421 }
01422 
01423 }//namespace Tpetra
01424 
01425 //
01426 // Explicit instantiation macro
01427 //
01428 // Must be expanded from within the Tpetra namespace!
01429 //
01430 
01431 #define TPETRA_VBRMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
01432   \
01433   template class VbrMatrix< SCALAR , LO , GO , NODE >;
01434 
01435 #endif //TPETRA_VBRMATRIX_DEF_HPP
01436 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines