|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP 00043 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP 00044 00047 00048 #ifdef DOXYGEN_USE_ONLY 00049 # include "Tpetra_Experimental_BlockMultiVector_decl.hpp" 00050 #endif 00051 00052 namespace Tpetra { 00053 namespace Experimental { 00054 00055 template<class Scalar, class LO, class GO, class Node> 00056 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00057 BlockCrsMatrix () : 00058 dist_object_type (Teuchos::rcp (new map_type ())), // nonnull, so DistObject doesn't throw 00059 graph_ (Teuchos::rcp (new map_type ()), 0), // FIXME (mfh 16 May 2014) no empty ctor yet 00060 blockSize_ (static_cast<LO> (0)), 00061 ptr_ (NULL), 00062 ind_ (NULL), 00063 X_colMap_ (new Teuchos::RCP<BMV> ()), // ptr to a null ptr 00064 Y_rowMap_ (new Teuchos::RCP<BMV> ()), // ptr to a null ptr 00065 columnPadding_ (0), // no padding by default 00066 rowMajor_ (true), // row major blocks by default 00067 localError_ (new bool (false)), 00068 errs_ (new Teuchos::RCP<std::ostringstream> ()), // ptr to a null ptr 00069 computedDiagonalGraph_(false) 00070 { 00071 } 00072 00073 template<class Scalar, class LO, class GO, class Node> 00074 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00075 BlockCrsMatrix (const crs_graph_type& graph, 00076 const LO blockSize) : 00077 dist_object_type (graph.getMap ()), 00078 graph_ (graph), 00079 rowMeshMap_ (* (graph.getRowMap ())), 00080 blockSize_ (blockSize), 00081 ptr_ (NULL), // to be initialized below 00082 ind_ (NULL), // to be initialized below 00083 val_ (NULL), // to be initialized below 00084 X_colMap_ (new Teuchos::RCP<BMV> ()), // ptr to a null ptr 00085 Y_rowMap_ (new Teuchos::RCP<BMV> ()), // ptr to a null ptr 00086 columnPadding_ (0), // no padding by default 00087 rowMajor_ (true), // row major blocks by default 00088 localError_ (new bool (false)), 00089 errs_ (new Teuchos::RCP<std::ostringstream> ()), // ptr to a null ptr 00090 computedDiagonalGraph_(false) 00091 { 00092 TEUCHOS_TEST_FOR_EXCEPTION( 00093 ! graph_.isSorted (), std::invalid_argument, "Tpetra::Experimental::" 00094 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 00095 "rows (isSorted() is false). This class assumes sorted rows."); 00096 00097 graphRCP_ = Teuchos::rcpFromRef(graph_); 00098 00099 // Trick to test whether LO is nonpositive, without a compiler 00100 // warning in case LO is unsigned (which is generally a bad idea 00101 // anyway). I don't promise that the trick works, but it 00102 // generally does with gcc at least, in my experience. 00103 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1); 00104 TEUCHOS_TEST_FOR_EXCEPTION( 00105 blockSizeIsNonpositive, std::invalid_argument, "Tpetra::Experimental::" 00106 "BlockCrsMatrix constructor: The input blockSize = " << blockSize << 00107 " <= 0. The block size must be positive."); 00108 00109 domainPointMap_ = BMV::makePointMap (* (graph.getDomainMap ()), blockSize); 00110 rangePointMap_ = BMV::makePointMap (* (graph.getRangeMap ()), blockSize); 00111 00112 ptr_ = graph.getNodeRowPtrs ().getRawPtr (); 00113 ind_ = graph.getNodePackedIndices ().getRawPtr (); 00114 valView_.resize (graph.getNodeNumEntries () * offsetPerBlock ()); 00115 val_ = valView_.getRawPtr (); 00116 } 00117 00118 template<class Scalar, class LO, class GO, class Node> 00119 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00120 BlockCrsMatrix (const crs_graph_type& graph, 00121 const map_type& domainPointMap, 00122 const map_type& rangePointMap, 00123 const LO blockSize) : 00124 dist_object_type (graph.getMap ()), 00125 graph_ (graph), 00126 rowMeshMap_ (* (graph.getRowMap ())), 00127 domainPointMap_ (domainPointMap), 00128 rangePointMap_ (rangePointMap), 00129 blockSize_ (blockSize), 00130 ptr_ (NULL), // to be initialized below 00131 ind_ (NULL), // to be initialized below 00132 X_colMap_ (new Teuchos::RCP<BMV> ()), // ptr to a null ptr 00133 Y_rowMap_ (new Teuchos::RCP<BMV> ()), // ptr to a null ptr 00134 columnPadding_ (0), // no padding by default 00135 rowMajor_ (true), // row major blocks by default 00136 localError_ (new bool (false)), 00137 errs_ (new Teuchos::RCP<std::ostringstream> ()), // ptr to a null ptr 00138 computedDiagonalGraph_(false) 00139 { 00140 TEUCHOS_TEST_FOR_EXCEPTION( 00141 ! graph_.isSorted (), std::invalid_argument, "Tpetra::Experimental::" 00142 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 00143 "rows (isSorted() is false). This class assumes sorted rows."); 00144 00145 graphRCP_ = Teuchos::rcpFromRef(graph_); 00146 00147 // Trick to test whether LO is nonpositive, without a compiler 00148 // warning in case LO is unsigned (which is generally a bad idea 00149 // anyway). I don't promise that the trick works, but it 00150 // generally does with gcc at least, in my experience. 00151 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1); 00152 TEUCHOS_TEST_FOR_EXCEPTION( 00153 blockSizeIsNonpositive, std::invalid_argument, "Tpetra::Experimental::" 00154 "BlockCrsMatrix constructor: The input blockSize = " << blockSize << 00155 " <= 0. The block size must be positive."); 00156 00157 ptr_ = graph.getNodeRowPtrs ().getRawPtr (); 00158 ind_ = graph.getNodePackedIndices ().getRawPtr (); 00159 valView_.resize (graph.getNodeNumEntries () * offsetPerBlock ()); 00160 val_ = valView_.getRawPtr (); 00161 } 00162 00163 template<class Scalar, class LO, class GO, class Node> 00164 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type> 00165 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00166 getDomainMap () const 00167 { // Copy constructor of map_type does a shallow copy. 00168 // We're only returning by RCP for backwards compatibility. 00169 return Teuchos::rcp (new map_type (domainPointMap_)); 00170 } 00171 00172 template<class Scalar, class LO, class GO, class Node> 00173 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type> 00174 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00175 getRangeMap () const 00176 { // Copy constructor of map_type does a shallow copy. 00177 // We're only returning by RCP for backwards compatibility. 00178 return Teuchos::rcp (new map_type (rangePointMap_)); 00179 } 00180 00181 template<class Scalar, class LO, class GO, class Node> 00182 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type> 00183 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00184 getRowMap () const 00185 { 00186 return graph_.getRowMap(); 00187 } 00188 00189 template<class Scalar, class LO, class GO, class Node> 00190 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type> 00191 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00192 getColMap () const 00193 { 00194 return graph_.getColMap(); 00195 } 00196 00197 template<class Scalar, class LO, class GO, class Node> 00198 global_size_t 00199 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00200 getGlobalNumRows() const 00201 { 00202 return graph_.getGlobalNumRows(); 00203 } 00204 00205 template<class Scalar, class LO, class GO, class Node> 00206 size_t 00207 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00208 getNodeNumRows() const 00209 { 00210 return graph_.getNodeNumRows(); 00211 } 00212 00213 template<class Scalar, class LO, class GO, class Node> 00214 size_t 00215 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00216 getNodeMaxNumRowEntries() const 00217 { 00218 return graph_.getNodeMaxNumRowEntries(); 00219 } 00220 00221 template<class Scalar, class LO, class GO, class Node> 00222 void 00223 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00224 apply (const mv_type& X, 00225 mv_type& Y, 00226 Teuchos::ETransp mode, 00227 scalar_type alpha, 00228 scalar_type beta) const 00229 { 00230 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type; 00231 TEUCHOS_TEST_FOR_EXCEPTION( 00232 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS, 00233 std::invalid_argument, "Tpetra::Experimental::BlockCrsMatrix::apply: " 00234 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, " 00235 "Teuchos::TRANS, and Teuchos::CONJ_TRANS."); 00236 00237 BMV X_view; 00238 BMV Y_view; 00239 const LO blockSize = getBlockSize (); 00240 try { 00241 X_view = BMV (X, * (graph_.getDomainMap ()), blockSize); 00242 Y_view = BMV (Y, * (graph_.getRangeMap ()), blockSize); 00243 } 00244 catch (std::invalid_argument& e) { 00245 TEUCHOS_TEST_FOR_EXCEPTION( 00246 true, std::invalid_argument, "Tpetra::Experimental::BlockCrsMatrix::" 00247 "apply: Either the input MultiVector X or the output MultiVector Y " 00248 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's " 00249 "graph. BlockMultiVector's constructor threw the following exception: " 00250 << e.what ()); 00251 } 00252 00253 try { 00254 // mfh 16 May 2014: Casting 'this' to nonconst is icky, but it's 00255 // either that or mark fields of this class as 'mutable'. The 00256 // problem is that applyBlock wants to do lazy initialization of 00257 // temporary block multivectors. 00258 const_cast<this_type*> (this)->applyBlock (X_view, Y_view, mode, alpha, beta); 00259 } catch (std::invalid_argument& e) { 00260 TEUCHOS_TEST_FOR_EXCEPTION( 00261 true, std::invalid_argument, "Tpetra::Experimental::BlockCrsMatrix::" 00262 "apply: The implementation method applyBlock complained about having " 00263 "an invalid argument. It reported the following: " << e.what ()); 00264 } catch (std::logic_error& e) { 00265 TEUCHOS_TEST_FOR_EXCEPTION( 00266 true, std::invalid_argument, "Tpetra::Experimental::BlockCrsMatrix::" 00267 "apply: The implementation method applyBlock complained about a " 00268 "possible bug in its implementation. It reported the following: " 00269 << e.what ()); 00270 } catch (std::exception& e) { 00271 TEUCHOS_TEST_FOR_EXCEPTION( 00272 true, std::invalid_argument, "Tpetra::Experimental::BlockCrsMatrix::" 00273 "apply: The implementation method applyBlock threw an exception which " 00274 "is neither std::invalid_argument nor std::logic_error, but is a " 00275 "subclass of std::exception. It reported the following: " 00276 << e.what ()); 00277 } catch (...) { 00278 TEUCHOS_TEST_FOR_EXCEPTION( 00279 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::" 00280 "apply: The implementation method applyBlock threw an exception which " 00281 "is not an instance of a subclass of std::exception. This probably " 00282 "indicates a bug. Please report this to the Tpetra developers."); 00283 } 00284 } 00285 00286 template<class Scalar, class LO, class GO, class Node> 00287 void 00288 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00289 applyBlock (const BlockMultiVector<Scalar, LO, GO, Node>& X, 00290 BlockMultiVector<Scalar, LO, GO, Node>& Y, 00291 Teuchos::ETransp mode, 00292 const Scalar alpha, 00293 const Scalar beta) 00294 { 00295 TEUCHOS_TEST_FOR_EXCEPTION( 00296 X.getBlockSize () != Y.getBlockSize (), std::invalid_argument, 00297 "Tpetra::Experimental::BlockCrsMatrix::applyBlock: " 00298 "X and Y have different block sizes. X.getBlockSize() = " 00299 << X.getBlockSize () << " != Y.getBlockSize() = " 00300 << Y.getBlockSize () << "."); 00301 00302 if (mode == Teuchos::NO_TRANS) { 00303 applyBlockNoTrans (X, Y, alpha, beta); 00304 } else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) { 00305 applyBlockTrans (X, Y, mode, alpha, beta); 00306 } else { 00307 TEUCHOS_TEST_FOR_EXCEPTION( 00308 true, std::invalid_argument, "Tpetra::Experimental::BlockCrsMatrix::" 00309 "applyBlock: Invalid 'mode' argument. Valid values are " 00310 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS."); 00311 } 00312 } 00313 00314 template<class Scalar, class LO, class GO, class Node> 00315 void 00316 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00317 setAllToScalar (const Scalar& alpha) 00318 { 00319 const LO numLocalMeshRows = static_cast<LO> (rowMeshMap_.getNodeNumElements ()); 00320 for (LO lclRow = 0; lclRow < numLocalMeshRows; ++lclRow) { 00321 const size_t meshBeg = ptr_[lclRow]; 00322 const size_t meshEnd = ptr_[lclRow+1]; 00323 for (size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) { 00324 little_block_type A_cur = getNonConstLocalBlockFromAbsOffset (absBlkOff); 00325 A_cur.fill (alpha); 00326 } 00327 } 00328 } 00329 00330 template<class Scalar, class LO, class GO, class Node> 00331 LO 00332 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00333 replaceLocalValues (const LO localRowInd, 00334 const LO colInds[], 00335 const Scalar vals[], 00336 const LO numColInds) const 00337 { 00338 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) { 00339 // We modified no values, because the input local row index is 00340 // invalid on the calling process. That may not be an error, if 00341 // numColInds is zero anyway; it doesn't matter. This is the 00342 // advantage of returning the number of valid indices. 00343 return static_cast<LO> (0); 00344 } 00345 00346 const size_t absRowBlockOffset = ptr_[localRowInd]; 00347 const size_t perBlockSize = static_cast<LO> (offsetPerBlock ()); 00348 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid (); 00349 size_t hint = 0; // Guess for the relative offset into the current row 00350 size_t pointOffset = 0; // Current offset into input values 00351 LO validCount = 0; // number of valid column indices in colInds 00352 00353 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) { 00354 const size_t relBlockOffset = 00355 findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint); 00356 if (relBlockOffset != STINV) { 00357 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset; 00358 little_block_type A_old = 00359 getNonConstLocalBlockFromAbsOffset (absBlockOffset); 00360 const_little_block_type A_new = 00361 getConstLocalBlockFromInput (vals, pointOffset); 00362 A_old.assign (A_new); 00363 hint = relBlockOffset + 1; 00364 ++validCount; 00365 } 00366 } 00367 return validCount; 00368 } 00369 00370 template <class Scalar, class LO, class GO, class Node> 00371 void 00372 BlockCrsMatrix<Scalar,LO,GO,Node>:: 00373 getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const 00374 { 00375 00376 const map_type& rowMap = * (graph_.getRowMap()); 00377 const map_type& colMap = * (graph_.getColMap ()); 00378 00379 const size_t myNumRows = rowMeshMap_.getNodeNumElements(); 00380 if (static_cast<size_t> (offsets.size ()) != myNumRows) { 00381 offsets.resize (static_cast<size_t> (myNumRows)); 00382 } 00383 00384 #ifdef HAVE_TPETRA_DEBUG 00385 bool allRowMapDiagEntriesInColMap = true; 00386 bool allDiagEntriesFound = true; 00387 #endif // HAVE_TPETRA_DEBUG 00388 00389 for (size_t r = 0; r < myNumRows; ++r) { 00390 const GO rgid = rowMap.getGlobalElement (r); 00391 const LO rlid = colMap.getLocalElement (rgid); 00392 00393 #ifdef HAVE_TPETRA_DEBUG 00394 if (rlid == Teuchos::OrdinalTraits<LO>::invalid ()) { 00395 allRowMapDiagEntriesInColMap = false; 00396 } 00397 #endif // HAVE_TPETRA_DEBUG 00398 00399 if (rlid != Teuchos::OrdinalTraits<LO>::invalid ()) { 00400 RowInfo rowinfo = graph_.getRowInfo (r); 00401 if (rowinfo.numEntries > 0) { 00402 offsets[r] = graph_.findLocalIndex (rowinfo, rlid); 00403 } 00404 else { 00405 offsets[r] = Teuchos::OrdinalTraits<size_t>::invalid (); 00406 #ifdef HAVE_TPETRA_DEBUG 00407 allDiagEntriesFound = false; 00408 #endif // HAVE_TPETRA_DEBUG 00409 } 00410 } 00411 } 00412 00413 #ifdef HAVE_TPETRA_DEBUG 00414 using Teuchos::reduceAll; 00415 using std::endl; 00416 const char tfecfFuncName[] = "getLocalDiagOffsets"; 00417 00418 const bool localSuccess = 00419 allRowMapDiagEntriesInColMap && allDiagEntriesFound; 00420 int localResults[3]; 00421 localResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0; 00422 localResults[1] = allDiagEntriesFound ? 1 : 0; 00423 // min-all-reduce will compute least rank of all the processes 00424 // that didn't succeed. 00425 localResults[2] = 00426 ! localSuccess ? getComm ()->getRank () : getComm ()->getSize (); 00427 int globalResults[3]; 00428 globalResults[0] = 0; 00429 globalResults[1] = 0; 00430 globalResults[2] = 0; 00431 reduceAll<int, int> (* (getComm ()), Teuchos::REDUCE_MIN, 00432 3, localResults, globalResults); 00433 if (globalResults[0] == 0 || globalResults[1] == 0) { 00434 std::ostringstream os; // build error message 00435 const bool both = 00436 globalResults[0] == 0 && globalResults[1] == 0; 00437 os << ": At least one process (including Process " << globalResults[2] 00438 << ") had the following issue" << (both ? "s" : "") << ":" << endl; 00439 if (globalResults[0] == 0) { 00440 os << " - The column Map does not contain at least one diagonal entry " 00441 "of the matrix." << endl; 00442 } 00443 if (globalResults[1] == 0) { 00444 os << " - There is a row on that / those process(es) that does not " 00445 "contain a diagonal entry." << endl; 00446 } 00447 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str()); 00448 } 00449 #endif // HAVE_TPETRA_DEBUG 00450 } 00451 00452 template <class Scalar, class LO, class GO, class Node> 00453 void 00454 BlockCrsMatrix<Scalar,LO,GO,Node>:: 00455 computeDiagonalGraph () 00456 { 00457 using Teuchos::rcp; 00458 00459 if (computedDiagonalGraph_) { 00460 // FIXME (mfh 12 Aug 2014) Consider storing the "diagonal graph" 00461 // separately from the matrix. It should really go in the 00462 // preconditioner, not here. We could do this by adding a 00463 // method that accepts a nonconst diagonal graph, and updates 00464 // it. btw it would probably be a better idea to use a 00465 // BlockMultiVector to store the diagonal, not a graph. 00466 return; 00467 } 00468 00469 const size_t maxDiagEntPerRow = 1; 00470 // NOTE (mfh 12 Aug 2014) We could also pass in the column Map 00471 // here. However, we still would have to do LID->GID lookups to 00472 // make sure that we are using the correct diagonal column 00473 // indices, so it probably wouldn't help much. 00474 diagonalGraph_ = 00475 rcp (new crs_graph_type (graph_.getRowMap (), maxDiagEntPerRow, 00476 Tpetra::StaticProfile)); 00477 const map_type& meshRowMap = * (graph_.getRowMap ()); 00478 00479 Teuchos::Array<GO> diagGblColInds (maxDiagEntPerRow); 00480 00481 for (LO lclRowInd = meshRowMap.getMinLocalIndex (); 00482 lclRowInd <= meshRowMap.getMaxLocalIndex (); ++lclRowInd) { 00483 const GO gblRowInd = meshRowMap.getGlobalElement (lclRowInd); 00484 diagGblColInds[0] = gblRowInd; 00485 diagonalGraph_->insertGlobalIndices (gblRowInd, diagGblColInds ()); 00486 } 00487 diagonalGraph_->fillComplete (graph_.getDomainMap (), 00488 graph_.getRangeMap ()); 00489 computedDiagonalGraph_ = true; 00490 } 00491 00492 template <class Scalar, class LO, class GO, class Node> 00493 Teuchos::RCP<CrsGraph<LO, GO, Node> > 00494 BlockCrsMatrix<Scalar,LO,GO,Node>:: 00495 getDiagonalGraph () const 00496 { 00497 TEUCHOS_TEST_FOR_EXCEPTION( 00498 ! computedDiagonalGraph_, std::runtime_error, "Tpetra::Experimental::" 00499 "BlockCrsMatrix::getDiagonalGraph: You must call computeDiagionalGraph() " 00500 "before calling this method."); 00501 return diagonalGraph_; 00502 } 00503 00504 template <class Scalar, class LO, class GO, class Node> 00505 void 00506 BlockCrsMatrix<Scalar,LO,GO,Node>:: 00507 localGaussSeidel (const BlockMultiVector<Scalar, LO, GO, Node>& B, 00508 BlockMultiVector<Scalar, LO, GO, Node>& X, 00509 BlockCrsMatrix<Scalar, LO, GO, Node> & factorizedDiagonal, 00510 const int * factorizationPivots, 00511 const Scalar omega, 00512 const ESweepDirection direction) const 00513 { 00514 const LO numLocalMeshRows = 00515 static_cast<LO> (rowMeshMap_.getNodeNumElements ()); 00516 const LO numVecs = static_cast<LO> (X.getNumVectors ()); 00517 00518 // If using (new) Kokkos, replace localMem with thread-local 00519 // memory. Note that for larger block sizes, this will affect the 00520 // two-level parallelization. Look to Stokhos for best practice 00521 // on making this fast for GPUs. 00522 const LO blockSize = getBlockSize (); 00523 Teuchos::Array<Scalar> localMem (blockSize); 00524 Teuchos::Array<Scalar> localMat (blockSize*blockSize); 00525 little_vec_type X_lcl (localMem.getRawPtr (), blockSize, 1); 00526 00527 const LO * columnIndices; 00528 Scalar * Dmat; 00529 LO numIndices; 00530 00531 // FIXME (mfh 12 Aug 2014) This probably won't work if LO is unsigned. 00532 LO rowBegin = 0, rowEnd = 0, rowStride = 0; 00533 if (direction == Forward) { 00534 rowBegin = 1; 00535 rowEnd = numLocalMeshRows+1; 00536 rowStride = 1; 00537 } 00538 else if (direction == Backward) { 00539 rowBegin = numLocalMeshRows; 00540 rowEnd = 0; 00541 rowStride = -1; 00542 } 00543 else if (direction == Symmetric) { 00544 this->localGaussSeidel (B, X, factorizedDiagonal, factorizationPivots, omega, Forward); 00545 this->localGaussSeidel (B, X, factorizedDiagonal, factorizationPivots, omega, Backward); 00546 return; 00547 } 00548 00549 const Scalar one_minus_omega = Teuchos::ScalarTraits<Scalar>::one()-omega; 00550 const Scalar minus_omega = -omega; 00551 00552 if (numVecs == 1) { 00553 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) { 00554 const LO actlRow = lclRow - 1; 00555 00556 little_vec_type B_cur = B.getLocalBlock (actlRow, 0); 00557 X_lcl.assign (B_cur); 00558 X_lcl.scale (omega); 00559 00560 const size_t meshBeg = ptr_[actlRow]; 00561 const size_t meshEnd = ptr_[actlRow+1]; 00562 for (size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) { 00563 const LO meshCol = ind_[absBlkOff]; 00564 const_little_block_type A_cur = 00565 getConstLocalBlockFromAbsOffset (absBlkOff); 00566 00567 little_vec_type X_cur = X.getLocalBlock (meshCol, 0); 00568 00569 // X_lcl += alpha*A_cur*X_cur 00570 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega; 00571 X_lcl.matvecUpdate (alpha, A_cur, X_cur); 00572 } // for each entry in the current local row of the matrx 00573 00574 factorizedDiagonal.getLocalRowView (actlRow, columnIndices, 00575 Dmat, numIndices); 00576 little_block_type D_lcl = getNonConstLocalBlockFromInput (Dmat, 0); 00577 00578 D_lcl.solve (X_lcl, &factorizationPivots[actlRow*blockSize_]); 00579 little_vec_type X_update = X.getLocalBlock (actlRow, 0); 00580 X_update.assign(X_lcl); 00581 } // for each local row of the matrix 00582 } 00583 else { 00584 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) { 00585 for (LO j = 0; j < numVecs; ++j) { 00586 LO actlRow = lclRow-1; 00587 00588 little_vec_type B_cur = B.getLocalBlock (actlRow, j); 00589 X_lcl.assign (B_cur); 00590 X_lcl.scale (omega); 00591 00592 const size_t meshBeg = ptr_[actlRow]; 00593 const size_t meshEnd = ptr_[actlRow+1]; 00594 for (size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) { 00595 const LO meshCol = ind_[absBlkOff]; 00596 const_little_block_type A_cur = 00597 getConstLocalBlockFromAbsOffset (absBlkOff); 00598 00599 little_vec_type X_cur = X.getLocalBlock (meshCol, j); 00600 00601 // X_lcl += alpha*A_cur*X_cur 00602 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega; 00603 X_lcl.matvecUpdate (alpha, A_cur, X_cur); 00604 } // for each entry in the current local row of the matrx 00605 00606 factorizedDiagonal.getLocalRowView (actlRow, columnIndices, 00607 Dmat, numIndices); 00608 little_block_type D_lcl = getNonConstLocalBlockFromInput(Dmat, 0); 00609 00610 D_lcl.solve (X_lcl, &factorizationPivots[actlRow*blockSize_]); 00611 00612 little_vec_type X_update = X.getLocalBlock (actlRow, j); 00613 X_update.assign(X_lcl); 00614 } // for each entry in the current local row of the matrix 00615 } // for each local row of the matrix 00616 } 00617 } 00618 00619 template <class Scalar, class LO, class GO, class Node> 00620 void 00621 BlockCrsMatrix<Scalar,LO,GO,Node>:: 00622 gaussSeidelCopy (MultiVector<Scalar,LO,GO,Node> &X, 00623 const MultiVector<Scalar,LO,GO,Node> &B, 00624 const MultiVector<Scalar,LO,GO,Node> &D, 00625 const Scalar& dampingFactor, 00626 const ESweepDirection direction, 00627 const int numSweeps, 00628 const bool zeroInitialGuess) const 00629 { 00630 // FIXME (mfh 12 Aug 2014) This method has entirely the wrong 00631 // interface for block Gauss-Seidel. 00632 TEUCHOS_TEST_FOR_EXCEPTION( 00633 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::" 00634 "gaussSeidelCopy: Not implemented."); 00635 } 00636 00637 template <class Scalar, class LO, class GO, class Node> 00638 void 00639 BlockCrsMatrix<Scalar,LO,GO,Node>:: 00640 reorderedGaussSeidelCopy (MultiVector<Scalar,LO,GO,Node>& X, 00641 const MultiVector<Scalar,LO,GO,Node>& B, 00642 const MultiVector<Scalar,LO,GO,Node>& D, 00643 const ArrayView<LO>& rowIndices, 00644 const Scalar& dampingFactor, 00645 const ESweepDirection direction, 00646 const int numSweeps, 00647 const bool zeroInitialGuess) const 00648 { 00649 // FIXME (mfh 12 Aug 2014) This method has entirely the wrong 00650 // interface for block Gauss-Seidel. 00651 TEUCHOS_TEST_FOR_EXCEPTION( 00652 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::" 00653 "reorderedGaussSeidelCopy: Not implemented."); 00654 } 00655 00656 template <class Scalar, class LO, class GO, class Node> 00657 void 00658 BlockCrsMatrix<Scalar,LO,GO,Node>:: 00659 getLocalDiagCopy (BlockCrsMatrix<Scalar,LO,GO,Node>& diag, 00660 const Teuchos::ArrayView<const size_t>& offsets) const 00661 { 00662 using Teuchos::ArrayRCP; 00663 using Teuchos::ArrayView; 00664 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero (); 00665 00666 const size_t myNumRows = rowMeshMap_.getNodeNumElements(); 00667 const LO* columnIndices; 00668 Scalar* vals; 00669 LO numColumns; 00670 Teuchos::Array<LO> cols(1); 00671 00672 // FIXME (mfh 12 Aug 2014) Should use a "little block" for this instead. 00673 Teuchos::Array<Scalar> zeroMat (blockSize_*blockSize_, ZERO); 00674 for (size_t i = 0; i < myNumRows; ++i) { 00675 cols[0] = i; 00676 if (offsets[i] == Teuchos::OrdinalTraits<size_t>::invalid ()) { 00677 diag.replaceLocalValues (i, cols.getRawPtr (), zeroMat.getRawPtr (), 1); 00678 } 00679 else { 00680 getLocalRowView (i, columnIndices, vals, numColumns); 00681 diag.replaceLocalValues (i, cols.getRawPtr(), &vals[offsets[i]*blockSize_*blockSize_], 1); 00682 } 00683 } 00684 } 00685 00686 00687 template<class Scalar, class LO, class GO, class Node> 00688 LO 00689 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00690 absMaxLocalValues (const LO localRowInd, 00691 const LO colInds[], 00692 const Scalar vals[], 00693 const LO numColInds) const 00694 { 00695 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) { 00696 // We modified no values, because the input local row index is 00697 // invalid on the calling process. That may not be an error, if 00698 // numColInds is zero anyway; it doesn't matter. This is the 00699 // advantage of returning the number of valid indices. 00700 return static_cast<LO> (0); 00701 } 00702 00703 const size_t absRowBlockOffset = ptr_[localRowInd]; 00704 const size_t perBlockSize = static_cast<LO> (offsetPerBlock ()); 00705 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid (); 00706 size_t hint = 0; // Guess for the relative offset into the current row 00707 size_t pointOffset = 0; // Current offset into input values 00708 LO validCount = 0; // number of valid column indices in colInds 00709 00710 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) { 00711 const size_t relBlockOffset = 00712 findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint); 00713 if (relBlockOffset != STINV) { 00714 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset; 00715 little_block_type A_old = 00716 getNonConstLocalBlockFromAbsOffset (absBlockOffset); 00717 const_little_block_type A_new = 00718 getConstLocalBlockFromInput (vals, pointOffset); 00719 A_old.absmax (A_new); 00720 hint = relBlockOffset + 1; 00721 ++validCount; 00722 } 00723 } 00724 return validCount; 00725 } 00726 00727 00728 template<class Scalar, class LO, class GO, class Node> 00729 LO 00730 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00731 sumIntoLocalValues (const LO localRowInd, 00732 const LO colInds[], 00733 const Scalar vals[], 00734 const LO numColInds) const 00735 { 00736 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) { 00737 // We modified no values, because the input local row index is 00738 // invalid on the calling process. That may not be an error, if 00739 // numColInds is zero anyway; it doesn't matter. This is the 00740 // advantage of returning the number of valid indices. 00741 return static_cast<LO> (0); 00742 } 00743 00744 const size_t absRowBlockOffset = ptr_[localRowInd]; 00745 const size_t perBlockSize = static_cast<LO> (offsetPerBlock ()); 00746 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid (); 00747 size_t hint = 0; // Guess for the relative offset into the current row 00748 size_t pointOffset = 0; // Current offset into input values 00749 LO validCount = 0; // number of valid column indices in colInds 00750 00751 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) { 00752 const size_t relBlockOffset = 00753 findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint); 00754 if (relBlockOffset != STINV) { 00755 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset; 00756 little_block_type A_old = 00757 getNonConstLocalBlockFromAbsOffset (absBlockOffset); 00758 const_little_block_type A_new = 00759 getConstLocalBlockFromInput (vals, pointOffset); 00760 A_old.update (STS::one (), A_new); 00761 hint = relBlockOffset + 1; 00762 ++validCount; 00763 } 00764 } 00765 return validCount; 00766 } 00767 00768 template<class Scalar, class LO, class GO, class Node> 00769 LO 00770 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00771 getLocalRowView (const LO localRowInd, 00772 const LO*& colInds, 00773 Scalar*& vals, 00774 LO& numInds) const 00775 { 00776 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) { 00777 colInds = NULL; 00778 vals = NULL; 00779 numInds = 0; 00780 return Teuchos::OrdinalTraits<LO>::invalid (); 00781 } 00782 else { 00783 const size_t absBlockOffsetStart = ptr_[localRowInd]; 00784 colInds = ind_ + absBlockOffsetStart; 00785 vals = val_ + absBlockOffsetStart * offsetPerBlock (); 00786 numInds = ptr_[localRowInd + 1] - absBlockOffsetStart; 00787 return 0; // indicates no error 00788 } 00789 } 00790 00791 template<class Scalar, class LO, class GO, class Node> 00792 void 00793 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00794 getLocalRowCopy (LO LocalRow, 00795 const ArrayView<LO> &Indices, 00796 const ArrayView<Scalar> &Values, 00797 size_t &NumEntries) const 00798 { 00799 TEUCHOS_TEST_FOR_EXCEPTION( 00800 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::" 00801 "getLocalRowCopy: Copying is not implemented. You should use a view."); 00802 } 00803 00804 template<class Scalar, class LO, class GO, class Node> 00805 LO 00806 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00807 getLocalRowOffsets (const LO localRowInd, 00808 ptrdiff_t offsets[], 00809 const LO colInds[], 00810 const LO numColInds) const 00811 { 00812 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) { 00813 // We got no offsets, because the input local row index is 00814 // invalid on the calling process. That may not be an error, if 00815 // numColInds is zero anyway; it doesn't matter. This is the 00816 // advantage of returning the number of valid indices. 00817 return static_cast<LO> (0); 00818 } 00819 00820 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid (); 00821 size_t hint = 0; // Guess for the relative offset into the current row 00822 LO validCount = 0; // number of valid column indices in colInds 00823 00824 for (LO k = 0; k < numColInds; ++k) { 00825 const size_t relBlockOffset = 00826 findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint); 00827 if (relBlockOffset != STINV) { 00828 offsets[k] = relBlockOffset; 00829 hint = relBlockOffset + 1; 00830 ++validCount; 00831 } 00832 } 00833 return validCount; 00834 } 00835 00836 00837 template<class Scalar, class LO, class GO, class Node> 00838 LO 00839 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00840 replaceLocalValuesByOffsets (const LO localRowInd, 00841 const ptrdiff_t offsets[], 00842 const Scalar vals[], 00843 const LO numOffsets) const 00844 { 00845 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) { 00846 // We modified no values, because the input local row index is 00847 // invalid on the calling process. That may not be an error, if 00848 // numColInds is zero anyway; it doesn't matter. This is the 00849 // advantage of returning the number of valid indices. 00850 return static_cast<LO> (0); 00851 } 00852 00853 const size_t absRowBlockOffset = ptr_[localRowInd]; 00854 const size_t perBlockSize = static_cast<LO> (offsetPerBlock ()); 00855 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid (); 00856 size_t pointOffset = 0; // Current offset into input values 00857 LO validCount = 0; // number of valid offsets 00858 00859 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) { 00860 const size_t relBlockOffset = offsets[k]; 00861 if (relBlockOffset != STINV) { 00862 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset; 00863 little_block_type A_old = 00864 getNonConstLocalBlockFromAbsOffset (absBlockOffset); 00865 const_little_block_type A_new = 00866 getConstLocalBlockFromInput (vals, pointOffset); 00867 A_old.assign (A_new); 00868 ++validCount; 00869 } 00870 } 00871 return validCount; 00872 } 00873 00874 00875 template<class Scalar, class LO, class GO, class Node> 00876 LO 00877 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00878 absMaxLocalValuesByOffsets (const LO localRowInd, 00879 const ptrdiff_t offsets[], 00880 const Scalar vals[], 00881 const LO numOffsets) const 00882 { 00883 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) { 00884 // We modified no values, because the input local row index is 00885 // invalid on the calling process. That may not be an error, if 00886 // numColInds is zero anyway; it doesn't matter. This is the 00887 // advantage of returning the number of valid indices. 00888 return static_cast<LO> (0); 00889 } 00890 00891 const size_t absRowBlockOffset = ptr_[localRowInd]; 00892 const size_t perBlockSize = static_cast<LO> (offsetPerBlock ()); 00893 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid (); 00894 size_t pointOffset = 0; // Current offset into input values 00895 LO validCount = 0; // number of valid offsets 00896 00897 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) { 00898 const size_t relBlockOffset = offsets[k]; 00899 if (relBlockOffset != STINV) { 00900 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset; 00901 little_block_type A_old = 00902 getNonConstLocalBlockFromAbsOffset (absBlockOffset); 00903 const_little_block_type A_new = 00904 getConstLocalBlockFromInput (vals, pointOffset); 00905 A_old.absmax (A_new); 00906 ++validCount; 00907 } 00908 } 00909 return validCount; 00910 } 00911 00912 00913 template<class Scalar, class LO, class GO, class Node> 00914 LO 00915 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00916 sumIntoLocalValuesByOffsets (const LO localRowInd, 00917 const ptrdiff_t offsets[], 00918 const Scalar vals[], 00919 const LO numOffsets) const 00920 { 00921 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) { 00922 // We modified no values, because the input local row index is 00923 // invalid on the calling process. That may not be an error, if 00924 // numColInds is zero anyway; it doesn't matter. This is the 00925 // advantage of returning the number of valid indices. 00926 return static_cast<LO> (0); 00927 } 00928 00929 const size_t absRowBlockOffset = ptr_[localRowInd]; 00930 const size_t perBlockSize = static_cast<LO> (offsetPerBlock ()); 00931 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid (); 00932 size_t pointOffset = 0; // Current offset into input values 00933 LO validCount = 0; // number of valid offsets 00934 00935 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) { 00936 const size_t relBlockOffset = offsets[k]; 00937 if (relBlockOffset != STINV) { 00938 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset; 00939 little_block_type A_old = 00940 getNonConstLocalBlockFromAbsOffset (absBlockOffset); 00941 const_little_block_type A_new = 00942 getConstLocalBlockFromInput (vals, pointOffset); 00943 A_old.update (STS::one (), A_new); 00944 ++validCount; 00945 } 00946 } 00947 return validCount; 00948 } 00949 00950 00951 template<class Scalar, class LO, class GO, class Node> 00952 size_t 00953 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00954 getNumEntriesInLocalRow (const LO localRowInd) const 00955 { 00956 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd); 00957 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) { 00958 return static_cast<LO> (0); // the calling process doesn't have that row 00959 } else { 00960 return static_cast<LO> (numEntInGraph); 00961 } 00962 } 00963 00964 template<class Scalar, class LO, class GO, class Node> 00965 void 00966 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00967 applyBlockTrans (const BlockMultiVector<Scalar, LO, GO, Node>& X, 00968 BlockMultiVector<Scalar, LO, GO, Node>& Y, 00969 const Teuchos::ETransp mode, 00970 const Scalar alpha, 00971 const Scalar beta) 00972 { 00973 (void) X; 00974 (void) Y; 00975 (void) mode; 00976 (void) alpha; 00977 (void) beta; 00978 00979 TEUCHOS_TEST_FOR_EXCEPTION( 00980 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::apply: " 00981 "transpose and conjugate transpose modes are not yet implemented."); 00982 } 00983 00984 template<class Scalar, class LO, class GO, class Node> 00985 void 00986 BlockCrsMatrix<Scalar, LO, GO, Node>:: 00987 applyBlockNoTrans (const BlockMultiVector<Scalar, LO, GO, Node>& X, 00988 BlockMultiVector<Scalar, LO, GO, Node>& Y, 00989 const Scalar alpha, 00990 const Scalar beta) 00991 { 00992 using Teuchos::RCP; 00993 using Teuchos::rcp; 00994 typedef Tpetra::Import<LO, GO, Node> import_type; 00995 typedef Tpetra::Export<LO, GO, Node> export_type; 00996 const Scalar zero = STS::zero (); 00997 const Scalar one = STS::one (); 00998 RCP<const import_type> import = graph_.getImporter (); 00999 // "export" is a reserved C++ keyword, so we can't use it. 01000 RCP<const export_type> theExport = graph_.getExporter (); 01001 01002 // FIXME (mfh 20 May 2014) X.mv_ and Y.mv_ requires a friend 01003 // declaration, which is useful only for debugging. 01004 TEUCHOS_TEST_FOR_EXCEPTION( 01005 X.mv_.getCopyOrView () != Teuchos::View, std::invalid_argument, 01006 "Tpetra::Experimental::BlockCrsMatrix::applyBlockNoTrans: " 01007 "The input BlockMultiVector X has deep copy semantics, " 01008 "not view semantics (as it should)."); 01009 TEUCHOS_TEST_FOR_EXCEPTION( 01010 Y.mv_.getCopyOrView () != Teuchos::View, std::invalid_argument, 01011 "Tpetra::Experimental::BlockCrsMatrix::applyBlockNoTrans: " 01012 "The output BlockMultiVector Y has deep copy semantics, " 01013 "not view semantics (as it should)."); 01014 01015 if (alpha == zero) { 01016 if (beta == zero) { 01017 Y.putScalar (zero); // replace Inf or NaN (BLAS rules) 01018 } else if (beta != one) { 01019 Y.scale (beta); 01020 } 01021 } else { // alpha != 0 01022 const BMV* X_colMap = NULL; 01023 if (import.is_null ()) { 01024 try { 01025 X_colMap = &X; 01026 } catch (std::exception& e) { 01027 TEUCHOS_TEST_FOR_EXCEPTION( 01028 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::" 01029 "applyBlockNoTrans:" << std::endl << "Tpetra::MultiVector::" 01030 "operator= threw an exception: " << std::endl << e.what ()); 01031 } 01032 } else { 01033 // X_colMap_ is a pointer to a pointer to BMV. Ditto for 01034 // Y_rowMap_ below. This lets us do lazy initialization 01035 // correctly with view semantics of BlockCrsMatrix. All views 01036 // of this BlockCrsMatrix have the same outer pointer. That 01037 // way, we can set the inner pointer in one view, and all 01038 // other views will see it. 01039 if ((*X_colMap_).is_null () || 01040 (**X_colMap_).getNumVectors () != X.getNumVectors () || 01041 (**X_colMap_).getBlockSize () != X.getBlockSize ()) { 01042 *X_colMap_ = rcp (new BMV (* (graph_.getColMap ()), getBlockSize (), 01043 static_cast<LO> (X.getNumVectors ()))); 01044 } 01045 (**X_colMap_).doImport (X, *import, Tpetra::REPLACE); 01046 try { 01047 X_colMap = &(**X_colMap_); 01048 } catch (std::exception& e) { 01049 TEUCHOS_TEST_FOR_EXCEPTION( 01050 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::" 01051 "applyBlockNoTrans:" << std::endl << "Tpetra::MultiVector::" 01052 "operator= threw an exception: " << std::endl << e.what ()); 01053 } 01054 } 01055 01056 BMV* Y_rowMap = NULL; 01057 if (theExport.is_null ()) { 01058 Y_rowMap = &Y; 01059 } else if ((*Y_rowMap_).is_null () || 01060 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () || 01061 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) { 01062 *Y_rowMap_ = rcp (new BMV (* (graph_.getRowMap ()), getBlockSize (), 01063 static_cast<LO> (X.getNumVectors ()))); 01064 try { 01065 Y_rowMap = &(**Y_rowMap_); 01066 } catch (std::exception& e) { 01067 TEUCHOS_TEST_FOR_EXCEPTION( 01068 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::" 01069 "applyBlockNoTrans:" << std::endl << "Tpetra::MultiVector::" 01070 "operator= threw an exception: " << std::endl << e.what ()); 01071 } 01072 } 01073 01074 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta); 01075 01076 if (! theExport.is_null ()) { 01077 Y.doExport (*Y_rowMap, *theExport, Tpetra::REPLACE); 01078 } 01079 } 01080 } 01081 01082 template<class Scalar, class LO, class GO, class Node> 01083 void 01084 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01085 localApplyBlockNoTrans (const BlockMultiVector<Scalar, LO, GO, Node>& X, 01086 BlockMultiVector<Scalar, LO, GO, Node>& Y, 01087 const Scalar alpha, 01088 const Scalar beta) 01089 { 01090 // If using (new) Kokkos, prefer Kokkos::ArithTraits to 01091 // Teuchos::ScalarTraits. 01092 const Scalar zero = STS::zero (); 01093 const Scalar one = STS::one (); 01094 const LO numLocalMeshRows = 01095 static_cast<LO> (rowMeshMap_.getNodeNumElements ()); 01096 const LO numVecs = static_cast<LO> (X.getNumVectors ()); 01097 01098 // If using (new) Kokkos, replace localMem with thread-local 01099 // memory. Note that for larger block sizes, this will affect the 01100 // two-level parallelization. Look to Stokhos for best practice 01101 // on making this fast for GPUs. 01102 const LO blockSize = getBlockSize (); 01103 Teuchos::Array<Scalar> localMem (blockSize); 01104 little_vec_type Y_lcl (localMem.getRawPtr (), blockSize, 1); 01105 01106 if (numVecs == 1) { 01107 for (LO lclRow = 0; lclRow < numLocalMeshRows; ++lclRow) { 01108 little_vec_type Y_cur = Y.getLocalBlock (lclRow, 0); 01109 01110 if (beta == zero) { 01111 Y_lcl.fill (zero); 01112 } else if (beta == one) { 01113 Y_lcl.assign (Y_cur); 01114 } else { 01115 Y_lcl.assign (Y_cur); 01116 Y_lcl.scale (beta); 01117 } 01118 01119 const size_t meshBeg = ptr_[lclRow]; 01120 const size_t meshEnd = ptr_[lclRow+1]; 01121 for (size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) { 01122 const LO meshCol = ind_[absBlkOff]; 01123 const_little_block_type A_cur = 01124 getConstLocalBlockFromAbsOffset (absBlkOff); 01125 little_vec_type X_cur = X.getLocalBlock (meshCol, 0); 01126 // Y_lcl += alpha*A_cur*X_cur 01127 Y_lcl.matvecUpdate (alpha, A_cur, X_cur); 01128 } // for each entry in the current local row of the matrx 01129 01130 Y_cur.assign (Y_lcl); 01131 } // for each local row of the matrix 01132 } 01133 else { 01134 for (LO lclRow = 0; lclRow < numLocalMeshRows; ++lclRow) { 01135 for (LO j = 0; j < numVecs; ++j) { 01136 little_vec_type Y_cur = Y.getLocalBlock (lclRow, j); 01137 01138 if (beta == zero) { 01139 Y_lcl.fill (zero); 01140 } else if (beta == one) { 01141 Y_lcl.assign (Y_cur); 01142 } else { 01143 Y_lcl.assign (Y_cur); 01144 Y_lcl.scale (beta); 01145 } 01146 01147 const size_t meshBeg = ptr_[lclRow]; 01148 const size_t meshEnd = ptr_[lclRow+1]; 01149 for (size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) { 01150 const LO meshCol = ind_[absBlkOff]; 01151 const_little_block_type A_cur = 01152 getConstLocalBlockFromAbsOffset (absBlkOff); 01153 little_vec_type X_cur = X.getLocalBlock (meshCol, j); 01154 // Y_lcl += alpha*A_cur*X_cur 01155 Y_lcl.matvecUpdate (alpha, A_cur, X_cur); 01156 } // for each entry in the current local row of the matrix 01157 01158 Y_cur.assign (Y_lcl); 01159 } // for each entry in the current row of Y 01160 } // for each local row of the matrix 01161 } 01162 } 01163 01164 template<class Scalar, class LO, class GO, class Node> 01165 size_t 01166 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01167 findRelOffsetOfColumnIndex (const LO localRowIndex, 01168 const LO colIndexToFind, 01169 const size_t hint) const 01170 { 01171 const size_t absStartOffset = ptr_[localRowIndex]; 01172 const size_t absEndOffset = ptr_[localRowIndex+1]; 01173 const size_t numEntriesInRow = absEndOffset - absStartOffset; 01174 01175 // If the hint was correct, then the hint is the offset to return. 01176 if (hint < numEntriesInRow && ind_[absStartOffset+hint] == colIndexToFind) { 01177 // Always return the offset relative to the current row. 01178 return hint; 01179 } 01180 01181 // The hint was wrong, so we must search for the given column 01182 // index in the column indices for the given row. 01183 size_t relOffset = Teuchos::OrdinalTraits<size_t>::invalid (); 01184 01185 // We require that the graph have sorted rows. However, binary 01186 // search only pays if the current row is longer than a certain 01187 // amount. We set this to 32, but you might want to tune this. 01188 const size_t maxNumEntriesForLinearSearch = 32; 01189 if (numEntriesInRow > maxNumEntriesForLinearSearch) { 01190 // Use binary search. It would probably be better for us to 01191 // roll this loop by hand. If we wrote it right, a smart 01192 // compiler could perhaps use conditional loads and avoid 01193 // branches (according to Jed Brown on May 2014). 01194 const LO* beg = ind_ + absStartOffset; 01195 const LO* end = ind_ + absEndOffset; 01196 std::pair<const LO*, const LO*> p = 01197 std::equal_range (beg, end, colIndexToFind); 01198 if (p.first != p.second) { 01199 // offset is relative to the current row 01200 relOffset = static_cast<size_t> (p.first - beg); 01201 } 01202 } 01203 else { // use linear search 01204 for (size_t k = 0; k < numEntriesInRow; ++k) { 01205 if (colIndexToFind == ind_[absStartOffset + k]) { 01206 relOffset = k; // offset is relative to the current row 01207 break; 01208 } 01209 } 01210 } 01211 01212 return relOffset; 01213 } 01214 01215 template<class Scalar, class LO, class GO, class Node> 01216 LO 01217 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01218 offsetPerBlock () const 01219 { 01220 const LO numRows = blockSize_; 01221 01222 LO numCols = blockSize_; 01223 if (columnPadding_ > 0) { // Column padding == 0 means no padding. 01224 const LO numColsRoundedDown = (blockSize_ / columnPadding_) * columnPadding_; 01225 numCols = (numColsRoundedDown < numCols) ? 01226 (numColsRoundedDown + columnPadding_) : 01227 numColsRoundedDown; 01228 } 01229 return numRows * numCols; 01230 } 01231 01232 template<class Scalar, class LO, class GO, class Node> 01233 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type 01234 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01235 getConstLocalBlockFromInput (const Scalar* val, const size_t pointOffset) const 01236 { 01237 if (rowMajor_) { 01238 const size_t rowStride = (columnPadding_ == 0) ? 01239 static_cast<size_t> (blockSize_) : static_cast<size_t> (columnPadding_); 01240 return const_little_block_type (val + pointOffset, blockSize_, rowStride, 1); 01241 } else { 01242 return const_little_block_type (val + pointOffset, blockSize_, 1, blockSize_); 01243 } 01244 } 01245 01246 template<class Scalar, class LO, class GO, class Node> 01247 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type 01248 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01249 getNonConstLocalBlockFromInput (Scalar* val, const size_t pointOffset) const 01250 { 01251 if (rowMajor_) { 01252 const size_t rowStride = (columnPadding_ == 0) ? 01253 static_cast<size_t> (blockSize_) : static_cast<size_t> (columnPadding_); 01254 return little_block_type (val + pointOffset, blockSize_, rowStride, 1); 01255 } else { 01256 return little_block_type (val + pointOffset, blockSize_, 1, blockSize_); 01257 } 01258 } 01259 01260 template<class Scalar, class LO, class GO, class Node> 01261 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type 01262 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01263 getConstLocalBlockFromAbsOffset (const size_t absBlockOffset) const 01264 { 01265 if (absBlockOffset >= ptr_[rowMeshMap_.getNodeNumElements ()]) { 01266 // An empty block signifies an error. We don't expect to see 01267 // this error in correct code, but it's helpful for avoiding 01268 // memory corruption in case there is a bug. 01269 return const_little_block_type (NULL, 0, 0, 0); 01270 } else { 01271 const size_t absPointOffset = absBlockOffset * offsetPerBlock (); 01272 return getConstLocalBlockFromInput (val_, absPointOffset); 01273 } 01274 } 01275 01276 template<class Scalar, class LO, class GO, class Node> 01277 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type 01278 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01279 getNonConstLocalBlockFromAbsOffset (const size_t absBlockOffset) const 01280 { 01281 if (absBlockOffset >= ptr_[rowMeshMap_.getNodeNumElements ()]) { 01282 // An empty block signifies an error. We don't expect to see 01283 // this error in correct code, but it's helpful for avoiding 01284 // memory corruption in case there is a bug. 01285 return little_block_type (NULL, 0, 0, 0); 01286 } else { 01287 const size_t absPointOffset = absBlockOffset * offsetPerBlock (); 01288 return getNonConstLocalBlockFromInput (const_cast<Scalar*> (val_), 01289 absPointOffset); 01290 } 01291 } 01292 01293 template<class Scalar, class LO, class GO, class Node> 01294 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type 01295 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01296 getLocalBlock (const LO localRowInd, const LO localColInd) const 01297 { 01298 const size_t absRowBlockOffset = ptr_[localRowInd]; 01299 01300 size_t hint = 0; 01301 const size_t relBlockOffset = 01302 findRelOffsetOfColumnIndex (localRowInd, localColInd, hint); 01303 01304 if (relBlockOffset != Teuchos::OrdinalTraits<size_t>::invalid ()) { 01305 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset; 01306 return getNonConstLocalBlockFromAbsOffset (absBlockOffset); 01307 } 01308 else 01309 { 01310 return little_block_type (NULL, 0, 0, 0); 01311 } 01312 01313 } 01314 01315 01316 template<class Scalar, class LO, class GO, class Node> 01317 bool 01318 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01319 checkSizes (const Tpetra::SrcDistObject& source) 01320 { 01321 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type; 01322 const this_type* src = dynamic_cast<const this_type* > (&source); 01323 01324 // Clear out the current local error state. 01325 * (const_cast<this_type*> (this)->localError_) = false; 01326 *errs_ = Teuchos::null; 01327 01328 // We don't allow block sizes to be inconsistent across processes, 01329 // but it's possible due to user error. It costs an all-reduce to 01330 // check in the constructor; we want to save that. 01331 if (src == NULL || 01332 src->getBlockSize () != this->getBlockSize () || 01333 ! src->graph_.isFillComplete () || 01334 ! this->graph_.isFillComplete () || 01335 src->graph_.getColMap ().is_null () || 01336 this->graph_.getColMap ().is_null ()) { 01337 * (const_cast<this_type*> (this)->localError_) = true; 01338 if ((*errs_).is_null ()) { 01339 *errs_ = Teuchos::rcp (new std::ostringstream ()); 01340 } 01341 } 01342 01343 if (src == NULL) { 01344 **errs_ << "checkSizes: The source object of the Import or Export " 01345 "must be a BlockCrsMatrix with the same template parameters as the " 01346 "target object." << std::endl; 01347 } 01348 else { 01349 // Use a string of ifs, not if-elseifs, because we want to know 01350 // all the errors. 01351 if (src->getBlockSize () != this->getBlockSize ()) { 01352 **errs_ << "checkSizes: The source and target objects of the Import or " 01353 << "Export must have the same block sizes. The source's block " 01354 << "size = " << src->getBlockSize () << " != the target's block " 01355 << "size = " << this->getBlockSize () << "." << std::endl; 01356 } 01357 if (! src->graph_.isFillComplete ()) { 01358 **errs_ << "checkSizes: The source object of the Import or Export is " 01359 "not fill complete. Both source and target objects must be fill " 01360 "complete." << std::endl; 01361 } 01362 if (! this->graph_.isFillComplete ()) { 01363 **errs_ << "checkSizes: The target object of the Import or Export is " 01364 "not fill complete. Both source and target objects must be fill " 01365 "complete." << std::endl; 01366 } 01367 if (src->graph_.getColMap ().is_null ()) { 01368 **errs_ << "checkSizes: The source object of the Import or Export does " 01369 "not have a column Map. Both source and target objects must have " 01370 "column Maps." << std::endl; 01371 } 01372 if (this->graph_.getColMap ().is_null ()) { 01373 **errs_ << "checkSizes: The target object of the Import or Export does " 01374 "not have a column Map. Both source and target objects must have " 01375 "column Maps." << std::endl; 01376 } 01377 } 01378 01379 return ! (* (this->localError_)); 01380 } 01381 01382 template<class Scalar, class LO, class GO, class Node> 01383 void 01384 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01385 copyAndPermute (const Tpetra::SrcDistObject& source, 01386 size_t numSameIDs, 01387 const Teuchos::ArrayView<const LO>& permuteToLIDs, 01388 const Teuchos::ArrayView<const LO>& permuteFromLIDs) 01389 { 01390 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type; 01391 const this_type* src = dynamic_cast<const this_type* > (&source); 01392 if (src == NULL) { 01393 * (this->localError_) = true; 01394 if ((*errs_).is_null ()) { 01395 *errs_ = Teuchos::rcp (new std::ostringstream ()); 01396 } 01397 **errs_ << "copyAndPermute: The source object of the Import or Export is " 01398 "either not a BlockCrsMatrix, or does not have the right template " 01399 "parameters. checkSizes() should have caught this. " 01400 "Please report this bug to the Tpetra developers." << std::endl; 01401 // There's no communication in this method, so it's safe just to 01402 // return on error. 01403 return; 01404 } 01405 01406 bool srcInvalidRow = false; 01407 bool dstInvalidCol = false; 01408 01409 // Copy the initial sequence of rows that are the same. 01410 // 01411 // The two graphs might have different column Maps, so we need to 01412 // do this using global column indices. This is purely local, so 01413 // we only need to check for local sameness of the two column 01414 // Maps. 01415 01416 const map_type& srcMap = * (src->graph_.getColMap ()); 01417 const map_type& tgtMap = * (this->graph_.getColMap ()); 01418 const bool canUseLocalColumnIndices = srcMap.locallySameAs (tgtMap); 01419 const size_t numPermute = 01420 std::min (permuteToLIDs.size (), permuteFromLIDs.size ()); 01421 01422 if (canUseLocalColumnIndices) { 01423 // Copy local rows that are the "same" in both source and target. 01424 for (size_t localRow = 0; localRow < numSameIDs; ++localRow) { 01425 const LO* lclSrcCols; 01426 Scalar* vals; 01427 LO numEntries; 01428 // If this call fails, that means the mesh row local index is 01429 // invalid. That means the Import or Export is invalid somehow. 01430 LO err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries); 01431 if (err != 0) { 01432 srcInvalidRow = true; 01433 } 01434 else { 01435 err = this->replaceLocalValues (localRow, lclSrcCols, vals, numEntries); 01436 if (err != numEntries) { 01437 dstInvalidCol = true; 01438 } 01439 } 01440 } 01441 01442 // Copy the "permute" local rows. 01443 for (size_t k = 0; k < numPermute; ++k) { 01444 const LO* lclSrcCols; 01445 Scalar* vals; 01446 LO numEntries; 01447 LO err = src->getLocalRowView (permuteFromLIDs[k], lclSrcCols, vals, numEntries); 01448 if (err != 0) { 01449 srcInvalidRow = true; 01450 } 01451 else { 01452 err = this->replaceLocalValues (permuteFromLIDs[k], lclSrcCols, vals, numEntries); 01453 if (err != numEntries) { 01454 dstInvalidCol = true; 01455 } 01456 } 01457 } 01458 } 01459 else { 01460 // Reserve space to store the destination matrix's local column indices. 01461 Teuchos::Array<LO> lclDstCols (src->graph_.getNodeMaxNumRowEntries ()); 01462 01463 // Copy local rows that are the "same" in both source and target. 01464 for (size_t localRow = 0; localRow < numSameIDs; ++localRow) { 01465 const LO* lclSrcCols; 01466 Scalar* vals; 01467 LO numEntries; 01468 // If this call fails, that means the mesh row local index is 01469 // invalid. That means the Import or Export is invalid somehow. 01470 LO err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries); 01471 if (err != 0) { 01472 srcInvalidRow = true; 01473 } 01474 else { 01475 // Convert the source matrix's local column indices to the 01476 // destination matrix's local column indices. 01477 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries); 01478 for (LO k = 0; k < numEntries; ++k) { 01479 lclDstColsView[k] = tgtMap.getLocalElement (srcMap.getGlobalElement (lclSrcCols[k])); 01480 } 01481 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (), vals, numEntries); 01482 if (err != numEntries) { 01483 dstInvalidCol = true; 01484 } 01485 } 01486 } 01487 01488 // Copy the "permute" local rows. 01489 for (size_t k = 0; k < numPermute; ++k) { 01490 const LO* lclSrcCols; 01491 Scalar* vals; 01492 LO numEntries; 01493 LO err = src->getLocalRowView (permuteFromLIDs[k], lclSrcCols, vals, numEntries); 01494 if (err != 0) { 01495 srcInvalidRow = true; 01496 } 01497 else { 01498 // Convert the source matrix's local column indices to the 01499 // destination matrix's local column indices. 01500 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries); 01501 for (LO j = 0; j < numEntries; ++j) { 01502 lclDstColsView[j] = tgtMap.getLocalElement (srcMap.getGlobalElement (lclSrcCols[j])); 01503 } 01504 err = this->replaceLocalValues (permuteFromLIDs[k], lclDstColsView.getRawPtr (), vals, numEntries); 01505 if (err != numEntries) { 01506 dstInvalidCol = true; 01507 } 01508 } 01509 } 01510 } 01511 01512 // FIXME (mfh 19 May 2014) Would be great to have a way to report 01513 // errors, without throwing an exception. The latter would be bad 01514 // in the case of differing block sizes, just in case those block 01515 // sizes are inconsistent across processes. (The latter is not 01516 // allowed, but possible due to user error.) 01517 01518 if (srcInvalidRow || dstInvalidCol) { 01519 * (this->localError_) = true; 01520 if ((*errs_).is_null ()) { 01521 *errs_ = Teuchos::rcp (new std::ostringstream ()); 01522 } 01523 **errs_ << "copyAndPermute: The graph structure of the source of the " 01524 "Import or Export must be a subset of the graph structure of the " 01525 "target." << std::endl; 01526 } 01527 } 01528 01529 template<class Scalar, class LO, class GO, class Node> 01530 void 01531 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01532 packAndPrepare (const Tpetra::SrcDistObject& source, 01533 const Teuchos::ArrayView<const LO>& exportLIDs, 01534 Teuchos::Array<packet_type>& exports, 01535 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 01536 size_t& constantNumPackets, 01537 Tpetra::Distributor& /* distor */) 01538 { 01539 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type; 01540 const bool debug = true; 01541 const this_type* src = dynamic_cast<const this_type* > (&source); 01542 01543 // Should have checked for this case in checkSizes(). 01544 TEUCHOS_TEST_FOR_EXCEPTION( 01545 src == NULL, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::" 01546 "packAndPrepare: The source object of the Import or Export is either " 01547 "not a BlockCrsMatrix, or does not have the right template parameters. " 01548 "checkSizes() should have caught this. " 01549 "Please report this bug to the Tpetra developers."); 01550 01551 if (debug) { 01552 const int myRank = graph_.getComm ()->getRank (); 01553 std::ostringstream os; 01554 os << "Proc " << myRank << ": packAndPrepare" << std::endl; 01555 std::cerr << os.str (); 01556 } 01557 01558 const crs_graph_type& srcGraph = src->graph_; 01559 const size_t blockSize = static_cast<size_t> (src->getBlockSize ()); 01560 const size_t bytesPerBlock = blockSize * blockSize * sizeof (Scalar); 01561 01562 // Graphs and matrices are allowed to have a variable number of 01563 // entries per row. We could test whether all rows have the same 01564 // number of entries, but DistObject can only use this 01565 // optimization if all rows on _all_ processes have the same 01566 // number of entries. Rather than do the all-reduce necessary to 01567 // test for this unlikely case, we tell DistObject (by setting 01568 // constantNumPackets to zero) to assume that different rows may 01569 // have different numbers of entries. 01570 constantNumPackets = 0; 01571 01572 // Count the number of packets per row. The number of packets is 01573 // the number of entries to send. However, we don't pack entries 01574 // contiguously. Instead, we put the column indices first, and 01575 // then all the block values. This makes it easier to align the 01576 // latter to sizeof (Scalar), and reduces the number of memcpy 01577 // operations necessary to copy in the row's data. (We have to 01578 // use memcpy because of ANSI C(++) aliasing rules, which 01579 // compilers now enforce.) We pack global column indices, because 01580 // the source and target matrices are allowed to have different 01581 // column Maps. 01582 01583 // Byte alignment of the start of the block values in each row's 01584 // data. We use the max in case sizeof(Scalar) < sizeof(GO), 01585 // e.g., float vs. long long. 01586 const size_t alignment = std::max (sizeof (Scalar), sizeof (GO)); 01587 01588 // While counting the number of packets per row, compute the total 01589 // required send buffer ('exports') size, so we can resize it if 01590 // needed. Also count the total number of entries to send, as a 01591 // sanity check for the total buffer size. 01592 size_t totalBufSize = 0; 01593 size_t totalNumEnt = 0; 01594 01595 for (size_t k = 0; k < static_cast<size_t> (exportLIDs.size ()); ++k) { 01596 const LO localRow = exportLIDs[k]; 01597 const size_t numEnt = srcGraph.getNumEntriesInLocalRow (localRow); 01598 totalNumEnt += numEnt; 01599 01600 // If any given LIDs are invalid, the above might return either 01601 // zero or the invalid size_t value. If the former, we have no 01602 // way to tell, but that's OK; it just means the calling process 01603 // won't pack anything (it has nothing to pack anyway). If the 01604 // latter, we replace it with zero (that row is not owned by the 01605 // calling process, so it has no entries to pack), and set the 01606 // local error flag. 01607 if (numEnt == Teuchos::OrdinalTraits<size_t>::invalid ()) { 01608 numPacketsPerLID[localRow] = static_cast<size_t> (0); 01609 * (this->localError_) = true; 01610 } else { 01611 numPacketsPerLID[localRow] = numEnt; 01612 } 01613 01614 // Space for the global column indices in this row. 01615 totalBufSize += numEnt * sizeof (GO); 01616 01617 // Padding, so that the block values are Scalar aligned. 01618 { 01619 const size_t rem = (numEnt * sizeof (GO)) % alignment; 01620 const size_t padding = (rem == 0) ? 01621 static_cast<size_t> (0) : 01622 alignment - rem; 01623 totalBufSize += padding; 01624 } 01625 01626 // Space for the block values in this row. We don't include 01627 // padding for vectorization when packing the blocks. 01628 totalBufSize += numEnt * bytesPerBlock; 01629 01630 // In case sizeof(Scalar) < sizeof(GO) (e.g., float vs. long 01631 // long), we pad the end of the send buffer so that the next row 01632 // is aligned to sizeof(GO) bytes. 01633 { 01634 const size_t rem = (numEnt * bytesPerBlock) % sizeof (GO); 01635 const size_t padding = (rem == 0) ? 01636 static_cast<size_t> (0) : 01637 sizeof (GO) - rem; 01638 totalBufSize += padding; 01639 } 01640 } 01641 01642 if (debug) { 01643 const int myRank = graph_.getComm ()->getRank (); 01644 std::ostringstream os; 01645 os << "Proc " << myRank << ": packAndPrepare: totalBufSize = " 01646 << totalBufSize << std::endl; 01647 std::cerr << os.str (); 01648 } 01649 01650 { 01651 const size_t minSaneBufSize = totalNumEnt * (bytesPerBlock + sizeof (GO)); 01652 // If the test triggers, there's no point in setting the error 01653 // flag here, because the above code is completely broken. This 01654 // probably means that the code below is broken as well. 01655 TEUCHOS_TEST_FOR_EXCEPTION( 01656 minSaneBufSize < totalBufSize, std::logic_error, "Tpetra::Experimental" 01657 "::BlockCrsMatrix: This method must have computed the total send buffer" 01658 " size incorrectly. The minimum size in bytes without padding for " 01659 "alignment is " << minSaneBufSize << ", but this method computed the " 01660 "size in bytes _with_ padding for alignment as a lesser number " << 01661 totalBufSize << ". Please report this bug to the Tpetra developers."); 01662 } 01663 01664 // packAndPrepare is responsible for resizing the send buffer. 01665 exports.resize (totalBufSize); 01666 01667 // In the loop below: Current offset in bytes into the 'exports' 01668 // array; where to start putting the current row's data. 01669 size_t exportsOffset = 0; 01670 01671 // Temporary space for global column indices. 01672 Teuchos::Array<GO> gblColIndsSpace (srcGraph.getNodeMaxNumRowEntries ()); 01673 01674 // Temporary row-major contiguous buffer for a block's entries. 01675 Teuchos::Array<Scalar> tempBlockSpace (blockSize * blockSize); 01676 little_block_type tempBlock (tempBlockSpace.getRawPtr (), blockSize, blockSize, 1); 01677 01678 // Source matrix's column Map. We verified in checkSizes() that 01679 // the column Map exists (is not null). 01680 const map_type& srcColMap = * (srcGraph.getColMap ()); 01681 01682 // Pack the data for each row to send, into the 'exports' buffer. 01683 // If any given LIDs are invalid, we pack obviously invalid data 01684 // (e.g., invalid column indices) into the buffer for that LID, 01685 // and set the local error flag. 01686 for (size_t lidInd = 0; lidInd < static_cast<size_t> (exportLIDs.size ()); ++lidInd) { 01687 // Get a view of row exportLIDs[lidInd]. 01688 const LO lclRowInd = exportLIDs[lidInd]; 01689 const LO* lclColInds; 01690 Scalar* vals; 01691 LO numEnt; 01692 const int err = src->getLocalRowView (lclRowInd, lclColInds, vals, numEnt); 01693 if (err != 0) { 01694 * (localError_) = true; 01695 // TODO (mfh 20 May 2014) Report the local error, without 01696 // printing a line for each bad LID. It might help to collect 01697 // all the bad LIDs, but don't print them all if there are too 01698 // many. 01699 continue; 01700 } 01701 01702 // Convert column indices from local to global. 01703 Teuchos::ArrayView<GO> gblColInds = gblColIndsSpace.view (0, numEnt); 01704 for (size_t j = 0; j < static_cast<size_t> (numEnt); ++j) { 01705 gblColInds[j] = srcColMap.getGlobalElement (lclColInds[j]); 01706 } 01707 01708 // Pack the column indices. Use memcpy to follow ANSI aliasing rules. 01709 packet_type* exportsStart = &exports[exportsOffset]; 01710 memcpy (exportsStart, gblColInds.getRawPtr (), numEnt * sizeof (GO)); 01711 exportsOffset += numEnt * sizeof (GO); 01712 01713 // Compute padding so that block values are Scalar aligned. 01714 { 01715 const size_t rem = (numEnt * sizeof (GO)) % alignment; 01716 const size_t padding = (rem == 0) ? 01717 static_cast<size_t> (0) : 01718 alignment - rem; 01719 exportsOffset += padding; 01720 } 01721 01722 // Pack the block values in this row. Always pack row major, 01723 // for consistency, in case the source and target objects order 01724 // their blocks differently. 01725 // 01726 // In order to follow ANSI aliasing rules that forbid certain 01727 // kinds of type punning, we copy each block's entries first 01728 // into a temporary contiguous buffer, and then memcpy into the 01729 // send buffer. We could just memcpy one entry at a time, but 01730 // it's probably faster to avoid the library call. 01731 const size_t rowStart = src->ptr_[lclRowInd]; 01732 for (size_t j = 0; j < static_cast<size_t> (numEnt); ++j) { 01733 const_little_block_type srcBlock = 01734 src->getConstLocalBlockFromAbsOffset (rowStart + j); 01735 if (static_cast<size_t> (srcBlock.getBlockSize ()) == blockSize) { 01736 // A block size of zero is a possible error state. Of 01737 // course, the actual block size could be zero. That would 01738 // be silly, but why shouldn't it be legal? That's why we 01739 // check whether the actual block size is different. 01740 * (localError_) = true; 01741 // Pack a block of zeros. It might make sense to pack NaNs, 01742 // if Scalar implements a NaN value. 01743 tempBlock.fill (STS::zero ()); 01744 } else { 01745 tempBlock.assign (srcBlock); 01746 } 01747 memcpy (&exports[exportsOffset], tempBlock.getRawPtr (), bytesPerBlock); 01748 exportsOffset += bytesPerBlock; 01749 } 01750 01751 // In case sizeof(Scalar) < sizeof(GO) (e.g., float vs. long 01752 // long), we pad the end of the send buffer so that the next row 01753 // is aligned to sizeof(GO) bytes. 01754 { 01755 const size_t rem = (numEnt * bytesPerBlock) % sizeof (GO); 01756 const size_t padding = (rem == 0) ? 01757 static_cast<size_t> (0) : 01758 sizeof (GO) - rem; 01759 exportsOffset += padding; 01760 } 01761 } // for each LID (of a row) to send 01762 01763 01764 { 01765 const int myRank = graph_.getComm ()->getRank (); 01766 std::ostringstream os; 01767 os << "Proc " << myRank << ": packAndPrepare done" << std::endl; 01768 std::cerr << os.str (); 01769 } 01770 } 01771 01772 01773 template<class Scalar, class LO, class GO, class Node> 01774 void 01775 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01776 unpackAndCombine (const Teuchos::ArrayView<const LO> &importLIDs, 01777 const Teuchos::ArrayView<const packet_type> &imports, 01778 const Teuchos::ArrayView<size_t> &numPacketsPerLID, 01779 size_t /* constantNumPackets */, // not worthwhile to use this 01780 Tpetra::Distributor& /* distor */, 01781 Tpetra::CombineMode CM) 01782 { 01783 if (CM != ADD && CM != INSERT && CM != REPLACE && CM != ABSMAX && CM != ZERO) { 01784 * (this->localError_) = true; 01785 if ((*errs_).is_null ()) { 01786 *errs_ = Teuchos::rcp (new std::ostringstream ()); 01787 **errs_ << "unpackAndCombine: Invalid CombineMode value " << CM << ". " 01788 << "Valid values include ADD, INSERT, REPLACE, ABSMAX, and ZERO." 01789 << std::endl; 01790 // It won't cause deadlock to return here, since this method 01791 // does not communicate. 01792 return; 01793 } 01794 } 01795 01796 if (CM == ZERO) { 01797 return; // nothing to do; no need to combine entries 01798 } 01799 01800 const size_t blockSize = this->getBlockSize (); 01801 const size_t bytesPerBlock = blockSize * blockSize * sizeof (Scalar); 01802 const size_t numImportLids = importLIDs.size (); 01803 01804 // Byte alignment of the start of the block values in each row's 01805 // data. We use the max in case sizeof(Scalar) < sizeof(GO), 01806 // e.g., float vs. long long. 01807 const size_t alignment = std::max (sizeof (Scalar), sizeof (GO)); 01808 01809 // Temporary space to cache local and global column indices. 01810 // Column indices come in as global indices, in case the source 01811 // object's column Map differs from the target object's (this's) 01812 // column Map. We need the latter space in order to follow ANSI 01813 // aliasing rules (we don't want to type pun the buffer). 01814 Teuchos::Array<LO> lclColIndsSpace (graph_.getNodeMaxNumRowEntries ()); 01815 Teuchos::Array<GO> gblColIndsSpace (graph_.getNodeMaxNumRowEntries ()); 01816 01817 // Temporary contiguous buffer for a row's block entries. 01818 Teuchos::Array<Scalar> tempVals (graph_.getNodeMaxNumRowEntries ()); 01819 01820 // Target matrix's column Map. Use to convert the global column 01821 // indices in the receive buffer to local indices. We verified in 01822 // checkSizes() that the column Map exists (is not null). 01823 const map_type& tgtColMap = * (this->graph_.getColMap ()); 01824 01825 // In the loop below: Current offset (in bytes) into the 'imports' 01826 // array; from whence to unpack data for the current LID (row). 01827 size_t importsOffset = 0; 01828 01829 for (size_t lidInd = 0; lidInd < numImportLids; ++lidInd) { 01830 const size_t numEnt = numPacketsPerLID[lidInd]; // # entries in row 01831 const LO lclRowInd = importLIDs[lidInd]; // current local row index 01832 01833 // Lengths and offsets for packed data in the 'imports' array. 01834 // 01835 // We pack all column indices first, then all values. We pad 01836 // the former so that the latter is aligned to sizeof(Scalar). 01837 // 'padding' gives the size in bytes of the padding. 01838 const size_t numIndexBytes = numEnt * sizeof (GO); 01839 const size_t numBlockBytes = numEnt * bytesPerBlock; 01840 01841 // Views for storing global and local column indices. 01842 Teuchos::ArrayView<GO> gblColInds = gblColIndsSpace.view (0, numEnt); 01843 Teuchos::ArrayView<LO> lclColInds = lclColIndsSpace.view (0, numEnt); 01844 01845 // Unpack the global column indices from the receive buffer. 01846 memcpy (gblColInds.getRawPtr (), &imports[importsOffset], numIndexBytes); 01847 // Convert global column indices to local. 01848 for (size_t j = 0; j < numEnt; ++j) { 01849 lclColInds[j] = tgtColMap.getLocalElement (gblColInds[j]); 01850 } 01851 01852 // Update the byte offset into the receive buffer. 01853 importsOffset += numIndexBytes; 01854 // Block values are aligned to max(sizeof(Scalar), sizeof(GO)). 01855 // Update the byte offset to account for padding to this alignment. 01856 { 01857 const size_t rem = numIndexBytes % alignment; 01858 const size_t padding = (rem == 0) ? 01859 static_cast<size_t> (0) : 01860 alignment - rem; 01861 importsOffset += padding; 01862 } 01863 01864 // Copy out data for _all_ the blocks. We memcpy into temp 01865 // storage, in order to follow ANSI aliasing rules that forbid 01866 // certain kinds of type punning. 01867 memcpy (tempVals.getRawPtr (), &imports[importsOffset], numBlockBytes); 01868 // Update the byte offset into the receive buffer. 01869 importsOffset += numBlockBytes; 01870 // In case sizeof(Scalar) < sizeof(GO) (e.g., float vs. long 01871 // long), the receive buffer was padded so that the next row is 01872 // aligned to sizeof(GO) bytes. 01873 const size_t rem = numBlockBytes % sizeof (GO); 01874 const size_t padding = (rem == 0) ? 01875 static_cast<size_t> (0) : 01876 sizeof (GO) - rem; 01877 importsOffset += padding; 01878 01879 // Combine the incoming data with the matrix's current data. 01880 LO successCount = 0; 01881 if (CM == ADD) { 01882 successCount = 01883 this->sumIntoLocalValues (lclRowInd, lclColInds.getRawPtr (), 01884 tempVals.getRawPtr (), numEnt); 01885 } else if (CM == INSERT || CM == REPLACE) { 01886 successCount = 01887 this->replaceLocalValues (lclRowInd, lclColInds.getRawPtr (), 01888 tempVals.getRawPtr (), numEnt); 01889 } else if (CM == ABSMAX) { 01890 successCount = 01891 this->absMaxLocalValues (lclRowInd, lclColInds.getRawPtr (), 01892 tempVals.getRawPtr (), numEnt); 01893 } 01894 // We've already checked that CM is valid. 01895 01896 if (static_cast<size_t> (successCount) != numEnt) { 01897 * (localError_) = true; 01898 } 01899 } 01900 } 01901 01902 01903 template<class Scalar, class LO, class GO, class Node> 01904 std::string 01905 BlockCrsMatrix<Scalar, LO, GO, Node>::description () const 01906 { 01907 using Teuchos::TypeNameTraits; 01908 std::ostringstream os; 01909 os << "\"Tpetra::BlockCrsMatrix\": { " 01910 << "Template parameters: { " 01911 << "Scalar: " << TypeNameTraits<Scalar>::name () 01912 << "LO: " << TypeNameTraits<LO>::name () 01913 << "GO: " << TypeNameTraits<GO>::name () 01914 << "Node: " << TypeNameTraits<Node>::name () 01915 << " }" 01916 << ", Label: \"" << this->getObjectLabel () << "\"" 01917 << ", Global dimensions: [" 01918 << graph_.getDomainMap ()->getGlobalNumElements () << ", " 01919 << graph_.getRangeMap ()->getGlobalNumElements () << "]" 01920 << ", Block size: " << getBlockSize () 01921 << " }"; 01922 return os.str (); 01923 } 01924 01925 01926 template<class Scalar, class LO, class GO, class Node> 01927 void 01928 BlockCrsMatrix<Scalar, LO, GO, Node>:: 01929 describe (Teuchos::FancyOStream& out, 01930 const Teuchos::EVerbosityLevel verbLevel) const 01931 { 01932 using Teuchos::ArrayRCP; 01933 using Teuchos::CommRequest; 01934 using Teuchos::FancyOStream; 01935 using Teuchos::getFancyOStream; 01936 using Teuchos::ireceive; 01937 using Teuchos::isend; 01938 using Teuchos::outArg; 01939 using Teuchos::TypeNameTraits; 01940 using Teuchos::VERB_DEFAULT; 01941 using Teuchos::VERB_NONE; 01942 using Teuchos::VERB_LOW; 01943 // using Teuchos::VERB_MEDIUM; 01944 // using Teuchos::VERB_HIGH; 01945 using Teuchos::VERB_EXTREME; 01946 using Teuchos::RCP; 01947 using Teuchos::wait; 01948 using std::endl; 01949 01950 // Set default verbosity if applicable. 01951 const Teuchos::EVerbosityLevel vl = 01952 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel; 01953 01954 if (vl == VERB_NONE) { 01955 return; // print nothing 01956 } 01957 01958 // describe() always starts with a tab before it prints anything. 01959 Teuchos::OSTab tab0 (out); 01960 01961 out << "\"Tpetra::BlockCrsMatrix\":" << endl; 01962 Teuchos::OSTab tab1 (out); 01963 01964 out << "Template parameters:" << endl; 01965 { 01966 Teuchos::OSTab tab2 (out); 01967 out << "Scalar: " << TypeNameTraits<Scalar>::name () << endl 01968 << "LO: " << TypeNameTraits<LO>::name () << endl 01969 << "GO: " << TypeNameTraits<GO>::name () << endl 01970 << "Node: " << TypeNameTraits<Node>::name () << endl; 01971 } 01972 out << "Label: \"" << this->getObjectLabel () << "\"" << endl 01973 << "Global dimensions: [" 01974 << graph_.getDomainMap ()->getGlobalNumElements () << ", " 01975 << graph_.getRangeMap ()->getGlobalNumElements () << "]" << endl; 01976 01977 const LO blockSize = getBlockSize (); 01978 out << "Block size: " << blockSize << endl; 01979 01980 if (vl >= VERB_EXTREME) { 01981 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ()); 01982 const int myRank = comm.getRank (); 01983 const int numProcs = comm.getSize (); 01984 01985 // Print the calling process' data to the given output stream. 01986 RCP<std::ostringstream> lclOutStrPtr (new std::ostringstream ()); 01987 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr); 01988 FancyOStream& os = *osPtr; 01989 os << "Process " << myRank << ":" << endl; 01990 Teuchos::OSTab tab2 (os); 01991 01992 const map_type& meshRowMap = * (graph_.getRowMap ()); 01993 const map_type& meshColMap = * (graph_.getColMap ()); 01994 for (LO meshLclRow = meshRowMap.getMinLocalIndex (); 01995 meshLclRow <= meshRowMap.getMaxLocalIndex (); 01996 ++meshLclRow) { 01997 const GO meshGblRow = meshRowMap.getGlobalElement (meshLclRow); 01998 os << "Row " << meshGblRow << ": {"; 01999 02000 const LO* lclColInds = NULL; 02001 Scalar* vals = NULL; 02002 LO numInds = 0; 02003 this->getLocalRowView (meshLclRow, lclColInds, vals, numInds); 02004 02005 for (LO k = 0; k < numInds; ++k) { 02006 const GO gblCol = meshColMap.getGlobalElement (lclColInds[k]); 02007 02008 os << "Col " << gblCol << ": ["; 02009 for (LO i = 0; i < blockSize; ++i) { 02010 for (LO j = 0; j < blockSize; ++j) { 02011 os << vals[blockSize*blockSize*k + i*blockSize + j]; 02012 if (j + 1 < blockSize) { 02013 os << ", "; 02014 } 02015 } 02016 if (i + 1 < blockSize) { 02017 os << "; "; 02018 } 02019 } 02020 os << "]"; 02021 if (k + 1 < numInds) { 02022 os << ", "; 02023 } 02024 } 02025 os << "}" << endl; 02026 } 02027 02028 // Print data on Process 0. This will automatically respect the 02029 // current indentation level. 02030 if (myRank == 0) { 02031 out << lclOutStrPtr->str (); 02032 lclOutStrPtr = Teuchos::null; // clear it to save space 02033 } 02034 02035 const int sizeTag = 1337; 02036 const int dataTag = 1338; 02037 02038 ArrayRCP<char> recvDataBuf; // only used on Process 0 02039 02040 // Send string sizes and data from each process in turn to 02041 // Process 0, and print on that process. 02042 for (int p = 1; p < numProcs; ++p) { 02043 if (myRank == 0) { 02044 // Receive the incoming string's length. 02045 ArrayRCP<size_t> recvSize (1); 02046 recvSize[0] = 0; 02047 RCP<CommRequest<int> > recvSizeReq = 02048 ireceive<int, size_t> (recvSize, p, sizeTag, comm); 02049 wait<int> (comm, outArg (recvSizeReq)); 02050 const size_t numCharsToRecv = recvSize[0]; 02051 02052 // Allocate space for the string to receive. Reuse receive 02053 // buffer space if possible. We can do this because in the 02054 // current implementation, we only have one receive in 02055 // flight at a time. Leave space for the '\0' at the end, 02056 // in case the sender doesn't send it. 02057 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) { 02058 recvDataBuf.resize (numCharsToRecv + 1); 02059 } 02060 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv); 02061 // Post the receive of the actual string data. 02062 RCP<CommRequest<int> > recvDataReq = 02063 ireceive<int, char> (recvData, p, dataTag, comm); 02064 wait<int> (comm, outArg (recvDataReq)); 02065 02066 // Print the received data. This will respect the current 02067 // indentation level. Make sure that the string is 02068 // null-terminated. 02069 recvDataBuf[numCharsToRecv] = '\0'; 02070 out << recvDataBuf.getRawPtr (); 02071 } 02072 else if (myRank == p) { // if I am not Process 0, and my rank is p 02073 // This deep-copies the string at most twice, depending on 02074 // whether std::string reference counts internally (it 02075 // generally does, so this won't deep-copy at all). 02076 const std::string stringToSend = lclOutStrPtr->str (); 02077 lclOutStrPtr = Teuchos::null; // clear original to save space 02078 02079 // Send the string's length to Process 0. 02080 const size_t numCharsToSend = stringToSend.size (); 02081 ArrayRCP<size_t> sendSize (1); 02082 sendSize[0] = numCharsToSend; 02083 RCP<CommRequest<int> > sendSizeReq = 02084 isend<int, size_t> (sendSize, 0, sizeTag, comm); 02085 wait<int> (comm, outArg (sendSizeReq)); 02086 02087 // Send the actual string to Process 0. We know that the 02088 // string has length > 0, so it's save to take the address 02089 // of the first entry. Make a nonowning ArrayRCP to hold 02090 // the string. Process 0 will add a null termination 02091 // character at the end of the string, after it receives the 02092 // message. 02093 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend, false); 02094 RCP<CommRequest<int> > sendDataReq = 02095 isend<int, char> (sendData, 0, dataTag, comm); 02096 wait<int> (comm, outArg (sendDataReq)); 02097 } 02098 } // for each process rank p other than 0 02099 } // extreme verbosity level (print the whole matrix) 02100 } 02101 02102 template<class Scalar, class LO, class GO, class Node> 02103 Teuchos::RCP<const Teuchos::Comm<int> > 02104 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02105 getComm() const 02106 { 02107 return graph_.getComm(); 02108 } 02109 02110 template<class Scalar, class LO, class GO, class Node> 02111 Teuchos::RCP<Node> 02112 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02113 getNode() const 02114 { 02115 return graph_.getNode(); 02116 02117 } 02118 02119 template<class Scalar, class LO, class GO, class Node> 02120 global_size_t 02121 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02122 getGlobalNumCols() const 02123 { 02124 return graph_.getGlobalNumCols(); 02125 } 02126 02127 template<class Scalar, class LO, class GO, class Node> 02128 size_t 02129 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02130 getNodeNumCols() const 02131 { 02132 return graph_.getNodeNumCols(); 02133 } 02134 02135 template<class Scalar, class LO, class GO, class Node> 02136 GO 02137 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02138 getIndexBase() const 02139 { 02140 return graph_.getIndexBase(); 02141 } 02142 02143 template<class Scalar, class LO, class GO, class Node> 02144 global_size_t 02145 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02146 getGlobalNumEntries() const 02147 { 02148 return graph_.getGlobalNumEntries(); 02149 } 02150 02151 template<class Scalar, class LO, class GO, class Node> 02152 size_t 02153 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02154 getNodeNumEntries() const 02155 { 02156 return graph_.getNodeNumEntries(); 02157 } 02158 02159 template<class Scalar, class LO, class GO, class Node> 02160 size_t 02161 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02162 getNumEntriesInGlobalRow (GO globalRow) const 02163 { 02164 return graph_.getNumEntriesInGlobalRow(globalRow); 02165 } 02166 02167 template<class Scalar, class LO, class GO, class Node> 02168 global_size_t 02169 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02170 getGlobalNumDiags() const 02171 { 02172 return getGlobalNumDiags(); 02173 } 02174 02175 template<class Scalar, class LO, class GO, class Node> 02176 size_t 02177 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02178 getNodeNumDiags() const 02179 { 02180 return getNodeNumDiags(); 02181 } 02182 02183 template<class Scalar, class LO, class GO, class Node> 02184 size_t 02185 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02186 getGlobalMaxNumRowEntries() const 02187 { 02188 return graph_.getGlobalMaxNumRowEntries(); 02189 } 02190 02191 template<class Scalar, class LO, class GO, class Node> 02192 bool 02193 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02194 hasColMap() const 02195 { 02196 return graph_.hasColMap(); 02197 } 02198 02199 template<class Scalar, class LO, class GO, class Node> 02200 bool 02201 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02202 isLowerTriangular() const 02203 { 02204 return graph_.isLowerTriangular(); 02205 } 02206 02207 template<class Scalar, class LO, class GO, class Node> 02208 bool 02209 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02210 isUpperTriangular() const 02211 { 02212 return graph_.isUpperTriangular(); 02213 } 02214 02215 template<class Scalar, class LO, class GO, class Node> 02216 bool 02217 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02218 isLocallyIndexed() const 02219 { 02220 return graph_.isLocallyIndexed(); 02221 } 02222 02223 template<class Scalar, class LO, class GO, class Node> 02224 bool 02225 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02226 isGloballyIndexed() const 02227 { 02228 return graph_.isGloballyIndexed(); 02229 } 02230 02231 template<class Scalar, class LO, class GO, class Node> 02232 bool 02233 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02234 isFillComplete() const 02235 { 02236 return true; 02237 } 02238 02239 template<class Scalar, class LO, class GO, class Node> 02240 bool 02241 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02242 supportsRowViews() const 02243 { 02244 return true; 02245 } 02246 02247 02248 template<class Scalar, class LO, class GO, class Node> 02249 void 02250 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02251 getGlobalRowCopy (GO GlobalRow, 02252 const Teuchos::ArrayView<GO> &Indices, 02253 const Teuchos::ArrayView<Scalar> &Values, 02254 size_t &NumEntries) const 02255 { 02256 TEUCHOS_TEST_FOR_EXCEPTION( 02257 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::getGlobalRowCopy: " 02258 "This class doesn't support global matrix indexing."); 02259 02260 } 02261 02262 template<class Scalar, class LO, class GO, class Node> 02263 void 02264 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02265 getGlobalRowView (GO GlobalRow, 02266 ArrayView<const GO> &indices, 02267 ArrayView<const Scalar> &values) const 02268 { 02269 TEUCHOS_TEST_FOR_EXCEPTION( 02270 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::getGlobalRowView: " 02271 "This class doesn't support global matrix indexing."); 02272 02273 } 02274 02275 template<class Scalar, class LO, class GO, class Node> 02276 void 02277 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02278 getLocalRowView (LO LocalRow, 02279 ArrayView<const LO> &indices, 02280 ArrayView<const Scalar> &values) const 02281 { 02282 TEUCHOS_TEST_FOR_EXCEPTION( 02283 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::getGlobalRowView: " 02284 "This class doesn't support global matrix indexing."); 02285 02286 } 02287 02288 02289 template<class Scalar, class LO, class GO, class Node> 02290 void 02291 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02292 getLocalDiagCopy (Tpetra::Vector<Scalar,LO,GO,Node> &diag) const 02293 { 02294 TEUCHOS_TEST_FOR_EXCEPTION( 02295 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::getLocalDiagCopy: " 02296 "not implemented."); 02297 02298 } 02299 02300 template<class Scalar, class LO, class GO, class Node> 02301 void 02302 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02303 leftScale (const Tpetra::Vector<Scalar, LO, GO, Node>& x) 02304 { 02305 TEUCHOS_TEST_FOR_EXCEPTION( 02306 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::leftScale: " 02307 "not implemented."); 02308 02309 } 02310 02311 template<class Scalar, class LO, class GO, class Node> 02312 void 02313 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02314 rightScale (const Tpetra::Vector<Scalar, LO, GO, Node>& x) 02315 { 02316 TEUCHOS_TEST_FOR_EXCEPTION( 02317 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::rightScale: " 02318 "not implemented."); 02319 02320 } 02321 02322 template<class Scalar, class LO, class GO, class Node> 02323 Teuchos::RCP<const Tpetra::RowGraph<LO, GO, Node> > 02324 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02325 getGraph() const 02326 { 02327 return graphRCP_; 02328 } 02329 02330 template<class Scalar, class LO, class GO, class Node> 02331 typename Teuchos::ScalarTraits<Scalar>::magnitudeType 02332 BlockCrsMatrix<Scalar, LO, GO, Node>:: 02333 getFrobeniusNorm () const 02334 { 02335 TEUCHOS_TEST_FOR_EXCEPTION( 02336 true, std::logic_error, "Tpetra::Experimental::BlockCrsMatrix::getFrobeniusNorm: " 02337 "not implemented."); 02338 } 02339 02340 02341 } // namespace Experimental 02342 } // namespace Tpetra 02343 02344 // 02345 // Explicit instantiation macro 02346 // 02347 // Must be expanded from within the Tpetra namespace! 02348 // 02349 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \ 02350 template class Experimental::BlockCrsMatrix< S, LO, GO, NODE >; 02351 02352 #endif // TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP
1.7.6.1