|
Ifpack2 Templated Preconditioning Package
Version 1.0
|
00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2009) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 //@HEADER 00041 */ 00042 00043 #ifndef IFPACK2_DENSECONTAINER_DEF_HPP 00044 #define IFPACK2_DENSECONTAINER_DEF_HPP 00045 00046 #include "Ifpack2_DenseContainer_decl.hpp" 00047 #include "Teuchos_LAPACK.hpp" 00048 00049 #ifdef HAVE_MPI 00050 # include <mpi.h> 00051 # include "Teuchos_DefaultMpiComm.hpp" 00052 #else 00053 # include "Teuchos_DefaultSerialComm.hpp" 00054 #endif // HAVE_MPI 00055 00056 00057 namespace Ifpack2 { 00058 00059 //============================================================================== 00060 template<class MatrixType, class LocalScalarType> 00061 DenseContainer<MatrixType, LocalScalarType>:: 00062 DenseContainer (const Teuchos::RCP<const row_matrix_type>& matrix, 00063 const Teuchos::ArrayView<const local_ordinal_type>& localRows) : 00064 Container<MatrixType> (matrix, localRows), 00065 numRows_ (localRows.size ()), 00066 diagBlock_ (numRows_, numRows_), 00067 ipiv_ (numRows_, 0) 00068 { 00069 using Teuchos::Array; 00070 using Teuchos::ArrayView; 00071 using Teuchos::RCP; 00072 using Teuchos::rcp; 00073 using Teuchos::toString; 00074 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type; 00075 typedef typename ArrayView<const local_ordinal_type>::size_type size_type; 00076 TEUCHOS_TEST_FOR_EXCEPTION( 00077 ! matrix->hasColMap (), std::invalid_argument, "Ifpack2::DenseContainer: " 00078 "The constructor's input matrix must have a column Map."); 00079 00080 // Check whether the input set of local row indices is correct. 00081 const map_type& rowMap = * (matrix->getRowMap ()); 00082 const size_type numRows = localRows.size (); 00083 bool rowIndicesValid = true; 00084 Array<local_ordinal_type> invalidLocalRowIndices; 00085 for (size_type i = 0; i < numRows; ++i) { 00086 if (! rowMap.isNodeLocalElement (localRows[i])) { 00087 rowIndicesValid = false; 00088 invalidLocalRowIndices.push_back (localRows[i]); 00089 break; 00090 } 00091 } 00092 TEUCHOS_TEST_FOR_EXCEPTION( 00093 ! rowIndicesValid, std::invalid_argument, "Ifpack2::DenseContainer: " 00094 "On process " << rowMap.getComm ()->getRank () << " of " 00095 << rowMap.getComm ()->getSize () << ", in the given set of local row " 00096 "indices localRows = " << toString (localRows) << ", the following " 00097 "entries are not valid local row indices on the calling process: " 00098 << toString (invalidLocalRowIndices) << "."); 00099 00100 #ifdef HAVE_MPI 00101 RCP<const Teuchos::Comm<int> > localComm = 00102 rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF)); 00103 #else 00104 RCP<const Teuchos::Comm<int> > localComm = 00105 rcp (new Teuchos::SerialComm<int> ()); 00106 #endif // HAVE_MPI 00107 00108 // FIXME (mfh 25 Aug 2013) What if the matrix's row Map has a 00109 // different index base than zero? 00110 const global_ordinal_type indexBase = 0; 00111 localMap_ = rcp (new map_type (numRows_, indexBase, localComm)); 00112 } 00113 00114 //============================================================================== 00115 template<class MatrixType, class LocalScalarType> 00116 DenseContainer<MatrixType, LocalScalarType>::~DenseContainer() 00117 {} 00118 00119 //============================================================================== 00120 template<class MatrixType, class LocalScalarType> 00121 size_t DenseContainer<MatrixType, LocalScalarType>::getNumRows () const 00122 { 00123 return numRows_; 00124 } 00125 00126 //============================================================================== 00127 template<class MatrixType, class LocalScalarType> 00128 bool DenseContainer<MatrixType, LocalScalarType>::isInitialized () const 00129 { 00130 return IsInitialized_; 00131 } 00132 00133 //============================================================================== 00134 template<class MatrixType, class LocalScalarType> 00135 bool DenseContainer<MatrixType, LocalScalarType>::isComputed() const 00136 { 00137 return IsComputed_; 00138 } 00139 00140 //============================================================================== 00141 template<class MatrixType, class LocalScalarType> 00142 void DenseContainer<MatrixType, LocalScalarType>:: 00143 setParameters (const Teuchos::ParameterList& List) 00144 { 00145 (void) List; // the solver doesn't currently take any parameters 00146 } 00147 00148 //============================================================================== 00149 template<class MatrixType, class LocalScalarType> 00150 void DenseContainer<MatrixType, LocalScalarType>::initialize () 00151 { 00152 using Teuchos::null; 00153 using Teuchos::rcp; 00154 00155 // We assume that if you called this method, you intend to recompute 00156 // everything. 00157 IsInitialized_ = false; 00158 IsComputed_ = false; 00159 00160 // Fill the diagonal block and LU permutation array with zeros. 00161 diagBlock_.putScalar (Teuchos::ScalarTraits<local_scalar_type>::zero ()); 00162 std::fill (ipiv_.begin (), ipiv_.end (), 0); 00163 00164 IsInitialized_ = true; 00165 } 00166 00167 //============================================================================== 00168 template<class MatrixType, class LocalScalarType> 00169 void DenseContainer<MatrixType, LocalScalarType>::compute () 00170 { 00171 TEUCHOS_TEST_FOR_EXCEPTION( 00172 static_cast<size_t> (ipiv_.size ()) != numRows_, std::logic_error, 00173 "Ifpack2::DenseContainer::compute: ipiv_ array has the wrong size. " 00174 "Please report this bug to the Ifpack2 developers."); 00175 00176 IsComputed_ = false; 00177 if (! this->isInitialized ()) { 00178 this->initialize (); 00179 } 00180 00181 // Extract the submatrix. 00182 extract (this->getMatrix ()); // extract the submatrix 00183 factor (); // factor the submatrix 00184 00185 IsComputed_ = true; 00186 } 00187 00188 template<class MatrixType, class LocalScalarType> 00189 void DenseContainer<MatrixType, LocalScalarType>::factor () 00190 { 00191 Teuchos::LAPACK<int, local_scalar_type> lapack; 00192 int INFO = 0; 00193 lapack.GETRF (diagBlock_.numRows (), diagBlock_.numCols (), 00194 diagBlock_.values (), diagBlock_.stride (), 00195 ipiv_.getRawPtr (), &INFO); 00196 // INFO < 0 is a bug. 00197 TEUCHOS_TEST_FOR_EXCEPTION( 00198 INFO < 0, std::logic_error, "Ifpack2::DenseContainer::factor: " 00199 "LAPACK's _GETRF (LU factorization with partial pivoting) was called " 00200 "incorrectly. INFO = " << INFO << " < 0. " 00201 "Please report this bug to the Ifpack2 developers."); 00202 // INFO > 0 means the matrix is singular. This is probably an issue 00203 // either with the choice of rows the rows we extracted, or with the 00204 // input matrix itself. 00205 TEUCHOS_TEST_FOR_EXCEPTION( 00206 INFO > 0, std::runtime_error, "Ifpack2::DenseContainer::factor: " 00207 "LAPACK's _GETRF (LU factorization with partial pivoting) reports that the " 00208 "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") " 00209 "(one-based index i) is exactly zero. This probably means that the input " 00210 "matrix has a singular diagonal block."); 00211 } 00212 00213 //============================================================================== 00214 template<class MatrixType, class LocalScalarType> 00215 void DenseContainer<MatrixType, LocalScalarType>:: 00216 applyImpl (const local_mv_type& X, 00217 local_mv_type& Y, 00218 Teuchos::ETransp mode, 00219 LocalScalarType alpha, 00220 LocalScalarType beta) const 00221 { 00222 using Teuchos::ArrayRCP; 00223 using Teuchos::RCP; 00224 using Teuchos::rcp; 00225 using Teuchos::rcpFromRef; 00226 00227 TEUCHOS_TEST_FOR_EXCEPTION( 00228 X.getLocalLength () != Y.getLocalLength (), 00229 std::logic_error, "Ifpack2::DenseContainer::applyImpl: X and Y have " 00230 "incompatible dimensions (" << X.getLocalLength () << " resp. " 00231 << Y.getLocalLength () << "). Please report this bug to " 00232 "the Ifpack2 developers."); 00233 TEUCHOS_TEST_FOR_EXCEPTION( 00234 localMap_->getNodeNumElements () != X.getLocalLength (), 00235 std::logic_error, "Ifpack2::DenseContainer::applyImpl: The inverse " 00236 "operator and X have incompatible dimensions (" << 00237 localMap_->getNodeNumElements () << " resp. " 00238 << X.getLocalLength () << "). Please report this bug to " 00239 "the Ifpack2 developers."); 00240 TEUCHOS_TEST_FOR_EXCEPTION( 00241 localMap_->getNodeNumElements () != Y.getLocalLength (), 00242 std::logic_error, "Ifpack2::DenseContainer::applyImpl: The inverse " 00243 "operator and Y have incompatible dimensions (" << 00244 localMap_->getNodeNumElements () << " resp. " 00245 << Y.getLocalLength () << "). Please report this bug to " 00246 "the Ifpack2 developers."); 00247 TEUCHOS_TEST_FOR_EXCEPTION( 00248 X.getLocalLength () != static_cast<size_t> (diagBlock_.numRows ()), 00249 std::logic_error, "Ifpack2::DenseContainer::applyImpl: The input " 00250 "multivector X has incompatible dimensions from those of the " 00251 "inverse operator (" << X.getLocalLength () << " vs. " 00252 << (mode == Teuchos::NO_TRANS ? diagBlock_.numCols () : diagBlock_.numRows ()) 00253 << "). Please report this bug to the Ifpack2 developers."); 00254 TEUCHOS_TEST_FOR_EXCEPTION( 00255 X.getLocalLength () != static_cast<size_t> (diagBlock_.numRows ()), 00256 std::logic_error, "Ifpack2::DenseContainer::applyImpl: The output " 00257 "multivector Y has incompatible dimensions from those of the " 00258 "inverse operator (" << Y.getLocalLength () << " vs. " 00259 << (mode == Teuchos::NO_TRANS ? diagBlock_.numRows () : diagBlock_.numCols ()) 00260 << "). Please report this bug to the Ifpack2 developers."); 00261 00262 typedef Teuchos::ScalarTraits<local_scalar_type> STS; 00263 const int numVecs = static_cast<int> (X.getNumVectors ()); 00264 if (alpha == STS::zero ()) { // don't need to solve the linear system 00265 if (beta == STS::zero ()) { 00266 // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering 00267 // any Inf or NaN values in Y (rather than multiplying them by 00268 // zero, resulting in NaN values). 00269 Y.putScalar (STS::zero ()); 00270 } 00271 else { // beta != 0 00272 Y.scale (beta); 00273 } 00274 } 00275 else { // alpha != 0; must solve the linear system 00276 Teuchos::LAPACK<int, local_scalar_type> lapack; 00277 // If beta is nonzero or Y is not constant stride, we have to use 00278 // a temporary output multivector. It gets a (deep) copy of X, 00279 // since GETRS overwrites its (multi)vector input with its output. 00280 RCP<local_mv_type> Y_tmp; 00281 if (beta == STS::zero () ){ 00282 Tpetra::deep_copy (Y, X); 00283 Y_tmp = rcpFromRef (Y); 00284 } 00285 else { 00286 Y_tmp = rcp (new local_mv_type (X, Teuchos::Copy)); 00287 } 00288 const int Y_stride = static_cast<int> (Y_tmp->getStride ()); 00289 ArrayRCP<local_scalar_type> Y_view = Y_tmp->get1dViewNonConst (); 00290 local_scalar_type* const Y_ptr = Y_view.getRawPtr (); 00291 00292 int INFO = 0; 00293 const char trans = 00294 (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N')); 00295 lapack.GETRS (trans, diagBlock_.numRows (), numVecs, 00296 diagBlock_.values (), diagBlock_.stride (), 00297 ipiv_.getRawPtr (), Y_ptr, Y_stride, &INFO); 00298 TEUCHOS_TEST_FOR_EXCEPTION( 00299 INFO != 0, std::runtime_error, "Ifpack2::DenseContainer::applyImpl: " 00300 "LAPACK's _GETRS (solve using LU factorization with partial pivoting) " 00301 "failed with INFO = " << INFO << " != 0."); 00302 00303 if (beta != STS::zero ()) { 00304 Y.update (alpha, *Y_tmp, beta); 00305 } 00306 else if (! Y.isConstantStride ()) { 00307 Tpetra::deep_copy (Y, *Y_tmp); 00308 } 00309 } 00310 } 00311 00312 //============================================================================== 00313 template<class MatrixType, class LocalScalarType> 00314 void DenseContainer<MatrixType, LocalScalarType>:: 00315 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, 00316 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y, 00317 Teuchos::ETransp mode, 00318 scalar_type alpha, 00319 scalar_type beta) const 00320 { 00321 using Teuchos::ArrayView; 00322 using Teuchos::as; 00323 using Teuchos::RCP; 00324 using Teuchos::rcp; 00325 00326 // The local operator might have a different Scalar type than 00327 // MatrixType. This means that we might have to convert X and Y to 00328 // the Tpetra::MultiVector specialization that the local operator 00329 // wants. This class' X_ and Y_ internal fields are of the right 00330 // type for the local operator, so we can use those as targets. 00331 00332 // Tpetra::MultiVector specialization corresponding to the global operator. 00333 typedef Tpetra::MultiVector<scalar_type, 00334 local_ordinal_type, global_ordinal_type, node_type> global_mv_type; 00335 00336 const char prefix[] = "Ifpack2::DenseContainer::weightedApply: "; 00337 TEUCHOS_TEST_FOR_EXCEPTION( 00338 ! IsComputed_, std::runtime_error, prefix << "You must have called the " 00339 "compute() method before you may call this method. You may call " 00340 "apply() as many times as you want after calling compute() once, " 00341 "but you must have called compute() at least once first."); 00342 const size_t numVecs = X.getNumVectors (); 00343 TEUCHOS_TEST_FOR_EXCEPTION( 00344 numVecs != Y.getNumVectors (), std::runtime_error, 00345 prefix << "X and Y have different numbers of vectors (columns). X has " 00346 << X.getNumVectors () << ", but Y has " << X.getNumVectors () << "."); 00347 00348 if (numVecs == 0) { 00349 return; // done! nothing to do 00350 } 00351 00352 // The local operator works on a permuted subset of the local parts 00353 // of X and Y. The subset and permutation are defined by the index 00354 // array returned by getLocalRows(). If the permutation is trivial 00355 // and the subset is exactly equal to the local indices, then we 00356 // could use the local parts of X and Y exactly, without needing to 00357 // permute. Otherwise, we have to use temporary storage to permute 00358 // X and Y. For now, we always use temporary storage. 00359 // 00360 // Create temporary permuted versions of the input and output. 00361 // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to 00362 // store the permuted versions of X resp. Y. Note that X_local has 00363 // the domain Map of the operator, which may be a permuted subset of 00364 // the local Map corresponding to X.getMap(). Similarly, Y_local 00365 // has the range Map of the operator, which may be a permuted subset 00366 // of the local Map corresponding to Y.getMap(). numRows_ here 00367 // gives the number of rows in the row Map of the local Inverse_ 00368 // operator. 00369 // 00370 // FIXME (mfh 20 Aug 2013) There might be an implicit assumption 00371 // here that the row Map and the range Map of that operator are 00372 // the same. 00373 // 00374 // FIXME (mfh 20 Aug 2013) This "local permutation" functionality 00375 // really belongs in Tpetra. 00376 00377 if (X_.is_null ()) { 00378 X_ = rcp (new local_mv_type (localMap_, numVecs)); 00379 } 00380 RCP<local_mv_type> X_local = X_; 00381 TEUCHOS_TEST_FOR_EXCEPTION( 00382 X_local->getLocalLength () != numRows_, std::logic_error, 00383 "Ifpack2::DenseContainer::apply: " 00384 "X_local has length " << X_local->getLocalLength () << ", which does " 00385 "not match numRows_ = " << numRows_ << ". Please report this bug to " 00386 "the Ifpack2 developers."); 00387 ArrayView<const local_ordinal_type> localRows = this->getLocalRows (); 00388 00389 Details::MultiVectorLocalGatherScatter<global_mv_type, local_mv_type> mvgs; 00390 mvgs.gather (*X_local, X, localRows); 00391 00392 // We must gather the contents of the output multivector Y even on 00393 // input to applyImpl(), since the inverse operator might use it as 00394 // an initial guess for a linear solve. We have no way of knowing 00395 // whether it does or does not. 00396 00397 if (Y_.is_null ()) { 00398 Y_ = rcp (new local_mv_type (localMap_, numVecs)); 00399 } 00400 RCP<local_mv_type> Y_local = Y_; 00401 TEUCHOS_TEST_FOR_EXCEPTION( 00402 Y_local->getLocalLength () != numRows_, std::logic_error, 00403 "Ifpack2::DenseContainer::apply: " 00404 "Y_local has length " << X_local->getLocalLength () << ", which does " 00405 "not match numRows_ = " << numRows_ << ". Please report this bug to " 00406 "the Ifpack2 developers."); 00407 mvgs.gather (*Y_local, Y, localRows); 00408 00409 // Apply the local operator: 00410 // Y_local := beta*Y_local + alpha*M^{-1}*X_local 00411 this->applyImpl (*X_local, *Y_local, mode, as<local_scalar_type> (alpha), 00412 as<local_scalar_type> (beta)); 00413 00414 // Scatter the permuted subset output vector Y_local back into the 00415 // original output multivector Y. 00416 mvgs.scatter (Y, *Y_local, localRows); 00417 } 00418 00419 //============================================================================== 00420 template<class MatrixType, class LocalScalarType> 00421 void DenseContainer<MatrixType,LocalScalarType>:: 00422 weightedApply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, 00423 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y, 00424 const Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& D, 00425 Teuchos::ETransp mode, 00426 scalar_type alpha, 00427 scalar_type beta) const 00428 { 00429 using Teuchos::ArrayRCP; 00430 using Teuchos::ArrayView; 00431 using Teuchos::Range1D; 00432 using Teuchos::RCP; 00433 using Teuchos::rcp; 00434 using Teuchos::rcp_const_cast; 00435 using std::cerr; 00436 using std::endl; 00437 typedef Teuchos::ScalarTraits<scalar_type> STS; 00438 00439 // The local operator template parameter might have a different 00440 // Scalar type than MatrixType. This means that we might have to 00441 // convert X and Y to the Tpetra::MultiVector specialization that 00442 // the local operator wants. This class' X_ and Y_ internal fields 00443 // are of the right type for the local operator, so we can use those 00444 // as targets. 00445 00446 // Tpetra::MultiVector specialization corresponding to the global operator. 00447 typedef Tpetra::MultiVector<scalar_type, 00448 local_ordinal_type, global_ordinal_type, node_type> global_mv_type; 00449 // Tpetra::Vector specialization corresponding to the local 00450 // operator. We will use this for the subset permutation of the 00451 // diagonal scaling D. 00452 typedef Tpetra::Vector<local_scalar_type, local_ordinal_type, 00453 global_ordinal_type, node_type> local_vec_type; 00454 00455 const char prefix[] = "Ifpack2::DenseContainer::weightedApply: "; 00456 TEUCHOS_TEST_FOR_EXCEPTION( 00457 ! IsComputed_, std::runtime_error, prefix << "You must have called the " 00458 "compute() method before you may call this method. You may call " 00459 "weightedApply() as many times as you want after calling compute() once, " 00460 "but you must have called compute() at least once first."); 00461 const size_t numVecs = X.getNumVectors (); 00462 TEUCHOS_TEST_FOR_EXCEPTION( 00463 numVecs != Y.getNumVectors (), std::runtime_error, 00464 prefix << "X and Y have different numbers of vectors (columns). X has " 00465 << X.getNumVectors () << ", but Y has " << X.getNumVectors () << "."); 00466 00467 if (numVecs == 0) { 00468 return; // done! nothing to do 00469 } 00470 00471 // The local operator works on a permuted subset of the local parts 00472 // of X and Y. The subset and permutation are defined by the index 00473 // array returned by getLocalRows(). If the permutation is trivial 00474 // and the subset is exactly equal to the local indices, then we 00475 // could use the local parts of X and Y exactly, without needing to 00476 // permute. Otherwise, we have to use temporary storage to permute 00477 // X and Y. For now, we always use temporary storage. 00478 // 00479 // Create temporary permuted versions of the input and output. 00480 // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to 00481 // store the permuted versions of X resp. Y. Note that X_local has 00482 // the domain Map of the operator, which may be a permuted subset of 00483 // the local Map corresponding to X.getMap(). Similarly, Y_local 00484 // has the range Map of the operator, which may be a permuted subset 00485 // of the local Map corresponding to Y.getMap(). numRows_ here 00486 // gives the number of rows in the row Map of the local operator. 00487 // 00488 // FIXME (mfh 20 Aug 2013) There might be an implicit assumption 00489 // here that the row Map and the range Map of that operator are 00490 // the same. 00491 // 00492 // FIXME (mfh 20 Aug 2013) This "local permutation" functionality 00493 // really belongs in Tpetra. 00494 00495 if (X_.is_null ()) { 00496 X_ = rcp (new local_mv_type (localMap_, numVecs)); 00497 } 00498 RCP<local_mv_type> X_local = X_; 00499 TEUCHOS_TEST_FOR_EXCEPTION( 00500 X_local->getLocalLength () != numRows_, std::logic_error, 00501 "Ifpack2::DenseContainer::weightedApply: " 00502 "X_local has length " << X_local->getLocalLength () << ", which does " 00503 "not match numRows_ = " << numRows_ << ". Please report this bug to " 00504 "the Ifpack2 developers."); 00505 ArrayView<const local_ordinal_type> localRows = this->getLocalRows (); 00506 00507 Details::MultiVectorLocalGatherScatter<global_mv_type, local_mv_type> mvgs; 00508 mvgs.gather (*X_local, X, localRows); 00509 00510 // We must gather the output multivector Y even on input to 00511 // applyImpl(), since the local operator might use it as an initial 00512 // guess for a linear solve. We have no way of knowing whether it 00513 // does or does not. 00514 00515 if (Y_.is_null ()) { 00516 Y_ = rcp (new local_mv_type (localMap_, numVecs)); 00517 } 00518 RCP<local_mv_type> Y_local = Y_; 00519 TEUCHOS_TEST_FOR_EXCEPTION( 00520 Y_local->getLocalLength () != numRows_, std::logic_error, 00521 "Ifpack2::DenseContainer::weightedApply: " 00522 "Y_local has length " << X_local->getLocalLength () << ", which does " 00523 "not match numRows_ = " << numRows_ << ". Please report this bug to " 00524 "the Ifpack2 developers."); 00525 mvgs.gather (*Y_local, Y, localRows); 00526 00527 // Apply the diagonal scaling D to the input X. It's our choice 00528 // whether the result has the original input Map of X, or the 00529 // permuted subset Map of X_local. If the latter, we also need to 00530 // gather D into the permuted subset Map. We choose the latter, to 00531 // save memory and computation. Thus, we do the following: 00532 // 00533 // 1. Gather D into a temporary vector D_local. 00534 // 2. Create a temporary X_scaled to hold diag(D_local) * X_local. 00535 // 3. Compute X_scaled := diag(D_loca) * X_local. 00536 00537 local_vec_type D_local (localMap_); 00538 TEUCHOS_TEST_FOR_EXCEPTION( 00539 D_local.getLocalLength () != numRows_, std::logic_error, 00540 "Ifpack2::DenseContainer::weightedApply: " 00541 "D_local has length " << D_local.getLocalLength () << ", which does " 00542 "not match numRows_ = " << numRows_ << ". Please report this bug to " 00543 "the Ifpack2 developers."); 00544 mvgs.gather (D_local, D, localRows); 00545 local_mv_type X_scaled (localMap_, numVecs); 00546 X_scaled.elementWiseMultiply (STS::one (), D_local, *X_local, STS::zero ()); 00547 00548 // Y_temp will hold the result of M^{-1}*X_scaled. If beta == 0, we 00549 // can write the result of Inverse_->apply() directly to Y_local, so 00550 // Y_temp may alias Y_local. Otherwise, if beta != 0, we need 00551 // temporary storage for M^{-1}*X_scaled, so Y_temp must be 00552 // different than Y_local. 00553 RCP<local_mv_type> Y_temp; 00554 if (beta == STS::zero ()) { 00555 Y_temp = Y_local; 00556 } else { 00557 Y_temp = rcp (new local_mv_type (localMap_, numVecs)); 00558 } 00559 00560 // Apply the local operator: Y_temp := M^{-1} * X_scaled 00561 this->applyImpl (X_scaled, *Y_temp, mode, STS::one (), STS::zero ()); 00562 // Y_local := beta * Y_local + alpha * diag(D_local) * Y_temp. 00563 // 00564 // Note that we still use the permuted subset scaling D_local here, 00565 // because Y_temp has the same permuted subset Map. That's good, in 00566 // fact, because it's a subset: less data to read and multiply. 00567 Y_local->elementWiseMultiply (alpha, D_local, *Y_temp, beta); 00568 00569 // Copy the permuted subset output vector Y_local into the original 00570 // output multivector Y. 00571 mvgs.scatter (Y, *Y_local, localRows); 00572 } 00573 00574 //============================================================================== 00575 template<class MatrixType, class LocalScalarType> 00576 std::ostream& DenseContainer<MatrixType,LocalScalarType>:: 00577 print (std::ostream& os) const 00578 { 00579 Teuchos::FancyOStream fos (Teuchos::rcpFromRef (os)); 00580 fos.setOutputToRootOnly (0); 00581 this->describe (fos); 00582 return os; 00583 } 00584 00585 //============================================================================== 00586 template<class MatrixType, class LocalScalarType> 00587 std::string DenseContainer<MatrixType,LocalScalarType>::description() const 00588 { 00589 std::ostringstream oss; 00590 oss << Teuchos::Describable::description(); 00591 if (isInitialized()) { 00592 if (isComputed()) { 00593 oss << "{status = initialized, computed"; 00594 } 00595 else { 00596 oss << "{status = initialized, not computed"; 00597 } 00598 } 00599 else { 00600 oss << "{status = not initialized, not computed"; 00601 } 00602 00603 oss << "}"; 00604 return oss.str(); 00605 } 00606 00607 //============================================================================== 00608 template<class MatrixType, class LocalScalarType> 00609 void DenseContainer<MatrixType,LocalScalarType>::describe(Teuchos::FancyOStream &os, const Teuchos::EVerbosityLevel verbLevel) const 00610 { 00611 using std::endl; 00612 if(verbLevel==Teuchos::VERB_NONE) return; 00613 os << "================================================================================" << endl; 00614 os << "Ifpack2::DenseContainer" << endl; 00615 os << "Number of rows = " << numRows_ << endl; 00616 os << "isInitialized() = " << IsInitialized_ << endl; 00617 os << "isComputed() = " << IsComputed_ << endl; 00618 os << "================================================================================" << endl; 00619 os << endl; 00620 } 00621 00622 //============================================================================== 00623 template<class MatrixType, class LocalScalarType> 00624 void DenseContainer<MatrixType,LocalScalarType>:: 00625 extract (const Teuchos::RCP<const row_matrix_type>& globalMatrix) 00626 { 00627 using Teuchos::Array; 00628 using Teuchos::ArrayView; 00629 using Teuchos::toString; 00630 typedef local_ordinal_type LO; 00631 typedef global_ordinal_type GO; 00632 typedef Tpetra::Map<LO, GO, node_type> map_type; 00633 const size_t inputMatrixNumRows = globalMatrix->getNodeNumRows (); 00634 // We only use the rank of the calling process and the number of MPI 00635 // processes for generating error messages. Extraction itself is 00636 // entirely local to each participating MPI process. 00637 const int myRank = globalMatrix->getRowMap ()->getComm ()->getRank (); 00638 const int numProcs = globalMatrix->getRowMap ()->getComm ()->getSize (); 00639 00640 // Sanity check that the local row indices to extract fall within 00641 // the valid range of local row indices for the input matrix. 00642 ArrayView<const LO> localRows = this->getLocalRows (); 00643 for (size_t j = 0; j < numRows_; ++j) { 00644 TEUCHOS_TEST_FOR_EXCEPTION( 00645 localRows[j] < 0 || 00646 static_cast<size_t> (localRows[j]) >= inputMatrixNumRows, 00647 std::runtime_error, "Ifpack2::DenseContainer::extract: On process " << 00648 myRank << " of " << numProcs << ", localRows[j=" << j << "] = " << 00649 localRows[j] << ", which is out of the valid range of local row indices " 00650 "indices [0, " << (inputMatrixNumRows - 1) << "] for the input matrix."); 00651 } 00652 00653 // Convert the local row indices we want into local column indices. 00654 // For every local row ii_local = localRows[i] we take, we also want 00655 // to take the corresponding column. To find the corresponding 00656 // column, we use the row Map to convert the local row index 00657 // ii_local into a global index ii_global, and then use the column 00658 // Map to convert ii_global into a local column index jj_local. If 00659 // the input matrix doesn't have a column Map, we need to be using 00660 // global indices anyway... 00661 00662 // We use the domain Map to exclude off-process global entries. 00663 const map_type& globalRowMap = * (globalMatrix->getRowMap ()); 00664 const map_type& globalColMap = * (globalMatrix->getColMap ()); 00665 const map_type& globalDomMap = * (globalMatrix->getDomainMap ()); 00666 00667 bool rowIndsValid = true; 00668 bool colIndsValid = true; 00669 Array<LO> localCols (numRows_); 00670 // For error messages, collect the sets of invalid row indices and 00671 // invalid column indices. They are otherwise not useful. 00672 Array<LO> invalidLocalRowInds; 00673 Array<GO> invalidGlobalColInds; 00674 for (size_t i = 0; i < numRows_; ++i) { 00675 // ii_local is the (local) row index we want to look up. 00676 const LO ii_local = localRows[i]; 00677 // Find the global index jj_global corresponding to ii_local. 00678 // Global indices are the same (rather, are required to be the 00679 // same) in all three Maps, which is why we use jj (suggesting a 00680 // column index, which is how we will use it below). 00681 const GO jj_global = globalRowMap.getGlobalElement (ii_local); 00682 if (jj_global == Teuchos::OrdinalTraits<GO>::invalid ()) { 00683 // If ii_local is not a local index in the row Map on the 00684 // calling process, that means localRows is incorrect. We've 00685 // already checked for this in the constructor, but we might as 00686 // well check again here, since it's cheap to do so (just an 00687 // integer comparison, since we need jj_global anyway). 00688 rowIndsValid = false; 00689 invalidLocalRowInds.push_back (ii_local); 00690 break; 00691 } 00692 // Exclude "off-process" entries: that is, those in the column Map 00693 // on this process that are not in the domain Map on this process. 00694 if (globalDomMap.isNodeGlobalElement (jj_global)) { 00695 // jj_global is not an off-process entry. Look up its local 00696 // index in the column Map; we want to extract this column index 00697 // from the input matrix. If jj_global is _not_ in the column 00698 // Map on the calling process, that could mean that the column 00699 // in question is empty on this process. That would be bad for 00700 // solving linear systems with the extract submatrix. We could 00701 // solve the resulting singular linear systems in a minimum-norm 00702 // least-squares sense, but for now we simply raise an exception. 00703 const LO jj_local = globalColMap.getLocalElement (jj_global); 00704 if (jj_local == Teuchos::OrdinalTraits<local_ordinal_type>::invalid ()) { 00705 colIndsValid = false; 00706 invalidGlobalColInds.push_back (jj_global); 00707 break; 00708 } 00709 localCols[i] = jj_local; 00710 } 00711 } 00712 TEUCHOS_TEST_FOR_EXCEPTION( 00713 ! rowIndsValid, std::logic_error, "Ifpack2::DenseContainer::extract: " 00714 "On process " << myRank << ", at least one row index in the set of local " 00715 "row indices given to the constructor is not a valid local row index in " 00716 "the input matrix's row Map on this process. This should be impossible " 00717 "because the constructor checks for this case. Here is the complete set " 00718 "of invalid local row indices: " << toString (invalidLocalRowInds) << ". " 00719 "Please report this bug to the Ifpack2 developers."); 00720 TEUCHOS_TEST_FOR_EXCEPTION( 00721 ! colIndsValid, std::runtime_error, "Ifpack2::DenseContainer::extract: " 00722 "On process " << myRank << ", " 00723 "At least one row index in the set of row indices given to the constructor " 00724 "does not have a corresponding column index in the input matrix's column " 00725 "Map. This probably means that the column(s) in question is/are empty on " 00726 "this process, which would make the submatrix to extract structurally " 00727 "singular. Here is the compete set of invalid global column indices: " 00728 << toString (invalidGlobalColInds) << "."); 00729 00730 diagBlock_.putScalar (Teuchos::ScalarTraits<local_scalar_type>::zero ()); 00731 00732 const size_t maxNumEntriesInRow = globalMatrix->getNodeMaxNumRowEntries (); 00733 Array<scalar_type> val (maxNumEntriesInRow); 00734 Array<local_ordinal_type> ind (maxNumEntriesInRow); 00735 00736 const local_ordinal_type INVALID = 00737 Teuchos::OrdinalTraits<local_ordinal_type>::invalid (); 00738 for (size_t i = 0; i < numRows_; ++i) { 00739 const local_ordinal_type localRow = localRows[i]; 00740 size_t numEntries; 00741 globalMatrix->getLocalRowCopy (localRow, ind (), val (), numEntries); 00742 00743 for (size_t k = 0; k < numEntries; ++k) { 00744 const local_ordinal_type localCol = ind[k]; 00745 00746 // Skip off-process elements 00747 // 00748 // FIXME (mfh 24 Aug 2013) This assumes the following: 00749 // 00750 // 1. The column and row Maps begin with the same set of 00751 // on-process entries, in the same order. That is, 00752 // on-process row and column indices are the same. 00753 // 2. All off-process indices in the column Map of the input 00754 // matrix occur after that initial set. 00755 if (localCol >= 0 && static_cast<size_t> (localCol) < inputMatrixNumRows) { 00756 // for local column IDs, look for each ID in the list 00757 // of columns hosted by this object 00758 local_ordinal_type jj = INVALID; 00759 for (size_t kk = 0; kk < numRows_; ++kk) { 00760 if (localRows[kk] == localCol) { 00761 jj = kk; 00762 } 00763 } 00764 if (jj != INVALID) { 00765 diagBlock_ (i, jj) += val[k]; // ??? 00766 } 00767 } 00768 } 00769 } 00770 } 00771 00772 } // namespace Ifpack2 00773 00774 // FIXME (mfh 16 Sep 2014) We should really only use RowMatrix here! 00775 // There's no need to instantiate for CrsMatrix too. All Ifpack2 00776 // preconditioners can and should do dynamic casts if they need a type 00777 // more specific than RowMatrix. 00778 00779 #define IFPACK2_DENSECONTAINER_INSTANT(S,LO,GO,N) \ 00780 template class Ifpack2::DenseContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >; \ 00781 template class Ifpack2::DenseContainer< Tpetra::CrsMatrix<S, LO, GO, N>, S >; 00782 00783 #endif // IFPACK2_SPARSECONTAINER_HPP
1.7.6.1