|
Ifpack2 Templated Preconditioning Package
Version 1.0
|
00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2009) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 //@HEADER 00041 */ 00042 00043 #ifndef IFPACK2_RELAXATION_DEF_HPP 00044 #define IFPACK2_RELAXATION_DEF_HPP 00045 00046 #include "Ifpack2_Relaxation_decl.hpp" 00047 #include "Teuchos_StandardParameterEntryValidators.hpp" 00048 #include <Teuchos_TimeMonitor.hpp> 00049 #include <Tpetra_ConfigDefs.hpp> 00050 #include <Tpetra_CrsMatrix.hpp> 00051 #include <Tpetra_Experimental_BlockCrsMatrix.hpp> 00052 00053 // mfh 28 Mar 2013: Uncomment out these three lines to compute 00054 // statistics on diagonal entries in compute(). 00055 // #ifndef IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS 00056 // # define IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS 1 00057 // #endif // IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS 00058 00059 namespace { 00060 // Validate that a given int is nonnegative. 00061 class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator { 00062 public: 00063 // Constructor (does nothing). 00064 NonnegativeIntValidator () {} 00065 00066 // ParameterEntryValidator wants this method. 00067 Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const { 00068 return Teuchos::null; 00069 } 00070 00071 // Actually validate the parameter's value. 00072 void 00073 validate (const Teuchos::ParameterEntry& entry, 00074 const std::string& paramName, 00075 const std::string& sublistName) const 00076 { 00077 using std::endl; 00078 Teuchos::any anyVal = entry.getAny (true); 00079 const std::string entryName = entry.getAny (false).typeName (); 00080 00081 TEUCHOS_TEST_FOR_EXCEPTION( 00082 anyVal.type () != typeid (int), 00083 Teuchos::Exceptions::InvalidParameterType, 00084 "Parameter \"" << paramName << "\" in sublist \"" << sublistName 00085 << "\" has the wrong type." << endl << "Parameter: " << paramName 00086 << endl << "Type specified: " << entryName << endl 00087 << "Type required: int" << endl); 00088 00089 const int val = Teuchos::any_cast<int> (anyVal); 00090 TEUCHOS_TEST_FOR_EXCEPTION( 00091 val < 0, Teuchos::Exceptions::InvalidParameterValue, 00092 "Parameter \"" << paramName << "\" in sublist \"" << sublistName 00093 << "\" is negative." << endl << "Parameter: " << paramName 00094 << endl << "Value specified: " << val << endl 00095 << "Required range: [0, INT_MAX]" << endl); 00096 } 00097 00098 // ParameterEntryValidator wants this method. 00099 const std::string getXMLTypeName () const { 00100 return "NonnegativeIntValidator"; 00101 } 00102 00103 // ParameterEntryValidator wants this method. 00104 void 00105 printDoc (const std::string& docString, 00106 std::ostream &out) const 00107 { 00108 Teuchos::StrUtils::printLines (out, "# ", docString); 00109 out << "#\tValidator Used: " << std::endl; 00110 out << "#\t\tNonnegativeIntValidator" << std::endl; 00111 } 00112 }; 00113 00114 // A way to get a small positive number (eps() for floating-point 00115 // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't 00116 // define it (for example, for integer values). 00117 template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal> 00118 class SmallTraits { 00119 public: 00120 // Return eps if Scalar is a floating-point type, else return 1. 00121 static const Scalar eps (); 00122 }; 00123 00124 // Partial specialization for when Scalar is not a floating-point type. 00125 template<class Scalar> 00126 class SmallTraits<Scalar, true> { 00127 public: 00128 static const Scalar eps () { 00129 return Teuchos::ScalarTraits<Scalar>::one (); 00130 } 00131 }; 00132 00133 // Partial specialization for when Scalar is a floating-point type. 00134 template<class Scalar> 00135 class SmallTraits<Scalar, false> { 00136 public: 00137 static const Scalar eps () { 00138 return Teuchos::ScalarTraits<Scalar>::eps (); 00139 } 00140 }; 00141 } // namespace (anonymous) 00142 00143 namespace Ifpack2 { 00144 00145 template<class MatrixType> 00146 void Relaxation<MatrixType>:: 00147 setMatrix (const Teuchos::RCP<const row_matrix_type>& A) 00148 { 00149 if (A.getRawPtr () != A_.getRawPtr ()) { // it's a different matrix 00150 Importer_ = Teuchos::null; 00151 Diagonal_ = Teuchos::null; // ??? what if this comes from the user??? 00152 isInitialized_ = false; 00153 IsComputed_ = false; 00154 diagOffsets_ = Teuchos::null; 00155 savedDiagOffsets_ = false; 00156 hasBlockCrsMatrix_ = false; 00157 if (! A.is_null ()) { 00158 IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1); 00159 } 00160 A_ = A; 00161 } 00162 } 00163 00164 00165 template<class MatrixType> 00166 Relaxation<MatrixType>:: 00167 Relaxation (const Teuchos::RCP<const row_matrix_type>& A) 00168 : A_ (A), 00169 Time_ (Teuchos::rcp (new Teuchos::Time ("Ifpack2::Relaxation"))), 00170 NumSweeps_ (1), 00171 PrecType_ (Ifpack2::Details::JACOBI), 00172 DampingFactor_ (STS::one ()), 00173 IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ? 00174 false : // a reasonable default if there's no communicator 00175 A->getRowMap ()->getComm ()->getSize () > 1), 00176 ZeroStartingSolution_ (true), 00177 DoBackwardGS_ (false), 00178 DoL1Method_ (false), 00179 L1Eta_ (Teuchos::as<magnitude_type> (1.5)), 00180 MinDiagonalValue_ (STS::zero ()), 00181 fixTinyDiagEntries_ (false), 00182 checkDiagEntries_ (false), 00183 Condest_ (-STM::one ()), 00184 isInitialized_ (false), 00185 IsComputed_ (false), 00186 NumInitialize_ (0), 00187 NumCompute_ (0), 00188 NumApply_ (0), 00189 InitializeTime_ (0.0), // Times are double anyway, so no need for ScalarTraits. 00190 ComputeTime_ (0.0), 00191 ApplyTime_ (0.0), 00192 ComputeFlops_ (0.0), 00193 ApplyFlops_ (0.0), 00194 globalMinMagDiagEntryMag_ (STM::zero ()), 00195 globalMaxMagDiagEntryMag_ (STM::zero ()), 00196 globalNumSmallDiagEntries_ (0), 00197 globalNumZeroDiagEntries_ (0), 00198 globalNumNegDiagEntries_ (0), 00199 globalDiagNormDiff_(Teuchos::ScalarTraits<magnitude_type>::zero()), 00200 savedDiagOffsets_ (false), 00201 hasBlockCrsMatrix_ (false) 00202 { 00203 this->setObjectLabel ("Ifpack2::Relaxation"); 00204 } 00205 00206 //========================================================================== 00207 template<class MatrixType> 00208 Relaxation<MatrixType>::~Relaxation() { 00209 } 00210 00211 template<class MatrixType> 00212 Teuchos::RCP<const Teuchos::ParameterList> 00213 Relaxation<MatrixType>::getValidParameters () const 00214 { 00215 using Teuchos::Array; 00216 using Teuchos::ParameterList; 00217 using Teuchos::parameterList; 00218 using Teuchos::RCP; 00219 using Teuchos::rcp; 00220 using Teuchos::rcp_const_cast; 00221 using Teuchos::rcp_implicit_cast; 00222 using Teuchos::setStringToIntegralParameter; 00223 typedef Teuchos::ParameterEntryValidator PEV; 00224 00225 if (validParams_.is_null ()) { 00226 RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation"); 00227 00228 // Set a validator that automatically converts from the valid 00229 // string options to their enum values. 00230 Array<std::string> precTypes (3); 00231 precTypes[0] = "Jacobi"; 00232 precTypes[1] = "Gauss-Seidel"; 00233 precTypes[2] = "Symmetric Gauss-Seidel"; 00234 Array<Details::RelaxationType> precTypeEnums (3); 00235 precTypeEnums[0] = Details::JACOBI; 00236 precTypeEnums[1] = Details::GS; 00237 precTypeEnums[2] = Details::SGS; 00238 const std::string defaultPrecType ("Jacobi"); 00239 setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type", 00240 defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (), 00241 pl.getRawPtr ()); 00242 00243 const int numSweeps = 1; 00244 RCP<PEV> numSweepsValidator = 00245 rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator)); 00246 pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps", 00247 rcp_const_cast<const PEV> (numSweepsValidator)); 00248 00249 const scalar_type dampingFactor = STS::one (); 00250 pl->set ("relaxation: damping factor", dampingFactor); 00251 00252 const bool zeroStartingSolution = true; 00253 pl->set ("relaxation: zero starting solution", zeroStartingSolution); 00254 00255 const bool doBackwardGS = false; 00256 pl->set ("relaxation: backward mode", doBackwardGS); 00257 00258 const bool doL1Method = false; 00259 pl->set ("relaxation: use l1", doL1Method); 00260 00261 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) / 00262 (STM::one() + STM::one()); // 1.5 00263 pl->set ("relaxation: l1 eta", l1eta); 00264 00265 const scalar_type minDiagonalValue = STS::zero (); 00266 pl->set ("relaxation: min diagonal value", minDiagonalValue); 00267 00268 const bool fixTinyDiagEntries = false; 00269 pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries); 00270 00271 const bool checkDiagEntries = false; 00272 pl->set ("relaxation: check diagonal entries", checkDiagEntries); 00273 00274 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null; 00275 pl->set("relaxation: local smoothing indices", localSmoothingIndices); 00276 00277 validParams_ = rcp_const_cast<const ParameterList> (pl); 00278 } 00279 return validParams_; 00280 } 00281 00282 00283 template<class MatrixType> 00284 void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl) 00285 { 00286 using Teuchos::getIntegralValue; 00287 using Teuchos::ParameterList; 00288 using Teuchos::RCP; 00289 typedef scalar_type ST; // just to make code below shorter 00290 00291 pl.validateParametersAndSetDefaults (* getValidParameters ()); 00292 00293 const Details::RelaxationType precType = 00294 getIntegralValue<Details::RelaxationType> (pl, "relaxation: type"); 00295 const int numSweeps = pl.get<int> ("relaxation: sweeps"); 00296 const ST dampingFactor = pl.get<ST> ("relaxation: damping factor"); 00297 const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution"); 00298 const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode"); 00299 const bool doL1Method = pl.get<bool> ("relaxation: use l1"); 00300 const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta"); 00301 const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value"); 00302 const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries"); 00303 const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries"); 00304 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices"); 00305 00306 00307 // "Commit" the changes, now that we've validated everything. 00308 PrecType_ = precType; 00309 NumSweeps_ = numSweeps; 00310 DampingFactor_ = dampingFactor; 00311 ZeroStartingSolution_ = zeroStartSol; 00312 DoBackwardGS_ = doBackwardGS; 00313 DoL1Method_ = doL1Method; 00314 L1Eta_ = l1Eta; 00315 MinDiagonalValue_ = minDiagonalValue; 00316 fixTinyDiagEntries_ = fixTinyDiagEntries; 00317 checkDiagEntries_ = checkDiagEntries; 00318 localSmoothingIndices_ = localSmoothingIndices; 00319 } 00320 00321 00322 template<class MatrixType> 00323 void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl) 00324 { 00325 // FIXME (aprokop 18 Oct 2013) Casting away const is bad here. 00326 // but otherwise, we will get [unused] in pl 00327 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl)); 00328 } 00329 00330 00331 template<class MatrixType> 00332 Teuchos::RCP<const Teuchos::Comm<int> > 00333 Relaxation<MatrixType>::getComm() const { 00334 TEUCHOS_TEST_FOR_EXCEPTION( 00335 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: " 00336 "The input matrix A is null. Please call setMatrix() with a nonnull " 00337 "input matrix before calling this method."); 00338 return A_->getRowMap ()->getComm (); 00339 } 00340 00341 00342 template<class MatrixType> 00343 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type> 00344 Relaxation<MatrixType>::getMatrix () const { 00345 return A_; 00346 } 00347 00348 00349 template<class MatrixType> 00350 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type, 00351 typename MatrixType::global_ordinal_type, 00352 typename MatrixType::node_type> > 00353 Relaxation<MatrixType>::getDomainMap () const { 00354 TEUCHOS_TEST_FOR_EXCEPTION( 00355 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: " 00356 "The input matrix A is null. Please call setMatrix() with a nonnull " 00357 "input matrix before calling this method."); 00358 return A_->getDomainMap (); 00359 } 00360 00361 00362 template<class MatrixType> 00363 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type, 00364 typename MatrixType::global_ordinal_type, 00365 typename MatrixType::node_type> > 00366 Relaxation<MatrixType>::getRangeMap () const { 00367 TEUCHOS_TEST_FOR_EXCEPTION( 00368 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: " 00369 "The input matrix A is null. Please call setMatrix() with a nonnull " 00370 "input matrix before calling this method."); 00371 return A_->getRangeMap (); 00372 } 00373 00374 00375 template<class MatrixType> 00376 bool Relaxation<MatrixType>::hasTransposeApply () const { 00377 return true; 00378 } 00379 00380 00381 template<class MatrixType> 00382 int Relaxation<MatrixType>::getNumInitialize() const { 00383 return(NumInitialize_); 00384 } 00385 00386 00387 template<class MatrixType> 00388 int Relaxation<MatrixType>::getNumCompute() const { 00389 return(NumCompute_); 00390 } 00391 00392 00393 template<class MatrixType> 00394 int Relaxation<MatrixType>::getNumApply() const { 00395 return(NumApply_); 00396 } 00397 00398 00399 template<class MatrixType> 00400 double Relaxation<MatrixType>::getInitializeTime() const { 00401 return(InitializeTime_); 00402 } 00403 00404 00405 template<class MatrixType> 00406 double Relaxation<MatrixType>::getComputeTime() const { 00407 return(ComputeTime_); 00408 } 00409 00410 00411 template<class MatrixType> 00412 double Relaxation<MatrixType>::getApplyTime() const { 00413 return(ApplyTime_); 00414 } 00415 00416 00417 template<class MatrixType> 00418 double Relaxation<MatrixType>::getComputeFlops() const { 00419 return(ComputeFlops_); 00420 } 00421 00422 00423 template<class MatrixType> 00424 double Relaxation<MatrixType>::getApplyFlops() const { 00425 return(ApplyFlops_); 00426 } 00427 00428 00429 template<class MatrixType> 00430 typename Teuchos::ScalarTraits<typename MatrixType::scalar_type>::magnitudeType 00431 Relaxation<MatrixType>::getCondEst () const 00432 { 00433 return Condest_; 00434 } 00435 00436 00437 template<class MatrixType> 00438 typename Teuchos::ScalarTraits<typename MatrixType::scalar_type>::magnitudeType 00439 Relaxation<MatrixType>:: 00440 computeCondEst (CondestType CT, 00441 typename MatrixType::local_ordinal_type MaxIters, 00442 magnitude_type Tol, 00443 const Teuchos::Ptr<const row_matrix_type>& matrix) 00444 { 00445 if (! isComputed ()) { // cannot compute right now 00446 return -Teuchos::ScalarTraits<magnitude_type>::one (); 00447 } 00448 // always compute it. Call Condest() with no parameters to get 00449 // the previous estimate. 00450 Condest_ = Ifpack2::Condest (*this, CT, MaxIters, Tol, matrix); 00451 return Condest_; 00452 } 00453 00454 00455 template<class MatrixType> 00456 void 00457 Relaxation<MatrixType>:: 00458 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X, 00459 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y, 00460 Teuchos::ETransp mode, 00461 scalar_type alpha, 00462 scalar_type beta) const 00463 { 00464 using Teuchos::as; 00465 using Teuchos::RCP; 00466 using Teuchos::rcp; 00467 using Teuchos::rcpFromRef; 00468 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, 00469 global_ordinal_type, node_type> MV; 00470 TEUCHOS_TEST_FOR_EXCEPTION( 00471 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: " 00472 "The input matrix A is null. Please call setMatrix() with a nonnull " 00473 "input matrix, then call compute(), before calling this method."); 00474 TEUCHOS_TEST_FOR_EXCEPTION( 00475 ! isComputed (), 00476 std::runtime_error, 00477 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 " 00478 "preconditioner instance before you may call apply(). You may call " 00479 "isComputed() to find out if compute() has been called already."); 00480 TEUCHOS_TEST_FOR_EXCEPTION( 00481 X.getNumVectors() != Y.getNumVectors(), 00482 std::runtime_error, 00483 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. " 00484 "X has " << X.getNumVectors() << " columns, but Y has " 00485 << Y.getNumVectors() << " columns."); 00486 TEUCHOS_TEST_FOR_EXCEPTION( 00487 beta != STS::zero (), std::logic_error, 00488 "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not " 00489 "implemented."); 00490 { 00491 // Reset the timer each time, since Relaxation uses the same Time 00492 // object to track times for different methods. 00493 Teuchos::TimeMonitor timeMon (*Time_, true); 00494 00495 // Special case: alpha == 0. 00496 if (alpha == STS::zero ()) { 00497 // No floating-point operations, so no need to update a count. 00498 Y.putScalar (STS::zero ()); 00499 } 00500 else { 00501 // If X and Y alias one another, then we need to create an 00502 // auxiliary vector, Xcopy (a deep copy of X). 00503 RCP<const MV> Xcopy; 00504 // FIXME (mfh 12 Sep 2014) This test for aliasing is both 00505 // incomplete, and a dependence on KokkosClassic. 00506 if (X.getLocalMV ().getValues () == Y.getLocalMV ().getValues ()) { 00507 Xcopy = rcp (new MV (X, Teuchos::Copy)); 00508 } 00509 else { 00510 Xcopy = rcpFromRef (X); 00511 } 00512 00513 // Each of the following methods updates the flop count itself. 00514 // All implementations handle zeroing out the starting solution 00515 // (if necessary) themselves. 00516 switch (PrecType_) { 00517 case Ifpack2::Details::JACOBI: 00518 ApplyInverseJacobi(*Xcopy,Y); 00519 break; 00520 case Ifpack2::Details::GS: 00521 ApplyInverseGS(*Xcopy,Y); 00522 break; 00523 case Ifpack2::Details::SGS: 00524 ApplyInverseSGS(*Xcopy,Y); 00525 break; 00526 default: 00527 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 00528 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value " 00529 << PrecType_ << ". Please report this bug to the Ifpack2 developers."); 00530 } 00531 if (alpha != STS::one ()) { 00532 Y.scale (alpha); 00533 const double numGlobalRows = as<double> (A_->getGlobalNumRows ()); 00534 const double numVectors = as<double> (Y.getNumVectors ()); 00535 ApplyFlops_ += numGlobalRows * numVectors; 00536 } 00537 } 00538 } 00539 ApplyTime_ += Time_->totalElapsedTime (); 00540 ++NumApply_; 00541 } 00542 00543 00544 template<class MatrixType> 00545 void 00546 Relaxation<MatrixType>:: 00547 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X, 00548 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y, 00549 Teuchos::ETransp mode) const 00550 { 00551 TEUCHOS_TEST_FOR_EXCEPTION( 00552 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: " 00553 "The input matrix A is null. Please call setMatrix() with a nonnull " 00554 "input matrix, then call compute(), before calling this method."); 00555 TEUCHOS_TEST_FOR_EXCEPTION( 00556 ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: " 00557 "isComputed() must be true before you may call applyMat(). " 00558 "Please call compute() before calling this method."); 00559 TEUCHOS_TEST_FOR_EXCEPTION( 00560 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument, 00561 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors () 00562 << " != Y.getNumVectors() = " << Y.getNumVectors () << "."); 00563 A_->apply (X, Y, mode); 00564 } 00565 00566 00567 template<class MatrixType> 00568 void Relaxation<MatrixType>::initialize () 00569 { 00570 TEUCHOS_TEST_FOR_EXCEPTION( 00571 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::initialize: " 00572 "The input matrix A is null. Please call setMatrix() with a nonnull " 00573 "input matrix before calling this method."); 00574 00575 if (A_.is_null ()) { 00576 hasBlockCrsMatrix_ = false; 00577 } 00578 else { // A_ is not null 00579 Teuchos::RCP<const block_crs_matrix_type> A_bcrs = 00580 Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_); 00581 if (A_bcrs.is_null ()) { 00582 hasBlockCrsMatrix_ = false; 00583 } 00584 else { // A_ is a block_crs_matrix_type 00585 hasBlockCrsMatrix_ = true; 00586 } 00587 } 00588 00589 // Initialization for Relaxation is trivial, so we say it takes zero time. 00590 //InitializeTime_ += Time_->totalElapsedTime (); 00591 ++NumInitialize_; 00592 isInitialized_ = true; 00593 } 00594 00595 template<class MatrixType> 00596 void Relaxation<MatrixType>::computeBlockCrs () 00597 { 00598 using Teuchos::Array; 00599 using Teuchos::ArrayRCP; 00600 using Teuchos::ArrayView; 00601 using Teuchos::as; 00602 using Teuchos::Comm; 00603 using Teuchos::RCP; 00604 using Teuchos::rcp; 00605 using Teuchos::REDUCE_MAX; 00606 using Teuchos::REDUCE_MIN; 00607 using Teuchos::REDUCE_SUM; 00608 using Teuchos::rcp_dynamic_cast; 00609 using Teuchos::reduceAll; 00610 00611 { 00612 // Reset the timer each time, since Relaxation uses the same Time 00613 // object to track times for different methods. 00614 Teuchos::TimeMonitor timeMon (*Time_, true); 00615 00616 TEUCHOS_TEST_FOR_EXCEPTION( 00617 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::" 00618 "computeBlockCrs: The input matrix A is null. Please call setMatrix() " 00619 "with a nonnull input matrix, then call initialize(), before calling " 00620 "this method."); 00621 const block_crs_matrix_type* blockCrsA = 00622 dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ()); 00623 TEUCHOS_TEST_FOR_EXCEPTION( 00624 blockCrsA == NULL, std::logic_error, "Ifpack2::Relaxation::" 00625 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we " 00626 "got this far. Please report this bug to the Ifpack2 developers."); 00627 00628 const scalar_type one = STS::one (); 00629 00630 // Reset state. 00631 IsComputed_ = false; 00632 Condest_ = -STM::one (); 00633 00634 const local_ordinal_type blockSize = blockCrsA->getBlockSize (); 00635 00636 if (! savedDiagOffsets_) { 00637 BlockDiagonal_ = Teuchos::null; 00638 BlockDiagonal_ = 00639 rcp (new block_crs_matrix_type (* (blockCrsA->getDiagonalGraph ()), 00640 blockSize)); 00641 blockCrsA->getLocalDiagOffsets (diagOffsets_); 00642 savedDiagOffsets_ = true; 00643 } 00644 blockCrsA->getLocalDiagCopy (*BlockDiagonal_, diagOffsets_ ()); 00645 00646 const size_t numMyRows = A_->getNodeNumRows (); 00647 00648 if (DoL1Method_ && IsParallel_) { 00649 const scalar_type two = one + one; 00650 const size_t maxLength = A_->getNodeMaxNumRowEntries (); 00651 Array<local_ordinal_type> indices (maxLength); 00652 Array<scalar_type> values (maxLength * blockSize * blockSize); 00653 size_t numEntries = 0; 00654 00655 for (size_t i = 0; i < numMyRows; ++i) { 00656 blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries); 00657 scalar_type* diagBlock = BlockDiagonal_->getLocalBlock (i,i).getRawPtr (); 00658 for (local_ordinal_type subRow = 0; subRow < blockSize; ++subRow) { 00659 magnitude_type diagonal_boost = STM::zero (); 00660 for (size_t k = 0 ; k < numEntries ; ++k) { 00661 if (static_cast<size_t> (indices[k]) > numMyRows) { 00662 const size_t offset = blockSize*blockSize*k + subRow*blockSize; 00663 for (local_ordinal_type subCol = 0; subCol < blockSize; ++subCol) { 00664 diagonal_boost += STS::magnitude (values[offset+subCol] / two); 00665 } 00666 } 00667 } 00668 if (STS::magnitude (diagBlock[subRow*(blockSize+1)]) < L1Eta_ * diagonal_boost) { 00669 diagBlock[subRow*(blockSize+1)] += diagonal_boost; 00670 } 00671 } 00672 } 00673 } 00674 00675 blockDiagonalFactorizationPivots.resize (numMyRows * blockSize); 00676 int info; 00677 for (size_t i = 0 ; i < numMyRows; ++i) { 00678 typename block_crs_matrix_type::little_block_type diagBlock = 00679 BlockDiagonal_->getLocalBlock (i, i); 00680 diagBlock.factorize (&blockDiagonalFactorizationPivots[i*blockSize], info); 00681 } 00682 00683 Importer_ = A_->getGraph ()->getImporter (); 00684 } // end TimeMonitor scope 00685 00686 ComputeTime_ += Time_->totalElapsedTime (); 00687 ++NumCompute_; 00688 IsComputed_ = true; 00689 } 00690 00691 template<class MatrixType> 00692 void Relaxation<MatrixType>::compute () 00693 { 00694 using Teuchos::Array; 00695 using Teuchos::ArrayRCP; 00696 using Teuchos::ArrayView; 00697 using Teuchos::as; 00698 using Teuchos::Comm; 00699 using Teuchos::RCP; 00700 using Teuchos::rcp; 00701 using Teuchos::REDUCE_MAX; 00702 using Teuchos::REDUCE_MIN; 00703 using Teuchos::REDUCE_SUM; 00704 using Teuchos::rcp_dynamic_cast; 00705 using Teuchos::reduceAll; 00706 typedef Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> vector_type; 00707 const scalar_type zero = STS::zero (); 00708 const scalar_type one = STS::one (); 00709 00710 // We don't count initialization in compute() time. 00711 if (! isInitialized ()) { 00712 initialize (); 00713 } 00714 00715 if (hasBlockCrsMatrix_) { 00716 computeBlockCrs (); 00717 return; 00718 } 00719 00720 { 00721 // Reset the timer each time, since Relaxation uses the same Time 00722 // object to track times for different methods. 00723 Teuchos::TimeMonitor timeMon (*Time_, true); 00724 00725 TEUCHOS_TEST_FOR_EXCEPTION( 00726 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::compute: " 00727 "The input matrix A is null. Please call setMatrix() with a nonnull " 00728 "input matrix, then call initialize(), before calling this method."); 00729 00730 // Reset state. 00731 IsComputed_ = false; 00732 Condest_ = -STM::one (); 00733 00734 TEUCHOS_TEST_FOR_EXCEPTION( 00735 NumSweeps_ < 0, std::logic_error, 00736 "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ << " < 0. " 00737 "Please report this bug to the Ifpack2 developers."); 00738 00739 Diagonal_ = rcp (new vector_type (A_->getRowMap ())); 00740 00741 TEUCHOS_TEST_FOR_EXCEPTION( 00742 Diagonal_.is_null (), std::logic_error, 00743 "Ifpack2::Relaxation::compute: Vector of diagonal entries has not been " 00744 "created yet. Please report this bug to the Ifpack2 developers."); 00745 00746 // Extract the diagonal entries. The CrsMatrix static graph 00747 // version is faster for subsequent calls to compute(), since it 00748 // caches the diagonal offsets. 00749 // 00750 // isStaticGraph() == true means that the matrix was created with 00751 // a const graph. The only requirement is that the structure of 00752 // the matrix never changes, so isStaticGraph() == true is a bit 00753 // more conservative than we need. However, Tpetra doesn't (as of 00754 // 05 Apr 2013) have a way to tell if the graph hasn't changed 00755 // since the last time we used it. 00756 { 00757 // NOTE (mfh 07 Jul 2013): We must cast here to CrsMatrix 00758 // instead of MatrixType, because isStaticGraph is a CrsMatrix 00759 // method (not inherited from RowMatrix's interface). It's 00760 // perfectly valid to do relaxation on a RowMatrix which is not 00761 // a CrsMatrix. 00762 const crs_matrix_type* crsMat = 00763 dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ()); 00764 if (crsMat == NULL || ! crsMat->isStaticGraph ()) { 00765 A_->getLocalDiagCopy (*Diagonal_); // slow path 00766 } else { 00767 if (! savedDiagOffsets_) { // we haven't precomputed offsets 00768 crsMat->getLocalDiagOffsets (diagOffsets_); 00769 savedDiagOffsets_ = true; 00770 } 00771 crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_ ()); 00772 #ifdef HAVE_TPETRA_DEBUG 00773 // Validate the fast-path diagonal against the slow-path diagonal. 00774 vector_type D_copy (A_->getRowMap ()); 00775 A_->getLocalDiagCopy (D_copy); 00776 D_copy.update (STS::one (), *Diagonal_, -STS::one ()); 00777 const magnitude_type err = D_copy.normInf (); 00778 // The two diagonals should be exactly the same, so their 00779 // difference should be exactly zero. 00780 TEUCHOS_TEST_FOR_EXCEPTION( 00781 err != STM::zero(), std::logic_error, "Ifpack2::Relaxation::compute: " 00782 "\"fast-path\" diagonal computation failed. \\|D1 - D2\\|_inf = " 00783 << err << "."); 00784 #endif // HAVE_TPETRA_DEBUG 00785 } 00786 } 00787 00788 // If we're checking the computed inverse diagonal, then keep a 00789 // copy of the original diagonal entries for later comparison. 00790 RCP<vector_type> origDiag; 00791 if (checkDiagEntries_) { 00792 origDiag = rcp (new vector_type (A_->getRowMap ())); 00793 *origDiag = *Diagonal_; 00794 } 00795 00796 // "Host view" means that if the Node type is a GPU Node, the 00797 // ArrayRCP points to host memory, not device memory. It will 00798 // write back to device memory (Diagonal_) at end of scope. 00799 ArrayRCP<scalar_type> diagHostView = Diagonal_->get1dViewNonConst (); 00800 00801 // The view below is only valid as long as diagHostView is within 00802 // scope. We extract a raw pointer in release mode because we 00803 // don't trust older versions of compilers (like GCC 4.4.x or 00804 // Intel < 13) to optimize away ArrayView::operator[]. 00805 #ifdef HAVE_IFPACK2_DEBUG 00806 ArrayView<scalar_type> diag = diagHostView (); 00807 #else 00808 scalar_type* KOKKOSCLASSIC_RESTRICT const diag = diagHostView.getRawPtr (); 00809 #endif // HAVE_IFPACK2_DEBUG 00810 00811 const size_t numMyRows = A_->getNodeNumRows (); 00812 00813 // Setup for L1 Methods. 00814 // Here we add half the value of the off-processor entries in the row, 00815 // but only if diagonal isn't sufficiently large. 00816 // 00817 // This follows from Equation (6.5) in: Baker, Falgout, Kolev and 00818 // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM 00819 // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887. 00820 if (DoL1Method_ && IsParallel_) { 00821 const scalar_type two = one + one; 00822 const size_t maxLength = A_->getNodeMaxNumRowEntries (); 00823 Array<local_ordinal_type> indices (maxLength); 00824 Array<scalar_type> values (maxLength); 00825 size_t numEntries; 00826 00827 for (size_t i = 0; i < numMyRows; ++i) { 00828 A_->getLocalRowCopy (i, indices (), values (), numEntries); 00829 magnitude_type diagonal_boost = STM::zero (); 00830 for (size_t k = 0 ; k < numEntries ; ++k) { 00831 if (static_cast<size_t> (indices[k]) > numMyRows) { 00832 diagonal_boost += STS::magnitude (values[k] / two); 00833 } 00834 } 00835 if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) { 00836 diag[i] += diagonal_boost; 00837 } 00838 } 00839 00840 } 00841 00842 // 00843 // Invert the diagonal entries of the matrix (not in place). 00844 // 00845 00846 // Precompute some quantities for "fixing" zero or tiny diagonal 00847 // entries. We'll only use them if this "fixing" is enabled. 00848 // 00849 // SmallTraits covers for the lack of eps() in 00850 // Teuchos::ScalarTraits when its template parameter is not a 00851 // floating-point type. (Ifpack2 sometimes gets instantiated for 00852 // integer Scalar types.) 00853 const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ? 00854 one / SmallTraits<scalar_type>::eps () : 00855 one / MinDiagonalValue_; 00856 // It's helpful not to have to recompute this magnitude each time. 00857 const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_); 00858 00859 if (checkDiagEntries_) { 00860 // 00861 // Check diagonal elements, replace zero elements with the minimum 00862 // diagonal value, and store their inverses. Count the number of 00863 // "small" and zero diagonal entries while we're at it. 00864 // 00865 size_t numSmallDiagEntries = 0; // "small" includes zero 00866 size_t numZeroDiagEntries = 0; // # zero diagonal entries 00867 size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries 00868 00869 // As we go, keep track of the diagonal entries with the least and 00870 // greatest magnitude. We could use the trick of starting the min 00871 // with +Inf and the max with -Inf, but that doesn't work if 00872 // scalar_type is a built-in integer type. Thus, we have to start 00873 // by reading the first diagonal entry redundantly. 00874 // scalar_type minMagDiagEntry = zero; 00875 // scalar_type maxMagDiagEntry = zero; 00876 magnitude_type minMagDiagEntryMag = STM::zero (); 00877 magnitude_type maxMagDiagEntryMag = STM::zero (); 00878 if (numMyRows > 0) { 00879 const scalar_type d_0 = diag[0]; 00880 const magnitude_type d_0_mag = STS::magnitude (d_0); 00881 // minMagDiagEntry = d_0; 00882 // maxMagDiagEntry = d_0; 00883 minMagDiagEntryMag = d_0_mag; 00884 maxMagDiagEntryMag = d_0_mag; 00885 } 00886 00887 // Go through all the diagonal entries. Compute counts of 00888 // small-magnitude, zero, and negative-real-part entries. Invert 00889 // the diagonal entries that aren't too small. For those that are 00890 // too small in magnitude, replace them with 1/MinDiagonalValue_ 00891 // (or 1/eps if MinDiagonalValue_ happens to be zero). 00892 for (size_t i = 0 ; i < numMyRows; ++i) { 00893 const scalar_type d_i = diag[i]; 00894 const magnitude_type d_i_mag = STS::magnitude (d_i); 00895 const magnitude_type d_i_real = STS::real (d_i); 00896 00897 // We can't compare complex numbers, but we can compare their 00898 // real parts. 00899 if (d_i_real < STM::zero ()) { 00900 ++numNegDiagEntries; 00901 } 00902 if (d_i_mag < minMagDiagEntryMag) { 00903 // minMagDiagEntry = d_i; 00904 minMagDiagEntryMag = d_i_mag; 00905 } 00906 if (d_i_mag > maxMagDiagEntryMag) { 00907 // maxMagDiagEntry = d_i; 00908 maxMagDiagEntryMag = d_i_mag; 00909 } 00910 00911 if (fixTinyDiagEntries_) { 00912 // <= not <, in case minDiagValMag is zero. 00913 if (d_i_mag <= minDiagValMag) { 00914 ++numSmallDiagEntries; 00915 if (d_i_mag == STM::zero ()) { 00916 ++numZeroDiagEntries; 00917 } 00918 diag[i] = oneOverMinDiagVal; 00919 } else { 00920 diag[i] = one / d_i; 00921 } 00922 } 00923 else { // Don't fix zero or tiny diagonal entries. 00924 // <= not <, in case minDiagValMag is zero. 00925 if (d_i_mag <= minDiagValMag) { 00926 ++numSmallDiagEntries; 00927 if (d_i_mag == STM::zero ()) { 00928 ++numZeroDiagEntries; 00929 } 00930 } 00931 diag[i] = one / d_i; 00932 } 00933 } 00934 00935 // We're done computing the inverse diagonal, so invalidate the view. 00936 // This ensures that the operations below, that use methods on Vector, 00937 // produce correct results even on a GPU Node. 00938 diagHostView = Teuchos::null; 00939 00940 // Count floating-point operations of computing the inverse diagonal. 00941 // 00942 // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local? 00943 if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry 00944 ComputeFlops_ += 4.0 * numMyRows; 00945 } else { 00946 ComputeFlops_ += numMyRows; 00947 } 00948 00949 // Collect global data about the diagonal entries. Only do this 00950 // (which involves O(1) all-reduces) if the user asked us to do 00951 // the extra work. 00952 // 00953 // FIXME (mfh 28 Mar 2013) This is wrong if some processes have 00954 // zero rows. Fixing this requires one of two options: 00955 // 00956 // 1. Use a custom reduction operation that ignores processes 00957 // with zero diagonal entries. 00958 // 2. Split the communicator, compute all-reduces using the 00959 // subcommunicator over processes that have a nonzero number 00960 // of diagonal entries, and then broadcast from one of those 00961 // processes (if there is one) to the processes in the other 00962 // subcommunicator. 00963 const Comm<int>& comm = * (A_->getRowMap ()->getComm ()); 00964 00965 // Compute global min and max magnitude of entries. 00966 Array<magnitude_type> localVals (2); 00967 localVals[0] = minMagDiagEntryMag; 00968 // (- (min (- x))) is the same as (max x). 00969 localVals[1] = -maxMagDiagEntryMag; 00970 Array<magnitude_type> globalVals (2); 00971 reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2, 00972 localVals.getRawPtr (), 00973 globalVals.getRawPtr ()); 00974 globalMinMagDiagEntryMag_ = globalVals[0]; 00975 globalMaxMagDiagEntryMag_ = -globalVals[1]; 00976 00977 Array<size_t> localCounts (3); 00978 localCounts[0] = numSmallDiagEntries; 00979 localCounts[1] = numZeroDiagEntries; 00980 localCounts[2] = numNegDiagEntries; 00981 Array<size_t> globalCounts (3); 00982 reduceAll<int, size_t> (comm, REDUCE_SUM, 3, 00983 localCounts.getRawPtr (), 00984 globalCounts.getRawPtr ()); 00985 globalNumSmallDiagEntries_ = globalCounts[0]; 00986 globalNumZeroDiagEntries_ = globalCounts[1]; 00987 globalNumNegDiagEntries_ = globalCounts[2]; 00988 00989 // Forestall "set but not used" compiler warnings. 00990 // (void) minMagDiagEntry; 00991 // (void) maxMagDiagEntry; 00992 00993 // Compute and save the difference between the computed inverse 00994 // diagonal, and the original diagonal's inverse. 00995 RCP<vector_type> diff = rcp (new vector_type (A_->getRowMap ())); 00996 diff->reciprocal (*origDiag); 00997 diff->update (-one, *Diagonal_, one); 00998 globalDiagNormDiff_ = diff->norm2 (); 00999 } 01000 else { // don't check diagonal elements 01001 if (fixTinyDiagEntries_) { 01002 // Go through all the diagonal entries. Invert those that 01003 // aren't too small in magnitude. For those that are too 01004 // small in magnitude, replace them with oneOverMinDiagVal. 01005 for (size_t i = 0 ; i < numMyRows; ++i) { 01006 const scalar_type d_i = diag[i]; 01007 const magnitude_type d_i_mag = STS::magnitude (d_i); 01008 01009 // <= not <, in case minDiagValMag is zero. 01010 if (d_i_mag <= minDiagValMag) { 01011 diag[i] = oneOverMinDiagVal; 01012 } else { 01013 diag[i] = one / d_i; 01014 } 01015 } 01016 } 01017 else { // don't fix tiny or zero diagonal entries 01018 for (size_t i = 0 ; i < numMyRows; ++i) { 01019 diag[i] = one / diag[i]; 01020 } 01021 } 01022 if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry 01023 ComputeFlops_ += 4.0 * numMyRows; 01024 } else { 01025 ComputeFlops_ += numMyRows; 01026 } 01027 } 01028 01029 if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS || 01030 PrecType_ == Ifpack2::Details::SGS)) { 01031 Importer_ = A_->getGraph ()->getImporter (); 01032 // mfh 21 Mar 2013: The Import object may be null, but in that 01033 // case, the domain and column Maps are the same and we don't 01034 // need to Import anyway. 01035 } 01036 } // end TimeMonitor scope 01037 01038 ComputeTime_ += Time_->totalElapsedTime (); 01039 ++NumCompute_; 01040 IsComputed_ = true; 01041 } 01042 01043 01044 template<class MatrixType> 01045 void 01046 Relaxation<MatrixType>:: 01047 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, 01048 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const 01049 { 01050 using Teuchos::as; 01051 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, 01052 global_ordinal_type, node_type> MV; 01053 01054 if (hasBlockCrsMatrix_) { 01055 ApplyInverseJacobi_BlockCrsMatrix (X, Y); 01056 return; 01057 } 01058 01059 const double numGlobalRows = as<double> (A_->getGlobalNumRows ()); 01060 const double numVectors = as<double> (X.getNumVectors ()); 01061 if (ZeroStartingSolution_) { 01062 // For the first Jacobi sweep, if we are allowed to assume that 01063 // the initial guess is zero, then Jacobi is just diagonal 01064 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.) 01065 // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i). 01066 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ()); 01067 01068 // Count (global) floating-point operations. Ifpack2 represents 01069 // this as a floating-point number rather than an integer, so that 01070 // overflow (for a very large number of calls, or a very large 01071 // problem) is approximate instead of catastrophic. 01072 double flopUpdate = 0.0; 01073 if (DampingFactor_ == STS::one ()) { 01074 // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y. 01075 flopUpdate = numGlobalRows * numVectors; 01076 } else { 01077 // Y(i,j) = DampingFactor_ * X(i,j) * D(i): 01078 // Two multiplies per entry of Y. 01079 flopUpdate = 2.0 * numGlobalRows * numVectors; 01080 } 01081 ApplyFlops_ += flopUpdate; 01082 if (NumSweeps_ == 1) { 01083 return; 01084 } 01085 } 01086 // If we were allowed to assume that the starting guess was zero, 01087 // then we have already done the first sweep above. 01088 const int startSweep = ZeroStartingSolution_ ? 1 : 0; 01089 // We don't need to initialize the result MV, since the sparse 01090 // mat-vec will clobber its contents anyway. 01091 MV A_times_Y (Y.getMap (), as<size_t>(numVectors), false); 01092 for (int j = startSweep; j < NumSweeps_; ++j) { 01093 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y) 01094 applyMat (Y, A_times_Y); 01095 A_times_Y.update (STS::one (), X, -STS::one ()); 01096 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, A_times_Y, STS::one ()); 01097 } 01098 01099 // For each column of output, for each pass over the matrix: 01100 // 01101 // - One + and one * for each matrix entry 01102 // - One / and one + for each row of the matrix 01103 // - If the damping factor is not one: one * for each row of the 01104 // matrix. (It's not fair to count this if the damping factor is 01105 // one, since the implementation could skip it. Whether it does 01106 // or not is the implementation's choice.) 01107 01108 // Floating-point operations due to the damping factor, per matrix 01109 // row, per direction, per columm of output. 01110 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ()); 01111 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0; 01112 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors * 01113 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops); 01114 } 01115 01116 template<class MatrixType> 01117 void 01118 Relaxation<MatrixType>:: 01119 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type, local_ordinal_type, 01120 global_ordinal_type, node_type>& X, 01121 Tpetra::MultiVector<scalar_type, local_ordinal_type, 01122 global_ordinal_type,node_type>& Y) const 01123 { 01124 typedef Tpetra::Experimental::BlockMultiVector<scalar_type, 01125 local_ordinal_type, global_ordinal_type,node_type> BMV; 01126 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, 01127 global_ordinal_type, node_type> MV; 01128 typedef typename block_crs_matrix_type::little_block_type little_block_type; 01129 typedef typename block_crs_matrix_type::little_vec_type little_vec_type; 01130 01131 if (ZeroStartingSolution_) { 01132 Y.putScalar (STS::zero ()); 01133 } 01134 01135 const size_t numVectors = X.getNumVectors (); 01136 01137 const block_crs_matrix_type* blockMat = 01138 dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ()); 01139 01140 const local_ordinal_type blockSize = blockMat->getBlockSize (); 01141 BMV A_times_Y_Block (* (blockMat->getRowMap ()), * (Y.getMap ()), 01142 blockSize, numVectors); 01143 MV A_times_Y = A_times_Y_Block.getMultiVectorView(); 01144 BMV yBlock (Y, * (blockMat->getRowMap ()), blockSize); 01145 for (int j = 0; j < NumSweeps_; ++j) { 01146 blockMat->apply (Y, A_times_Y); 01147 A_times_Y.update (STS::one (), X, -STS::one ()); 01148 const size_t numRows = blockMat->getNodeNumRows (); 01149 for (size_t i = 0; i < numVectors; ++i) { 01150 for (size_t k = 0; k < numRows; ++k) { 01151 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y) 01152 little_block_type factorizedBlockDiag = BlockDiagonal_->getLocalBlock (k, k); 01153 little_vec_type xloc = A_times_Y_Block.getLocalBlock (k, i); 01154 little_vec_type yloc = yBlock.getLocalBlock (k, i); 01155 factorizedBlockDiag.solve (xloc, &blockDiagonalFactorizationPivots[i*blockSize]); 01156 yloc.update (DampingFactor_, xloc); 01157 } 01158 } 01159 } 01160 } 01161 01162 template<class MatrixType> 01163 void 01164 Relaxation<MatrixType>:: 01165 ApplyInverseGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, 01166 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const 01167 { 01168 typedef Relaxation<MatrixType> this_type; 01169 // The CrsMatrix version is faster, because it can access the sparse 01170 // matrix data directly, rather than by copying out each row's data 01171 // in turn. Thus, we check whether the RowMatrix is really a 01172 // CrsMatrix. 01173 // 01174 // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef 01175 // declaration in Ifpack2_Relaxation_decl.hpp header file. The code 01176 // will still be correct if the cast fails, but it will use an 01177 // unoptimized kernel. 01178 const block_crs_matrix_type* blockCrsMat = 01179 dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ()); 01180 const crs_matrix_type* crsMat = 01181 dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ()); 01182 if (blockCrsMat != NULL) { 01183 const_cast<this_type*> (this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y); 01184 } else if (crsMat != NULL) { 01185 ApplyInverseGS_CrsMatrix (*crsMat, X, Y); 01186 } else { 01187 ApplyInverseGS_RowMatrix (X, Y); 01188 } 01189 } 01190 01191 01192 template<class MatrixType> 01193 void 01194 Relaxation<MatrixType>:: 01195 ApplyInverseGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, 01196 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const 01197 { 01198 using Teuchos::Array; 01199 using Teuchos::ArrayRCP; 01200 using Teuchos::ArrayView; 01201 using Teuchos::as; 01202 using Teuchos::RCP; 01203 using Teuchos::rcp; 01204 using Teuchos::rcpFromRef; 01205 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV; 01206 01207 // Tpetra's GS implementation for CrsMatrix handles zeroing out the 01208 // starting multivector itself. The generic RowMatrix version here 01209 // does not, so we have to zero out Y here. 01210 if (ZeroStartingSolution_) { 01211 Y.putScalar (STS::zero ()); 01212 } 01213 01214 const size_t NumVectors = X.getNumVectors (); 01215 const size_t maxLength = A_->getNodeMaxNumRowEntries (); 01216 Array<local_ordinal_type> Indices (maxLength); 01217 Array<scalar_type> Values (maxLength); 01218 01219 // Local smoothing stuff 01220 const size_t numMyRows = A_->getNodeNumRows(); 01221 const local_ordinal_type* rowInd = 0; 01222 size_t numActive = numMyRows; 01223 bool do_local = ! localSmoothingIndices_.is_null (); 01224 if (do_local) { 01225 rowInd = localSmoothingIndices_.getRawPtr (); 01226 numActive = localSmoothingIndices_.size (); 01227 } 01228 01229 RCP<MV> Y2; 01230 if (IsParallel_) { 01231 if (Importer_.is_null ()) { // domain and column Maps are the same. 01232 // We will copy Y into Y2 below, so no need to fill with zeros here. 01233 Y2 = rcp (new MV (Y.getMap (), NumVectors, false)); 01234 } else { 01235 // FIXME (mfh 21 Mar 2013) We probably don't need to fill with 01236 // zeros here, since we are doing an Import into Y2 below 01237 // anyway. However, it doesn't hurt correctness. 01238 Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors)); 01239 } 01240 } 01241 else { 01242 Y2 = rcpFromRef (Y); 01243 } 01244 01245 // Diagonal 01246 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView (); 01247 ArrayView<const scalar_type> d_ptr = d_rcp(); 01248 01249 // Constant stride check 01250 bool constant_stride = X.isConstantStride() && Y2->isConstantStride(); 01251 01252 if (constant_stride) { 01253 // extract 1D RCPs 01254 size_t x_stride = X.getStride(); 01255 size_t y2_stride = Y2->getStride(); 01256 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst(); 01257 ArrayRCP<const scalar_type> x_rcp = X.get1dView(); 01258 ArrayView<scalar_type> y2_ptr = y2_rcp(); 01259 ArrayView<const scalar_type> x_ptr = x_rcp(); 01260 Array<scalar_type> dtemp(NumVectors,STS::zero()); 01261 01262 for (int j = 0; j < NumSweeps_; ++j) { 01263 // data exchange is here, once per sweep 01264 if (IsParallel_) { 01265 if (Importer_.is_null ()) { 01266 *Y2 = Y; // just copy, since domain and column Maps are the same 01267 } else { 01268 Y2->doImport (Y, *Importer_, Tpetra::INSERT); 01269 } 01270 } 01271 01272 if (! DoBackwardGS_) { // Forward sweep 01273 for (size_t ii = 0; ii < numActive; ++ii) { 01274 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii); 01275 size_t NumEntries; 01276 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries); 01277 dtemp.assign(NumVectors,STS::zero()); 01278 01279 for (size_t k = 0; k < NumEntries; ++k) { 01280 const local_ordinal_type col = Indices[k]; 01281 for (size_t m = 0; m < NumVectors; ++m) { 01282 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m]; 01283 } 01284 } 01285 01286 for (size_t m = 0; m < NumVectors; ++m) { 01287 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]); 01288 } 01289 } 01290 } 01291 else { // Backward sweep 01292 // ptrdiff_t is the same size as size_t, but is signed. Being 01293 // signed is important so that i >= 0 is not trivially true. 01294 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) { 01295 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii); 01296 size_t NumEntries; 01297 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries); 01298 dtemp.assign (NumVectors, STS::zero ()); 01299 01300 for (size_t k = 0; k < NumEntries; ++k) { 01301 const local_ordinal_type col = Indices[k]; 01302 for (size_t m = 0; m < NumVectors; ++m) { 01303 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m]; 01304 } 01305 } 01306 01307 for (size_t m = 0; m < NumVectors; ++m) { 01308 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]); 01309 } 01310 } 01311 } 01312 // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map. 01313 if (IsParallel_) { 01314 Tpetra::deep_copy (Y, *Y2); 01315 } 01316 } 01317 } 01318 else { 01319 // extract 2D RCPS 01320 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst (); 01321 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView (); 01322 01323 for (int j = 0; j < NumSweeps_; ++j) { 01324 // data exchange is here, once per sweep 01325 if (IsParallel_) { 01326 if (Importer_.is_null ()) { 01327 *Y2 = Y; // just copy, since domain and column Maps are the same 01328 } else { 01329 Y2->doImport (Y, *Importer_, Tpetra::INSERT); 01330 } 01331 } 01332 01333 if (! DoBackwardGS_) { // Forward sweep 01334 for (size_t ii = 0; ii < numActive; ++ii) { 01335 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii); 01336 size_t NumEntries; 01337 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries); 01338 01339 for (size_t m = 0; m < NumVectors; ++m) { 01340 scalar_type dtemp = STS::zero (); 01341 ArrayView<const scalar_type> x_local = (x_ptr())[m](); 01342 ArrayView<scalar_type> y2_local = (y2_ptr())[m](); 01343 01344 for (size_t k = 0; k < NumEntries; ++k) { 01345 const local_ordinal_type col = Indices[k]; 01346 dtemp += Values[k] * y2_local[col]; 01347 } 01348 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp); 01349 } 01350 } 01351 } 01352 else { // Backward sweep 01353 // ptrdiff_t is the same size as size_t, but is signed. Being 01354 // signed is important so that i >= 0 is not trivially true. 01355 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) { 01356 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii); 01357 01358 size_t NumEntries; 01359 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries); 01360 01361 for (size_t m = 0; m < NumVectors; ++m) { 01362 scalar_type dtemp = STS::zero (); 01363 ArrayView<const scalar_type> x_local = (x_ptr())[m](); 01364 ArrayView<scalar_type> y2_local = (y2_ptr())[m](); 01365 01366 for (size_t k = 0; k < NumEntries; ++k) { 01367 const local_ordinal_type col = Indices[k]; 01368 dtemp += Values[k] * y2_local[col]; 01369 } 01370 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp); 01371 } 01372 } 01373 } 01374 01375 // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map. 01376 if (IsParallel_) { 01377 Tpetra::deep_copy (Y, *Y2); 01378 } 01379 } 01380 } 01381 01382 01383 // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix(). 01384 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0; 01385 const double numVectors = as<double> (X.getNumVectors ()); 01386 const double numGlobalRows = as<double> (A_->getGlobalNumRows ()); 01387 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ()); 01388 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors * 01389 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops); 01390 } 01391 01392 01393 template<class MatrixType> 01394 void 01395 Relaxation<MatrixType>:: 01396 ApplyInverseGS_CrsMatrix (const crs_matrix_type& A, 01397 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, 01398 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const 01399 { 01400 using Teuchos::as; 01401 const Tpetra::ESweepDirection direction = 01402 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward; 01403 if(localSmoothingIndices_.is_null()) 01404 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction, 01405 NumSweeps_, ZeroStartingSolution_); 01406 else 01407 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_(), DampingFactor_, direction, 01408 NumSweeps_, ZeroStartingSolution_); 01409 01410 // For each column of output, for each sweep over the matrix: 01411 // 01412 // - One + and one * for each matrix entry 01413 // - One / and one + for each row of the matrix 01414 // - If the damping factor is not one: one * for each row of the 01415 // matrix. (It's not fair to count this if the damping factor is 01416 // one, since the implementation could skip it. Whether it does 01417 // or not is the implementation's choice.) 01418 01419 // Floating-point operations due to the damping factor, per matrix 01420 // row, per direction, per columm of output. 01421 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0; 01422 const double numVectors = as<double> (X.getNumVectors ()); 01423 const double numGlobalRows = as<double> (A_->getGlobalNumRows ()); 01424 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ()); 01425 ApplyFlops_ += NumSweeps_ * numVectors * 01426 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops); 01427 } 01428 01429 template<class MatrixType> 01430 void 01431 Relaxation<MatrixType>:: 01432 ApplyInverseGS_BlockCrsMatrix (const block_crs_matrix_type& A, 01433 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, 01434 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) 01435 { 01436 using Teuchos::RCP; 01437 using Teuchos::rcp; 01438 using Teuchos::rcpFromRef; 01439 typedef Tpetra::Experimental::BlockMultiVector<scalar_type, 01440 local_ordinal_type, global_ordinal_type, node_type> BMV; 01441 typedef Tpetra::MultiVector<scalar_type, 01442 local_ordinal_type, global_ordinal_type, node_type> MV; 01443 01444 //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides 01445 // 01446 // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for 01447 // multiple right-hand sides, unless the input or output MultiVector 01448 // does not have constant stride. We should check for that case 01449 // here, in case it doesn't work in localGaussSeidel (which is 01450 // entirely possible). 01451 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ()); 01452 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ()); 01453 01454 bool performImport = false; 01455 RCP<BMV> yBlockCol; 01456 if (Importer_.is_null ()) { 01457 yBlockCol = rcpFromRef (yBlock); 01458 } 01459 else { 01460 if (yBlockColumnPointMap_.is_null () || 01461 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () || 01462 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) { 01463 yBlockColumnPointMap_ = 01464 rcp (new BMV (* (A.getColMap ()), A.getBlockSize (), 01465 static_cast<local_ordinal_type> (yBlock.getNumVectors ()))); 01466 } 01467 yBlockCol = yBlockColumnPointMap_; 01468 performImport = true; 01469 } 01470 01471 if (ZeroStartingSolution_) { 01472 yBlockCol->putScalar (STS::zero ()); 01473 } 01474 else if (performImport) { 01475 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT); 01476 } 01477 01478 const Tpetra::ESweepDirection direction = 01479 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward; 01480 01481 for (int sweep = 0; sweep < NumSweeps_; ++sweep) { 01482 if (performImport && sweep > 0) { 01483 yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT); 01484 } 01485 A.localGaussSeidel (xBlock, *yBlockCol, *BlockDiagonal_, 01486 &blockDiagonalFactorizationPivots[0], 01487 DampingFactor_, direction); 01488 if (performImport) { 01489 RCP<const MV> yBlockColPointDomain = 01490 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0); 01491 Tpetra::deep_copy (Y, *yBlockColPointDomain); 01492 } 01493 } 01494 } 01495 01496 01497 template<class MatrixType> 01498 void 01499 Relaxation<MatrixType>:: 01500 ApplyInverseSGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, 01501 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const 01502 { 01503 typedef Relaxation<MatrixType> this_type; 01504 // The CrsMatrix version is faster, because it can access the sparse 01505 // matrix data directly, rather than by copying out each row's data 01506 // in turn. Thus, we check whether the RowMatrix is really a 01507 // CrsMatrix. 01508 // 01509 // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef 01510 // declaration in Ifpack2_Relaxation_decl.hpp header file. The code 01511 // will still be correct if the cast fails, but it will use an 01512 // unoptimized kernel. 01513 const block_crs_matrix_type* blockCrsMat = dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr()); 01514 const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_)); 01515 if (blockCrsMat != NULL) { 01516 const_cast<this_type*> (this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y); 01517 } 01518 else if (crsMat != NULL) { 01519 ApplyInverseSGS_CrsMatrix (*crsMat, X, Y); 01520 } else { 01521 ApplyInverseSGS_RowMatrix (X, Y); 01522 } 01523 } 01524 01525 01526 template<class MatrixType> 01527 void 01528 Relaxation<MatrixType>:: 01529 ApplyInverseSGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, 01530 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const 01531 { 01532 using Teuchos::Array; 01533 using Teuchos::ArrayRCP; 01534 using Teuchos::ArrayView; 01535 using Teuchos::as; 01536 using Teuchos::RCP; 01537 using Teuchos::rcp; 01538 using Teuchos::rcpFromRef; 01539 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV; 01540 01541 // Tpetra's GS implementation for CrsMatrix handles zeroing out the 01542 // starting multivector itself. The generic RowMatrix version here 01543 // does not, so we have to zero out Y here. 01544 if (ZeroStartingSolution_) { 01545 Y.putScalar (STS::zero ()); 01546 } 01547 01548 const size_t NumVectors = X.getNumVectors (); 01549 const size_t maxLength = A_->getNodeMaxNumRowEntries (); 01550 Array<local_ordinal_type> Indices (maxLength); 01551 Array<scalar_type> Values (maxLength); 01552 01553 // Local smoothing stuff 01554 const size_t numMyRows = A_->getNodeNumRows(); 01555 const local_ordinal_type * rowInd = 0; 01556 size_t numActive = numMyRows; 01557 bool do_local = localSmoothingIndices_.is_null(); 01558 if(do_local) { 01559 rowInd = localSmoothingIndices_.getRawPtr(); 01560 numActive = localSmoothingIndices_.size(); 01561 } 01562 01563 01564 RCP<MV> Y2; 01565 if (IsParallel_) { 01566 if (Importer_.is_null ()) { // domain and column Maps are the same. 01567 // We will copy Y into Y2 below, so no need to fill with zeros here. 01568 Y2 = rcp (new MV (Y.getMap (), NumVectors, false)); 01569 } else { 01570 // FIXME (mfh 21 Mar 2013) We probably don't need to fill with 01571 // zeros here, since we are doing an Import into Y2 below 01572 // anyway. However, it doesn't hurt correctness. 01573 Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors)); 01574 } 01575 } 01576 else { 01577 Y2 = rcpFromRef (Y); 01578 } 01579 01580 // Diagonal 01581 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView(); 01582 ArrayView<const scalar_type> d_ptr = d_rcp(); 01583 01584 // Constant stride check 01585 bool constant_stride = X.isConstantStride() && Y2->isConstantStride(); 01586 01587 if(constant_stride) { 01588 // extract 1D RCPs 01589 size_t x_stride = X.getStride(); 01590 size_t y2_stride = Y2->getStride(); 01591 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst(); 01592 ArrayRCP<const scalar_type> x_rcp = X.get1dView(); 01593 ArrayView<scalar_type> y2_ptr = y2_rcp(); 01594 ArrayView<const scalar_type> x_ptr = x_rcp(); 01595 Array<scalar_type> dtemp(NumVectors,STS::zero()); 01596 for (int iter = 0; iter < NumSweeps_; ++iter) { 01597 // only one data exchange per sweep 01598 if (IsParallel_) { 01599 if (Importer_.is_null ()) { 01600 // just copy, since domain and column Maps are the same 01601 Tpetra::deep_copy (*Y2, Y); 01602 } else { 01603 Y2->doImport (Y, *Importer_, Tpetra::INSERT); 01604 } 01605 } 01606 01607 for (int j = 0; j < NumSweeps_; j++) { 01608 // data exchange is here, once per sweep 01609 if (IsParallel_) { 01610 if (Importer_.is_null ()) { 01611 // just copy, since domain and column Maps are the same 01612 Tpetra::deep_copy (*Y2, Y); 01613 } else { 01614 Y2->doImport (Y, *Importer_, Tpetra::INSERT); 01615 } 01616 } 01617 01618 for (size_t ii = 0; ii < numActive; ++ii) { 01619 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii); 01620 size_t NumEntries; 01621 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries); 01622 dtemp.assign(NumVectors,STS::zero()); 01623 01624 for (size_t k = 0; k < NumEntries; ++k) { 01625 const local_ordinal_type col = Indices[k]; 01626 for (size_t m = 0; m < NumVectors; ++m) { 01627 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m]; 01628 } 01629 } 01630 01631 for (size_t m = 0; m < NumVectors; ++m) { 01632 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]); 01633 } 01634 } 01635 01636 // ptrdiff_t is the same size as size_t, but is signed. Being 01637 // signed is important so that i >= 0 is not trivially true. 01638 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) { 01639 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii); 01640 size_t NumEntries; 01641 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries); 01642 dtemp.assign(NumVectors,STS::zero()); 01643 01644 for (size_t k = 0; k < NumEntries; ++k) { 01645 const local_ordinal_type col = Indices[k]; 01646 for (size_t m = 0; m < NumVectors; ++m) { 01647 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m]; 01648 } 01649 } 01650 01651 for (size_t m = 0; m < NumVectors; ++m) { 01652 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]); 01653 } 01654 } 01655 } 01656 01657 // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map. 01658 if (IsParallel_) { 01659 Tpetra::deep_copy (Y, *Y2); 01660 } 01661 } 01662 } 01663 else { 01664 // extract 2D RCPs 01665 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst (); 01666 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView (); 01667 01668 for (int iter = 0; iter < NumSweeps_; ++iter) { 01669 // only one data exchange per sweep 01670 if (IsParallel_) { 01671 if (Importer_.is_null ()) { 01672 // just copy, since domain and column Maps are the same 01673 Tpetra::deep_copy (*Y2, Y); 01674 } else { 01675 Y2->doImport (Y, *Importer_, Tpetra::INSERT); 01676 } 01677 } 01678 01679 for (size_t ii = 0; ii < numActive; ++ii) { 01680 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii); 01681 const scalar_type diag = d_ptr[i]; 01682 size_t NumEntries; 01683 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries); 01684 01685 for (size_t m = 0; m < NumVectors; ++m) { 01686 scalar_type dtemp = STS::zero (); 01687 ArrayView<const scalar_type> x_local = (x_ptr())[m](); 01688 ArrayView<scalar_type> y2_local = (y2_ptr())[m](); 01689 01690 for (size_t k = 0; k < NumEntries; ++k) { 01691 const local_ordinal_type col = Indices[k]; 01692 dtemp += Values[k] * y2_local[col]; 01693 } 01694 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag; 01695 } 01696 } 01697 01698 // ptrdiff_t is the same size as size_t, but is signed. Being 01699 // signed is important so that i >= 0 is not trivially true. 01700 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) { 01701 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii); 01702 const scalar_type diag = d_ptr[i]; 01703 size_t NumEntries; 01704 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries); 01705 01706 for (size_t m = 0; m < NumVectors; ++m) { 01707 scalar_type dtemp = STS::zero (); 01708 ArrayView<const scalar_type> x_local = (x_ptr())[m](); 01709 ArrayView<scalar_type> y2_local = (y2_ptr())[m](); 01710 01711 for (size_t k = 0; k < NumEntries; ++k) { 01712 const local_ordinal_type col = Indices[k]; 01713 dtemp += Values[k] * y2_local[col]; 01714 } 01715 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag; 01716 } 01717 } 01718 01719 // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map. 01720 if (IsParallel_) { 01721 Tpetra::deep_copy (Y, *Y2); 01722 } 01723 } 01724 } 01725 01726 // See flop count discussion in implementation of ApplyInverseSGS_CrsMatrix(). 01727 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0; 01728 const double numVectors = as<double> (X.getNumVectors ()); 01729 const double numGlobalRows = as<double> (A_->getGlobalNumRows ()); 01730 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ()); 01731 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors * 01732 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops); 01733 } 01734 01735 01736 template<class MatrixType> 01737 void 01738 Relaxation<MatrixType>:: 01739 ApplyInverseSGS_CrsMatrix (const crs_matrix_type& A, 01740 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, 01741 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const 01742 { 01743 using Teuchos::as; 01744 const Tpetra::ESweepDirection direction = Tpetra::Symmetric; 01745 if (localSmoothingIndices_.is_null ()) { 01746 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction, 01747 NumSweeps_, ZeroStartingSolution_); 01748 } 01749 else { 01750 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (), 01751 DampingFactor_, direction, 01752 NumSweeps_, ZeroStartingSolution_); 01753 } 01754 01755 // For each column of output, for each sweep over the matrix: 01756 // 01757 // - One + and one * for each matrix entry 01758 // - One / and one + for each row of the matrix 01759 // - If the damping factor is not one: one * for each row of the 01760 // matrix. (It's not fair to count this if the damping factor is 01761 // one, since the implementation could skip it. Whether it does 01762 // or not is the implementation's choice.) 01763 // 01764 // Each sweep of symmetric Gauss-Seidel / SOR counts as two sweeps, 01765 // one forward and one backward. 01766 01767 // Floating-point operations due to the damping factor, per matrix 01768 // row, per direction, per columm of output. 01769 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0; 01770 const double numVectors = as<double> (X.getNumVectors ()); 01771 const double numGlobalRows = as<double> (A_->getGlobalNumRows ()); 01772 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ()); 01773 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors * 01774 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops); 01775 } 01776 01777 template<class MatrixType> 01778 void 01779 Relaxation<MatrixType>:: 01780 ApplyInverseSGS_BlockCrsMatrix (const block_crs_matrix_type& A, 01781 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, 01782 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) 01783 { 01784 using Teuchos::RCP; 01785 using Teuchos::rcp; 01786 using Teuchos::rcpFromRef; 01787 typedef Tpetra::Experimental::BlockMultiVector<scalar_type, 01788 local_ordinal_type, global_ordinal_type, node_type> BMV; 01789 typedef Tpetra::MultiVector<scalar_type, 01790 local_ordinal_type, global_ordinal_type, node_type> MV; 01791 01792 //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides 01793 // 01794 // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for 01795 // multiple right-hand sides, unless the input or output MultiVector 01796 // does not have constant stride. We should check for that case 01797 // here, in case it doesn't work in localGaussSeidel (which is 01798 // entirely possible). 01799 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ()); 01800 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ()); 01801 01802 bool performImport = false; 01803 RCP<BMV> yBlockCol; 01804 if (Importer_.is_null ()) { 01805 yBlockCol = Teuchos::rcpFromRef (yBlock); 01806 } 01807 else { 01808 if (yBlockColumnPointMap_.is_null () || 01809 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () || 01810 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) { 01811 yBlockColumnPointMap_ = 01812 rcp (new BMV (* (A.getColMap ()), A.getBlockSize (), 01813 static_cast<local_ordinal_type> (yBlock.getNumVectors ()))); 01814 } 01815 yBlockCol = yBlockColumnPointMap_; 01816 performImport = true; 01817 } 01818 01819 if (ZeroStartingSolution_) { 01820 yBlockCol->putScalar (STS::zero ()); 01821 } 01822 else if (performImport) { 01823 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT); 01824 } 01825 01826 // FIXME (mfh 12 Sep 2014) Shouldn't this come from the user's parameter? 01827 const Tpetra::ESweepDirection direction = Tpetra::Symmetric; 01828 01829 for (int sweep = 0; sweep < NumSweeps_; ++sweep) { 01830 if (performImport && sweep > 0) { 01831 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT); 01832 } 01833 A.localGaussSeidel (xBlock, *yBlockCol, *BlockDiagonal_, 01834 &blockDiagonalFactorizationPivots[0], 01835 DampingFactor_, direction); 01836 if (performImport) { 01837 RCP<const MV> yBlockColPointDomain = 01838 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0); 01839 MV yBlockView = yBlock.getMultiVectorView (); 01840 Tpetra::deep_copy (yBlockView, *yBlockColPointDomain); 01841 } 01842 } 01843 } 01844 01845 01846 template<class MatrixType> 01847 std::string Relaxation<MatrixType>::description () const 01848 { 01849 std::ostringstream os; 01850 01851 // Output is a valid YAML dictionary in flow style. If you don't 01852 // like everything on a single line, you should call describe() 01853 // instead. 01854 os << "\"Ifpack2::Relaxation\": {"; 01855 01856 os << "Initialized: " << (isInitialized () ? "true" : "false") << ", " 01857 << "Computed: " << (isComputed () ? "true" : "false") << ", "; 01858 01859 // It's useful to print this instance's relaxation method (Jacobi, 01860 // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info 01861 // than that, call describe() instead. 01862 os << "Type: "; 01863 if (PrecType_ == Ifpack2::Details::JACOBI) { 01864 os << "Jacobi"; 01865 } else if (PrecType_ == Ifpack2::Details::GS) { 01866 os << "Gauss-Seidel"; 01867 } else if (PrecType_ == Ifpack2::Details::SGS) { 01868 os << "Symmetric Gauss-Seidel"; 01869 } else { 01870 os << "INVALID"; 01871 } 01872 01873 os << ", " << "sweeps: " << NumSweeps_ << ", " 01874 << "damping factor: " << DampingFactor_ << ", "; 01875 if (DoL1Method_) { 01876 os << "use l1: " << DoL1Method_ << ", " 01877 << "l1 eta: " << L1Eta_ << ", "; 01878 } 01879 01880 if (A_.is_null ()) { 01881 os << "Matrix: null"; 01882 } 01883 else { 01884 os << "Global matrix dimensions: [" 01885 << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]" 01886 << ", Global nnz: " << A_->getGlobalNumEntries(); 01887 } 01888 01889 os << "}"; 01890 return os.str (); 01891 } 01892 01893 01894 template<class MatrixType> 01895 void 01896 Relaxation<MatrixType>:: 01897 describe (Teuchos::FancyOStream &out, 01898 const Teuchos::EVerbosityLevel verbLevel) const 01899 { 01900 using Teuchos::OSTab; 01901 using Teuchos::TypeNameTraits; 01902 using Teuchos::VERB_DEFAULT; 01903 using Teuchos::VERB_NONE; 01904 using Teuchos::VERB_LOW; 01905 using Teuchos::VERB_MEDIUM; 01906 using Teuchos::VERB_HIGH; 01907 using Teuchos::VERB_EXTREME; 01908 using std::endl; 01909 01910 const Teuchos::EVerbosityLevel vl = 01911 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel; 01912 01913 const int myRank = this->getComm ()->getRank (); 01914 01915 // none: print nothing 01916 // low: print O(1) info from Proc 0 01917 // medium: 01918 // high: 01919 // extreme: 01920 01921 if (vl != VERB_NONE && myRank == 0) { 01922 // Describable interface asks each implementation to start with a tab. 01923 OSTab tab1 (out); 01924 // Output is valid YAML; hence the quotes, to protect the colons. 01925 out << "\"Ifpack2::Relaxation\":" << endl; 01926 OSTab tab2 (out); 01927 out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\"" 01928 << endl; 01929 if (this->getObjectLabel () != "") { 01930 out << "Label: " << this->getObjectLabel () << endl; 01931 } 01932 out << "Initialized: " << (isInitialized () ? "true" : "false") << endl 01933 << "Computed: " << (isComputed () ? "true" : "false") << endl 01934 << "Parameters: " << endl; 01935 { 01936 OSTab tab3 (out); 01937 out << "\"relaxation: type\": "; 01938 if (PrecType_ == Ifpack2::Details::JACOBI) { 01939 out << "Jacobi"; 01940 } else if (PrecType_ == Ifpack2::Details::GS) { 01941 out << "Gauss-Seidel"; 01942 } else if (PrecType_ == Ifpack2::Details::SGS) { 01943 out << "Symmetric Gauss-Seidel"; 01944 } else { 01945 out << "INVALID"; 01946 } 01947 // We quote these parameter names because they contain colons. 01948 // YAML uses the colon to distinguish key from value. 01949 out << endl 01950 << "\"relaxation: sweeps\": " << NumSweeps_ << endl 01951 << "\"relaxation: damping factor\": " << DampingFactor_ << endl 01952 << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl 01953 << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl 01954 << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl 01955 << "\"relaxation: use l1\": " << DoL1Method_ << endl 01956 << "\"relaxation: l1 eta\": " << L1Eta_ << endl; 01957 } 01958 out << "Computed quantities:" << endl; 01959 { 01960 OSTab tab3 (out); 01961 out << "Condition number estimate: " << Condest_ << endl 01962 << "Global number of rows: " << A_->getGlobalNumRows () << endl 01963 << "Global number of columns: " << A_->getGlobalNumCols () << endl; 01964 } 01965 if (checkDiagEntries_ && isComputed ()) { 01966 out << "Properties of input diagonal entries:" << endl; 01967 { 01968 OSTab tab3 (out); 01969 out << "Magnitude of minimum-magnitude diagonal entry: " 01970 << globalMinMagDiagEntryMag_ << endl 01971 << "Magnitude of maximum-magnitude diagonal entry: " 01972 << globalMaxMagDiagEntryMag_ << endl 01973 << "Number of diagonal entries with small magnitude: " 01974 << globalNumSmallDiagEntries_ << endl 01975 << "Number of zero diagonal entries: " 01976 << globalNumZeroDiagEntries_ << endl 01977 << "Number of diagonal entries with negative real part: " 01978 << globalNumNegDiagEntries_ << endl 01979 << "Abs 2-norm diff between computed and actual inverse " 01980 << "diagonal: " << globalDiagNormDiff_ << endl; 01981 } 01982 } 01983 if (isComputed ()) { 01984 out << "Saved diagonal offsets: " 01985 << (savedDiagOffsets_ ? "true" : "false") << endl; 01986 } 01987 out << "Call counts and total times (in seconds): " << endl; 01988 { 01989 OSTab tab3 (out); 01990 out << "initialize: " << endl; 01991 { 01992 OSTab tab4 (out); 01993 out << "Call count: " << NumInitialize_ << endl; 01994 out << "Total time: " << InitializeTime_ << endl; 01995 } 01996 out << "compute: " << endl; 01997 { 01998 OSTab tab4 (out); 01999 out << "Call count: " << NumCompute_ << endl; 02000 out << "Total time: " << ComputeTime_ << endl; 02001 } 02002 out << "apply: " << endl; 02003 { 02004 OSTab tab4 (out); 02005 out << "Call count: " << NumApply_ << endl; 02006 out << "Total time: " << ApplyTime_ << endl; 02007 } 02008 } 02009 } 02010 } 02011 02012 } // namespace Ifpack2 02013 02014 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \ 02015 template class Ifpack2::Relaxation< Tpetra::CrsMatrix<S, LO, GO, N> >; \ 02016 template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >; 02017 02018 #endif // IFPACK2_RELAXATION_DEF_HPP
1.7.6.1