Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends
Ifpack2_Relaxation_def.hpp
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 */
00042 
00043 #ifndef IFPACK2_RELAXATION_DEF_HPP
00044 #define IFPACK2_RELAXATION_DEF_HPP
00045 
00046 #include "Ifpack2_Relaxation_decl.hpp"
00047 #include "Teuchos_StandardParameterEntryValidators.hpp"
00048 #include <Teuchos_TimeMonitor.hpp>
00049 #include <Tpetra_ConfigDefs.hpp>
00050 #include <Tpetra_CrsMatrix.hpp>
00051 #include <Tpetra_Experimental_BlockCrsMatrix.hpp>
00052 
00053 // mfh 28 Mar 2013: Uncomment out these three lines to compute
00054 // statistics on diagonal entries in compute().
00055 // #ifndef IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
00056 // #  define IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS 1
00057 // #endif // IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
00058 
00059 namespace {
00060   // Validate that a given int is nonnegative.
00061   class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
00062   public:
00063     // Constructor (does nothing).
00064     NonnegativeIntValidator () {}
00065 
00066     // ParameterEntryValidator wants this method.
00067     Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const {
00068       return Teuchos::null;
00069     }
00070 
00071     // Actually validate the parameter's value.
00072     void
00073     validate (const Teuchos::ParameterEntry& entry,
00074               const std::string& paramName,
00075               const std::string& sublistName) const
00076     {
00077       using std::endl;
00078       Teuchos::any anyVal = entry.getAny (true);
00079       const std::string entryName = entry.getAny (false).typeName ();
00080 
00081       TEUCHOS_TEST_FOR_EXCEPTION(
00082         anyVal.type () != typeid (int),
00083         Teuchos::Exceptions::InvalidParameterType,
00084         "Parameter \"" << paramName << "\" in sublist \"" << sublistName
00085         << "\" has the wrong type." << endl << "Parameter: " << paramName
00086         << endl << "Type specified: " << entryName << endl
00087         << "Type required: int" << endl);
00088 
00089       const int val = Teuchos::any_cast<int> (anyVal);
00090       TEUCHOS_TEST_FOR_EXCEPTION(
00091         val < 0, Teuchos::Exceptions::InvalidParameterValue,
00092         "Parameter \"" << paramName << "\" in sublist \"" << sublistName
00093         << "\" is negative." << endl << "Parameter: " << paramName
00094         << endl << "Value specified: " << val << endl
00095         << "Required range: [0, INT_MAX]" << endl);
00096     }
00097 
00098     // ParameterEntryValidator wants this method.
00099     const std::string getXMLTypeName () const {
00100       return "NonnegativeIntValidator";
00101     }
00102 
00103     // ParameterEntryValidator wants this method.
00104     void
00105     printDoc (const std::string& docString,
00106               std::ostream &out) const
00107     {
00108       Teuchos::StrUtils::printLines (out, "# ", docString);
00109       out << "#\tValidator Used: " << std::endl;
00110       out << "#\t\tNonnegativeIntValidator" << std::endl;
00111     }
00112   };
00113 
00114   // A way to get a small positive number (eps() for floating-point
00115   // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
00116   // define it (for example, for integer values).
00117   template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
00118   class SmallTraits {
00119   public:
00120     // Return eps if Scalar is a floating-point type, else return 1.
00121     static const Scalar eps ();
00122   };
00123 
00124   // Partial specialization for when Scalar is not a floating-point type.
00125   template<class Scalar>
00126   class SmallTraits<Scalar, true> {
00127   public:
00128     static const Scalar eps () {
00129       return Teuchos::ScalarTraits<Scalar>::one ();
00130     }
00131   };
00132 
00133   // Partial specialization for when Scalar is a floating-point type.
00134   template<class Scalar>
00135   class SmallTraits<Scalar, false> {
00136   public:
00137     static const Scalar eps () {
00138       return Teuchos::ScalarTraits<Scalar>::eps ();
00139     }
00140   };
00141 } // namespace (anonymous)
00142 
00143 namespace Ifpack2 {
00144 
00145 template<class MatrixType>
00146 void Relaxation<MatrixType>::
00147 setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
00148 {
00149   if (A.getRawPtr () != A_.getRawPtr ()) { // it's a different matrix
00150     Importer_ = Teuchos::null;
00151     Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
00152     isInitialized_ = false;
00153     IsComputed_ = false;
00154     diagOffsets_ = Teuchos::null;
00155     savedDiagOffsets_ = false;
00156     hasBlockCrsMatrix_ = false;
00157     if (! A.is_null ()) {
00158       IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
00159     }
00160     A_ = A;
00161   }
00162 }
00163 
00164 
00165 template<class MatrixType>
00166 Relaxation<MatrixType>::
00167 Relaxation (const Teuchos::RCP<const row_matrix_type>& A)
00168 : A_ (A),
00169   Time_ (Teuchos::rcp (new Teuchos::Time ("Ifpack2::Relaxation"))),
00170   NumSweeps_ (1),
00171   PrecType_ (Ifpack2::Details::JACOBI),
00172   DampingFactor_ (STS::one ()),
00173   IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
00174                false : // a reasonable default if there's no communicator
00175                A->getRowMap ()->getComm ()->getSize () > 1),
00176   ZeroStartingSolution_ (true),
00177   DoBackwardGS_ (false),
00178   DoL1Method_ (false),
00179   L1Eta_ (Teuchos::as<magnitude_type> (1.5)),
00180   MinDiagonalValue_ (STS::zero ()),
00181   fixTinyDiagEntries_ (false),
00182   checkDiagEntries_ (false),
00183   Condest_ (-STM::one ()),
00184   isInitialized_ (false),
00185   IsComputed_ (false),
00186   NumInitialize_ (0),
00187   NumCompute_ (0),
00188   NumApply_ (0),
00189   InitializeTime_ (0.0), // Times are double anyway, so no need for ScalarTraits.
00190   ComputeTime_ (0.0),
00191   ApplyTime_ (0.0),
00192   ComputeFlops_ (0.0),
00193   ApplyFlops_ (0.0),
00194   globalMinMagDiagEntryMag_ (STM::zero ()),
00195   globalMaxMagDiagEntryMag_ (STM::zero ()),
00196   globalNumSmallDiagEntries_ (0),
00197   globalNumZeroDiagEntries_ (0),
00198   globalNumNegDiagEntries_ (0),
00199   globalDiagNormDiff_(Teuchos::ScalarTraits<magnitude_type>::zero()),
00200   savedDiagOffsets_ (false),
00201   hasBlockCrsMatrix_ (false)
00202 {
00203   this->setObjectLabel ("Ifpack2::Relaxation");
00204 }
00205 
00206 //==========================================================================
00207 template<class MatrixType>
00208 Relaxation<MatrixType>::~Relaxation() {
00209 }
00210 
00211 template<class MatrixType>
00212 Teuchos::RCP<const Teuchos::ParameterList>
00213 Relaxation<MatrixType>::getValidParameters () const
00214 {
00215   using Teuchos::Array;
00216   using Teuchos::ParameterList;
00217   using Teuchos::parameterList;
00218   using Teuchos::RCP;
00219   using Teuchos::rcp;
00220   using Teuchos::rcp_const_cast;
00221   using Teuchos::rcp_implicit_cast;
00222   using Teuchos::setStringToIntegralParameter;
00223   typedef Teuchos::ParameterEntryValidator PEV;
00224 
00225   if (validParams_.is_null ()) {
00226     RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
00227 
00228     // Set a validator that automatically converts from the valid
00229     // string options to their enum values.
00230     Array<std::string> precTypes (3);
00231     precTypes[0] = "Jacobi";
00232     precTypes[1] = "Gauss-Seidel";
00233     precTypes[2] = "Symmetric Gauss-Seidel";
00234     Array<Details::RelaxationType> precTypeEnums (3);
00235     precTypeEnums[0] = Details::JACOBI;
00236     precTypeEnums[1] = Details::GS;
00237     precTypeEnums[2] = Details::SGS;
00238     const std::string defaultPrecType ("Jacobi");
00239     setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
00240       defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
00241       pl.getRawPtr ());
00242 
00243     const int numSweeps = 1;
00244     RCP<PEV> numSweepsValidator =
00245       rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
00246     pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
00247              rcp_const_cast<const PEV> (numSweepsValidator));
00248 
00249     const scalar_type dampingFactor = STS::one ();
00250     pl->set ("relaxation: damping factor", dampingFactor);
00251 
00252     const bool zeroStartingSolution = true;
00253     pl->set ("relaxation: zero starting solution", zeroStartingSolution);
00254 
00255     const bool doBackwardGS = false;
00256     pl->set ("relaxation: backward mode", doBackwardGS);
00257 
00258     const bool doL1Method = false;
00259     pl->set ("relaxation: use l1", doL1Method);
00260 
00261     const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
00262       (STM::one() + STM::one()); // 1.5
00263     pl->set ("relaxation: l1 eta", l1eta);
00264 
00265     const scalar_type minDiagonalValue = STS::zero ();
00266     pl->set ("relaxation: min diagonal value", minDiagonalValue);
00267 
00268     const bool fixTinyDiagEntries = false;
00269     pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
00270 
00271     const bool checkDiagEntries = false;
00272     pl->set ("relaxation: check diagonal entries", checkDiagEntries);
00273 
00274     Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
00275     pl->set("relaxation: local smoothing indices", localSmoothingIndices);
00276 
00277     validParams_ = rcp_const_cast<const ParameterList> (pl);
00278   }
00279   return validParams_;
00280 }
00281 
00282 
00283 template<class MatrixType>
00284 void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl)
00285 {
00286   using Teuchos::getIntegralValue;
00287   using Teuchos::ParameterList;
00288   using Teuchos::RCP;
00289   typedef scalar_type ST; // just to make code below shorter
00290 
00291   pl.validateParametersAndSetDefaults (* getValidParameters ());
00292 
00293   const Details::RelaxationType precType =
00294     getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
00295   const int numSweeps = pl.get<int> ("relaxation: sweeps");
00296   const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
00297   const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
00298   const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
00299   const bool doL1Method = pl.get<bool> ("relaxation: use l1");
00300   const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
00301   const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
00302   const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
00303   const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
00304   Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
00305 
00306 
00307   // "Commit" the changes, now that we've validated everything.
00308   PrecType_              = precType;
00309   NumSweeps_             = numSweeps;
00310   DampingFactor_         = dampingFactor;
00311   ZeroStartingSolution_  = zeroStartSol;
00312   DoBackwardGS_          = doBackwardGS;
00313   DoL1Method_            = doL1Method;
00314   L1Eta_                 = l1Eta;
00315   MinDiagonalValue_      = minDiagonalValue;
00316   fixTinyDiagEntries_    = fixTinyDiagEntries;
00317   checkDiagEntries_      = checkDiagEntries;
00318   localSmoothingIndices_ = localSmoothingIndices;
00319 }
00320 
00321 
00322 template<class MatrixType>
00323 void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl)
00324 {
00325   // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
00326   // but otherwise, we will get [unused] in pl
00327   this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
00328 }
00329 
00330 
00331 template<class MatrixType>
00332 Teuchos::RCP<const Teuchos::Comm<int> >
00333 Relaxation<MatrixType>::getComm() const {
00334   TEUCHOS_TEST_FOR_EXCEPTION(
00335     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
00336     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00337     "input matrix before calling this method.");
00338   return A_->getRowMap ()->getComm ();
00339 }
00340 
00341 
00342 template<class MatrixType>
00343 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
00344 Relaxation<MatrixType>::getMatrix () const {
00345   return A_;
00346 }
00347 
00348 
00349 template<class MatrixType>
00350 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
00351                                typename MatrixType::global_ordinal_type,
00352                                typename MatrixType::node_type> >
00353 Relaxation<MatrixType>::getDomainMap () const {
00354   TEUCHOS_TEST_FOR_EXCEPTION(
00355     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
00356     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00357     "input matrix before calling this method.");
00358   return A_->getDomainMap ();
00359 }
00360 
00361 
00362 template<class MatrixType>
00363 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
00364                                typename MatrixType::global_ordinal_type,
00365                                typename MatrixType::node_type> >
00366 Relaxation<MatrixType>::getRangeMap () const {
00367   TEUCHOS_TEST_FOR_EXCEPTION(
00368     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
00369     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00370     "input matrix before calling this method.");
00371   return A_->getRangeMap ();
00372 }
00373 
00374 
00375 template<class MatrixType>
00376 bool Relaxation<MatrixType>::hasTransposeApply () const {
00377   return true;
00378 }
00379 
00380 
00381 template<class MatrixType>
00382 int Relaxation<MatrixType>::getNumInitialize() const {
00383   return(NumInitialize_);
00384 }
00385 
00386 
00387 template<class MatrixType>
00388 int Relaxation<MatrixType>::getNumCompute() const {
00389   return(NumCompute_);
00390 }
00391 
00392 
00393 template<class MatrixType>
00394 int Relaxation<MatrixType>::getNumApply() const {
00395   return(NumApply_);
00396 }
00397 
00398 
00399 template<class MatrixType>
00400 double Relaxation<MatrixType>::getInitializeTime() const {
00401   return(InitializeTime_);
00402 }
00403 
00404 
00405 template<class MatrixType>
00406 double Relaxation<MatrixType>::getComputeTime() const {
00407   return(ComputeTime_);
00408 }
00409 
00410 
00411 template<class MatrixType>
00412 double Relaxation<MatrixType>::getApplyTime() const {
00413   return(ApplyTime_);
00414 }
00415 
00416 
00417 template<class MatrixType>
00418 double Relaxation<MatrixType>::getComputeFlops() const {
00419   return(ComputeFlops_);
00420 }
00421 
00422 
00423 template<class MatrixType>
00424 double Relaxation<MatrixType>::getApplyFlops() const {
00425   return(ApplyFlops_);
00426 }
00427 
00428 
00429 template<class MatrixType>
00430 typename Teuchos::ScalarTraits<typename MatrixType::scalar_type>::magnitudeType
00431 Relaxation<MatrixType>::getCondEst () const
00432 {
00433   return Condest_;
00434 }
00435 
00436 
00437 template<class MatrixType>
00438 typename Teuchos::ScalarTraits<typename MatrixType::scalar_type>::magnitudeType
00439 Relaxation<MatrixType>::
00440 computeCondEst (CondestType CT,
00441                 typename MatrixType::local_ordinal_type MaxIters,
00442                 magnitude_type Tol,
00443                 const Teuchos::Ptr<const row_matrix_type>& matrix)
00444 {
00445   if (! isComputed ()) { // cannot compute right now
00446     return -Teuchos::ScalarTraits<magnitude_type>::one ();
00447   }
00448   // always compute it. Call Condest() with no parameters to get
00449   // the previous estimate.
00450   Condest_ = Ifpack2::Condest (*this, CT, MaxIters, Tol, matrix);
00451   return Condest_;
00452 }
00453 
00454 
00455 template<class MatrixType>
00456 void
00457 Relaxation<MatrixType>::
00458 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
00459        Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
00460        Teuchos::ETransp mode,
00461        scalar_type alpha,
00462        scalar_type beta) const
00463 {
00464   using Teuchos::as;
00465   using Teuchos::RCP;
00466   using Teuchos::rcp;
00467   using Teuchos::rcpFromRef;
00468   typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
00469                               global_ordinal_type, node_type> MV;
00470   TEUCHOS_TEST_FOR_EXCEPTION(
00471     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
00472     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00473     "input matrix, then call compute(), before calling this method.");
00474   TEUCHOS_TEST_FOR_EXCEPTION(
00475     ! isComputed (),
00476     std::runtime_error,
00477     "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
00478     "preconditioner instance before you may call apply().  You may call "
00479     "isComputed() to find out if compute() has been called already.");
00480   TEUCHOS_TEST_FOR_EXCEPTION(
00481     X.getNumVectors() != Y.getNumVectors(),
00482     std::runtime_error,
00483     "Ifpack2::Relaxation::apply: X and Y have different numbers of columns.  "
00484     "X has " << X.getNumVectors() << " columns, but Y has "
00485     << Y.getNumVectors() << " columns.");
00486   TEUCHOS_TEST_FOR_EXCEPTION(
00487     beta != STS::zero (), std::logic_error,
00488     "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
00489     "implemented.");
00490   {
00491     // Reset the timer each time, since Relaxation uses the same Time
00492     // object to track times for different methods.
00493     Teuchos::TimeMonitor timeMon (*Time_, true);
00494 
00495     // Special case: alpha == 0.
00496     if (alpha == STS::zero ()) {
00497       // No floating-point operations, so no need to update a count.
00498       Y.putScalar (STS::zero ());
00499     }
00500     else {
00501       // If X and Y alias one another, then we need to create an
00502       // auxiliary vector, Xcopy (a deep copy of X).
00503       RCP<const MV> Xcopy;
00504       // FIXME (mfh 12 Sep 2014) This test for aliasing is both
00505       // incomplete, and a dependence on KokkosClassic.
00506       if (X.getLocalMV ().getValues () == Y.getLocalMV ().getValues ()) {
00507         Xcopy = rcp (new MV (X, Teuchos::Copy));
00508       }
00509       else {
00510         Xcopy = rcpFromRef (X);
00511       }
00512 
00513       // Each of the following methods updates the flop count itself.
00514       // All implementations handle zeroing out the starting solution
00515       // (if necessary) themselves.
00516       switch (PrecType_) {
00517       case Ifpack2::Details::JACOBI:
00518         ApplyInverseJacobi(*Xcopy,Y);
00519         break;
00520       case Ifpack2::Details::GS:
00521         ApplyInverseGS(*Xcopy,Y);
00522         break;
00523       case Ifpack2::Details::SGS:
00524         ApplyInverseSGS(*Xcopy,Y);
00525         break;
00526       default:
00527         TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
00528           "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
00529           << PrecType_ << ".  Please report this bug to the Ifpack2 developers.");
00530       }
00531       if (alpha != STS::one ()) {
00532         Y.scale (alpha);
00533         const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
00534         const double numVectors = as<double> (Y.getNumVectors ());
00535         ApplyFlops_ += numGlobalRows * numVectors;
00536       }
00537     }
00538   }
00539   ApplyTime_ += Time_->totalElapsedTime ();
00540   ++NumApply_;
00541 }
00542 
00543 
00544 template<class MatrixType>
00545 void
00546 Relaxation<MatrixType>::
00547 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
00548           Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
00549           Teuchos::ETransp mode) const
00550 {
00551   TEUCHOS_TEST_FOR_EXCEPTION(
00552     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
00553     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00554     "input matrix, then call compute(), before calling this method.");
00555   TEUCHOS_TEST_FOR_EXCEPTION(
00556     ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
00557     "isComputed() must be true before you may call applyMat().  "
00558     "Please call compute() before calling this method.");
00559   TEUCHOS_TEST_FOR_EXCEPTION(
00560     X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
00561     "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
00562     << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
00563   A_->apply (X, Y, mode);
00564 }
00565 
00566 
00567 template<class MatrixType>
00568 void Relaxation<MatrixType>::initialize ()
00569 {
00570   TEUCHOS_TEST_FOR_EXCEPTION(
00571     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::initialize: "
00572     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00573     "input matrix before calling this method.");
00574 
00575   if (A_.is_null ()) {
00576     hasBlockCrsMatrix_ = false;
00577   }
00578   else { // A_ is not null
00579     Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
00580       Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
00581     if (A_bcrs.is_null ()) {
00582       hasBlockCrsMatrix_ = false;
00583     }
00584     else { // A_ is a block_crs_matrix_type
00585       hasBlockCrsMatrix_ = true;
00586     }
00587   }
00588 
00589   // Initialization for Relaxation is trivial, so we say it takes zero time.
00590   //InitializeTime_ += Time_->totalElapsedTime ();
00591   ++NumInitialize_;
00592   isInitialized_ = true;
00593 }
00594 
00595 template<class MatrixType>
00596 void Relaxation<MatrixType>::computeBlockCrs ()
00597 {
00598   using Teuchos::Array;
00599   using Teuchos::ArrayRCP;
00600   using Teuchos::ArrayView;
00601   using Teuchos::as;
00602   using Teuchos::Comm;
00603   using Teuchos::RCP;
00604   using Teuchos::rcp;
00605   using Teuchos::REDUCE_MAX;
00606   using Teuchos::REDUCE_MIN;
00607   using Teuchos::REDUCE_SUM;
00608   using Teuchos::rcp_dynamic_cast;
00609   using Teuchos::reduceAll;
00610 
00611   {
00612     // Reset the timer each time, since Relaxation uses the same Time
00613     // object to track times for different methods.
00614     Teuchos::TimeMonitor timeMon (*Time_, true);
00615 
00616     TEUCHOS_TEST_FOR_EXCEPTION(
00617       A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
00618       "computeBlockCrs: The input matrix A is null.  Please call setMatrix() "
00619       "with a nonnull input matrix, then call initialize(), before calling "
00620       "this method.");
00621     const block_crs_matrix_type* blockCrsA =
00622       dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
00623     TEUCHOS_TEST_FOR_EXCEPTION(
00624       blockCrsA == NULL, std::logic_error, "Ifpack2::Relaxation::"
00625       "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
00626       "got this far.  Please report this bug to the Ifpack2 developers.");
00627 
00628     const scalar_type one = STS::one ();
00629 
00630     // Reset state.
00631     IsComputed_ = false;
00632     Condest_ = -STM::one ();
00633 
00634     const local_ordinal_type blockSize = blockCrsA->getBlockSize ();
00635 
00636     if (! savedDiagOffsets_) {
00637       BlockDiagonal_ = Teuchos::null;
00638       BlockDiagonal_ =
00639         rcp (new block_crs_matrix_type (* (blockCrsA->getDiagonalGraph ()),
00640                                         blockSize));
00641       blockCrsA->getLocalDiagOffsets (diagOffsets_);
00642       savedDiagOffsets_ = true;
00643     }
00644     blockCrsA->getLocalDiagCopy (*BlockDiagonal_, diagOffsets_ ());
00645 
00646     const size_t numMyRows = A_->getNodeNumRows ();
00647 
00648     if (DoL1Method_ && IsParallel_) {
00649       const scalar_type two = one + one;
00650       const size_t maxLength = A_->getNodeMaxNumRowEntries ();
00651       Array<local_ordinal_type> indices (maxLength);
00652       Array<scalar_type> values (maxLength * blockSize * blockSize);
00653       size_t numEntries = 0;
00654 
00655       for (size_t i = 0; i < numMyRows; ++i) {
00656         blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
00657         scalar_type* diagBlock = BlockDiagonal_->getLocalBlock (i,i).getRawPtr ();
00658         for (local_ordinal_type subRow = 0; subRow < blockSize; ++subRow) {
00659           magnitude_type diagonal_boost = STM::zero ();
00660           for (size_t k = 0 ; k < numEntries ; ++k) {
00661             if (static_cast<size_t> (indices[k]) > numMyRows) {
00662               const size_t offset = blockSize*blockSize*k + subRow*blockSize;
00663               for (local_ordinal_type subCol = 0; subCol < blockSize; ++subCol) {
00664                 diagonal_boost += STS::magnitude (values[offset+subCol] / two);
00665               }
00666             }
00667           }
00668           if (STS::magnitude (diagBlock[subRow*(blockSize+1)]) < L1Eta_ * diagonal_boost) {
00669             diagBlock[subRow*(blockSize+1)] += diagonal_boost;
00670           }
00671         }
00672       }
00673     }
00674 
00675     blockDiagonalFactorizationPivots.resize (numMyRows * blockSize);
00676     int info;
00677     for (size_t i = 0 ; i < numMyRows; ++i) {
00678       typename block_crs_matrix_type::little_block_type diagBlock =
00679         BlockDiagonal_->getLocalBlock (i, i);
00680       diagBlock.factorize (&blockDiagonalFactorizationPivots[i*blockSize], info);
00681     }
00682 
00683     Importer_ = A_->getGraph ()->getImporter ();
00684   } // end TimeMonitor scope
00685 
00686   ComputeTime_ += Time_->totalElapsedTime ();
00687   ++NumCompute_;
00688   IsComputed_ = true;
00689 }
00690 
00691 template<class MatrixType>
00692 void Relaxation<MatrixType>::compute ()
00693 {
00694   using Teuchos::Array;
00695   using Teuchos::ArrayRCP;
00696   using Teuchos::ArrayView;
00697   using Teuchos::as;
00698   using Teuchos::Comm;
00699   using Teuchos::RCP;
00700   using Teuchos::rcp;
00701   using Teuchos::REDUCE_MAX;
00702   using Teuchos::REDUCE_MIN;
00703   using Teuchos::REDUCE_SUM;
00704   using Teuchos::rcp_dynamic_cast;
00705   using Teuchos::reduceAll;
00706   typedef Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> vector_type;
00707   const scalar_type zero = STS::zero ();
00708   const scalar_type one = STS::one ();
00709 
00710   // We don't count initialization in compute() time.
00711   if (! isInitialized ()) {
00712     initialize ();
00713   }
00714 
00715   if (hasBlockCrsMatrix_) {
00716     computeBlockCrs ();
00717     return;
00718   }
00719 
00720   {
00721     // Reset the timer each time, since Relaxation uses the same Time
00722     // object to track times for different methods.
00723     Teuchos::TimeMonitor timeMon (*Time_, true);
00724 
00725     TEUCHOS_TEST_FOR_EXCEPTION(
00726       A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::compute: "
00727       "The input matrix A is null.  Please call setMatrix() with a nonnull "
00728       "input matrix, then call initialize(), before calling this method.");
00729 
00730     // Reset state.
00731     IsComputed_ = false;
00732     Condest_ = -STM::one ();
00733 
00734     TEUCHOS_TEST_FOR_EXCEPTION(
00735       NumSweeps_ < 0, std::logic_error,
00736       "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ << " < 0.  "
00737       "Please report this bug to the Ifpack2 developers.");
00738 
00739     Diagonal_ = rcp (new vector_type (A_->getRowMap ()));
00740 
00741     TEUCHOS_TEST_FOR_EXCEPTION(
00742       Diagonal_.is_null (), std::logic_error,
00743       "Ifpack2::Relaxation::compute: Vector of diagonal entries has not been "
00744       "created yet.  Please report this bug to the Ifpack2 developers.");
00745 
00746     // Extract the diagonal entries.  The CrsMatrix static graph
00747     // version is faster for subsequent calls to compute(), since it
00748     // caches the diagonal offsets.
00749     //
00750     // isStaticGraph() == true means that the matrix was created with
00751     // a const graph.  The only requirement is that the structure of
00752     // the matrix never changes, so isStaticGraph() == true is a bit
00753     // more conservative than we need.  However, Tpetra doesn't (as of
00754     // 05 Apr 2013) have a way to tell if the graph hasn't changed
00755     // since the last time we used it.
00756     {
00757       // NOTE (mfh 07 Jul 2013): We must cast here to CrsMatrix
00758       // instead of MatrixType, because isStaticGraph is a CrsMatrix
00759       // method (not inherited from RowMatrix's interface).  It's
00760       // perfectly valid to do relaxation on a RowMatrix which is not
00761       // a CrsMatrix.
00762       const crs_matrix_type* crsMat =
00763         dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
00764       if (crsMat == NULL || ! crsMat->isStaticGraph ()) {
00765         A_->getLocalDiagCopy (*Diagonal_); // slow path
00766       } else {
00767         if (! savedDiagOffsets_) { // we haven't precomputed offsets
00768           crsMat->getLocalDiagOffsets (diagOffsets_);
00769           savedDiagOffsets_ = true;
00770         }
00771         crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_ ());
00772 #ifdef HAVE_TPETRA_DEBUG
00773         // Validate the fast-path diagonal against the slow-path diagonal.
00774         vector_type D_copy (A_->getRowMap ());
00775         A_->getLocalDiagCopy (D_copy);
00776         D_copy.update (STS::one (), *Diagonal_, -STS::one ());
00777         const magnitude_type err = D_copy.normInf ();
00778         // The two diagonals should be exactly the same, so their
00779         // difference should be exactly zero.
00780         TEUCHOS_TEST_FOR_EXCEPTION(
00781           err != STM::zero(), std::logic_error, "Ifpack2::Relaxation::compute: "
00782           "\"fast-path\" diagonal computation failed.  \\|D1 - D2\\|_inf = "
00783           << err << ".");
00784 #endif // HAVE_TPETRA_DEBUG
00785       }
00786     }
00787 
00788     // If we're checking the computed inverse diagonal, then keep a
00789     // copy of the original diagonal entries for later comparison.
00790     RCP<vector_type> origDiag;
00791     if (checkDiagEntries_) {
00792       origDiag = rcp (new vector_type (A_->getRowMap ()));
00793       *origDiag = *Diagonal_;
00794     }
00795 
00796     // "Host view" means that if the Node type is a GPU Node, the
00797     // ArrayRCP points to host memory, not device memory.  It will
00798     // write back to device memory (Diagonal_) at end of scope.
00799     ArrayRCP<scalar_type> diagHostView = Diagonal_->get1dViewNonConst ();
00800 
00801     // The view below is only valid as long as diagHostView is within
00802     // scope.  We extract a raw pointer in release mode because we
00803     // don't trust older versions of compilers (like GCC 4.4.x or
00804     // Intel < 13) to optimize away ArrayView::operator[].
00805 #ifdef HAVE_IFPACK2_DEBUG
00806     ArrayView<scalar_type> diag = diagHostView ();
00807 #else
00808     scalar_type* KOKKOSCLASSIC_RESTRICT const diag = diagHostView.getRawPtr ();
00809 #endif // HAVE_IFPACK2_DEBUG
00810 
00811     const size_t numMyRows = A_->getNodeNumRows ();
00812 
00813     // Setup for L1 Methods.
00814     // Here we add half the value of the off-processor entries in the row,
00815     // but only if diagonal isn't sufficiently large.
00816     //
00817     // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
00818     // Yang.  "Multigrid Smoothers for Ultraparallel Computing."  SIAM
00819     // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
00820     if (DoL1Method_ && IsParallel_) {
00821       const scalar_type two = one + one;
00822       const size_t maxLength = A_->getNodeMaxNumRowEntries ();
00823       Array<local_ordinal_type> indices (maxLength);
00824       Array<scalar_type> values (maxLength);
00825       size_t numEntries;
00826 
00827       for (size_t i = 0; i < numMyRows; ++i) {
00828         A_->getLocalRowCopy (i, indices (), values (), numEntries);
00829         magnitude_type diagonal_boost = STM::zero ();
00830         for (size_t k = 0 ; k < numEntries ; ++k) {
00831           if (static_cast<size_t> (indices[k]) > numMyRows) {
00832             diagonal_boost += STS::magnitude (values[k] / two);
00833           }
00834         }
00835         if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) {
00836           diag[i] += diagonal_boost;
00837         }
00838       }
00839 
00840     }
00841 
00842     //
00843     // Invert the diagonal entries of the matrix (not in place).
00844     //
00845 
00846     // Precompute some quantities for "fixing" zero or tiny diagonal
00847     // entries.  We'll only use them if this "fixing" is enabled.
00848     //
00849     // SmallTraits covers for the lack of eps() in
00850     // Teuchos::ScalarTraits when its template parameter is not a
00851     // floating-point type.  (Ifpack2 sometimes gets instantiated for
00852     // integer Scalar types.)
00853     const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
00854       one / SmallTraits<scalar_type>::eps () :
00855       one / MinDiagonalValue_;
00856     // It's helpful not to have to recompute this magnitude each time.
00857     const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
00858 
00859     if (checkDiagEntries_) {
00860       //
00861       // Check diagonal elements, replace zero elements with the minimum
00862       // diagonal value, and store their inverses.  Count the number of
00863       // "small" and zero diagonal entries while we're at it.
00864       //
00865       size_t numSmallDiagEntries = 0; // "small" includes zero
00866       size_t numZeroDiagEntries = 0; // # zero diagonal entries
00867       size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
00868 
00869       // As we go, keep track of the diagonal entries with the least and
00870       // greatest magnitude.  We could use the trick of starting the min
00871       // with +Inf and the max with -Inf, but that doesn't work if
00872       // scalar_type is a built-in integer type.  Thus, we have to start
00873       // by reading the first diagonal entry redundantly.
00874       // scalar_type minMagDiagEntry = zero;
00875       // scalar_type maxMagDiagEntry = zero;
00876       magnitude_type minMagDiagEntryMag = STM::zero ();
00877       magnitude_type maxMagDiagEntryMag = STM::zero ();
00878       if (numMyRows > 0) {
00879         const scalar_type d_0 = diag[0];
00880         const magnitude_type d_0_mag = STS::magnitude (d_0);
00881         // minMagDiagEntry = d_0;
00882         // maxMagDiagEntry = d_0;
00883         minMagDiagEntryMag = d_0_mag;
00884         maxMagDiagEntryMag = d_0_mag;
00885       }
00886 
00887       // Go through all the diagonal entries.  Compute counts of
00888       // small-magnitude, zero, and negative-real-part entries.  Invert
00889       // the diagonal entries that aren't too small.  For those that are
00890       // too small in magnitude, replace them with 1/MinDiagonalValue_
00891       // (or 1/eps if MinDiagonalValue_ happens to be zero).
00892       for (size_t i = 0 ; i < numMyRows; ++i) {
00893         const scalar_type d_i = diag[i];
00894         const magnitude_type d_i_mag = STS::magnitude (d_i);
00895         const magnitude_type d_i_real = STS::real (d_i);
00896 
00897         // We can't compare complex numbers, but we can compare their
00898         // real parts.
00899         if (d_i_real < STM::zero ()) {
00900           ++numNegDiagEntries;
00901         }
00902         if (d_i_mag < minMagDiagEntryMag) {
00903           // minMagDiagEntry = d_i;
00904           minMagDiagEntryMag = d_i_mag;
00905         }
00906         if (d_i_mag > maxMagDiagEntryMag) {
00907           // maxMagDiagEntry = d_i;
00908           maxMagDiagEntryMag = d_i_mag;
00909         }
00910 
00911         if (fixTinyDiagEntries_) {
00912           // <= not <, in case minDiagValMag is zero.
00913           if (d_i_mag <= minDiagValMag) {
00914             ++numSmallDiagEntries;
00915             if (d_i_mag == STM::zero ()) {
00916               ++numZeroDiagEntries;
00917             }
00918             diag[i] = oneOverMinDiagVal;
00919           } else {
00920             diag[i] = one / d_i;
00921           }
00922         }
00923         else { // Don't fix zero or tiny diagonal entries.
00924           // <= not <, in case minDiagValMag is zero.
00925           if (d_i_mag <= minDiagValMag) {
00926             ++numSmallDiagEntries;
00927             if (d_i_mag == STM::zero ()) {
00928               ++numZeroDiagEntries;
00929             }
00930           }
00931           diag[i] = one / d_i;
00932         }
00933       }
00934 
00935       // We're done computing the inverse diagonal, so invalidate the view.
00936       // This ensures that the operations below, that use methods on Vector,
00937       // produce correct results even on a GPU Node.
00938       diagHostView = Teuchos::null;
00939 
00940       // Count floating-point operations of computing the inverse diagonal.
00941       //
00942       // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
00943       if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
00944         ComputeFlops_ += 4.0 * numMyRows;
00945       } else {
00946         ComputeFlops_ += numMyRows;
00947       }
00948 
00949       // Collect global data about the diagonal entries.  Only do this
00950       // (which involves O(1) all-reduces) if the user asked us to do
00951       // the extra work.
00952       //
00953       // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
00954       // zero rows.  Fixing this requires one of two options:
00955       //
00956       // 1. Use a custom reduction operation that ignores processes
00957       //    with zero diagonal entries.
00958       // 2. Split the communicator, compute all-reduces using the
00959       //    subcommunicator over processes that have a nonzero number
00960       //    of diagonal entries, and then broadcast from one of those
00961       //    processes (if there is one) to the processes in the other
00962       //    subcommunicator.
00963       const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
00964 
00965       // Compute global min and max magnitude of entries.
00966       Array<magnitude_type> localVals (2);
00967       localVals[0] = minMagDiagEntryMag;
00968       // (- (min (- x))) is the same as (max x).
00969       localVals[1] = -maxMagDiagEntryMag;
00970       Array<magnitude_type> globalVals (2);
00971       reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
00972                                       localVals.getRawPtr (),
00973                                       globalVals.getRawPtr ());
00974       globalMinMagDiagEntryMag_ = globalVals[0];
00975       globalMaxMagDiagEntryMag_ = -globalVals[1];
00976 
00977       Array<size_t> localCounts (3);
00978       localCounts[0] = numSmallDiagEntries;
00979       localCounts[1] = numZeroDiagEntries;
00980       localCounts[2] = numNegDiagEntries;
00981       Array<size_t> globalCounts (3);
00982       reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
00983                               localCounts.getRawPtr (),
00984                               globalCounts.getRawPtr ());
00985       globalNumSmallDiagEntries_ = globalCounts[0];
00986       globalNumZeroDiagEntries_ = globalCounts[1];
00987       globalNumNegDiagEntries_ = globalCounts[2];
00988 
00989       // Forestall "set but not used" compiler warnings.
00990       // (void) minMagDiagEntry;
00991       // (void) maxMagDiagEntry;
00992 
00993       // Compute and save the difference between the computed inverse
00994       // diagonal, and the original diagonal's inverse.
00995       RCP<vector_type> diff = rcp (new vector_type (A_->getRowMap ()));
00996       diff->reciprocal (*origDiag);
00997       diff->update (-one, *Diagonal_, one);
00998       globalDiagNormDiff_ = diff->norm2 ();
00999     }
01000     else { // don't check diagonal elements
01001       if (fixTinyDiagEntries_) {
01002         // Go through all the diagonal entries.  Invert those that
01003         // aren't too small in magnitude.  For those that are too
01004         // small in magnitude, replace them with oneOverMinDiagVal.
01005         for (size_t i = 0 ; i < numMyRows; ++i) {
01006           const scalar_type d_i = diag[i];
01007           const magnitude_type d_i_mag = STS::magnitude (d_i);
01008 
01009           // <= not <, in case minDiagValMag is zero.
01010           if (d_i_mag <= minDiagValMag) {
01011             diag[i] = oneOverMinDiagVal;
01012           } else {
01013             diag[i] = one / d_i;
01014           }
01015         }
01016       }
01017       else { // don't fix tiny or zero diagonal entries
01018         for (size_t i = 0 ; i < numMyRows; ++i) {
01019           diag[i] = one / diag[i];
01020         }
01021       }
01022       if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
01023         ComputeFlops_ += 4.0 * numMyRows;
01024       } else {
01025         ComputeFlops_ += numMyRows;
01026       }
01027     }
01028 
01029     if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
01030                         PrecType_ == Ifpack2::Details::SGS)) {
01031       Importer_ = A_->getGraph ()->getImporter ();
01032       // mfh 21 Mar 2013: The Import object may be null, but in that
01033       // case, the domain and column Maps are the same and we don't
01034       // need to Import anyway.
01035     }
01036   } // end TimeMonitor scope
01037 
01038   ComputeTime_ += Time_->totalElapsedTime ();
01039   ++NumCompute_;
01040   IsComputed_ = true;
01041 }
01042 
01043 
01044 template<class MatrixType>
01045 void
01046 Relaxation<MatrixType>::
01047 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01048                     Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01049 {
01050   using Teuchos::as;
01051   typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
01052     global_ordinal_type, node_type> MV;
01053 
01054   if (hasBlockCrsMatrix_) {
01055     ApplyInverseJacobi_BlockCrsMatrix (X, Y);
01056     return;
01057   }
01058 
01059   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01060   const double numVectors = as<double> (X.getNumVectors ());
01061   if (ZeroStartingSolution_) {
01062     // For the first Jacobi sweep, if we are allowed to assume that
01063     // the initial guess is zero, then Jacobi is just diagonal
01064     // scaling.  (A_ij * x_j = 0 for i != j, since x_j = 0.)
01065     // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
01066     Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
01067 
01068     // Count (global) floating-point operations.  Ifpack2 represents
01069     // this as a floating-point number rather than an integer, so that
01070     // overflow (for a very large number of calls, or a very large
01071     // problem) is approximate instead of catastrophic.
01072     double flopUpdate = 0.0;
01073     if (DampingFactor_ == STS::one ()) {
01074       // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
01075       flopUpdate = numGlobalRows * numVectors;
01076     } else {
01077       // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
01078       // Two multiplies per entry of Y.
01079       flopUpdate = 2.0 * numGlobalRows * numVectors;
01080     }
01081     ApplyFlops_ += flopUpdate;
01082     if (NumSweeps_ == 1) {
01083       return;
01084     }
01085   }
01086   // If we were allowed to assume that the starting guess was zero,
01087   // then we have already done the first sweep above.
01088   const int startSweep = ZeroStartingSolution_ ? 1 : 0;
01089   // We don't need to initialize the result MV, since the sparse
01090   // mat-vec will clobber its contents anyway.
01091   MV A_times_Y (Y.getMap (), as<size_t>(numVectors), false);
01092   for (int j = startSweep; j < NumSweeps_; ++j) {
01093     // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
01094     applyMat (Y, A_times_Y);
01095     A_times_Y.update (STS::one (), X, -STS::one ());
01096     Y.elementWiseMultiply (DampingFactor_, *Diagonal_, A_times_Y, STS::one ());
01097   }
01098 
01099   // For each column of output, for each pass over the matrix:
01100   //
01101   // - One + and one * for each matrix entry
01102   // - One / and one + for each row of the matrix
01103   // - If the damping factor is not one: one * for each row of the
01104   //   matrix.  (It's not fair to count this if the damping factor is
01105   //   one, since the implementation could skip it.  Whether it does
01106   //   or not is the implementation's choice.)
01107 
01108   // Floating-point operations due to the damping factor, per matrix
01109   // row, per direction, per columm of output.
01110   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01111   const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
01112   ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
01113     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01114 }
01115 
01116 template<class MatrixType>
01117 void
01118 Relaxation<MatrixType>::
01119 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type, local_ordinal_type,
01120                                                              global_ordinal_type, node_type>& X,
01121                                    Tpetra::MultiVector<scalar_type, local_ordinal_type,
01122                                                        global_ordinal_type,node_type>& Y) const
01123 {
01124   typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
01125     local_ordinal_type, global_ordinal_type,node_type> BMV;
01126   typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
01127     global_ordinal_type, node_type> MV;
01128   typedef typename block_crs_matrix_type::little_block_type little_block_type;
01129   typedef typename block_crs_matrix_type::little_vec_type little_vec_type;
01130 
01131   if (ZeroStartingSolution_) {
01132     Y.putScalar (STS::zero ());
01133   }
01134 
01135   const size_t numVectors = X.getNumVectors ();
01136 
01137   const block_crs_matrix_type* blockMat =
01138     dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
01139 
01140   const local_ordinal_type blockSize = blockMat->getBlockSize ();
01141   BMV A_times_Y_Block (* (blockMat->getRowMap ()), * (Y.getMap ()),
01142                        blockSize, numVectors);
01143   MV A_times_Y = A_times_Y_Block.getMultiVectorView();
01144   BMV yBlock (Y, * (blockMat->getRowMap ()), blockSize);
01145   for (int j = 0; j < NumSweeps_; ++j) {
01146     blockMat->apply (Y, A_times_Y);
01147     A_times_Y.update (STS::one (), X, -STS::one ());
01148     const size_t numRows = blockMat->getNodeNumRows ();
01149     for (size_t i = 0; i < numVectors; ++i) {
01150       for (size_t k = 0; k < numRows; ++k) {
01151         // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
01152         little_block_type factorizedBlockDiag = BlockDiagonal_->getLocalBlock (k, k);
01153         little_vec_type xloc = A_times_Y_Block.getLocalBlock (k, i);
01154         little_vec_type yloc = yBlock.getLocalBlock (k, i);
01155         factorizedBlockDiag.solve (xloc, &blockDiagonalFactorizationPivots[i*blockSize]);
01156         yloc.update (DampingFactor_, xloc);
01157       }
01158     }
01159   }
01160 }
01161 
01162 template<class MatrixType>
01163 void
01164 Relaxation<MatrixType>::
01165 ApplyInverseGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01166                 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01167 {
01168   typedef Relaxation<MatrixType> this_type;
01169   // The CrsMatrix version is faster, because it can access the sparse
01170   // matrix data directly, rather than by copying out each row's data
01171   // in turn.  Thus, we check whether the RowMatrix is really a
01172   // CrsMatrix.
01173   //
01174   // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
01175   // declaration in Ifpack2_Relaxation_decl.hpp header file.  The code
01176   // will still be correct if the cast fails, but it will use an
01177   // unoptimized kernel.
01178   const block_crs_matrix_type* blockCrsMat =
01179     dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
01180   const crs_matrix_type* crsMat =
01181     dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
01182   if (blockCrsMat != NULL)  {
01183     const_cast<this_type*> (this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
01184   } else if (crsMat != NULL) {
01185     ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
01186   } else {
01187     ApplyInverseGS_RowMatrix (X, Y);
01188   }
01189 }
01190 
01191 
01192 template<class MatrixType>
01193 void
01194 Relaxation<MatrixType>::
01195 ApplyInverseGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01196                           Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01197 {
01198   using Teuchos::Array;
01199   using Teuchos::ArrayRCP;
01200   using Teuchos::ArrayView;
01201   using Teuchos::as;
01202   using Teuchos::RCP;
01203   using Teuchos::rcp;
01204   using Teuchos::rcpFromRef;
01205   typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
01206 
01207   // Tpetra's GS implementation for CrsMatrix handles zeroing out the
01208   // starting multivector itself.  The generic RowMatrix version here
01209   // does not, so we have to zero out Y here.
01210   if (ZeroStartingSolution_) {
01211     Y.putScalar (STS::zero ());
01212   }
01213 
01214   const size_t NumVectors = X.getNumVectors ();
01215   const size_t maxLength = A_->getNodeMaxNumRowEntries ();
01216   Array<local_ordinal_type> Indices (maxLength);
01217   Array<scalar_type> Values (maxLength);
01218 
01219   // Local smoothing stuff
01220   const size_t numMyRows = A_->getNodeNumRows();
01221   const local_ordinal_type* rowInd  = 0;
01222   size_t numActive = numMyRows;
01223   bool do_local = ! localSmoothingIndices_.is_null ();
01224   if (do_local) {
01225     rowInd = localSmoothingIndices_.getRawPtr ();
01226     numActive = localSmoothingIndices_.size ();
01227   }
01228 
01229   RCP<MV> Y2;
01230   if (IsParallel_) {
01231     if (Importer_.is_null ()) { // domain and column Maps are the same.
01232       // We will copy Y into Y2 below, so no need to fill with zeros here.
01233       Y2 = rcp (new MV (Y.getMap (), NumVectors, false));
01234     } else {
01235       // FIXME (mfh 21 Mar 2013) We probably don't need to fill with
01236       // zeros here, since we are doing an Import into Y2 below
01237       // anyway.  However, it doesn't hurt correctness.
01238       Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors));
01239     }
01240   }
01241   else {
01242     Y2 = rcpFromRef (Y);
01243   }
01244 
01245   // Diagonal
01246   ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
01247   ArrayView<const scalar_type> d_ptr = d_rcp();
01248 
01249   // Constant stride check
01250   bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
01251 
01252   if (constant_stride) {
01253     // extract 1D RCPs
01254     size_t                    x_stride = X.getStride();
01255     size_t                   y2_stride = Y2->getStride();
01256     ArrayRCP<scalar_type>       y2_rcp = Y2->get1dViewNonConst();
01257     ArrayRCP<const scalar_type>  x_rcp = X.get1dView();
01258     ArrayView<scalar_type>      y2_ptr = y2_rcp();
01259     ArrayView<const scalar_type> x_ptr = x_rcp();
01260     Array<scalar_type> dtemp(NumVectors,STS::zero());
01261 
01262     for (int j = 0; j < NumSweeps_; ++j) {
01263       // data exchange is here, once per sweep
01264       if (IsParallel_) {
01265         if (Importer_.is_null ()) {
01266           *Y2 = Y; // just copy, since domain and column Maps are the same
01267         } else {
01268           Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01269         }
01270       }
01271 
01272       if (! DoBackwardGS_) { // Forward sweep
01273         for (size_t ii = 0; ii < numActive; ++ii) {
01274           local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
01275           size_t NumEntries;
01276           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01277           dtemp.assign(NumVectors,STS::zero());
01278 
01279           for (size_t k = 0; k < NumEntries; ++k) {
01280             const local_ordinal_type col = Indices[k];
01281             for (size_t m = 0; m < NumVectors; ++m) {
01282               dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01283             }
01284           }
01285 
01286           for (size_t m = 0; m < NumVectors; ++m) {
01287             y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01288           }
01289         }
01290       }
01291       else { // Backward sweep
01292         // ptrdiff_t is the same size as size_t, but is signed.  Being
01293         // signed is important so that i >= 0 is not trivially true.
01294         for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01295           local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
01296           size_t NumEntries;
01297           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01298           dtemp.assign (NumVectors, STS::zero ());
01299 
01300           for (size_t k = 0; k < NumEntries; ++k) {
01301             const local_ordinal_type col = Indices[k];
01302             for (size_t m = 0; m < NumVectors; ++m) {
01303               dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01304             }
01305           }
01306 
01307           for (size_t m = 0; m < NumVectors; ++m) {
01308             y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01309           }
01310         }
01311       }
01312       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01313       if (IsParallel_) {
01314         Tpetra::deep_copy (Y, *Y2);
01315       }
01316     }
01317   }
01318   else {
01319     // extract 2D RCPS
01320     ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
01321     ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
01322 
01323     for (int j = 0; j < NumSweeps_; ++j) {
01324       // data exchange is here, once per sweep
01325       if (IsParallel_) {
01326         if (Importer_.is_null ()) {
01327           *Y2 = Y; // just copy, since domain and column Maps are the same
01328         } else {
01329           Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01330         }
01331       }
01332 
01333       if (! DoBackwardGS_) { // Forward sweep
01334         for (size_t ii = 0; ii < numActive; ++ii) {
01335           local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
01336           size_t NumEntries;
01337           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01338 
01339           for (size_t m = 0; m < NumVectors; ++m) {
01340             scalar_type dtemp = STS::zero ();
01341             ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01342             ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01343 
01344             for (size_t k = 0; k < NumEntries; ++k) {
01345               const local_ordinal_type col = Indices[k];
01346               dtemp += Values[k] * y2_local[col];
01347             }
01348             y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
01349           }
01350         }
01351       }
01352       else { // Backward sweep
01353         // ptrdiff_t is the same size as size_t, but is signed.  Being
01354         // signed is important so that i >= 0 is not trivially true.
01355         for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01356           local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
01357 
01358           size_t NumEntries;
01359           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01360 
01361           for (size_t m = 0; m < NumVectors; ++m) {
01362             scalar_type dtemp = STS::zero ();
01363             ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01364             ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01365 
01366             for (size_t k = 0; k < NumEntries; ++k) {
01367               const local_ordinal_type col = Indices[k];
01368               dtemp += Values[k] * y2_local[col];
01369             }
01370             y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
01371           }
01372         }
01373       }
01374 
01375       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01376       if (IsParallel_) {
01377         Tpetra::deep_copy (Y, *Y2);
01378       }
01379     }
01380   }
01381 
01382 
01383   // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
01384   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01385   const double numVectors = as<double> (X.getNumVectors ());
01386   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01387   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01388   ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
01389     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01390 }
01391 
01392 
01393 template<class MatrixType>
01394 void
01395 Relaxation<MatrixType>::
01396 ApplyInverseGS_CrsMatrix (const crs_matrix_type& A,
01397                           const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01398                           Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01399 {
01400   using Teuchos::as;
01401   const Tpetra::ESweepDirection direction =
01402     DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
01403   if(localSmoothingIndices_.is_null())
01404     A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
01405                        NumSweeps_, ZeroStartingSolution_);
01406   else
01407     A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_(), DampingFactor_, direction,
01408                                 NumSweeps_, ZeroStartingSolution_);
01409 
01410   // For each column of output, for each sweep over the matrix:
01411   //
01412   // - One + and one * for each matrix entry
01413   // - One / and one + for each row of the matrix
01414   // - If the damping factor is not one: one * for each row of the
01415   //   matrix.  (It's not fair to count this if the damping factor is
01416   //   one, since the implementation could skip it.  Whether it does
01417   //   or not is the implementation's choice.)
01418 
01419   // Floating-point operations due to the damping factor, per matrix
01420   // row, per direction, per columm of output.
01421   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01422   const double numVectors = as<double> (X.getNumVectors ());
01423   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01424   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01425   ApplyFlops_ += NumSweeps_ * numVectors *
01426     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01427 }
01428 
01429 template<class MatrixType>
01430 void
01431 Relaxation<MatrixType>::
01432 ApplyInverseGS_BlockCrsMatrix (const block_crs_matrix_type& A,
01433                                const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01434                                Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
01435 {
01436   using Teuchos::RCP;
01437   using Teuchos::rcp;
01438   using Teuchos::rcpFromRef;
01439   typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
01440     local_ordinal_type, global_ordinal_type, node_type> BMV;
01441   typedef Tpetra::MultiVector<scalar_type,
01442     local_ordinal_type, global_ordinal_type, node_type> MV;
01443 
01444   //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
01445   //
01446   // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
01447   // multiple right-hand sides, unless the input or output MultiVector
01448   // does not have constant stride.  We should check for that case
01449   // here, in case it doesn't work in localGaussSeidel (which is
01450   // entirely possible).
01451   BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
01452   const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
01453 
01454   bool performImport = false;
01455   RCP<BMV> yBlockCol;
01456   if (Importer_.is_null ()) {
01457     yBlockCol = rcpFromRef (yBlock);
01458   }
01459   else {
01460     if (yBlockColumnPointMap_.is_null () ||
01461         yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
01462         yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
01463       yBlockColumnPointMap_ =
01464         rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
01465                       static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
01466     }
01467     yBlockCol = yBlockColumnPointMap_;
01468     performImport = true;
01469   }
01470 
01471   if (ZeroStartingSolution_) {
01472     yBlockCol->putScalar (STS::zero ());
01473   }
01474   else if (performImport) {
01475     yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
01476   }
01477 
01478   const Tpetra::ESweepDirection direction =
01479     DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
01480 
01481   for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
01482     if (performImport && sweep > 0) {
01483       yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
01484     }
01485     A.localGaussSeidel (xBlock, *yBlockCol, *BlockDiagonal_,
01486                         &blockDiagonalFactorizationPivots[0],
01487                         DampingFactor_, direction);
01488     if (performImport) {
01489       RCP<const MV> yBlockColPointDomain =
01490         yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
01491       Tpetra::deep_copy (Y, *yBlockColPointDomain);
01492     }
01493   }
01494 }
01495 
01496 
01497 template<class MatrixType>
01498 void
01499 Relaxation<MatrixType>::
01500 ApplyInverseSGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01501                  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01502 {
01503   typedef Relaxation<MatrixType> this_type;
01504   // The CrsMatrix version is faster, because it can access the sparse
01505   // matrix data directly, rather than by copying out each row's data
01506   // in turn.  Thus, we check whether the RowMatrix is really a
01507   // CrsMatrix.
01508   //
01509   // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
01510   // declaration in Ifpack2_Relaxation_decl.hpp header file.  The code
01511   // will still be correct if the cast fails, but it will use an
01512   // unoptimized kernel.
01513   const block_crs_matrix_type* blockCrsMat = dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr());
01514   const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
01515   if (blockCrsMat != NULL)  {
01516     const_cast<this_type*> (this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
01517   }
01518   else if (crsMat != NULL) {
01519     ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
01520   } else {
01521     ApplyInverseSGS_RowMatrix (X, Y);
01522   }
01523 }
01524 
01525 
01526 template<class MatrixType>
01527 void
01528 Relaxation<MatrixType>::
01529 ApplyInverseSGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01530                            Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01531 {
01532   using Teuchos::Array;
01533   using Teuchos::ArrayRCP;
01534   using Teuchos::ArrayView;
01535   using Teuchos::as;
01536   using Teuchos::RCP;
01537   using Teuchos::rcp;
01538   using Teuchos::rcpFromRef;
01539   typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
01540 
01541   // Tpetra's GS implementation for CrsMatrix handles zeroing out the
01542   // starting multivector itself.  The generic RowMatrix version here
01543   // does not, so we have to zero out Y here.
01544   if (ZeroStartingSolution_) {
01545     Y.putScalar (STS::zero ());
01546   }
01547 
01548   const size_t NumVectors = X.getNumVectors ();
01549   const size_t maxLength = A_->getNodeMaxNumRowEntries ();
01550   Array<local_ordinal_type> Indices (maxLength);
01551   Array<scalar_type> Values (maxLength);
01552 
01553   // Local smoothing stuff
01554   const size_t numMyRows             = A_->getNodeNumRows();
01555   const local_ordinal_type * rowInd  = 0;
01556   size_t numActive                   = numMyRows;
01557   bool do_local = localSmoothingIndices_.is_null();
01558   if(do_local) {
01559     rowInd    = localSmoothingIndices_.getRawPtr();
01560     numActive = localSmoothingIndices_.size();
01561   }
01562 
01563 
01564   RCP<MV> Y2;
01565   if (IsParallel_) {
01566     if (Importer_.is_null ()) { // domain and column Maps are the same.
01567       // We will copy Y into Y2 below, so no need to fill with zeros here.
01568       Y2 = rcp (new MV (Y.getMap (), NumVectors, false));
01569     } else {
01570       // FIXME (mfh 21 Mar 2013) We probably don't need to fill with
01571       // zeros here, since we are doing an Import into Y2 below
01572       // anyway.  However, it doesn't hurt correctness.
01573       Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors));
01574     }
01575   }
01576   else {
01577     Y2 = rcpFromRef (Y);
01578   }
01579 
01580   // Diagonal
01581   ArrayRCP<const scalar_type>  d_rcp = Diagonal_->get1dView();
01582   ArrayView<const scalar_type> d_ptr = d_rcp();
01583 
01584   // Constant stride check
01585   bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
01586 
01587   if(constant_stride) {
01588     // extract 1D RCPs
01589     size_t                    x_stride = X.getStride();
01590     size_t                   y2_stride = Y2->getStride();
01591     ArrayRCP<scalar_type>       y2_rcp = Y2->get1dViewNonConst();
01592     ArrayRCP<const scalar_type>  x_rcp = X.get1dView();
01593     ArrayView<scalar_type>      y2_ptr = y2_rcp();
01594     ArrayView<const scalar_type> x_ptr = x_rcp();
01595     Array<scalar_type> dtemp(NumVectors,STS::zero());
01596     for (int iter = 0; iter < NumSweeps_; ++iter) {
01597       // only one data exchange per sweep
01598       if (IsParallel_) {
01599         if (Importer_.is_null ()) {
01600           // just copy, since domain and column Maps are the same
01601           Tpetra::deep_copy (*Y2, Y);
01602         } else {
01603           Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01604         }
01605       }
01606 
01607       for (int j = 0; j < NumSweeps_; j++) {
01608         // data exchange is here, once per sweep
01609         if (IsParallel_) {
01610           if (Importer_.is_null ()) {
01611             // just copy, since domain and column Maps are the same
01612             Tpetra::deep_copy (*Y2, Y);
01613           } else {
01614             Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01615           }
01616         }
01617 
01618         for (size_t ii = 0; ii < numActive; ++ii) {
01619           local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01620           size_t NumEntries;
01621           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01622           dtemp.assign(NumVectors,STS::zero());
01623 
01624           for (size_t k = 0; k < NumEntries; ++k) {
01625             const local_ordinal_type col = Indices[k];
01626             for (size_t m = 0; m < NumVectors; ++m) {
01627               dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01628             }
01629           }
01630 
01631           for (size_t m = 0; m < NumVectors; ++m) {
01632             y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01633           }
01634         }
01635 
01636         // ptrdiff_t is the same size as size_t, but is signed.  Being
01637         // signed is important so that i >= 0 is not trivially true.
01638         for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01639           local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01640           size_t NumEntries;
01641           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01642           dtemp.assign(NumVectors,STS::zero());
01643 
01644           for (size_t k = 0; k < NumEntries; ++k) {
01645             const local_ordinal_type col = Indices[k];
01646             for (size_t m = 0; m < NumVectors; ++m) {
01647               dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01648             }
01649           }
01650 
01651           for (size_t m = 0; m < NumVectors; ++m) {
01652             y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01653           }
01654         }
01655       }
01656 
01657       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01658       if (IsParallel_) {
01659         Tpetra::deep_copy (Y, *Y2);
01660       }
01661     }
01662   }
01663   else {
01664     // extract 2D RCPs
01665     ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
01666     ArrayRCP<ArrayRCP<const scalar_type> > x_ptr =  X.get2dView ();
01667 
01668     for (int iter = 0; iter < NumSweeps_; ++iter) {
01669       // only one data exchange per sweep
01670       if (IsParallel_) {
01671         if (Importer_.is_null ()) {
01672           // just copy, since domain and column Maps are the same
01673           Tpetra::deep_copy (*Y2, Y);
01674         } else {
01675           Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01676         }
01677       }
01678 
01679       for (size_t ii = 0; ii < numActive; ++ii) {
01680         local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01681         const scalar_type diag = d_ptr[i];
01682         size_t NumEntries;
01683         A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
01684 
01685         for (size_t m = 0; m < NumVectors; ++m) {
01686           scalar_type dtemp = STS::zero ();
01687           ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01688           ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01689 
01690           for (size_t k = 0; k < NumEntries; ++k) {
01691             const local_ordinal_type col = Indices[k];
01692             dtemp += Values[k] * y2_local[col];
01693           }
01694           y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
01695         }
01696       }
01697 
01698       // ptrdiff_t is the same size as size_t, but is signed.  Being
01699       // signed is important so that i >= 0 is not trivially true.
01700       for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01701         local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01702         const scalar_type diag = d_ptr[i];
01703         size_t NumEntries;
01704         A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
01705 
01706         for (size_t m = 0; m < NumVectors; ++m) {
01707           scalar_type dtemp = STS::zero ();
01708           ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01709           ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01710 
01711           for (size_t k = 0; k < NumEntries; ++k) {
01712             const local_ordinal_type col = Indices[k];
01713             dtemp += Values[k] * y2_local[col];
01714           }
01715           y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
01716         }
01717       }
01718 
01719       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01720       if (IsParallel_) {
01721         Tpetra::deep_copy (Y, *Y2);
01722       }
01723     }
01724   }
01725 
01726   // See flop count discussion in implementation of ApplyInverseSGS_CrsMatrix().
01727   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01728   const double numVectors = as<double> (X.getNumVectors ());
01729   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01730   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01731   ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
01732     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01733 }
01734 
01735 
01736 template<class MatrixType>
01737 void
01738 Relaxation<MatrixType>::
01739 ApplyInverseSGS_CrsMatrix (const crs_matrix_type& A,
01740                            const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01741                            Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01742 {
01743   using Teuchos::as;
01744   const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
01745   if (localSmoothingIndices_.is_null ()) {
01746     A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
01747                        NumSweeps_, ZeroStartingSolution_);
01748   }
01749   else {
01750     A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
01751                                 DampingFactor_, direction,
01752                                 NumSweeps_, ZeroStartingSolution_);
01753   }
01754 
01755   // For each column of output, for each sweep over the matrix:
01756   //
01757   // - One + and one * for each matrix entry
01758   // - One / and one + for each row of the matrix
01759   // - If the damping factor is not one: one * for each row of the
01760   //   matrix.  (It's not fair to count this if the damping factor is
01761   //   one, since the implementation could skip it.  Whether it does
01762   //   or not is the implementation's choice.)
01763   //
01764   // Each sweep of symmetric Gauss-Seidel / SOR counts as two sweeps,
01765   // one forward and one backward.
01766 
01767   // Floating-point operations due to the damping factor, per matrix
01768   // row, per direction, per columm of output.
01769   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01770   const double numVectors = as<double> (X.getNumVectors ());
01771   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01772   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01773   ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
01774     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01775 }
01776 
01777 template<class MatrixType>
01778 void
01779 Relaxation<MatrixType>::
01780 ApplyInverseSGS_BlockCrsMatrix (const block_crs_matrix_type& A,
01781                                 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01782                                 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
01783 {
01784   using Teuchos::RCP;
01785   using Teuchos::rcp;
01786   using Teuchos::rcpFromRef;
01787   typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
01788     local_ordinal_type, global_ordinal_type, node_type> BMV;
01789   typedef Tpetra::MultiVector<scalar_type,
01790     local_ordinal_type, global_ordinal_type, node_type> MV;
01791 
01792   //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
01793   //
01794   // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
01795   // multiple right-hand sides, unless the input or output MultiVector
01796   // does not have constant stride.  We should check for that case
01797   // here, in case it doesn't work in localGaussSeidel (which is
01798   // entirely possible).
01799   BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
01800   const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
01801 
01802   bool performImport = false;
01803   RCP<BMV> yBlockCol;
01804   if (Importer_.is_null ()) {
01805     yBlockCol = Teuchos::rcpFromRef (yBlock);
01806   }
01807   else {
01808     if (yBlockColumnPointMap_.is_null () ||
01809         yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
01810         yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
01811       yBlockColumnPointMap_ =
01812         rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
01813                       static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
01814     }
01815     yBlockCol = yBlockColumnPointMap_;
01816     performImport = true;
01817   }
01818 
01819   if (ZeroStartingSolution_) {
01820     yBlockCol->putScalar (STS::zero ());
01821   }
01822   else if (performImport) {
01823     yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
01824   }
01825 
01826   // FIXME (mfh 12 Sep 2014) Shouldn't this come from the user's parameter?
01827   const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
01828 
01829   for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
01830     if (performImport && sweep > 0) {
01831       yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
01832     }
01833     A.localGaussSeidel (xBlock, *yBlockCol, *BlockDiagonal_,
01834                         &blockDiagonalFactorizationPivots[0],
01835                         DampingFactor_, direction);
01836     if (performImport) {
01837       RCP<const MV> yBlockColPointDomain =
01838         yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
01839       MV yBlockView = yBlock.getMultiVectorView ();
01840       Tpetra::deep_copy (yBlockView, *yBlockColPointDomain);
01841     }
01842   }
01843 }
01844 
01845 
01846 template<class MatrixType>
01847 std::string Relaxation<MatrixType>::description () const
01848 {
01849   std::ostringstream os;
01850 
01851   // Output is a valid YAML dictionary in flow style.  If you don't
01852   // like everything on a single line, you should call describe()
01853   // instead.
01854   os << "\"Ifpack2::Relaxation\": {";
01855 
01856   os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
01857      << "Computed: " << (isComputed () ? "true" : "false") << ", ";
01858 
01859   // It's useful to print this instance's relaxation method (Jacobi,
01860   // Gauss-Seidel, or symmetric Gauss-Seidel).  If you want more info
01861   // than that, call describe() instead.
01862   os << "Type: ";
01863   if (PrecType_ == Ifpack2::Details::JACOBI) {
01864     os << "Jacobi";
01865   } else if (PrecType_ == Ifpack2::Details::GS) {
01866     os << "Gauss-Seidel";
01867   } else if (PrecType_ == Ifpack2::Details::SGS) {
01868     os << "Symmetric Gauss-Seidel";
01869   } else {
01870     os << "INVALID";
01871   }
01872 
01873   os  << ", " << "sweeps: " << NumSweeps_ << ", "
01874       << "damping factor: " << DampingFactor_ << ", ";
01875   if (DoL1Method_) {
01876     os << "use l1: " << DoL1Method_ << ", "
01877        << "l1 eta: " << L1Eta_ << ", ";
01878   }
01879 
01880   if (A_.is_null ()) {
01881     os << "Matrix: null";
01882   }
01883   else {
01884     os << "Global matrix dimensions: ["
01885        << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
01886        << ", Global nnz: " << A_->getGlobalNumEntries();
01887   }
01888 
01889   os << "}";
01890   return os.str ();
01891 }
01892 
01893 
01894 template<class MatrixType>
01895 void
01896 Relaxation<MatrixType>::
01897 describe (Teuchos::FancyOStream &out,
01898           const Teuchos::EVerbosityLevel verbLevel) const
01899 {
01900   using Teuchos::OSTab;
01901   using Teuchos::TypeNameTraits;
01902   using Teuchos::VERB_DEFAULT;
01903   using Teuchos::VERB_NONE;
01904   using Teuchos::VERB_LOW;
01905   using Teuchos::VERB_MEDIUM;
01906   using Teuchos::VERB_HIGH;
01907   using Teuchos::VERB_EXTREME;
01908   using std::endl;
01909 
01910   const Teuchos::EVerbosityLevel vl =
01911     (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
01912 
01913   const int myRank = this->getComm ()->getRank ();
01914 
01915   //    none: print nothing
01916   //     low: print O(1) info from Proc 0
01917   //  medium:
01918   //    high:
01919   // extreme:
01920 
01921   if (vl != VERB_NONE && myRank == 0) {
01922     // Describable interface asks each implementation to start with a tab.
01923     OSTab tab1 (out);
01924     // Output is valid YAML; hence the quotes, to protect the colons.
01925     out << "\"Ifpack2::Relaxation\":" << endl;
01926     OSTab tab2 (out);
01927     out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
01928         << endl;
01929     if (this->getObjectLabel () != "") {
01930       out << "Label: " << this->getObjectLabel () << endl;
01931     }
01932     out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
01933         << "Computed: " << (isComputed () ? "true" : "false") << endl
01934         << "Parameters: " << endl;
01935     {
01936       OSTab tab3 (out);
01937       out << "\"relaxation: type\": ";
01938       if (PrecType_ == Ifpack2::Details::JACOBI) {
01939         out << "Jacobi";
01940       } else if (PrecType_ == Ifpack2::Details::GS) {
01941         out << "Gauss-Seidel";
01942       } else if (PrecType_ == Ifpack2::Details::SGS) {
01943         out << "Symmetric Gauss-Seidel";
01944       } else {
01945         out << "INVALID";
01946       }
01947       // We quote these parameter names because they contain colons.
01948       // YAML uses the colon to distinguish key from value.
01949       out << endl
01950           << "\"relaxation: sweeps\": " << NumSweeps_ << endl
01951           << "\"relaxation: damping factor\": " << DampingFactor_ << endl
01952           << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
01953           << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
01954           << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
01955           << "\"relaxation: use l1\": " << DoL1Method_ << endl
01956           << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
01957     }
01958     out << "Computed quantities:" << endl;
01959     {
01960       OSTab tab3 (out);
01961       out << "Condition number estimate: " << Condest_ << endl
01962           << "Global number of rows: " << A_->getGlobalNumRows () << endl
01963           << "Global number of columns: " << A_->getGlobalNumCols () << endl;
01964     }
01965     if (checkDiagEntries_ && isComputed ()) {
01966       out << "Properties of input diagonal entries:" << endl;
01967       {
01968         OSTab tab3 (out);
01969         out << "Magnitude of minimum-magnitude diagonal entry: "
01970             << globalMinMagDiagEntryMag_ << endl
01971             << "Magnitude of maximum-magnitude diagonal entry: "
01972             << globalMaxMagDiagEntryMag_ << endl
01973             << "Number of diagonal entries with small magnitude: "
01974             << globalNumSmallDiagEntries_ << endl
01975             << "Number of zero diagonal entries: "
01976             << globalNumZeroDiagEntries_ << endl
01977             << "Number of diagonal entries with negative real part: "
01978             << globalNumNegDiagEntries_ << endl
01979             << "Abs 2-norm diff between computed and actual inverse "
01980             << "diagonal: " << globalDiagNormDiff_ << endl;
01981       }
01982     }
01983     if (isComputed ()) {
01984       out << "Saved diagonal offsets: "
01985           << (savedDiagOffsets_ ? "true" : "false") << endl;
01986     }
01987     out << "Call counts and total times (in seconds): " << endl;
01988     {
01989       OSTab tab3 (out);
01990       out << "initialize: " << endl;
01991       {
01992         OSTab tab4 (out);
01993         out << "Call count: " << NumInitialize_ << endl;
01994         out << "Total time: " << InitializeTime_ << endl;
01995       }
01996       out << "compute: " << endl;
01997       {
01998         OSTab tab4 (out);
01999         out << "Call count: " << NumCompute_ << endl;
02000         out << "Total time: " << ComputeTime_ << endl;
02001       }
02002       out << "apply: " << endl;
02003       {
02004         OSTab tab4 (out);
02005         out << "Call count: " << NumApply_ << endl;
02006         out << "Total time: " << ApplyTime_ << endl;
02007       }
02008     }
02009   }
02010 }
02011 
02012 } // namespace Ifpack2
02013 
02014 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N)                            \
02015   template class Ifpack2::Relaxation< Tpetra::CrsMatrix<S, LO, GO, N> >; \
02016   template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
02017 
02018 #endif // IFPACK2_RELAXATION_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends