IFPACK  Development
 All Classes Files Functions Variables Enumerations Friends
Ifpack_PointRelaxation.cpp
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 */
00042 
00043 #include "Ifpack_ConfigDefs.h"
00044 #include <iomanip>
00045 #include <cmath>
00046 #include "Epetra_Operator.h"
00047 #include "Epetra_CrsMatrix.h"
00048 #include "Epetra_VbrMatrix.h"
00049 #include "Epetra_Comm.h"
00050 #include "Epetra_Map.h"
00051 #include "Epetra_MultiVector.h"
00052 #include "Ifpack_Preconditioner.h"
00053 #include "Ifpack_PointRelaxation.h"
00054 #include "Ifpack_Utils.h"
00055 #include "Ifpack_Condest.h"
00056 
00057 static const int IFPACK_JACOBI = 0;
00058 static const int IFPACK_GS = 1;
00059 static const int IFPACK_SGS = 2;
00060 
00061 //==============================================================================
00062 Ifpack_PointRelaxation::
00063 Ifpack_PointRelaxation(const Epetra_RowMatrix* Matrix_in) :
00064   IsInitialized_(false),
00065   IsComputed_(false),
00066   NumInitialize_(0),
00067   NumCompute_(0),
00068   NumApplyInverse_(0),
00069   InitializeTime_(0.0),
00070   ComputeTime_(0.0),
00071   ApplyInverseTime_(0.0),
00072   ComputeFlops_(0.0),
00073   ApplyInverseFlops_(0.0),
00074   NumSweeps_(1),
00075   DampingFactor_(1.0),
00076   UseTranspose_(false),
00077   Condest_(-1.0),
00078   ComputeCondest_(false),
00079   PrecType_(IFPACK_JACOBI),
00080   MinDiagonalValue_(0.0),
00081   NumMyRows_(0),
00082   NumMyNonzeros_(0),
00083   NumGlobalRows_(0),
00084   NumGlobalNonzeros_(0),
00085   Matrix_(Teuchos::rcp(Matrix_in,false)),
00086   IsParallel_(false),
00087   ZeroStartingSolution_(true),
00088   DoBackwardGS_(false),
00089   DoL1Method_(false),
00090   L1Eta_(1.5),
00091   NumLocalSmoothingIndices_(Matrix_in->NumMyRows()),
00092   LocalSmoothingIndices_(0)
00093 {
00094 }
00095 
00096 //==============================================================================
00097 int Ifpack_PointRelaxation::SetParameters(Teuchos::ParameterList& List)
00098 {
00099 
00100   string PT;
00101   if (PrecType_ == IFPACK_JACOBI)
00102     PT = "Jacobi";
00103   else if (PrecType_ == IFPACK_GS)
00104     PT = "Gauss-Seidel";
00105   else if (PrecType_ == IFPACK_SGS)
00106     PT = "symmetric Gauss-Seidel";
00107 
00108   PT = List.get("relaxation: type", PT);
00109 
00110   if (PT == "Jacobi")
00111     PrecType_ = IFPACK_JACOBI;
00112   else if (PT == "Gauss-Seidel")
00113     PrecType_ = IFPACK_GS;
00114   else if (PT == "symmetric Gauss-Seidel")
00115     PrecType_ = IFPACK_SGS;
00116   else {
00117     IFPACK_CHK_ERR(-2);
00118   }
00119 
00120   NumSweeps_            = List.get("relaxation: sweeps",NumSweeps_);
00121   DampingFactor_        = List.get("relaxation: damping factor",
00122                                    DampingFactor_);
00123   MinDiagonalValue_     = List.get("relaxation: min diagonal value",
00124                                    MinDiagonalValue_);
00125   ZeroStartingSolution_ = List.get("relaxation: zero starting solution",
00126                                    ZeroStartingSolution_);
00127 
00128   DoBackwardGS_         = List.get("relaxation: backward mode",DoBackwardGS_);
00129 
00130   DoL1Method_           = List.get("relaxation: use l1",DoL1Method_);
00131 
00132   L1Eta_                = List.get("relaxation: l1 eta",L1Eta_);
00133 
00134 
00135   // This (partial) reordering would require a very different implementation of Jacobi than we have no, so we're not
00136   // going to do it.
00137   NumLocalSmoothingIndices_= List.get("relaxation: number of local smoothing indices",NumLocalSmoothingIndices_);
00138   LocalSmoothingIndices_   = List.get("relaxation: local smoothing indices",LocalSmoothingIndices_);
00139   if(PrecType_ == IFPACK_JACOBI && LocalSmoothingIndices_) {
00140     NumLocalSmoothingIndices_=NumMyRows_;
00141     LocalSmoothingIndices_=0;
00142     if(!Comm().MyPID()) cout<<"Ifpack_PointRelaxation: WARNING: Reordered/Local Smoothing not implemented for Jacobi.  Defaulting to regular Jacobi"<<endl;
00143   }
00144 
00145   SetLabel();
00146 
00147   return(0);
00148 }
00149 
00150 //==============================================================================
00151 const Epetra_Comm& Ifpack_PointRelaxation::Comm() const
00152 {
00153   return(Matrix_->Comm());
00154 }
00155 
00156 //==============================================================================
00157 const Epetra_Map& Ifpack_PointRelaxation::OperatorDomainMap() const
00158 {
00159   return(Matrix_->OperatorDomainMap());
00160 }
00161 
00162 //==============================================================================
00163 const Epetra_Map& Ifpack_PointRelaxation::OperatorRangeMap() const
00164 {
00165   return(Matrix_->OperatorRangeMap());
00166 }
00167 
00168 //==============================================================================
00169 int Ifpack_PointRelaxation::Initialize()
00170 {
00171   IsInitialized_ = false;
00172 
00173   if (Matrix_ == Teuchos::null)
00174     IFPACK_CHK_ERR(-2);
00175 
00176   if (Time_ == Teuchos::null)
00177     Time_ = Teuchos::rcp( new Epetra_Time(Comm()) );
00178 
00179   if (Matrix().NumGlobalRows64() != Matrix().NumGlobalCols64())
00180     IFPACK_CHK_ERR(-2); // only square matrices
00181 
00182   NumMyRows_ = Matrix_->NumMyRows();
00183   NumMyNonzeros_ = Matrix_->NumMyNonzeros();
00184   NumGlobalRows_ = Matrix_->NumGlobalRows64();
00185   NumGlobalNonzeros_ = Matrix_->NumGlobalNonzeros64();
00186 
00187   if (Comm().NumProc() != 1)
00188     IsParallel_ = true;
00189   else
00190     IsParallel_ = false;
00191 
00192   ++NumInitialize_;
00193   InitializeTime_ += Time_->ElapsedTime();
00194   IsInitialized_ = true;
00195   return(0);
00196 }
00197 
00198 //==============================================================================
00199 int Ifpack_PointRelaxation::Compute()
00200 {
00201   int ierr = 0;
00202   if (!IsInitialized())
00203     IFPACK_CHK_ERR(Initialize());
00204 
00205   Time_->ResetStartTime();
00206 
00207   // reset values
00208   IsComputed_ = false;
00209   Condest_ = -1.0;
00210 
00211   if (NumSweeps_ == 0) ierr = 1; // Warning: no sweeps performed.
00212   if (NumSweeps_ < 0)
00213     IFPACK_CHK_ERR(-2); // at least one application
00214 
00215   // NOTE: RowMatrixRowMap doesn't work correctly for Epetra_VbrMatrix
00216   const Epetra_VbrMatrix * VbrMat = dynamic_cast<const Epetra_VbrMatrix*>(&Matrix());
00217   if(VbrMat)  Diagonal_ = Teuchos::rcp( new Epetra_Vector(VbrMat->RowMap()) );
00218   else Diagonal_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) );
00219 
00220   if (Diagonal_ == Teuchos::null)
00221     IFPACK_CHK_ERR(-5);
00222 
00223   IFPACK_CHK_ERR(Matrix().ExtractDiagonalCopy(*Diagonal_));
00224 
00225   // Setup for L1 Methods.
00226   // Here we add half the value of the off-processor entries in the row,
00227   // but only if diagonal isn't sufficiently large.
00228   //
00229   // Note: This is only done in the slower-but-more-general "RowMatrix" mode.
00230   //
00231   // This follows from Equation (6.5) in:
00232   // Baker, Falgout, Kolev and Yang.  Multigrid Smoothers for Ultraparallel Computing.
00233   // SIAM J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864--2887.
00234   if(DoL1Method_ && IsParallel_) {
00235     int maxLength = Matrix().MaxNumEntries();
00236     std::vector<int> Indices(maxLength);
00237     std::vector<double> Values(maxLength);
00238     int NumEntries;
00239 
00240     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00241       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, maxLength,NumEntries,
00242                            &Values[0], &Indices[0]));
00243       double diagonal_boost=0.0;
00244       for (int k = 0 ; k < NumEntries ; ++k)
00245     if(Indices[k] > NumMyRows_)
00246       diagonal_boost+=std::abs(Values[k]/2.0);
00247       if ((*Diagonal_)[i] < L1Eta_*diagonal_boost)
00248     (*Diagonal_)[i]+=diagonal_boost;
00249     }
00250   }
00251 
00252   // check diagonal elements, store the inverses, and verify that
00253   // no zeros are around. If an element is zero, then by default
00254   // its inverse is zero as well (that is, the row is ignored).
00255   for (int i = 0 ; i < NumMyRows_ ; ++i) {
00256     double& diag = (*Diagonal_)[i];
00257     if (IFPACK_ABS(diag) < MinDiagonalValue_)
00258       diag = MinDiagonalValue_;
00259     if (diag != 0.0)
00260       diag = 1.0 / diag;
00261   }
00262 #ifdef IFPACK_FLOPCOUNTERS
00263   ComputeFlops_ += NumMyRows_;
00264 #endif
00265 
00266 #if 0
00267   // some methods require the inverse of the diagonal, compute it
00268   // now and store it in `Diagonal_'
00269   if ((PrecType_ == IFPACK_JACOBI) || (PrecType_ == IFPACK_GS)) {
00270     Diagonal_->Reciprocal(*Diagonal_);
00271     // update flops
00272     ComputeFlops_ += NumMyRows_;
00273   }
00274 #endif
00275 
00276   // We need to import data from external processors. Here I create an
00277   // Epetra_Import object because I cannot assume that Matrix_ has one.
00278   // This is a bit of waste of resources (but the code is more robust).
00279   // Note that I am doing some strange stuff to set the components of Y
00280   // from Y2 (to save some time).
00281   //
00282   if (IsParallel_ && ((PrecType_ == IFPACK_GS) || (PrecType_ == IFPACK_SGS))) {
00283     Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(),
00284                                   Matrix().RowMatrixRowMap()) );
00285     if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5);
00286   }
00287 
00288   ++NumCompute_;
00289   ComputeTime_ += Time_->ElapsedTime();
00290   IsComputed_ = true;
00291 
00292   IFPACK_CHK_ERR(ierr);
00293   return(0);
00294 }
00295 
00296 //==============================================================================
00297 ostream& Ifpack_PointRelaxation::Print(ostream & os) const
00298 {
00299 
00300   double MyMinVal, MyMaxVal;
00301   double MinVal, MaxVal;
00302 
00303   if (IsComputed_) {
00304     Diagonal_->MinValue(&MyMinVal);
00305     Diagonal_->MaxValue(&MyMaxVal);
00306     Comm().MinAll(&MyMinVal,&MinVal,1);
00307     Comm().MinAll(&MyMaxVal,&MaxVal,1);
00308   }
00309 
00310   if (!Comm().MyPID()) {
00311     os << endl;
00312     os << "================================================================================" << endl;
00313     os << "Ifpack_PointRelaxation" << endl;
00314     os << "Sweeps         = " << NumSweeps_ << endl;
00315     os << "damping factor = " << DampingFactor_ << endl;
00316     if (PrecType_ == IFPACK_JACOBI)
00317       os << "Type           = Jacobi" << endl;
00318     else if (PrecType_ == IFPACK_GS)
00319       os << "Type           = Gauss-Seidel" << endl;
00320     else if (PrecType_ == IFPACK_SGS)
00321       os << "Type           = symmetric Gauss-Seidel" << endl;
00322     if (DoBackwardGS_)
00323       os << "Using backward mode (GS only)" << endl;
00324     if (ZeroStartingSolution_)
00325       os << "Using zero starting solution" << endl;
00326     else
00327       os << "Using input starting solution" << endl;
00328     os << "Condition number estimate = " << Condest() << endl;
00329     os << "Global number of rows            = " << Matrix_->NumGlobalRows64() << endl;
00330     if (IsComputed_) {
00331       os << "Minimum value on stored diagonal = " << MinVal << endl;
00332       os << "Maximum value on stored diagonal = " << MaxVal << endl;
00333     }
00334     os << endl;
00335     os << "Phase           # calls   Total Time (s)       Total MFlops     MFlops/s" << endl;
00336     os << "-----           -------   --------------       ------------     --------" << endl;
00337     os << "Initialize()    "   << std::setw(5) << NumInitialize_
00338        << "  " << std::setw(15) << InitializeTime_
00339        << "              0.0              0.0" << endl;
00340     os << "Compute()       "   << std::setw(5) << NumCompute_
00341        << "  " << std::setw(15) << ComputeTime_
00342        << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_;
00343     if (ComputeTime_ != 0.0)
00344       os << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_ / ComputeTime_ << endl;
00345     else
00346       os << "  " << std::setw(15) << 0.0 << endl;
00347     os << "ApplyInverse()  "   << std::setw(5) << NumApplyInverse_
00348        << "  " << std::setw(15) << ApplyInverseTime_
00349        << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_;
00350     if (ApplyInverseTime_ != 0.0)
00351       os << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_ / ApplyInverseTime_ << endl;
00352     else
00353       os << "  " << std::setw(15) << 0.0 << endl;
00354     os << "================================================================================" << endl;
00355     os << endl;
00356   }
00357 
00358   return(os);
00359 }
00360 
00361 //==============================================================================
00362 double Ifpack_PointRelaxation::
00363 Condest(const Ifpack_CondestType CT,
00364         const int MaxIters, const double Tol,
00365     Epetra_RowMatrix* Matrix_in)
00366 {
00367   if (!IsComputed()) // cannot compute right now
00368     return(-1.0);
00369 
00370   // always computes it. Call Condest() with no parameters to get
00371   // the previous estimate.
00372   Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
00373 
00374   return(Condest_);
00375 }
00376 
00377 //==============================================================================
00378 void Ifpack_PointRelaxation::SetLabel()
00379 {
00380   string PT;
00381   if (PrecType_ == IFPACK_JACOBI)
00382     PT = "Jacobi";
00383   else if (PrecType_ == IFPACK_GS){
00384     PT = "GS";
00385     if(DoBackwardGS_)
00386       PT = "Backward " + PT;
00387   }
00388   else if (PrecType_ == IFPACK_SGS)
00389     PT = "SGS";
00390 
00391   if(NumLocalSmoothingIndices_!=NumMyRows_) PT="Local " + PT;
00392   else if(LocalSmoothingIndices_) PT="Reordered " + PT;
00393 
00394   Label_ = "IFPACK (" + PT + ", sweeps=" + Ifpack_toString(NumSweeps_)
00395     + ", damping=" + Ifpack_toString(DampingFactor_) + ")";
00396 }
00397 
00398 //==============================================================================
00399 // Note that Ifpack_PointRelaxation and Jacobi is much faster than
00400 // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the
00401 // way the matrix-vector produce is performed).
00402 //
00403 // Another ML-related observation is that the starting solution (in Y)
00404 // is NOT supposed to be zero. This may slow down the application of just
00405 // one sweep of Jacobi.
00406 //
00407 int Ifpack_PointRelaxation::
00408 ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00409 {
00410   if (!IsComputed())
00411     IFPACK_CHK_ERR(-3);
00412 
00413   if (X.NumVectors() != Y.NumVectors())
00414     IFPACK_CHK_ERR(-2);
00415 
00416   Time_->ResetStartTime();
00417 
00418   // AztecOO gives X and Y pointing to the same memory location,
00419   // need to create an auxiliary vector, Xcopy
00420   Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
00421   if (X.Pointers()[0] == Y.Pointers()[0])
00422     Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00423   else
00424     Xcopy = Teuchos::rcp( &X, false );
00425 
00426   // Flops are updated in each of the following.
00427   switch (PrecType_) {
00428   case IFPACK_JACOBI:
00429     IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
00430     break;
00431   case IFPACK_GS:
00432     IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
00433     break;
00434   case IFPACK_SGS:
00435     IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
00436     break;
00437   default:
00438     IFPACK_CHK_ERR(-1); // something wrong
00439   }
00440 
00441   ++NumApplyInverse_;
00442   ApplyInverseTime_ += Time_->ElapsedTime();
00443   return(0);
00444 }
00445 
00446 //==============================================================================
00447 // This preconditioner can be much slower than AztecOO and ML versions
00448 // if the matrix-vector product requires all ExtractMyRowCopy()
00449 // (as done through Ifpack_AdditiveSchwarz).
00450 int Ifpack_PointRelaxation::
00451 ApplyInverseJacobi(const Epetra_MultiVector& RHS, Epetra_MultiVector& LHS) const
00452 {
00453   int NumVectors = LHS.NumVectors();
00454 
00455   int startIter = 0;
00456   if (NumSweeps_ > 0 && ZeroStartingSolution_) {
00457     // When we have a zero initial guess, we can skip the first
00458     // matrix apply call and zero initialization
00459     for (int v = 0; v < NumVectors; v++)
00460       IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(RHS(v)), *Diagonal_, 0.0));
00461 
00462     startIter = 1;
00463   }
00464 
00465   bool zeroOut = false;
00466   Epetra_MultiVector A_times_LHS(LHS.Map(), NumVectors, zeroOut);
00467   for (int j = startIter; j < NumSweeps_ ; j++) {
00468     IFPACK_CHK_ERR(Apply(LHS, A_times_LHS));
00469     IFPACK_CHK_ERR(A_times_LHS.Update(1.0,RHS,-1.0));
00470     for (int v = 0 ; v < NumVectors ; ++v)
00471       IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(A_times_LHS(v)),
00472                                      *Diagonal_, 1.0));
00473 
00474   }
00475 
00476   // Flops:
00477   // - matrix vector              (2 * NumGlobalNonzeros_)
00478   // - update                     (2 * NumGlobalRows_)
00479   // - Multiply:
00480   //   - DampingFactor            (NumGlobalRows_)
00481   //   - Diagonal                 (NumGlobalRows_)
00482   //   - A + B                    (NumGlobalRows_)
00483   //   - 1.0                      (NumGlobalRows_)
00484 #ifdef IFPACK_FLOPCOUNTERS
00485   ApplyInverseFlops_ += NumVectors * (6 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00486 #endif
00487 
00488   return(0);
00489 }
00490 
00491 //==============================================================================
00492 int Ifpack_PointRelaxation::
00493 ApplyInverseGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00494 {
00495   if (ZeroStartingSolution_)
00496       Y.PutScalar(0.0);
00497 
00498   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00499   // try to pick the best option; performances may be improved
00500   // if several sweeps are used.
00501   if (CrsMatrix != 0)
00502   {
00503     if (CrsMatrix->StorageOptimized() && LocalSmoothingIndices_)
00504       return(ApplyInverseGS_LocalFastCrsMatrix(CrsMatrix, X, Y));
00505     else if (CrsMatrix->StorageOptimized())
00506       return(ApplyInverseGS_FastCrsMatrix(CrsMatrix, X, Y));
00507     else
00508       return(ApplyInverseGS_CrsMatrix(CrsMatrix, X, Y));
00509   }
00510   else
00511     return(ApplyInverseGS_RowMatrix(X, Y));
00512 } //ApplyInverseGS()
00513 
00514 
00515 
00516 //==============================================================================
00517 int Ifpack_PointRelaxation::
00518 ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00519 {
00520   int NumVectors = X.NumVectors();
00521 
00522   int Length = Matrix().MaxNumEntries();
00523   std::vector<int> Indices(Length);
00524   std::vector<double> Values(Length);
00525 
00526   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00527   if (IsParallel_)
00528     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00529   else
00530     Y2 = Teuchos::rcp( &Y, false );
00531 
00532   // extract views (for nicer and faster code)
00533   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00534   X.ExtractView(&x_ptr);
00535   Y.ExtractView(&y_ptr);
00536   Y2->ExtractView(&y2_ptr);
00537   Diagonal_->ExtractView(&d_ptr);
00538 
00539   for (int j = 0; j < NumSweeps_ ; j++) {
00540 
00541     // data exchange is here, once per sweep
00542     if (IsParallel_)
00543       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00544 
00545     // FIXME: do I really need this code below?
00546     if (NumVectors == 1) {
00547 
00548       double* y0_ptr = y_ptr[0];
00549       double* y20_ptr = y2_ptr[0];
00550       double* x0_ptr = x_ptr[0];
00551 
00552       if(!DoBackwardGS_){
00553         /* Forward Mode */
00554     for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
00555       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
00556 
00557           int NumEntries;
00558           int col;
00559           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00560                                                    &Values[0], &Indices[0]));
00561 
00562           double dtemp = 0.0;
00563           for (int k = 0 ; k < NumEntries ; ++k) {
00564 
00565             col = Indices[k];
00566             dtemp += Values[k] * y20_ptr[col];
00567           }
00568 
00569           y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
00570         }
00571       }
00572       else {
00573         /* Backward Mode */
00574     for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
00575       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
00576 
00577           int NumEntries;
00578           int col;
00579       (void) col; // Forestall compiler warning for unused variable.
00580           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00581                                                    &Values[0], &Indices[0]));
00582           double dtemp = 0.0;
00583           for (int k = 0 ; k < NumEntries ; ++k) {
00584 
00585             col = Indices[k];
00586             dtemp += Values[k] * y20_ptr[i];
00587           }
00588 
00589           y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
00590         }
00591       }
00592 
00593       // using Export() sounded quite expensive
00594       if (IsParallel_)
00595         for (int i = 0 ; i < NumMyRows_ ; ++i)
00596           y0_ptr[i] = y20_ptr[i];
00597 
00598     }
00599     else {
00600       if(!DoBackwardGS_){
00601         /* Forward Mode */
00602         for (int i = 0 ; i < NumMyRows_ ; ++i) {
00603 
00604           int NumEntries;
00605           int col;
00606           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00607                                                    &Values[0], &Indices[0]));
00608 
00609           for (int m = 0 ; m < NumVectors ; ++m) {
00610 
00611             double dtemp = 0.0;
00612             for (int k = 0 ; k < NumEntries ; ++k) {
00613 
00614               col = Indices[k];
00615               dtemp += Values[k] * y2_ptr[m][col];
00616             }
00617 
00618             y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
00619           }
00620         }
00621       }
00622       else {
00623         /* Backward Mode */
00624         for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00625           int NumEntries;
00626           int col;
00627           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00628                                                    &Values[0], &Indices[0]));
00629 
00630           for (int m = 0 ; m < NumVectors ; ++m) {
00631 
00632             double dtemp = 0.0;
00633             for (int k = 0 ; k < NumEntries ; ++k) {
00634 
00635               col = Indices[k];
00636               dtemp += Values[k] * y2_ptr[m][col];
00637             }
00638 
00639             y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
00640 
00641           }
00642         }
00643       }
00644 
00645       // using Export() sounded quite expensive
00646       if (IsParallel_)
00647         for (int m = 0 ; m < NumVectors ; ++m)
00648       for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
00649         int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
00650         y_ptr[m][i] = y2_ptr[m][i];
00651       }
00652     }
00653   }
00654 
00655 #ifdef IFPACK_FLOPCOUNTERS
00656   ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00657 #endif
00658 
00659   return(0);
00660 } //ApplyInverseGS_RowMatrix()
00661 
00662 //==============================================================================
00663 int Ifpack_PointRelaxation::
00664 ApplyInverseGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00665 {
00666   int NumVectors = X.NumVectors();
00667 
00668   int* Indices;
00669   double* Values;
00670 
00671   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00672   if (IsParallel_) {
00673     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00674   }
00675   else
00676     Y2 = Teuchos::rcp( &Y, false );
00677 
00678   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00679   X.ExtractView(&x_ptr);
00680   Y.ExtractView(&y_ptr);
00681   Y2->ExtractView(&y2_ptr);
00682   Diagonal_->ExtractView(&d_ptr);
00683 
00684   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00685 
00686     // only one data exchange per sweep
00687     if (IsParallel_)
00688       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00689 
00690     if(!DoBackwardGS_){
00691       /* Forward Mode */
00692       for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
00693     int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
00694 
00695         int NumEntries;
00696         int col;
00697         double diag = d_ptr[i];
00698 
00699         IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00700 
00701         for (int m = 0 ; m < NumVectors ; ++m) {
00702 
00703           double dtemp = 0.0;
00704 
00705           for (int k = 0; k < NumEntries; ++k) {
00706 
00707             col = Indices[k];
00708             dtemp += Values[k] * y2_ptr[m][col];
00709           }
00710 
00711           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00712         }
00713       }
00714     }
00715     else {
00716       /* Backward Mode */
00717       for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
00718     int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
00719 
00720         int NumEntries;
00721         int col;
00722         double diag = d_ptr[i];
00723 
00724         IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00725 
00726         for (int m = 0 ; m < NumVectors ; ++m) {
00727 
00728           double dtemp = 0.0;
00729           for (int k = 0; k < NumEntries; ++k) {
00730 
00731             col = Indices[k];
00732             dtemp += Values[k] * y2_ptr[m][col];
00733           }
00734 
00735           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00736 
00737         }
00738       }
00739     }
00740 
00741     if (IsParallel_)
00742       for (int m = 0 ; m < NumVectors ; ++m)
00743     for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
00744       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
00745           y_ptr[m][i] = y2_ptr[m][i];
00746     }
00747   }
00748 
00749 #ifdef IFPACK_FLOPCOUNTERS
00750   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00751 #endif
00752   return(0);
00753 } //ApplyInverseGS_CrsMatrix()
00754 
00755 //
00756 //==============================================================================
00757 // ApplyInverseGS_FastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage()
00758 
00759 int Ifpack_PointRelaxation::
00760 ApplyInverseGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00761 {
00762   int* IndexOffset;
00763   int* Indices;
00764   double* Values;
00765   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00766 
00767   int NumVectors = X.NumVectors();
00768 
00769   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00770   if (IsParallel_) {
00771     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00772   }
00773   else
00774     Y2 = Teuchos::rcp( &Y, false );
00775 
00776   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00777   X.ExtractView(&x_ptr);
00778   Y.ExtractView(&y_ptr);
00779   Y2->ExtractView(&y2_ptr);
00780   Diagonal_->ExtractView(&d_ptr);
00781 
00782   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00783 
00784     // only one data exchange per sweep
00785     if (IsParallel_)
00786       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00787 
00788     if(!DoBackwardGS_){
00789       /* Forward Mode */
00790       for (int i = 0 ; i < NumMyRows_ ; ++i) {
00791 
00792         int col;
00793         double diag = d_ptr[i];
00794 
00795         for (int m = 0 ; m < NumVectors ; ++m) {
00796 
00797           double dtemp = 0.0;
00798 
00799           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00800 
00801             col = Indices[k];
00802             dtemp += Values[k] * y2_ptr[m][col];
00803           }
00804 
00805           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00806         }
00807       }
00808     }
00809     else {
00810       /* Backward Mode */
00811       for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00812 
00813         int col;
00814         double diag = d_ptr[i];
00815 
00816         for (int m = 0 ; m < NumVectors ; ++m) {
00817 
00818           double dtemp = 0.0;
00819           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00820 
00821             col = Indices[k];
00822             dtemp += Values[k] * y2_ptr[m][col];
00823           }
00824 
00825           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00826 
00827         }
00828       }
00829     }
00830 
00831 
00832     if (IsParallel_)
00833       for (int m = 0 ; m < NumVectors ; ++m)
00834         for (int i = 0 ; i < NumMyRows_ ; ++i)
00835           y_ptr[m][i] = y2_ptr[m][i];
00836   }
00837 
00838 #ifdef IFPACK_FLOPCOUNTERS
00839   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00840 #endif
00841   return(0);
00842 } //ApplyInverseGS_FastCrsMatrix()
00843 
00844 
00845 //
00846 //==============================================================================
00847 // ApplyInverseGS_LocalFastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage() + LocalSmoothingIndices
00848 
00849 int Ifpack_PointRelaxation::
00850 ApplyInverseGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00851 {
00852   int* IndexOffset;
00853   int* Indices;
00854   double* Values;
00855   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00856 
00857   int NumVectors = X.NumVectors();
00858 
00859   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00860   if (IsParallel_) {
00861     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00862   }
00863   else
00864     Y2 = Teuchos::rcp( &Y, false );
00865 
00866   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00867   X.ExtractView(&x_ptr);
00868   Y.ExtractView(&y_ptr);
00869   Y2->ExtractView(&y2_ptr);
00870   Diagonal_->ExtractView(&d_ptr);
00871 
00872   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00873 
00874     // only one data exchange per sweep
00875     if (IsParallel_)
00876       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00877 
00878     if(!DoBackwardGS_){
00879       /* Forward Mode */
00880       for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
00881     int i=LocalSmoothingIndices_[ii];
00882 
00883         int col;
00884         double diag = d_ptr[i];
00885 
00886         for (int m = 0 ; m < NumVectors ; ++m) {
00887 
00888           double dtemp = 0.0;
00889 
00890           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00891 
00892             col = Indices[k];
00893             dtemp += Values[k] * y2_ptr[m][col];
00894           }
00895 
00896           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00897         }
00898       }
00899     }
00900     else {
00901       /* Backward Mode */
00902       for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
00903     int i=LocalSmoothingIndices_[ii];
00904 
00905         int col;
00906         double diag = d_ptr[i];
00907 
00908         for (int m = 0 ; m < NumVectors ; ++m) {
00909 
00910           double dtemp = 0.0;
00911           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00912 
00913             col = Indices[k];
00914             dtemp += Values[k] * y2_ptr[m][col];
00915           }
00916 
00917           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00918 
00919         }
00920       }
00921     }
00922 
00923 
00924     if (IsParallel_)
00925       for (int m = 0 ; m < NumVectors ; ++m)
00926         for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
00927       int i=LocalSmoothingIndices_[ii];
00928           y_ptr[m][i] = y2_ptr[m][i];
00929     }
00930   }
00931 
00932 #ifdef IFPACK_FLOPCOUNTERS
00933   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00934 #endif
00935   return(0);
00936 } //ApplyInverseGS_LocalFastCrsMatrix()
00937 
00938 
00939 //==============================================================================
00940 int Ifpack_PointRelaxation::
00941 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00942 {
00943   if (ZeroStartingSolution_)
00944     Y.PutScalar(0.0);
00945 
00946   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00947   // try to pick the best option; performances may be improved
00948   // if several sweeps are used.
00949   if (CrsMatrix != 0)
00950   {
00951     if (CrsMatrix->StorageOptimized() && LocalSmoothingIndices_)
00952       return(ApplyInverseSGS_LocalFastCrsMatrix(CrsMatrix, X, Y));
00953     else if (CrsMatrix->StorageOptimized())
00954       return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y));
00955     else
00956       return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y));
00957   }
00958   else
00959     return(ApplyInverseSGS_RowMatrix(X, Y));
00960 }
00961 
00962 //==============================================================================
00963 int Ifpack_PointRelaxation::
00964 ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00965 {
00966   int NumVectors = X.NumVectors();
00967   int Length = Matrix().MaxNumEntries();
00968   std::vector<int> Indices(Length);
00969   std::vector<double> Values(Length);
00970 
00971   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00972   if (IsParallel_) {
00973     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00974   }
00975   else
00976     Y2 = Teuchos::rcp( &Y, false );
00977 
00978   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00979   X.ExtractView(&x_ptr);
00980   Y.ExtractView(&y_ptr);
00981   Y2->ExtractView(&y2_ptr);
00982   Diagonal_->ExtractView(&d_ptr);
00983 
00984   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00985 
00986     // only one data exchange per sweep
00987     if (IsParallel_)
00988       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00989 
00990     for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
00991       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
00992 
00993       int NumEntries;
00994       int col;
00995       double diag = d_ptr[i];
00996 
00997       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00998                                                &Values[0], &Indices[0]));
00999 
01000       for (int m = 0 ; m < NumVectors ; ++m) {
01001 
01002         double dtemp = 0.0;
01003 
01004         for (int k = 0 ; k < NumEntries ; ++k) {
01005 
01006           col = Indices[k];
01007           dtemp += Values[k] * y2_ptr[m][col];
01008         }
01009 
01010         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01011       }
01012     }
01013     for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
01014       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
01015 
01016       int NumEntries;
01017       int col;
01018       double diag = d_ptr[i];
01019 
01020       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
01021                                                &Values[0], &Indices[0]));
01022 
01023       for (int m = 0 ; m < NumVectors ; ++m) {
01024 
01025         double dtemp = 0.0;
01026         for (int k = 0 ; k < NumEntries ; ++k) {
01027 
01028           col = Indices[k];
01029           dtemp += Values[k] * y2_ptr[m][col];
01030         }
01031 
01032         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01033 
01034       }
01035     }
01036 
01037     if (IsParallel_)
01038       for (int m = 0 ; m < NumVectors ; ++m)
01039     for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
01040       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
01041           y_ptr[m][i] = y2_ptr[m][i];
01042     }
01043   }
01044 
01045 #ifdef IFPACK_FLOPCOUNTERS
01046   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
01047 #endif
01048   return(0);
01049 }
01050 
01051 //==============================================================================
01052 int Ifpack_PointRelaxation::
01053 ApplyInverseSGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
01054 {
01055   int NumVectors = X.NumVectors();
01056 
01057   int* Indices;
01058   double* Values;
01059 
01060   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
01061   if (IsParallel_) {
01062     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
01063   }
01064   else
01065     Y2 = Teuchos::rcp( &Y, false );
01066 
01067   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
01068   X.ExtractView(&x_ptr);
01069   Y.ExtractView(&y_ptr);
01070   Y2->ExtractView(&y2_ptr);
01071   Diagonal_->ExtractView(&d_ptr);
01072 
01073   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
01074 
01075     // only one data exchange per sweep
01076     if (IsParallel_)
01077       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
01078 
01079     for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
01080       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
01081 
01082       int NumEntries;
01083       int col;
01084       double diag = d_ptr[i];
01085 
01086       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01087 
01088       for (int m = 0 ; m < NumVectors ; ++m) {
01089 
01090         double dtemp = 0.0;
01091 
01092         for (int k = 0; k < NumEntries; ++k) {
01093 
01094           col = Indices[k];
01095           dtemp += Values[k] * y2_ptr[m][col];
01096         }
01097 
01098         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01099       }
01100     }
01101     for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
01102       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
01103 
01104       int NumEntries;
01105       int col;
01106       double diag = d_ptr[i];
01107 
01108       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01109 
01110       for (int m = 0 ; m < NumVectors ; ++m) {
01111 
01112         double dtemp = 0.0;
01113         for (int k = 0; k < NumEntries; ++k) {
01114 
01115           col = Indices[k];
01116           dtemp += Values[k] * y2_ptr[m][col];
01117         }
01118 
01119         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01120 
01121       }
01122     }
01123 
01124     if (IsParallel_)
01125       for (int m = 0 ; m < NumVectors ; ++m)
01126     for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
01127       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
01128           y_ptr[m][i] = y2_ptr[m][i];
01129     }
01130   }
01131 
01132 #ifdef IFPACK_FLOPCOUNTERS
01133   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
01134 #endif
01135   return(0);
01136 }
01137 
01138 //==============================================================================
01139 // this requires Epetra_CrsMatrix + OptimizeStorage()
01140 int Ifpack_PointRelaxation::
01141 ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
01142 {
01143   int* IndexOffset;
01144   int* Indices;
01145   double* Values;
01146   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
01147 
01148   int NumVectors = X.NumVectors();
01149 
01150   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
01151   if (IsParallel_) {
01152     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
01153   }
01154   else
01155     Y2 = Teuchos::rcp( &Y, false );
01156 
01157   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
01158   X.ExtractView(&x_ptr);
01159   Y.ExtractView(&y_ptr);
01160   Y2->ExtractView(&y2_ptr);
01161   Diagonal_->ExtractView(&d_ptr);
01162 
01163   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
01164 
01165     // only one data exchange per sweep
01166     if (IsParallel_)
01167       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
01168 
01169     for (int i = 0 ; i < NumMyRows_ ; ++i) {
01170 
01171       int col;
01172       double diag = d_ptr[i];
01173 
01174       // no need to extract row i
01175       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01176 
01177       for (int m = 0 ; m < NumVectors ; ++m) {
01178 
01179         double dtemp = 0.0;
01180 
01181         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
01182 
01183           col = Indices[k];
01184           dtemp += Values[k] * y2_ptr[m][col];
01185         }
01186 
01187         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01188       }
01189     }
01190 
01191     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
01192 
01193       int col;
01194       double diag = d_ptr[i];
01195 
01196       // no need to extract row i
01197       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01198 
01199       for (int m = 0 ; m < NumVectors ; ++m) {
01200 
01201         double dtemp = 0.0;
01202         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
01203 
01204           col = Indices[k];
01205           dtemp += Values[k] * y2_ptr[m][col];
01206         }
01207 
01208         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01209 
01210       }
01211     }
01212 
01213     if (IsParallel_)
01214       for (int m = 0 ; m < NumVectors ; ++m)
01215         for (int i = 0 ; i < NumMyRows_ ; ++i)
01216           y_ptr[m][i] = y2_ptr[m][i];
01217   }
01218 
01219 #ifdef IFPACK_FLOPCOUNTERS
01220   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
01221 #endif
01222   return(0);
01223 }
01224 
01225 
01226 //==============================================================================
01227 // this requires Epetra_CrsMatrix + OptimizeStorage() + LocalSmoothingIndices_
01228 int Ifpack_PointRelaxation::
01229 ApplyInverseSGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
01230 {
01231   int* IndexOffset;
01232   int* Indices;
01233   double* Values;
01234   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
01235 
01236   int NumVectors = X.NumVectors();
01237 
01238   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
01239   if (IsParallel_) {
01240     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
01241   }
01242   else
01243     Y2 = Teuchos::rcp( &Y, false );
01244 
01245   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
01246   X.ExtractView(&x_ptr);
01247   Y.ExtractView(&y_ptr);
01248   Y2->ExtractView(&y2_ptr);
01249   Diagonal_->ExtractView(&d_ptr);
01250 
01251   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
01252 
01253     // only one data exchange per sweep
01254     if (IsParallel_)
01255       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
01256 
01257     for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
01258       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
01259 
01260       int col;
01261       double diag = d_ptr[i];
01262 
01263       // no need to extract row i
01264       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01265 
01266       for (int m = 0 ; m < NumVectors ; ++m) {
01267 
01268         double dtemp = 0.0;
01269 
01270         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
01271 
01272           col = Indices[k];
01273           dtemp += Values[k] * y2_ptr[m][col];
01274         }
01275 
01276         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01277       }
01278     }
01279 
01280     for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
01281       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
01282 
01283       int col;
01284       double diag = d_ptr[i];
01285 
01286       // no need to extract row i
01287       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01288 
01289       for (int m = 0 ; m < NumVectors ; ++m) {
01290 
01291         double dtemp = 0.0;
01292         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
01293 
01294           col = Indices[k];
01295           dtemp += Values[k] * y2_ptr[m][col];
01296         }
01297 
01298         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01299 
01300       }
01301     }
01302 
01303     if (IsParallel_)
01304       for (int m = 0 ; m < NumVectors ; ++m)
01305     for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
01306       int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
01307           y_ptr[m][i] = y2_ptr[m][i];
01308     }
01309   }
01310 
01311 #ifdef IFPACK_FLOPCOUNTERS
01312   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
01313 #endif
01314   return(0);
01315 }
 All Classes Files Functions Variables Enumerations Friends