00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "Ifpack_ConfigDefs.h"
00031 #include "Ifpack_Preconditioner.h"
00032 #include "Ifpack_ILUT.h"
00033 #include "Ifpack_Condest.h"
00034 #include "Ifpack_Utils.h"
00035 #include "Ifpack_HashTable.h"
00036 #include "Epetra_SerialComm.h"
00037 #include "Epetra_Comm.h"
00038 #include "Epetra_Map.h"
00039 #include "Epetra_RowMatrix.h"
00040 #include "Epetra_CrsMatrix.h"
00041 #include "Epetra_Vector.h"
00042 #include "Epetra_MultiVector.h"
00043 #include "Epetra_Util.h"
00044 #include "Teuchos_ParameterList.hpp"
00045 #include "Teuchos_RefCountPtr.hpp"
00046 #include <functional>
00047
00048 using namespace Teuchos;
00049
00050
00051
00052 Ifpack_ILUT::Ifpack_ILUT(const Epetra_RowMatrix* A) :
00053 A_(*A),
00054 Comm_(A->Comm()),
00055 Condest_(-1.0),
00056 Relax_(0.),
00057 Athresh_(0.0),
00058 Rthresh_(1.0),
00059 LevelOfFill_(1.0),
00060 DropTolerance_(1e-12),
00061 IsInitialized_(false),
00062 IsComputed_(false),
00063 UseTranspose_(false),
00064 NumMyRows_(-1),
00065 NumInitialize_(0),
00066 NumCompute_(0),
00067 NumApplyInverse_(0),
00068 InitializeTime_(0.0),
00069 ComputeTime_(0.0),
00070 ApplyInverseTime_(0.0),
00071 ComputeFlops_(0.0),
00072 ApplyInverseFlops_(0.0),
00073 Time_(Comm()),
00074 GlobalNonzeros_(0)
00075 {
00076
00077 }
00078
00079
00080 Ifpack_ILUT::~Ifpack_ILUT()
00081 {
00082 Destroy();
00083 }
00084
00085
00086 void Ifpack_ILUT::Destroy()
00087 {
00088 IsInitialized_ = false;
00089 IsComputed_ = false;
00090 }
00091
00092
00093 int Ifpack_ILUT::SetParameters(Teuchos::ParameterList& List)
00094 {
00095 try
00096 {
00097 LevelOfFill_ = List.get<double>("fact: ilut level-of-fill", LevelOfFill());
00098 if (LevelOfFill_ <= 0.0)
00099 IFPACK_CHK_ERR(-2);
00100
00101 Athresh_ = List.get<double>("fact: absolute threshold", Athresh_);
00102 Rthresh_ = List.get<double>("fact: relative threshold", Rthresh_);
00103 Relax_ = List.get<double>("fact: relax value", Relax_);
00104 DropTolerance_ = List.get<double>("fact: drop tolerance", DropTolerance_);
00105
00106 Label_ = "IFPACK ILUT (fill=" + Ifpack_toString(LevelOfFill())
00107 + ", relax=" + Ifpack_toString(RelaxValue())
00108 + ", athr=" + Ifpack_toString(AbsoluteThreshold())
00109 + ", rthr=" + Ifpack_toString(RelativeThreshold())
00110 + ", droptol=" + Ifpack_toString(DropTolerance())
00111 + ")";
00112 return(0);
00113 }
00114 catch (...)
00115 {
00116 cerr << "Caught an exception while parsing the parameter list" << endl;
00117 cerr << "This typically means that a parameter was set with the" << endl;
00118 cerr << "wrong type (for example, int instead of double). " << endl;
00119 cerr << "please check the documentation for the type required by each parameer." << endl;
00120 IFPACK_CHK_ERR(-1);
00121 }
00122
00123 return(0);
00124 }
00125
00126
00127 int Ifpack_ILUT::Initialize()
00128 {
00129
00130 Destroy();
00131
00132 Time_.ResetStartTime();
00133
00134
00135 if (Comm().NumProc() == 1 && Matrix().NumMyRows() != Matrix().NumMyCols())
00136 IFPACK_CHK_ERR(-2);
00137
00138 NumMyRows_ = Matrix().NumMyRows();
00139
00140
00141 IsInitialized_ = true;
00142 ++NumInitialize_;
00143 InitializeTime_ += Time_.ElapsedTime();
00144
00145 return(0);
00146 }
00147
00148
00149 class Ifpack_AbsComp
00150 {
00151 public:
00152 inline bool operator()(const double& x, const double& y)
00153 {
00154 return(IFPACK_ABS(x) > IFPACK_ABS(y));
00155 }
00156 };
00157
00158
00159
00160
00161
00162 int Ifpack_ILUT::Compute()
00163 {
00164 if (!IsInitialized())
00165 IFPACK_CHK_ERR(Initialize());
00166
00167 Time_.ResetStartTime();
00168 IsComputed_ = false;
00169
00170 NumMyRows_ = A_.NumMyRows();
00171 int Length = A_.MaxNumEntries();
00172 vector<int> RowIndicesL(Length);
00173 vector<double> RowValuesL(Length);
00174 vector<int> RowIndicesU(Length);
00175 vector<double> RowValuesU(Length);
00176 bool distributed = (Comm().NumProc() > 1)?true:false;
00177
00178 if (distributed)
00179 {
00180 SerialComm_ = rcp(new Epetra_SerialComm);
00181 SerialMap_ = rcp(new Epetra_Map(NumMyRows_, 0, *SerialComm_));
00182 assert (SerialComm_.get() != 0);
00183 assert (SerialMap_.get() != 0);
00184 }
00185 else
00186 SerialMap_ = rcp(const_cast<Epetra_Map*>(&A_.RowMatrixRowMap()), false);
00187
00188 int RowNnzU;
00189
00190 L_ = rcp(new Epetra_CrsMatrix(Copy, *SerialMap_, 0));
00191 U_ = rcp(new Epetra_CrsMatrix(Copy, *SerialMap_, 0));
00192
00193 if ((L_.get() == 0) || (U_.get() == 0))
00194 IFPACK_CHK_ERR(-5);
00195
00196
00197 IFPACK_CHK_ERR(A_.ExtractMyRowCopy(0,Length,RowNnzU,
00198 &RowValuesU[0],&RowIndicesU[0]));
00199
00200 if (distributed)
00201 {
00202 int count = 0;
00203 for (int i = 0 ;i < RowNnzU ; ++i)
00204 {
00205 if (RowIndicesU[i] < NumMyRows_){
00206 RowIndicesU[count] = RowIndicesU[i];
00207 RowValuesU[count] = RowValuesU[i];
00208 ++count;
00209 }
00210 else
00211 continue;
00212 }
00213 RowNnzU = count;
00214 }
00215
00216
00217 for (int i = 0 ;i < RowNnzU ; ++i) {
00218 if (RowIndicesU[i] == 0) {
00219 double& v = RowValuesU[i];
00220 v = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v;
00221 break;
00222 }
00223 }
00224
00225 IFPACK_CHK_ERR(U_->InsertGlobalValues(0,RowNnzU,&(RowValuesU[0]),
00226 &(RowIndicesU[0])));
00227
00228 RowValuesU[0] = 1.0;
00229 RowIndicesU[0] = 0;
00230 IFPACK_CHK_ERR(L_->InsertGlobalValues(0,1,&(RowValuesU[0]),
00231 &(RowIndicesU[0])));
00232
00233 int max_keys = (int) 10 * A_.MaxNumEntries() * LevelOfFill() ;
00234 Ifpack_HashTable SingleRowU(max_keys , 1);
00235 Ifpack_HashTable SingleRowL(max_keys , 1);
00236
00237 int hash_size = SingleRowU.getRecommendedHashSize(max_keys) ;
00238 vector<int> keys; keys.reserve(hash_size * 10);
00239 vector<double> values; values.reserve(hash_size * 10);
00240 vector<double> AbsRow; AbsRow.reserve(hash_size * 10);
00241
00242
00243
00244
00245
00246 #ifdef IFPACK_FLOPCOUNTERS
00247 double this_proc_flops = 0.0;
00248 #endif
00249
00250 for (int row_i = 1 ; row_i < NumMyRows_ ; ++row_i)
00251 {
00252
00253 IFPACK_CHK_ERR(A_.ExtractMyRowCopy(row_i,Length,RowNnzU,
00254 &RowValuesU[0],&RowIndicesU[0]));
00255
00256 if (distributed)
00257 {
00258 int count = 0;
00259 for (int i = 0 ;i < RowNnzU ; ++i)
00260 {
00261 if (RowIndicesU[i] < NumMyRows_){
00262 RowIndicesU[count] = RowIndicesU[i];
00263 RowValuesU[count] = RowValuesU[i];
00264 ++count;
00265 }
00266 else
00267 continue;
00268 }
00269 RowNnzU = count;
00270 }
00271
00272 int NnzLower = 0;
00273 int NnzUpper = 0;
00274
00275 for (int i = 0 ;i < RowNnzU ; ++i) {
00276 if (RowIndicesU[i] < row_i)
00277 NnzLower++;
00278 else if (RowIndicesU[i] == row_i) {
00279
00280 NnzUpper++;
00281 double& v = RowValuesU[i];
00282 v = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v;
00283 }
00284 else
00285 NnzUpper++;
00286 }
00287
00288 int FillL = (int)(LevelOfFill() * NnzLower);
00289 int FillU = (int)(LevelOfFill() * NnzUpper);
00290 if (FillL == 0) FillL = 1;
00291 if (FillU == 0) FillU = 1;
00292
00293
00294 SingleRowU.reset();
00295
00296 for (int i = 0 ; i < RowNnzU ; ++i) {
00297 SingleRowU.set(RowIndicesU[i], RowValuesU[i]);
00298 }
00299
00300
00301 SingleRowL.reset();
00302
00303 int start_col = NumMyRows_;
00304 for (int i = 0 ; i < RowNnzU ; ++i)
00305 start_col = EPETRA_MIN(start_col, RowIndicesU[i]);
00306
00307 #ifdef IFPACK_FLOPCOUNTERS
00308 int flops = 0;
00309 #endif
00310
00311 for (int col_k = start_col ; col_k < row_i ; ++col_k) {
00312
00313 int* ColIndicesK;
00314 double* ColValuesK;
00315 int ColNnzK;
00316
00317 double xxx = SingleRowU.get(col_k);
00318
00319
00320 if (IFPACK_ABS(xxx) > DropTolerance()) {
00321 IFPACK_CHK_ERR(U_->ExtractGlobalRowView(col_k, ColNnzK, ColValuesK,
00322 ColIndicesK));
00323
00324
00325 double DiagonalValueK = 0.0;
00326 for (int i = 0 ; i < ColNnzK ; ++i) {
00327 if (ColIndicesK[i] == col_k) {
00328 DiagonalValueK = ColValuesK[i];
00329 break;
00330 }
00331 }
00332
00333
00334 if (DiagonalValueK == 0.0)
00335 DiagonalValueK = AbsoluteThreshold();
00336
00337 double yyy = xxx / DiagonalValueK ;
00338 SingleRowL.set(col_k, yyy);
00339 #ifdef IFPACK_FLOPCOUNTERS
00340 ++flops;
00341 #endif
00342
00343 for (int j = 0 ; yyy != 0.0 && j < ColNnzK ; ++j)
00344 {
00345 int col_j = ColIndicesK[j];
00346
00347 if (col_j < col_k) continue;
00348
00349 SingleRowU.set(col_j, -yyy * ColValuesK[j], true);
00350 #ifdef IFPACK_FLOPCOUNTERS
00351 flops += 2;
00352 #endif
00353 }
00354 }
00355 }
00356
00357 #ifdef IFPACK_FLOPCOUNTERS
00358 this_proc_flops += 1.0 * flops;
00359 #endif
00360
00361 double cutoff = DropTolerance();
00362 double DiscardedElements = 0.0;
00363 int count;
00364
00365
00366 count = 0;
00367 int sizeL = SingleRowL.getNumEntries();
00368 keys.resize(sizeL);
00369 values.resize(sizeL);
00370
00371 AbsRow.resize(sizeL);
00372
00373 SingleRowL.arrayify(
00374 keys.size() ? &keys[0] : 0,
00375 values.size() ? &values[0] : 0
00376 );
00377 for (int i = 0; i < sizeL; ++i)
00378 if (IFPACK_ABS(values[i]) > DropTolerance()) {
00379 AbsRow[count++] = IFPACK_ABS(values[i]);
00380 }
00381
00382 if (count > FillL) {
00383 nth_element(AbsRow.begin(), AbsRow.begin() + FillL, AbsRow.begin() + count,
00384 greater<double>());
00385 cutoff = AbsRow[FillL];
00386 }
00387
00388 for (int i = 0; i < sizeL; ++i) {
00389 if (IFPACK_ABS(values[i]) >= cutoff) {
00390 IFPACK_CHK_ERR(L_->InsertGlobalValues(row_i,1, &values[i], (int*)&keys[i]));
00391 }
00392 else
00393 DiscardedElements += values[i];
00394 }
00395
00396
00397
00398 double dtmp = 1.0;
00399 IFPACK_CHK_ERR(L_->InsertGlobalValues(row_i,1, &dtmp, &row_i));
00400
00401
00402 count = 0;
00403 int sizeU = SingleRowU.getNumEntries();
00404 AbsRow.resize(sizeU + 1);
00405 keys.resize(sizeU + 1);
00406 values.resize(sizeU + 1);
00407
00408 SingleRowU.arrayify(&keys[0], &values[0]);
00409
00410 for (int i = 0; i < sizeU; ++i)
00411 if (keys[i] >= row_i && IFPACK_ABS(values[i]) > DropTolerance())
00412 {
00413 AbsRow[count++] = IFPACK_ABS(values[i]);
00414 }
00415
00416 if (count > FillU) {
00417 nth_element(AbsRow.begin(), AbsRow.begin() + FillU, AbsRow.begin() + count,
00418 greater<double>());
00419 cutoff = AbsRow[FillU];
00420 }
00421
00422
00423 for (int i = 0; i < sizeU; ++i)
00424 {
00425 int col = keys[i];
00426 double val = values[i];
00427
00428 if (col >= row_i) {
00429 if (IFPACK_ABS(val) >= cutoff || row_i == col) {
00430 IFPACK_CHK_ERR(U_->InsertGlobalValues(row_i,1, &val, &col));
00431 }
00432 else
00433 DiscardedElements += val;
00434 }
00435 }
00436
00437
00438 if (RelaxValue() != 0.0) {
00439 DiscardedElements *= RelaxValue();
00440 IFPACK_CHK_ERR(U_->InsertGlobalValues(row_i,1, &DiscardedElements,
00441 &row_i));
00442 }
00443 }
00444
00445 #ifdef IFPACK_FLOPCOUNTERS
00446 double tf;
00447 Comm().SumAll(&this_proc_flops, &tf, 1);
00448 ComputeFlops_ += tf;
00449 #endif
00450
00451 IFPACK_CHK_ERR(L_->FillComplete());
00452 IFPACK_CHK_ERR(U_->FillComplete());
00453
00454 #if 0
00455
00456 Epetra_Vector LHS(A_.RowMatrixRowMap());
00457 Epetra_Vector RHS1(A_.RowMatrixRowMap());
00458 Epetra_Vector RHS2(A_.RowMatrixRowMap());
00459 Epetra_Vector RHS3(A_.RowMatrixRowMap());
00460 LHS.Random();
00461
00462 cout << "A = " << A_.NumGlobalNonzeros() << endl;
00463 cout << "L = " << L_->NumGlobalNonzeros() << endl;
00464 cout << "U = " << U_->NumGlobalNonzeros() << endl;
00465
00466 A_.Multiply(false,LHS,RHS1);
00467 U_->Multiply(false,LHS,RHS2);
00468 L_->Multiply(false,RHS2,RHS3);
00469
00470 RHS1.Update(-1.0, RHS3, 1.0);
00471 double Norm;
00472 RHS1.Norm2(&Norm);
00473 #endif
00474
00475 int MyNonzeros = L_->NumGlobalNonzeros() + U_->NumGlobalNonzeros();
00476 Comm().SumAll(&MyNonzeros, &GlobalNonzeros_, 1);
00477
00478 IsComputed_ = true;
00479
00480 ++NumCompute_;
00481 ComputeTime_ += Time_.ElapsedTime();
00482
00483 return(0);
00484
00485 }
00486
00487
00488 int Ifpack_ILUT::ApplyInverse(const Epetra_MultiVector& X,
00489 Epetra_MultiVector& Y) const
00490 {
00491 if (!IsComputed())
00492 IFPACK_CHK_ERR(-2);
00493
00494 if (X.NumVectors() != Y.NumVectors())
00495 IFPACK_CHK_ERR(-3);
00496
00497 Time_.ResetStartTime();
00498
00499
00500
00501
00502
00503
00504
00505 Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy;
00506 if (X.Pointers()[0] == Y.Pointers()[0])
00507 Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00508 else
00509 Xcopy = Teuchos::rcp( &X, false );
00510
00511 if (!UseTranspose_)
00512 {
00513
00514 IFPACK_CHK_ERR(L_->Solve(false,false,false,*Xcopy,Y));
00515 IFPACK_CHK_ERR(U_->Solve(true,false,false,Y,Y));
00516 }
00517 else
00518 {
00519
00520 IFPACK_CHK_ERR(U_->Solve(true,true,false,*Xcopy,Y));
00521 IFPACK_CHK_ERR(L_->Solve(false,true,false,Y,Y));
00522 }
00523
00524 ++NumApplyInverse_;
00525 #ifdef IFPACK_FLOPCOUNTERS
00526 ApplyInverseFlops_ += X.NumVectors() * 2 * GlobalNonzeros_;
00527 #endif
00528 ApplyInverseTime_ += Time_.ElapsedTime();
00529
00530 return(0);
00531
00532 }
00533
00534
00535 int Ifpack_ILUT::Apply(const Epetra_MultiVector& X,
00536 Epetra_MultiVector& Y) const
00537 {
00538 return(-98);
00539 }
00540
00541
00542 double Ifpack_ILUT::Condest(const Ifpack_CondestType CT,
00543 const int MaxIters, const double Tol,
00544 Epetra_RowMatrix* Matrix_in)
00545 {
00546 if (!IsComputed())
00547 return(-1.0);
00548
00549
00550 if (Condest_ == -1.0)
00551 Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
00552
00553 return(Condest_);
00554 }
00555
00556
00557 std::ostream&
00558 Ifpack_ILUT::Print(std::ostream& os) const
00559 {
00560 if (!Comm().MyPID()) {
00561 os << endl;
00562 os << "================================================================================" << endl;
00563 os << "Ifpack_ILUT: " << Label() << endl << endl;
00564 os << "Level-of-fill = " << LevelOfFill() << endl;
00565 os << "Absolute threshold = " << AbsoluteThreshold() << endl;
00566 os << "Relative threshold = " << RelativeThreshold() << endl;
00567 os << "Relax value = " << RelaxValue() << endl;
00568 os << "Condition number estimate = " << Condest() << endl;
00569 os << "Global number of rows = " << A_.NumGlobalRows() << endl;
00570 if (IsComputed_) {
00571 os << "Number of nonzeros in A = " << A_.NumGlobalNonzeros() << endl;
00572 os << "Number of nonzeros in L + U = " << NumGlobalNonzeros()
00573 << " ( = " << 100.0 * NumGlobalNonzeros() / A_.NumGlobalNonzeros()
00574 << " % of A)" << endl;
00575 os << "nonzeros / rows = "
00576 << 1.0 * NumGlobalNonzeros() / U_->NumGlobalRows() << endl;
00577 }
00578 os << endl;
00579 os << "Phase # calls Total Time (s) Total MFlops MFlops/s" << endl;
00580 os << "----- ------- -------------- ------------ --------" << endl;
00581 os << "Initialize() " << std::setw(5) << NumInitialize()
00582 << " " << std::setw(15) << InitializeTime()
00583 << " 0.0 0.0" << endl;
00584 os << "Compute() " << std::setw(5) << NumCompute()
00585 << " " << std::setw(15) << ComputeTime()
00586 << " " << std::setw(15) << 1.0e-6 * ComputeFlops();
00587 if (ComputeTime() != 0.0)
00588 os << " " << std::setw(15) << 1.0e-6 * ComputeFlops() / ComputeTime() << endl;
00589 else
00590 os << " " << std::setw(15) << 0.0 << endl;
00591 os << "ApplyInverse() " << std::setw(5) << NumApplyInverse()
00592 << " " << std::setw(15) << ApplyInverseTime()
00593 << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops();
00594 if (ApplyInverseTime() != 0.0)
00595 os << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops() / ApplyInverseTime() << endl;
00596 else
00597 os << " " << std::setw(15) << 0.0 << endl;
00598 os << "================================================================================" << endl;
00599 os << endl;
00600 }
00601
00602 return(os);
00603 }