|
Ifpack2 Templated Preconditioning Package
Version 1.0
|
00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2009) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 //@HEADER 00041 */ 00042 00043 #ifndef IFPACK2_ILUT_DEF_HPP 00044 #define IFPACK2_ILUT_DEF_HPP 00045 00046 // disable clang warnings 00047 #ifdef __clang__ 00048 #pragma clang system_header 00049 #endif 00050 00051 #include <Ifpack2_Heap.hpp> 00052 #include <Ifpack2_Condest.hpp> 00053 #include <Ifpack2_LocalFilter.hpp> 00054 #include <Ifpack2_Parameters.hpp> 00055 #include <Tpetra_CrsMatrix_def.hpp> 00056 00057 #include <Teuchos_Time.hpp> 00058 #include <Teuchos_TypeNameTraits.hpp> 00059 00060 00061 namespace Ifpack2 { 00062 00063 namespace { 00064 00089 template<class ScalarType> 00090 typename Teuchos::ScalarTraits<ScalarType>::magnitudeType 00091 ilutDefaultDropTolerance () { 00092 using Teuchos::as; 00093 typedef Teuchos::ScalarTraits<ScalarType> STS; 00094 typedef typename STS::magnitudeType magnitude_type; 00095 typedef Teuchos::ScalarTraits<magnitude_type> STM; 00096 00097 // 1/2. Hopefully this can be represented in magnitude_type. 00098 const magnitude_type oneHalf = STM::one() / (STM::one() + STM::one()); 00099 00100 // The min ensures that in case magnitude_type has very low 00101 // precision, we'll at least get some value strictly less than 00102 // one. 00103 return std::min (as<magnitude_type> (1000) * STS::magnitude (STS::eps ()), oneHalf); 00104 } 00105 00106 // Full specialization for ScalarType = double. 00107 // This specialization preserves ILUT's previous default behavior. 00108 template<> 00109 Teuchos::ScalarTraits<double>::magnitudeType 00110 ilutDefaultDropTolerance<double> () { 00111 return 1e-12; 00112 } 00113 00114 } // namespace (anonymous) 00115 00116 00117 template <class MatrixType> 00118 ILUT<MatrixType>::ILUT (const Teuchos::RCP<const row_matrix_type>& A) : 00119 A_ (A), 00120 Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()), 00121 Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ()), 00122 RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()), 00123 LevelOfFill_ (1), 00124 DropTolerance_ (ilutDefaultDropTolerance<scalar_type> ()), 00125 Condest_ (-Teuchos::ScalarTraits<magnitude_type>::one ()), 00126 InitializeTime_ (0.0), 00127 ComputeTime_ (0.0), 00128 ApplyTime_ (0.0), 00129 NumInitialize_ (0), 00130 NumCompute_ (0), 00131 NumApply_ (0), 00132 IsInitialized_ (false), 00133 IsComputed_ (false) 00134 {} 00135 00136 template <class MatrixType> 00137 ILUT<MatrixType>::~ILUT() 00138 {} 00139 00140 template <class MatrixType> 00141 void ILUT<MatrixType>::setParameters (const Teuchos::ParameterList& params) 00142 { 00143 using Teuchos::as; 00144 using Teuchos::Exceptions::InvalidParameterName; 00145 using Teuchos::Exceptions::InvalidParameterType; 00146 00147 // Default values of the various parameters. 00148 int fillLevel = 1; 00149 magnitude_type absThresh = STM::zero (); 00150 magnitude_type relThresh = STM::one (); 00151 magnitude_type relaxValue = STM::zero (); 00152 magnitude_type dropTol = ilutDefaultDropTolerance<scalar_type> (); 00153 00154 bool gotFillLevel = false; 00155 try { 00156 // Try getting the fill level as an int. 00157 fillLevel = params.get<int> ("fact: ilut level-of-fill"); 00158 gotFillLevel = true; 00159 } 00160 catch (InvalidParameterName&) { 00161 // We didn't really get it, but this tells us to stop looking. 00162 gotFillLevel = true; 00163 } 00164 catch (InvalidParameterType&) { 00165 // The name is right, but the type is wrong; try different types. 00166 // We don't have to check InvalidParameterName again, since we 00167 // checked it above, and it has nothing to do with the type. 00168 } 00169 00170 if (! gotFillLevel) { 00171 // Try magnitude_type, for backwards compatibility. 00172 try { 00173 fillLevel = as<int> (params.get<magnitude_type> ("fact: ilut level-of-fill")); 00174 } 00175 catch (InvalidParameterType&) {} 00176 } 00177 if (! gotFillLevel) { 00178 // Try double, for backwards compatibility. 00179 try { 00180 fillLevel = as<int> (params.get<double> ("fact: ilut level-of-fill")); 00181 } 00182 catch (InvalidParameterType&) {} 00183 } 00184 // If none of the above attempts succeed, accept the default value. 00185 00186 TEUCHOS_TEST_FOR_EXCEPTION( 00187 fillLevel <= 0, std::runtime_error, 00188 "Ifpack2::ILUT: The \"fact: ilut level-of-fill\" parameter must be " 00189 "strictly greater than zero, but you specified a value of " << fillLevel 00190 << ". Remember that for ILUT, the fill level p means something different " 00191 "than it does for ILU(k). ILU(0) produces factors with the same sparsity " 00192 "structure as the input matrix A; ILUT with p = 0 always produces a " 00193 "diagonal matrix, and is thus probably not what you want."); 00194 00195 try { 00196 absThresh = params.get<magnitude_type> ("fact: absolute threshold"); 00197 } 00198 catch (InvalidParameterType&) { 00199 // Try double, for backwards compatibility. 00200 // The cast from double to magnitude_type must succeed. 00201 absThresh = as<magnitude_type> (params.get<double> ("fact: absolute threshold")); 00202 } 00203 catch (InvalidParameterName&) { 00204 // Accept the default value. 00205 } 00206 00207 try { 00208 relThresh = params.get<magnitude_type> ("fact: relative threshold"); 00209 } 00210 catch (InvalidParameterType&) { 00211 // Try double, for backwards compatibility. 00212 // The cast from double to magnitude_type must succeed. 00213 relThresh = as<magnitude_type> (params.get<double> ("fact: relative threshold")); 00214 } 00215 catch (InvalidParameterName&) { 00216 // Accept the default value. 00217 } 00218 00219 try { 00220 relaxValue = params.get<magnitude_type> ("fact: relax value"); 00221 } 00222 catch (InvalidParameterType&) { 00223 // Try double, for backwards compatibility. 00224 // The cast from double to magnitude_type must succeed. 00225 relaxValue = as<magnitude_type> (params.get<double> ("fact: relax value")); 00226 } 00227 catch (InvalidParameterName&) { 00228 // Accept the default value. 00229 } 00230 00231 try { 00232 dropTol = params.get<magnitude_type> ("fact: drop tolerance"); 00233 } 00234 catch (InvalidParameterType&) { 00235 // Try double, for backwards compatibility. 00236 // The cast from double to magnitude_type must succeed. 00237 dropTol = as<magnitude_type> (params.get<double> ("fact: drop tolerance")); 00238 } 00239 catch (InvalidParameterName&) { 00240 // Accept the default value. 00241 } 00242 00243 // "Commit" the values only after validating all of them. This 00244 // ensures that there are no side effects if this routine throws an 00245 // exception. 00246 00247 // mfh 28 Nov 2012: The previous code would not assign Athresh_, 00248 // Rthresh_, RelaxValue_, or DropTolerance_ if the read-in value was 00249 // -1. I don't know if keeping this behavior is correct, but I'll 00250 // keep it just so as not to change previous behavior. 00251 00252 LevelOfFill_ = fillLevel; 00253 if (absThresh != -STM::one ()) { 00254 Athresh_ = absThresh; 00255 } 00256 if (relThresh != -STM::one ()) { 00257 Rthresh_ = relThresh; 00258 } 00259 if (relaxValue != -STM::one ()) { 00260 RelaxValue_ = relaxValue; 00261 } 00262 if (dropTol != -STM::one ()) { 00263 DropTolerance_ = dropTol; 00264 } 00265 } 00266 00267 00268 template <class MatrixType> 00269 Teuchos::RCP<const Teuchos::Comm<int> > 00270 ILUT<MatrixType>::getComm () const { 00271 TEUCHOS_TEST_FOR_EXCEPTION( 00272 A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getComm: " 00273 "The matrix is null. Please call setMatrix() with a nonnull input " 00274 "before calling this method."); 00275 return A_->getComm (); 00276 } 00277 00278 00279 template <class MatrixType> 00280 Teuchos::RCP<const typename ILUT<MatrixType>::row_matrix_type> 00281 ILUT<MatrixType>::getMatrix () const { 00282 return A_; 00283 } 00284 00285 00286 template <class MatrixType> 00287 Teuchos::RCP<const typename ILUT<MatrixType>::map_type> 00288 ILUT<MatrixType>::getDomainMap () const 00289 { 00290 TEUCHOS_TEST_FOR_EXCEPTION( 00291 A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getDomainMap: " 00292 "The matrix is null. Please call setMatrix() with a nonnull input " 00293 "before calling this method."); 00294 return A_->getDomainMap (); 00295 } 00296 00297 00298 template <class MatrixType> 00299 Teuchos::RCP<const typename ILUT<MatrixType>::map_type> 00300 ILUT<MatrixType>::getRangeMap () const 00301 { 00302 TEUCHOS_TEST_FOR_EXCEPTION( 00303 A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getRangeMap: " 00304 "The matrix is null. Please call setMatrix() with a nonnull input " 00305 "before calling this method."); 00306 return A_->getRangeMap (); 00307 } 00308 00309 00310 template <class MatrixType> 00311 bool ILUT<MatrixType>::hasTransposeApply () const { 00312 return true; 00313 } 00314 00315 00316 template <class MatrixType> 00317 int ILUT<MatrixType>::getNumInitialize () const { 00318 return NumInitialize_; 00319 } 00320 00321 00322 template <class MatrixType> 00323 int ILUT<MatrixType>::getNumCompute () const { 00324 return NumCompute_; 00325 } 00326 00327 00328 template <class MatrixType> 00329 int ILUT<MatrixType>::getNumApply () const { 00330 return NumApply_; 00331 } 00332 00333 00334 template <class MatrixType> 00335 double ILUT<MatrixType>::getInitializeTime () const { 00336 return InitializeTime_; 00337 } 00338 00339 00340 template<class MatrixType> 00341 double ILUT<MatrixType>::getComputeTime () const { 00342 return ComputeTime_; 00343 } 00344 00345 00346 template<class MatrixType> 00347 double ILUT<MatrixType>::getApplyTime () const { 00348 return ApplyTime_; 00349 } 00350 00351 00352 template<class MatrixType> 00353 global_size_t ILUT<MatrixType>::getGlobalNumEntries () const { 00354 return L_->getGlobalNumEntries () + U_->getGlobalNumEntries (); 00355 } 00356 00357 00358 template<class MatrixType> 00359 size_t ILUT<MatrixType>::getNodeNumEntries () const { 00360 return L_->getNodeNumEntries () + U_->getNodeNumEntries (); 00361 } 00362 00363 00364 template<class MatrixType> 00365 typename ILUT<MatrixType>::magnitude_type 00366 ILUT<MatrixType>:: 00367 computeCondEst (CondestType CT, 00368 local_ordinal_type MaxIters, 00369 magnitude_type Tol, 00370 const Teuchos::Ptr<const row_matrix_type>& matrix) 00371 { 00372 if (! isComputed ()) { 00373 return -STM::one (); 00374 } 00375 // NOTE: this is computing the *local* condest 00376 if (Condest_ == -STM::one ()) { 00377 Condest_ = Ifpack2::Condest(*this, CT, MaxIters, Tol, matrix); 00378 } 00379 return Condest_; 00380 } 00381 00382 00383 template<class MatrixType> 00384 void ILUT<MatrixType>::setMatrix (const Teuchos::RCP<const row_matrix_type>& A) 00385 { 00386 if (A.getRawPtr () != A_.getRawPtr ()) { 00387 // Check in serial or one-process mode if the matrix is square. 00388 TEUCHOS_TEST_FOR_EXCEPTION( 00389 ! A.is_null () && A->getComm ()->getSize () == 1 && 00390 A->getNodeNumRows () != A->getNodeNumCols (), 00391 std::runtime_error, "Ifpack2::ILUT::setMatrix: If A's communicator only " 00392 "contains one process, then A must be square. Instead, you provided a " 00393 "matrix A with " << A->getNodeNumRows () << " rows and " 00394 << A->getNodeNumCols () << " columns."); 00395 00396 // It's legal for A to be null; in that case, you may not call 00397 // initialize() until calling setMatrix() with a nonnull input. 00398 // Regardless, setting the matrix invalidates any previous 00399 // factorization. 00400 IsInitialized_ = false; 00401 IsComputed_ = false; 00402 A_local_ = Teuchos::null; 00403 L_ = Teuchos::null; 00404 U_ = Teuchos::null; 00405 Condest_ = -Teuchos::ScalarTraits<magnitude_type>::one(); 00406 A_ = A; 00407 } 00408 } 00409 00410 00411 template<class MatrixType> 00412 void ILUT<MatrixType>::initialize () 00413 { 00414 Teuchos::Time timer ("ILUT::initialize"); 00415 { 00416 Teuchos::TimeMonitor timeMon (timer); 00417 00418 // Check that the matrix is nonnull. 00419 TEUCHOS_TEST_FOR_EXCEPTION( 00420 A_.is_null (), std::runtime_error, "Ifpack2::ILUT::initialize: " 00421 "The matrix to precondition is null. Please call setMatrix() with a " 00422 "nonnull input before calling this method."); 00423 00424 // Clear any previous computations. 00425 IsInitialized_ = false; 00426 IsComputed_ = false; 00427 A_local_ = Teuchos::null; 00428 L_ = Teuchos::null; 00429 U_ = Teuchos::null; 00430 00431 A_local_ = makeLocalFilter (A_); // Compute the local filter. 00432 00433 IsInitialized_ = true; 00434 ++NumInitialize_; 00435 } 00436 InitializeTime_ += timer.totalElapsedTime (); 00437 } 00438 00439 00440 template<typename ScalarType> 00441 typename Teuchos::ScalarTraits<ScalarType>::magnitudeType 00442 scalar_mag (const ScalarType& s) 00443 { 00444 return Teuchos::ScalarTraits<ScalarType>::magnitude(s); 00445 } 00446 00447 00448 template<class MatrixType> 00449 void ILUT<MatrixType>::compute () 00450 { 00451 using Teuchos::Array; 00452 using Teuchos::ArrayRCP; 00453 using Teuchos::ArrayView; 00454 using Teuchos::as; 00455 using Teuchos::rcp; 00456 using Teuchos::reduceAll; 00457 00458 //-------------------------------------------------------------------------- 00459 // Ifpack2::ILUT is a translation of the Aztec ILUT implementation. The Aztec 00460 // ILUT implementation was written by Ray Tuminaro. 00461 // 00462 // This isn't an exact translation of the Aztec ILUT algorithm, for the 00463 // following reasons: 00464 // 1. Minor differences result from the fact that Aztec factors a MSR format 00465 // matrix in place, while the code below factors an input CrsMatrix which 00466 // remains untouched and stores the resulting factors in separate L and U 00467 // CrsMatrix objects. 00468 // Also, the Aztec code begins by shifting the matrix pointers back 00469 // by one, and the pointer contents back by one, and then using 1-based 00470 // Fortran-style indexing in the algorithm. This Ifpack2 code uses C-style 00471 // 0-based indexing throughout. 00472 // 2. Aztec stores the inverse of the diagonal of U. This Ifpack2 code 00473 // stores the non-inverted diagonal in U. 00474 // The triangular solves (in Ifpack2::ILUT::apply()) are performed by 00475 // calling the Tpetra::CrsMatrix::solve method on the L and U objects, and 00476 // this requires U to contain the non-inverted diagonal. 00477 // 00478 // ABW. 00479 //-------------------------------------------------------------------------- 00480 00481 // Don't count initialization in the compute() time. 00482 if (! isInitialized ()) { 00483 initialize (); 00484 } 00485 00486 Teuchos::Time timer ("ILUT::compute"); 00487 { // Timer scope for timing compute() 00488 Teuchos::TimeMonitor timeMon (timer, true); 00489 const scalar_type zero = STS::zero (); 00490 const scalar_type one = STS::one (); 00491 00492 const local_ordinal_type myNumRows = A_local_->getNodeNumRows (); 00493 L_ = rcp (new crs_matrix_type (A_local_->getRowMap (), A_local_->getColMap (), 0)); 00494 U_ = rcp (new crs_matrix_type (A_local_->getRowMap (), A_local_->getColMap (), 0)); 00495 00496 // CGB: note, this caching approach may not be necessary anymore 00497 // We will store ArrayView objects that are views of the rows of U, so that 00498 // we don't have to repeatedly retrieve the view for each row. These will 00499 // be populated row by row as the factorization proceeds. 00500 Array<ArrayView<const local_ordinal_type> > Uindices (myNumRows); 00501 Array<ArrayView<const scalar_type> > Ucoefs (myNumRows); 00502 00503 // If this macro is defined, files containing the L and U factors 00504 // will be written. DON'T CHECK IN THE CODE WITH THIS MACRO ENABLED!!! 00505 // #define IFPACK2_WRITE_FACTORS 00506 #ifdef IFPACK2_WRITE_FACTORS 00507 std::ofstream ofsL("L.tif.mtx", std::ios::out); 00508 std::ofstream ofsU("U.tif.mtx", std::ios::out); 00509 #endif 00510 00511 // The code here uses double for fill calculations, even though 00512 // the fill level is actually an integer. The point is to avoid 00513 // rounding and overflow for integer calculations. If int is <= 00514 // 32 bits, it can never overflow double, so this cast is always 00515 // OK as long as int has <= 32 bits. 00516 00517 // Calculate how much fill will be allowed in addition to the 00518 // space that corresponds to the input matrix entries. 00519 double local_nnz = static_cast<double> (A_local_->getNodeNumEntries ()); 00520 double fill; 00521 { 00522 const double fillLevel = as<double> (getLevelOfFill ()); 00523 fill = ((fillLevel - 1) * local_nnz) / (2 * myNumRows); 00524 } 00525 00526 // std::ceil gives the smallest integer larger than the argument. 00527 // this may give a slightly different result than Aztec's fill value in 00528 // some cases. 00529 double fill_ceil=std::ceil(fill); 00530 00531 // Similarly to Aztec, we will allow the same amount of fill for each 00532 // row, half in L and half in U. 00533 size_type fillL = static_cast<size_type>(fill_ceil); 00534 size_type fillU = static_cast<size_type>(fill_ceil); 00535 00536 Array<scalar_type> InvDiagU (myNumRows, zero); 00537 00538 Array<local_ordinal_type> tmp_idx; 00539 Array<scalar_type> tmpv; 00540 00541 enum { UNUSED, ORIG, FILL }; 00542 local_ordinal_type max_col = myNumRows; 00543 00544 Array<int> pattern(max_col, UNUSED); 00545 Array<scalar_type> cur_row(max_col, zero); 00546 Array<magnitude_type> unorm(max_col); 00547 magnitude_type rownorm; 00548 Array<local_ordinal_type> L_cols_heap; 00549 Array<local_ordinal_type> U_cols; 00550 Array<local_ordinal_type> L_vals_heap; 00551 Array<local_ordinal_type> U_vals_heap; 00552 00553 // A comparison object which will be used to create 'heaps' of indices 00554 // that are ordered according to the corresponding values in the 00555 // 'cur_row' array. 00556 greater_indirect<scalar_type,local_ordinal_type> vals_comp(cur_row); 00557 00558 // =================== // 00559 // start factorization // 00560 // =================== // 00561 00562 ArrayRCP<local_ordinal_type> ColIndicesARCP; 00563 ArrayRCP<scalar_type> ColValuesARCP; 00564 if (! A_local_->supportsRowViews ()) { 00565 const size_t maxnz = A_local_->getNodeMaxNumRowEntries (); 00566 ColIndicesARCP.resize (maxnz); 00567 ColValuesARCP.resize (maxnz); 00568 } 00569 00570 for (local_ordinal_type row_i = 0 ; row_i < myNumRows ; ++row_i) { 00571 ArrayView<const local_ordinal_type> ColIndicesA; 00572 ArrayView<const scalar_type> ColValuesA; 00573 size_t RowNnz; 00574 00575 if (A_local_->supportsRowViews ()) { 00576 A_local_->getLocalRowView (row_i, ColIndicesA, ColValuesA); 00577 RowNnz = ColIndicesA.size (); 00578 } 00579 else { 00580 A_local_->getLocalRowCopy (row_i, ColIndicesARCP (), ColValuesARCP (), RowNnz); 00581 ColIndicesA = ColIndicesARCP (0, RowNnz); 00582 ColValuesA = ColValuesARCP (0, RowNnz); 00583 } 00584 00585 // Always include the diagonal in the U factor. The value should get 00586 // set in the next loop below. 00587 U_cols.push_back(row_i); 00588 cur_row[row_i] = zero; 00589 pattern[row_i] = ORIG; 00590 00591 size_type L_cols_heaplen = 0; 00592 rownorm = STM::zero (); 00593 for (size_t i = 0; i < RowNnz; ++i) { 00594 if (ColIndicesA[i] < myNumRows) { 00595 if (ColIndicesA[i] < row_i) { 00596 add_to_heap(ColIndicesA[i], L_cols_heap, L_cols_heaplen); 00597 } 00598 else if (ColIndicesA[i] > row_i) { 00599 U_cols.push_back(ColIndicesA[i]); 00600 } 00601 00602 cur_row[ColIndicesA[i]] = ColValuesA[i]; 00603 pattern[ColIndicesA[i]] = ORIG; 00604 rownorm += scalar_mag(ColValuesA[i]); 00605 } 00606 } 00607 00608 // Alter the diagonal according to the absolute-threshold and 00609 // relative-threshold values. If not set, those values default 00610 // to zero and one respectively. 00611 const magnitude_type rthresh = getRelativeThreshold(); 00612 const scalar_type& v = cur_row[row_i]; 00613 cur_row[row_i] = as<scalar_type> (getAbsoluteThreshold() * IFPACK2_SGN(v)) + rthresh*v; 00614 00615 size_type orig_U_len = U_cols.size(); 00616 RowNnz = L_cols_heap.size() + orig_U_len; 00617 rownorm = getDropTolerance() * rownorm/RowNnz; 00618 00619 // The following while loop corresponds to the 'L30' goto's in Aztec. 00620 size_type L_vals_heaplen = 0; 00621 while (L_cols_heaplen > 0) { 00622 local_ordinal_type row_k = L_cols_heap.front(); 00623 00624 scalar_type multiplier = cur_row[row_k] * InvDiagU[row_k]; 00625 cur_row[row_k] = multiplier; 00626 magnitude_type mag_mult = scalar_mag(multiplier); 00627 if (mag_mult*unorm[row_k] < rownorm) { 00628 pattern[row_k] = UNUSED; 00629 rm_heap_root(L_cols_heap, L_cols_heaplen); 00630 continue; 00631 } 00632 if (pattern[row_k] != ORIG) { 00633 if (L_vals_heaplen < fillL) { 00634 add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp); 00635 } 00636 else if (L_vals_heaplen==0 || 00637 mag_mult < scalar_mag(cur_row[L_vals_heap.front()])) { 00638 pattern[row_k] = UNUSED; 00639 rm_heap_root(L_cols_heap, L_cols_heaplen); 00640 continue; 00641 } 00642 else { 00643 pattern[L_vals_heap.front()] = UNUSED; 00644 rm_heap_root(L_vals_heap, L_vals_heaplen, vals_comp); 00645 add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp); 00646 } 00647 } 00648 00649 /* Reduce current row */ 00650 00651 ArrayView<const local_ordinal_type>& ColIndicesU = Uindices[row_k]; 00652 ArrayView<const scalar_type>& ColValuesU = Ucoefs[row_k]; 00653 size_type ColNnzU = ColIndicesU.size(); 00654 00655 for(size_type j=0; j<ColNnzU; ++j) { 00656 if (ColIndicesU[j] > row_k) { 00657 scalar_type tmp = multiplier * ColValuesU[j]; 00658 local_ordinal_type col_j = ColIndicesU[j]; 00659 if (pattern[col_j] != UNUSED) { 00660 cur_row[col_j] -= tmp; 00661 } 00662 else if (scalar_mag(tmp) > rownorm) { 00663 cur_row[col_j] = -tmp; 00664 pattern[col_j] = FILL; 00665 if (col_j > row_i) { 00666 U_cols.push_back(col_j); 00667 } 00668 else { 00669 add_to_heap(col_j, L_cols_heap, L_cols_heaplen); 00670 } 00671 } 00672 } 00673 } 00674 00675 rm_heap_root(L_cols_heap, L_cols_heaplen); 00676 }//end of while(L_cols_heaplen) loop 00677 00678 00679 // Put indices and values for L into arrays and then into the L_ matrix. 00680 00681 // first, the original entries from the L section of A: 00682 for (size_type i = 0; i < ColIndicesA.size (); ++i) { 00683 if (ColIndicesA[i] < row_i) { 00684 tmp_idx.push_back(ColIndicesA[i]); 00685 tmpv.push_back(cur_row[ColIndicesA[i]]); 00686 pattern[ColIndicesA[i]] = UNUSED; 00687 } 00688 } 00689 00690 // next, the L entries resulting from fill: 00691 for (size_type j = 0; j < L_vals_heaplen; ++j) { 00692 tmp_idx.push_back(L_vals_heap[j]); 00693 tmpv.push_back(cur_row[L_vals_heap[j]]); 00694 pattern[L_vals_heap[j]] = UNUSED; 00695 } 00696 00697 // L has a one on the diagonal, but we don't explicitly store it. 00698 // If we don't store it, then the Tpetra/Kokkos kernel which performs 00699 // the triangular solve can assume a unit diagonal, take a short-cut 00700 // and perform faster. 00701 00702 L_->insertLocalValues (row_i, tmp_idx (), tmpv ()); 00703 #ifdef IFPACK2_WRITE_FACTORS 00704 for (size_type ii = 0; ii < tmp_idx.size (); ++ii) { 00705 ofsL << row_i << " " << tmp_idx[ii] << " " << tmpv[ii] << std::endl; 00706 } 00707 #endif 00708 00709 tmp_idx.clear(); 00710 tmpv.clear(); 00711 00712 // Pick out the diagonal element, store its reciprocal. 00713 if (cur_row[row_i] == zero) { 00714 std::cerr << "Ifpack2::ILUT::Compute: zero pivot encountered! Replacing with rownorm and continuing...(You may need to set the parameter 'fact: absolute threshold'.)" << std::endl; 00715 cur_row[row_i] = rownorm; 00716 } 00717 InvDiagU[row_i] = one / cur_row[row_i]; 00718 00719 // Non-inverted diagonal is stored for U: 00720 tmp_idx.push_back(row_i); 00721 tmpv.push_back(cur_row[row_i]); 00722 unorm[row_i] = scalar_mag(cur_row[row_i]); 00723 pattern[row_i] = UNUSED; 00724 00725 // Now put indices and values for U into arrays and then into the U_ matrix. 00726 // The first entry in U_cols is the diagonal, which we just handled, so we'll 00727 // start our loop at j=1. 00728 00729 size_type U_vals_heaplen = 0; 00730 for(size_type j=1; j<U_cols.size(); ++j) { 00731 local_ordinal_type col = U_cols[j]; 00732 if (pattern[col] != ORIG) { 00733 if (U_vals_heaplen < fillU) { 00734 add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp); 00735 } 00736 else if (U_vals_heaplen!=0 && scalar_mag(cur_row[col]) > 00737 scalar_mag(cur_row[U_vals_heap.front()])) { 00738 rm_heap_root(U_vals_heap, U_vals_heaplen, vals_comp); 00739 add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp); 00740 } 00741 } 00742 else { 00743 tmp_idx.push_back(col); 00744 tmpv.push_back(cur_row[col]); 00745 unorm[row_i] += scalar_mag(cur_row[col]); 00746 } 00747 pattern[col] = UNUSED; 00748 } 00749 00750 for(size_type j=0; j<U_vals_heaplen; ++j) { 00751 tmp_idx.push_back(U_vals_heap[j]); 00752 tmpv.push_back(cur_row[U_vals_heap[j]]); 00753 unorm[row_i] += scalar_mag(cur_row[U_vals_heap[j]]); 00754 } 00755 00756 unorm[row_i] /= (orig_U_len + U_vals_heaplen); 00757 00758 U_->insertLocalValues(row_i, tmp_idx(), tmpv() ); 00759 #ifdef IFPACK2_WRITE_FACTORS 00760 for(int ii=0; ii<tmp_idx.size(); ++ii) { 00761 ofsU <<row_i<< " " <<tmp_idx[ii]<< " " <<tmpv[ii]<< std::endl; 00762 } 00763 #endif 00764 tmp_idx.clear(); 00765 tmpv.clear(); 00766 00767 U_->getLocalRowView(row_i, Uindices[row_i], Ucoefs[row_i] ); 00768 00769 L_cols_heap.clear(); 00770 U_cols.clear(); 00771 L_vals_heap.clear(); 00772 U_vals_heap.clear(); 00773 } // end of for(row_i) loop 00774 00775 // FIXME (mfh 03 Apr 2013) Do we need to supply a domain and range Map? 00776 L_->fillComplete(); 00777 U_->fillComplete(); 00778 } 00779 ComputeTime_ += timer.totalElapsedTime (); 00780 IsComputed_ = true; 00781 ++NumCompute_; 00782 } 00783 00784 00785 template <class MatrixType> 00786 void ILUT<MatrixType>:: 00787 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X, 00788 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y, 00789 Teuchos::ETransp mode, 00790 scalar_type alpha, 00791 scalar_type beta) const 00792 { 00793 using Teuchos::RCP; 00794 using Teuchos::rcp; 00795 using Teuchos::rcpFromRef; 00796 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV; 00797 00798 Teuchos::Time timer ("ILUT::apply"); 00799 { // Timer scope for timing apply() 00800 Teuchos::TimeMonitor timeMon (timer, true); 00801 00802 TEUCHOS_TEST_FOR_EXCEPTION( 00803 ! isComputed (), std::runtime_error, 00804 "Ifpack2::ILUT::apply: You must call compute() to compute the incomplete " 00805 "factorization, before calling apply()."); 00806 00807 TEUCHOS_TEST_FOR_EXCEPTION( 00808 X.getNumVectors() != Y.getNumVectors(), std::runtime_error, 00809 "Ifpack2::ILUT::apply: X and Y must have the same number of columns. " 00810 "X has " << X.getNumVectors () << " columns, but Y has " 00811 << Y.getNumVectors () << " columns."); 00812 00813 if (alpha == Teuchos::ScalarTraits<scalar_type>::zero ()) { 00814 // alpha == 0, so we don't need to apply the operator. 00815 // 00816 // The special case for beta == 0 ensures that if Y contains Inf 00817 // or NaN values, we replace them with 0 (following BLAS 00818 // convention), rather than multiplying them by 0 to get NaN. 00819 if (beta == Teuchos::ScalarTraits<scalar_type>::zero ()) { 00820 Y.putScalar (beta); 00821 } else { 00822 Y.scale (beta); 00823 } 00824 return; 00825 } 00826 00827 // If beta != 0, create a temporary multivector Y_temp to hold the 00828 // contents of alpha*M^{-1}*X. Otherwise, alias Y_temp to Y. 00829 RCP<MV> Y_temp; 00830 if (beta == Teuchos::ScalarTraits<scalar_type>::zero ()) { 00831 Y_temp = rcpFromRef (Y); 00832 } else { 00833 Y_temp = rcp (new MV (Y.getMap (), Y.getNumVectors ())); 00834 } 00835 00836 // If X and Y are pointing to the same memory location, create an 00837 // auxiliary vector, X_temp, so that we don't clobber the input 00838 // when computing the output. Otherwise, alias X_temp to X. 00839 RCP<const MV> X_temp; 00840 if (X.getLocalMV ().getValues () == Y.getLocalMV ().getValues ()) { 00841 X_temp = rcp (new MV (X, Teuchos::Copy)); 00842 } else { 00843 X_temp = rcpFromRef (X); 00844 } 00845 00846 // Create a temporary multivector Y_mid to hold the intermediate 00847 // between the L and U (or U and L, for the transpose or conjugate 00848 // transpose case) solves. 00849 RCP<MV> Y_mid = rcp (new MV (Y.getMap (), Y.getNumVectors ())); 00850 00851 if (mode == Teuchos::NO_TRANS) { // Solve L U Y = X 00852 L_->template localSolve<scalar_type, scalar_type> (*X_temp, *Y_mid, mode); 00853 // FIXME (mfh 20 Aug 2013) Is it OK to use Y_temp for both the 00854 // input and the output? 00855 U_->template localSolve<scalar_type, scalar_type> (*Y_mid, *Y_temp, mode); 00856 } 00857 else { // Solve U^* L^* Y = X 00858 U_->template localSolve<scalar_type, scalar_type> (*X_temp, *Y_mid, mode); 00859 // FIXME (mfh 20 Aug 2013) Is it OK to use Y_temp for both the 00860 // input and the output? 00861 L_->template localSolve<scalar_type, scalar_type> (*Y_mid, *Y_temp, mode); 00862 } 00863 00864 if (beta == Teuchos::ScalarTraits<scalar_type>::zero ()) { 00865 Y.scale (alpha); 00866 } else { // beta != 0 00867 Y.update (alpha, *Y_temp, beta); 00868 } 00869 } 00870 ++NumApply_; 00871 ApplyTime_ += timer.totalElapsedTime (); 00872 } 00873 00874 00875 template <class MatrixType> 00876 std::string ILUT<MatrixType>::description () const 00877 { 00878 std::ostringstream os; 00879 00880 // Output is a valid YAML dictionary in flow style. If you don't 00881 // like everything on a single line, you should call describe() 00882 // instead. 00883 os << "\"Ifpack2::ILUT\": {"; 00884 os << "Initialized: " << (isInitialized () ? "true" : "false") << ", " 00885 << "Computed: " << (isComputed () ? "true" : "false") << ", "; 00886 00887 os << "Level-of-fill: " << getLevelOfFill() << ", " 00888 << "absolute threshold: " << getAbsoluteThreshold() << ", " 00889 << "relative threshold: " << getRelativeThreshold() << ", " 00890 << "relaxation value: " << getRelaxValue() << ", "; 00891 00892 if (A_.is_null ()) { 00893 os << "Matrix: null"; 00894 } 00895 else { 00896 os << "Global matrix dimensions: [" 00897 << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]" 00898 << ", Global nnz: " << A_->getGlobalNumEntries(); 00899 } 00900 00901 os << "}"; 00902 return os.str (); 00903 } 00904 00905 00906 template <class MatrixType> 00907 void 00908 ILUT<MatrixType>:: 00909 describe (Teuchos::FancyOStream& out, 00910 const Teuchos::EVerbosityLevel verbLevel) const 00911 { 00912 using Teuchos::Comm; 00913 using Teuchos::OSTab; 00914 using Teuchos::RCP; 00915 using Teuchos::TypeNameTraits; 00916 using std::endl; 00917 using Teuchos::VERB_DEFAULT; 00918 using Teuchos::VERB_NONE; 00919 using Teuchos::VERB_LOW; 00920 using Teuchos::VERB_MEDIUM; 00921 using Teuchos::VERB_HIGH; 00922 using Teuchos::VERB_EXTREME; 00923 00924 const Teuchos::EVerbosityLevel vl = 00925 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel; 00926 OSTab tab0 (out); 00927 00928 if (vl > VERB_NONE) { 00929 out << "\"Ifpack2::ILUT\":" << endl; 00930 OSTab tab1 (out); 00931 out << "MatrixType: " << TypeNameTraits<MatrixType>::name () << endl; 00932 if (this->getObjectLabel () != "") { 00933 out << "Label: \"" << this->getObjectLabel () << "\"" << endl; 00934 } 00935 out << "Initialized: " << (isInitialized () ? "true" : "false") 00936 << endl 00937 << "Computed: " << (isComputed () ? "true" : "false") 00938 << endl 00939 << "Level of fill: " << getLevelOfFill () << endl 00940 << "Absolute threshold: " << getAbsoluteThreshold () << endl 00941 << "Relative threshold: " << getRelativeThreshold () << endl 00942 << "Relax value: " << getRelaxValue () << endl; 00943 00944 if (isComputed () && vl >= VERB_HIGH) { 00945 const double fillFraction = 00946 (double) getGlobalNumEntries () / (double) A_->getGlobalNumEntries (); 00947 const double nnzToRows = 00948 (double) getGlobalNumEntries () / (double) U_->getGlobalNumRows (); 00949 00950 out << "Dimensions of L: [" << L_->getGlobalNumRows () << ", " 00951 << L_->getGlobalNumRows () << "]" << endl 00952 << "Dimensions of U: [" << U_->getGlobalNumRows () << ", " 00953 << U_->getGlobalNumRows () << "]" << endl 00954 << "Number of nonzeros in factors: " << getGlobalNumEntries () << endl 00955 << "Fill fraction of factors over A: " << fillFraction << endl 00956 << "Ratio of nonzeros to rows: " << nnzToRows << endl; 00957 } 00958 00959 out << "Number of initialize calls: " << getNumInitialize () << endl 00960 << "Number of compute calls: " << getNumCompute () << endl 00961 << "Number of apply calls: " << getNumApply () << endl 00962 << "Total time in seconds for initialize: " << getInitializeTime () << endl 00963 << "Total time in seconds for compute: " << getComputeTime () << endl 00964 << "Total time in seconds for apply: " << getApplyTime () << endl; 00965 00966 out << "Local matrix:" << endl; 00967 A_local_->describe (out, vl); 00968 } 00969 } 00970 00971 template <class MatrixType> 00972 Teuchos::RCP<const typename ILUT<MatrixType>::row_matrix_type> 00973 ILUT<MatrixType>::makeLocalFilter (const Teuchos::RCP<const row_matrix_type>& A) 00974 { 00975 if (A->getComm ()->getSize () > 1) { 00976 return Teuchos::rcp (new LocalFilter<MatrixType> (A)); 00977 } else { 00978 return A; 00979 } 00980 } 00981 00982 }//namespace Ifpack2 00983 00984 00985 // FIXME (mfh 16 Sep 2014) We should really only use RowMatrix here! 00986 // There's no need to instantiate for CrsMatrix too. All Ifpack2 00987 // preconditioners can and should do dynamic casts if they need a type 00988 // more specific than RowMatrix. 00989 00990 #define IFPACK2_ILUT_INSTANT(S,LO,GO,N) \ 00991 template class Ifpack2::ILUT< Tpetra::RowMatrix<S, LO, GO, N> >; \ 00992 template class Ifpack2::ILUT< Tpetra::CrsMatrix<S, LO, GO, N> >; 00993 00994 #endif /* IFPACK2_ILUT_DEF_HPP */
1.7.6.1