|
Anasazi
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2004) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 00034 /* 00035 LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities 00036 X,H,P,R 00037 KX,KH,KP (product of K and the above) 00038 MX,MH,MP (product of M and the above, not allocated if we don't have an M matrix) 00039 If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to 00040 compute the local update of X and P. 00041 00042 A solver is bound to an eigenproblem at declaration. 00043 Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically. 00044 00045 The orthogonalization manager is used to project away from the auxiliary vectors. 00046 If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis. 00047 The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product. 00048 LOBPCG will not work correctly if the orthomanager uses a different inner product. 00049 */ 00050 00051 00052 #ifndef ANASAZI_LOBPCG_HPP 00053 #define ANASAZI_LOBPCG_HPP 00054 00055 #include "AnasaziTypes.hpp" 00056 00057 #include "AnasaziEigensolver.hpp" 00058 #include "AnasaziMultiVecTraits.hpp" 00059 #include "AnasaziOperatorTraits.hpp" 00060 #include "Teuchos_ScalarTraits.hpp" 00061 00062 #include "AnasaziMatOrthoManager.hpp" 00063 #include "AnasaziSolverUtils.hpp" 00064 00065 #include "Teuchos_LAPACK.hpp" 00066 #include "Teuchos_BLAS.hpp" 00067 #include "Teuchos_SerialDenseMatrix.hpp" 00068 #include "Teuchos_ParameterList.hpp" 00069 #include "Teuchos_TimeMonitor.hpp" 00070 00091 namespace Anasazi { 00092 00094 00095 00100 template <class ScalarType, class MultiVector> 00101 struct LOBPCGState { 00103 Teuchos::RCP<const MultiVector> V; 00105 Teuchos::RCP<const MultiVector> KV; 00107 Teuchos::RCP<const MultiVector> MV; 00108 00110 Teuchos::RCP<const MultiVector> X; 00112 Teuchos::RCP<const MultiVector> KX; 00114 Teuchos::RCP<const MultiVector> MX; 00115 00117 Teuchos::RCP<const MultiVector> P; 00119 Teuchos::RCP<const MultiVector> KP; 00121 Teuchos::RCP<const MultiVector> MP; 00122 00127 Teuchos::RCP<const MultiVector> H; 00129 Teuchos::RCP<const MultiVector> KH; 00131 Teuchos::RCP<const MultiVector> MH; 00132 00134 Teuchos::RCP<const MultiVector> R; 00135 00137 Teuchos::RCP<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T; 00138 00139 LOBPCGState() : 00140 V(Teuchos::null),KV(Teuchos::null),MV(Teuchos::null), 00141 X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null), 00142 P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null), 00143 H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null), 00144 R(Teuchos::null),T(Teuchos::null) {}; 00145 }; 00146 00148 00150 00151 00165 class LOBPCGRitzFailure : public AnasaziError {public: 00166 LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg) 00167 {}}; 00168 00181 class LOBPCGInitFailure : public AnasaziError {public: 00182 LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg) 00183 {}}; 00184 00201 class LOBPCGOrthoFailure : public AnasaziError {public: 00202 LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg) 00203 {}}; 00204 00206 00207 00208 template <class ScalarType, class MV, class OP> 00209 class LOBPCG : public Eigensolver<ScalarType,MV,OP> { 00210 public: 00211 00213 00214 00222 LOBPCG( const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 00223 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter, 00224 const Teuchos::RCP<OutputManager<ScalarType> > &printer, 00225 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester, 00226 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho, 00227 Teuchos::ParameterList ¶ms 00228 ); 00229 00231 virtual ~LOBPCG() {}; 00232 00234 00236 00237 00264 void iterate(); 00265 00291 void initialize(LOBPCGState<ScalarType,MV> newstate); 00292 00296 void initialize(); 00297 00317 bool isInitialized() const; 00318 00329 LOBPCGState<ScalarType,MV> getState() const; 00330 00332 00334 00335 00337 int getNumIters() const; 00338 00340 void resetNumIters(); 00341 00349 Teuchos::RCP<const MV> getRitzVectors(); 00350 00356 std::vector<Value<ScalarType> > getRitzValues(); 00357 00365 std::vector<int> getRitzIndex(); 00366 00367 00373 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms(); 00374 00375 00381 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms(); 00382 00383 00391 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms(); 00392 00393 00402 int getCurSubspaceDim() const; 00403 00407 int getMaxSubspaceDim() const; 00408 00410 00412 00413 00415 void setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test); 00416 00418 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > getStatusTest() const; 00419 00421 const Eigenproblem<ScalarType,MV,OP>& getProblem() const; 00422 00423 00432 void setBlockSize(int blockSize); 00433 00434 00436 int getBlockSize() const; 00437 00438 00450 void setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs); 00451 00453 Teuchos::Array<Teuchos::RCP<const MV> > getAuxVecs() const; 00454 00456 00458 00459 00465 void setFullOrtho(bool fullOrtho); 00466 00468 bool getFullOrtho() const; 00469 00471 bool hasP(); 00472 00474 00476 00477 00479 void currentStatus(std::ostream &os); 00480 00482 00483 private: 00484 // 00485 // 00486 // 00487 void setupViews(); 00488 // 00489 // Convenience typedefs 00490 // 00491 typedef SolverUtils<ScalarType,MV,OP> Utils; 00492 typedef MultiVecTraits<ScalarType,MV> MVT; 00493 typedef MultiVecTraitsExt<ScalarType,MV> MVText; 00494 typedef OperatorTraits<ScalarType,MV,OP> OPT; 00495 typedef Teuchos::ScalarTraits<ScalarType> SCT; 00496 typedef typename SCT::magnitudeType MagnitudeType; 00497 const MagnitudeType ONE; 00498 const MagnitudeType ZERO; 00499 const MagnitudeType NANVAL; 00500 // 00501 // Internal structs 00502 // 00503 struct CheckList { 00504 bool checkX, checkMX, checkKX; 00505 bool checkH, checkMH; 00506 bool checkP, checkMP, checkKP; 00507 bool checkR, checkQ; 00508 CheckList() : checkX(false),checkMX(false),checkKX(false), 00509 checkH(false),checkMH(false), 00510 checkP(false),checkMP(false),checkKP(false), 00511 checkR(false),checkQ(false) {}; 00512 }; 00513 // 00514 // Internal methods 00515 // 00516 std::string accuracyCheck(const CheckList &chk, const std::string &where) const; 00517 // 00518 // Classes inputed through constructor that define the eigenproblem to be solved. 00519 // 00520 const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > problem_; 00521 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > sm_; 00522 const Teuchos::RCP<OutputManager<ScalarType> > om_; 00523 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > tester_; 00524 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > orthman_; 00525 // 00526 // Information obtained from the eigenproblem 00527 // 00528 Teuchos::RCP<const OP> Op_; 00529 Teuchos::RCP<const OP> MOp_; 00530 Teuchos::RCP<const OP> Prec_; 00531 bool hasM_; 00532 // 00533 // Internal timers 00534 // 00535 Teuchos::RCP<Teuchos::Time> timerOp_, timerMOp_, timerPrec_, 00536 timerSort_, 00537 timerLocalProj_, timerDS_, 00538 timerLocalUpdate_, timerCompRes_, 00539 timerOrtho_, timerInit_; 00540 // 00541 // Counters 00542 // 00543 // Number of operator applications 00544 int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_; 00545 00546 // 00547 // Algorithmic parameters. 00548 // 00549 // blockSize_ is the solver block size 00550 int blockSize_; 00551 // 00552 // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should 00553 // be activated (see citations at the top of this file) 00554 bool fullOrtho_; 00555 00556 // 00557 // Current solver state 00558 // 00559 // initialized_ specifies that the basis vectors have been initialized and the iterate() routine 00560 // is capable of running; _initialize is controlled by the initialize() member method 00561 // For the implications of the state of initialized_, please see documentation for initialize() 00562 bool initialized_; 00563 // 00564 // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_) 00565 // this tells us how many of the values in theta_ are valid Ritz values 00566 int nevLocal_; 00567 // 00568 // hasP_ tells us whether there is valid data in P (and KP,MP) 00569 bool hasP_; 00570 // 00571 // State Multivecs 00572 // V_, KV_ MV_ and R_ are primary pointers to allocated multivectors 00573 Teuchos::RCP<MV> V_, KV_, MV_, R_; 00574 // the rest are multivector views into V_, KV_ and MV_ 00575 Teuchos::RCP<MV> X_, KX_, MX_, 00576 H_, KH_, MH_, 00577 P_, KP_, MP_; 00578 00579 // 00580 // if fullOrtho_ == true, then we must produce the following on every iteration: 00581 // [newX newP] = [X H P] [CX;CP] 00582 // the structure of [CX;CP] when using full orthogonalization does not allow us to 00583 // do this in situ, and R_ does not have enough storage for newX and newP. therefore, 00584 // we must allocate additional storage for this. 00585 // otherwise, when not using full orthogonalization, the structure 00586 // [newX newP] = [X H P] [CX1 0 ] 00587 // [CX2 CP2] allows us to work using only R as work space 00588 // [CX3 CP3] 00589 Teuchos::RCP<MV> tmpmvec_; 00590 // 00591 // auxiliary vectors 00592 Teuchos::Array<Teuchos::RCP<const MV> > auxVecs_; 00593 int numAuxVecs_; 00594 // 00595 // Number of iterations that have been performed. 00596 int iter_; 00597 // 00598 // Current eigenvalues, residual norms 00599 std::vector<MagnitudeType> theta_, Rnorms_, R2norms_; 00600 // 00601 // are the residual norms current with the residual? 00602 bool Rnorms_current_, R2norms_current_; 00603 00604 }; 00605 00606 00607 00608 00610 // Constructor 00611 template <class ScalarType, class MV, class OP> 00612 LOBPCG<ScalarType,MV,OP>::LOBPCG( 00613 const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 00614 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter, 00615 const Teuchos::RCP<OutputManager<ScalarType> > &printer, 00616 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester, 00617 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho, 00618 Teuchos::ParameterList ¶ms 00619 ) : 00620 ONE(Teuchos::ScalarTraits<MagnitudeType>::one()), 00621 ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()), 00622 NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()), 00623 // problem, tools 00624 problem_(problem), 00625 sm_(sorter), 00626 om_(printer), 00627 tester_(tester), 00628 orthman_(ortho), 00629 // timers, counters 00630 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 00631 timerOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Op*x")), 00632 timerMOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation M*x")), 00633 timerPrec_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Prec*x")), 00634 timerSort_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Sorting eigenvalues")), 00635 timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local projection")), 00636 timerDS_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Direct solve")), 00637 timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local update")), 00638 timerCompRes_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Computing residuals")), 00639 timerOrtho_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Orthogonalization")), 00640 timerInit_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Initialization")), 00641 #endif 00642 count_ApplyOp_(0), 00643 count_ApplyM_(0), 00644 count_ApplyPrec_(0), 00645 // internal data 00646 blockSize_(0), 00647 fullOrtho_(params.get("Full Ortho", true)), 00648 initialized_(false), 00649 nevLocal_(0), 00650 hasP_(false), 00651 auxVecs_( Teuchos::Array<Teuchos::RCP<const MV> >(0) ), 00652 numAuxVecs_(0), 00653 iter_(0), 00654 Rnorms_current_(false), 00655 R2norms_current_(false) 00656 { 00657 TEUCHOS_TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument, 00658 "Anasazi::LOBPCG::constructor: user passed null problem pointer."); 00659 TEUCHOS_TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument, 00660 "Anasazi::LOBPCG::constructor: user passed null sort manager pointer."); 00661 TEUCHOS_TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument, 00662 "Anasazi::LOBPCG::constructor: user passed null output manager pointer."); 00663 TEUCHOS_TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument, 00664 "Anasazi::LOBPCG::constructor: user passed null status test pointer."); 00665 TEUCHOS_TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument, 00666 "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer."); 00667 TEUCHOS_TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument, 00668 "Anasazi::LOBPCG::constructor: problem is not set."); 00669 TEUCHOS_TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument, 00670 "Anasazi::LOBPCG::constructor: problem is not Hermitian; LOBPCG requires Hermitian problem."); 00671 00672 // get the problem operators 00673 Op_ = problem_->getOperator(); 00674 TEUCHOS_TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument, 00675 "Anasazi::LOBPCG::constructor: problem provides no operator."); 00676 MOp_ = problem_->getM(); 00677 Prec_ = problem_->getPrec(); 00678 hasM_ = (MOp_ != Teuchos::null); 00679 00680 // set the block size and allocate data 00681 int bs = params.get("Block Size", problem_->getNEV()); 00682 setBlockSize(bs); 00683 } 00684 00685 00687 // Set the block size and make necessary adjustments. 00688 template <class ScalarType, class MV, class OP> 00689 void LOBPCG<ScalarType,MV,OP>::setBlockSize (int newBS) 00690 { 00691 // time spent here counts towards timerInit_ 00692 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 00693 Teuchos::TimeMonitor lcltimer( *timerInit_ ); 00694 #endif 00695 00696 // This routine only allocates space; it doesn't not perform any computation 00697 // if size is decreased, take the first newBS vectors of all and leave state as is 00698 // otherwise, grow/allocate space and set solver to unitialized 00699 00700 Teuchos::RCP<const MV> tmp; 00701 // grab some Multivector to Clone 00702 // in practice, getInitVec() should always provide this, but it is possible to use a 00703 // Eigenproblem with nothing in getInitVec() by manually initializing with initialize(); 00704 // in case of that strange scenario, we will try to Clone from R_ because it is smaller 00705 // than V_, and we don't want to keep V_ around longer than necessary 00706 if (blockSize_ > 0) { 00707 tmp = R_; 00708 } 00709 else { 00710 tmp = problem_->getInitVec(); 00711 TEUCHOS_TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error, 00712 "Anasazi::LOBPCG::setBlockSize(): eigenproblem did not specify initial vectors to clone from."); 00713 } 00714 00715 TEUCHOS_TEST_FOR_EXCEPTION(newBS <= 0 || static_cast<ptrdiff_t>(newBS) > MVText::GetGlobalLength(*tmp), 00716 std::invalid_argument, "Anasazi::LOBPCG::setBlockSize(): block size must be strictly positive."); 00717 if (newBS == blockSize_) { 00718 // do nothing 00719 return; 00720 } 00721 else if (newBS < blockSize_ && initialized_) { 00722 // 00723 // shrink vectors 00724 00725 // release views so we can modify the bases 00726 X_ = Teuchos::null; 00727 KX_ = Teuchos::null; 00728 MX_ = Teuchos::null; 00729 H_ = Teuchos::null; 00730 KH_ = Teuchos::null; 00731 MH_ = Teuchos::null; 00732 P_ = Teuchos::null; 00733 KP_ = Teuchos::null; 00734 MP_ = Teuchos::null; 00735 00736 // make new indices vectors 00737 std::vector<int> newind(newBS), oldind(newBS); 00738 for (int i=0; i<newBS; i++) { 00739 newind[i] = i; 00740 oldind[i] = i; 00741 } 00742 00743 Teuchos::RCP<MV> newV, newMV, newKV, newR; 00744 Teuchos::RCP<const MV> src; 00745 // allocate R and newV 00746 newR = MVT::Clone(*tmp,newBS); 00747 newV = MVT::Clone(*tmp,newBS*3); 00748 newKV = MVT::Clone(*tmp,newBS*3); 00749 if (hasM_) { 00750 newMV = MVT::Clone(*tmp,newBS*3); 00751 } 00752 00753 // 00754 // if we are initialized, we want to pull the data from V_ into newV: 00755 // bs | bs | bs 00756 // newV = [newX | **** |newP ] 00757 // newKV = [newKX| **** |newKP] 00758 // newMV = [newMX| **** |newMP] 00759 // where 00760 // oldbs | oldbs | oldbs 00761 // V_ = [newX *** | ******* | newP ***] 00762 // KV_ = [newKX *** | ******* | newKP ***] 00763 // MV_ = [newMX *** | ******* | newMP ***] 00764 // 00765 // we don't care to copy the data corresponding to H 00766 // we will not copy the M data if !hasM_, because it doesn't exist 00767 // 00768 00769 // these are shrink operations which preserve their data 00770 theta_.resize(3*newBS); 00771 Rnorms_.resize(newBS); 00772 R2norms_.resize(newBS); 00773 00774 // copy residual vectors: oldind,newind currently contains [0,...,newBS-1] 00775 src = MVT::CloneView(*R_,newind); 00776 MVT::SetBlock(*src,newind,*newR); 00777 // free old memory and point to new memory 00778 R_ = newR; 00779 00780 // copy in order: newX newKX newMX, then newP newKP newMP 00781 // for X: [0,bs-1] <-- [0,bs-1] 00782 src = MVT::CloneView(*V_,oldind); 00783 MVT::SetBlock(*src,newind,*newV); 00784 src = MVT::CloneView(*KV_,oldind); 00785 MVT::SetBlock(*src,newind,*newKV); 00786 if (hasM_) { 00787 src = MVT::CloneView(*MV_,oldind); 00788 MVT::SetBlock(*src,newind,*newMV); 00789 } 00790 // for P: [2*bs, 3*bs-1] <-- [2*oldbs, 2*oldbs+bs-1] 00791 for (int i=0; i<newBS; i++) { 00792 newind[i] += 2*newBS; 00793 oldind[i] += 2*blockSize_; 00794 } 00795 src = MVT::CloneView(*V_,oldind); 00796 MVT::SetBlock(*src,newind,*newV); 00797 src = MVT::CloneView(*KV_,oldind); 00798 MVT::SetBlock(*src,newind,*newKV); 00799 if (hasM_) { 00800 src = MVT::CloneView(*MV_,oldind); 00801 MVT::SetBlock(*src,newind,*newMV); 00802 } 00803 00804 // release temp view 00805 src = Teuchos::null; 00806 00807 // release old allocations and point at new memory 00808 V_ = newV; 00809 KV_ = newKV; 00810 if (hasM_) { 00811 MV_ = newMV; 00812 } 00813 else { 00814 MV_ = V_; 00815 } 00816 } 00817 else { 00818 // newBS > blockSize_ or not initialized 00819 // this is also the scenario for our initial call to setBlockSize(), in the constructor 00820 initialized_ = false; 00821 hasP_ = false; 00822 00823 // release views 00824 X_ = Teuchos::null; 00825 KX_ = Teuchos::null; 00826 MX_ = Teuchos::null; 00827 H_ = Teuchos::null; 00828 KH_ = Teuchos::null; 00829 MH_ = Teuchos::null; 00830 P_ = Teuchos::null; 00831 KP_ = Teuchos::null; 00832 MP_ = Teuchos::null; 00833 00834 // free allocated storage 00835 R_ = Teuchos::null; 00836 V_ = Teuchos::null; 00837 00838 // allocate scalar vectors 00839 theta_.resize(3*newBS,NANVAL); 00840 Rnorms_.resize(newBS,NANVAL); 00841 R2norms_.resize(newBS,NANVAL); 00842 00843 // clone multivectors off of tmp 00844 R_ = MVT::Clone(*tmp,newBS); 00845 V_ = MVT::Clone(*tmp,newBS*3); 00846 KV_ = MVT::Clone(*tmp,newBS*3); 00847 if (hasM_) { 00848 MV_ = MVT::Clone(*tmp,newBS*3); 00849 } 00850 else { 00851 MV_ = V_; 00852 } 00853 } 00854 00855 // allocate tmp space 00856 tmpmvec_ = Teuchos::null; 00857 if (fullOrtho_) { 00858 tmpmvec_ = MVT::Clone(*tmp,newBS); 00859 } 00860 00861 // set new block size 00862 blockSize_ = newBS; 00863 00864 // setup new views 00865 setupViews(); 00866 } 00867 00868 00870 // Setup views into V,KV,MV 00871 template <class ScalarType, class MV, class OP> 00872 void LOBPCG<ScalarType,MV,OP>::setupViews() 00873 { 00874 std::vector<int> ind(blockSize_); 00875 00876 for (int i=0; i<blockSize_; i++) { 00877 ind[i] = i; 00878 } 00879 X_ = MVT::CloneViewNonConst(*V_,ind); 00880 KX_ = MVT::CloneViewNonConst(*KV_,ind); 00881 if (hasM_) { 00882 MX_ = MVT::CloneViewNonConst(*MV_,ind); 00883 } 00884 else { 00885 MX_ = X_; 00886 } 00887 00888 for (int i=0; i<blockSize_; i++) { 00889 ind[i] += blockSize_; 00890 } 00891 H_ = MVT::CloneViewNonConst(*V_,ind); 00892 KH_ = MVT::CloneViewNonConst(*KV_,ind); 00893 if (hasM_) { 00894 MH_ = MVT::CloneViewNonConst(*MV_,ind); 00895 } 00896 else { 00897 MH_ = H_; 00898 } 00899 00900 for (int i=0; i<blockSize_; i++) { 00901 ind[i] += blockSize_; 00902 } 00903 P_ = MVT::CloneViewNonConst(*V_,ind); 00904 KP_ = MVT::CloneViewNonConst(*KV_,ind); 00905 if (hasM_) { 00906 MP_ = MVT::CloneViewNonConst(*MV_,ind); 00907 } 00908 else { 00909 MP_ = P_; 00910 } 00911 } 00912 00913 00915 // Set the auxiliary vectors 00916 template <class ScalarType, class MV, class OP> 00917 void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs) { 00918 typedef typename Teuchos::Array<Teuchos::RCP<const MV> >::iterator tarcpmv; 00919 00920 // set new auxiliary vectors 00921 auxVecs_ = auxvecs; 00922 00923 numAuxVecs_ = 0; 00924 for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) { 00925 numAuxVecs_ += MVT::GetNumberVecs(**i); 00926 } 00927 00928 // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors 00929 if (numAuxVecs_ > 0 && initialized_) { 00930 initialized_ = false; 00931 hasP_ = false; 00932 } 00933 00934 if (om_->isVerbosity( Debug ) ) { 00935 // Check almost everything here 00936 CheckList chk; 00937 chk.checkQ = true; 00938 om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") ); 00939 } 00940 } 00941 00942 00944 /* Initialize the state of the solver 00945 * 00946 * POST-CONDITIONS: 00947 * 00948 * initialized_ == true 00949 * X is orthonormal, orthogonal to auxVecs_ 00950 * KX = Op*X 00951 * MX = M*X if hasM_ 00952 * theta_ contains Ritz values of X 00953 * R = KX - MX*diag(theta_) 00954 * if hasP() == true, 00955 * P orthogonal to auxVecs_ 00956 * if fullOrtho_ == true, 00957 * P orthonormal and orthogonal to X 00958 * KP = Op*P 00959 * MP = M*P 00960 */ 00961 template <class ScalarType, class MV, class OP> 00962 void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV> newstate) 00963 { 00964 // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone 00965 // NOTE: Overall time spent in this routine is counted to timerInit_; portions will also be counted towards other primitives 00966 00967 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 00968 Teuchos::TimeMonitor inittimer( *timerInit_ ); 00969 #endif 00970 00971 std::vector<int> bsind(blockSize_); 00972 for (int i=0; i<blockSize_; i++) bsind[i] = i; 00973 00974 // in LOBPCG, X (the subspace iterate) is primary 00975 // the order of dependence follows like so. 00976 // --init-> X 00977 // --op apply-> MX,KX 00978 // --ritz analysis-> theta 00979 // --optional-> P,MP,KP 00980 // 00981 // if the user specifies all data for a level, we will accept it. 00982 // otherwise, we will generate the whole level, and all subsequent levels. 00983 // 00984 // the data members are ordered based on dependence, and the levels are 00985 // partitioned according to the amount of work required to produce the 00986 // items in a level. 00987 // 00988 // inconsitent multivectors widths and lengths will not be tolerated, and 00989 // will be treated with exceptions. 00990 00991 // set up X, KX, MX: get them from "state" if user specified them 00992 00993 //---------------------------------------- 00994 // set up X, MX, KX 00995 //---------------------------------------- 00996 if (newstate.X != Teuchos::null) { 00997 TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.X) != MVText::GetGlobalLength(*X_), 00998 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.X not correct." ); 00999 // newstate.X must have blockSize_ vectors; any more will be ignored 01000 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.X) < blockSize_, 01001 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.X must have at least block size vectors."); 01002 01003 // put X data in X_ 01004 MVT::SetBlock(*newstate.X,bsind,*X_); 01005 01006 // put MX data in MX_ 01007 if (hasM_) { 01008 if (newstate.MX != Teuchos::null) { 01009 TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.MX) != MVText::GetGlobalLength(*MX_), 01010 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MX not correct." ); 01011 // newstate.MX must have blockSize_ vectors; any more will be ignored 01012 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MX) < blockSize_, 01013 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MX must have at least block size vectors."); 01014 MVT::SetBlock(*newstate.MX,bsind,*MX_); 01015 } 01016 else { 01017 // user didn't specify MX, compute it 01018 { 01019 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01020 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01021 #endif 01022 OPT::Apply(*MOp_,*X_,*MX_); 01023 count_ApplyM_ += blockSize_; 01024 } 01025 // we generated MX; we will generate R as well 01026 newstate.R = Teuchos::null; 01027 } 01028 } 01029 01030 // put data in KX 01031 if (newstate.KX != Teuchos::null) { 01032 TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.KX) != MVText::GetGlobalLength(*KX_), 01033 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KX not correct." ); 01034 // newstate.KX must have blockSize_ vectors; any more will be ignored 01035 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KX) < blockSize_, 01036 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KX must have at least block size vectors."); 01037 MVT::SetBlock(*newstate.KX,bsind,*KX_); 01038 } 01039 else { 01040 // user didn't specify KX, compute it 01041 { 01042 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01043 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01044 #endif 01045 OPT::Apply(*Op_,*X_,*KX_); 01046 count_ApplyOp_ += blockSize_; 01047 } 01048 // we generated KX; we will generate R as well 01049 newstate.R = Teuchos::null; 01050 } 01051 } 01052 else { 01053 // user did not specify X 01054 // we will initialize X, compute KX and MX, and compute R 01055 // 01056 // clear state so we won't use any data from it below 01057 newstate.P = Teuchos::null; 01058 newstate.KP = Teuchos::null; 01059 newstate.MP = Teuchos::null; 01060 newstate.R = Teuchos::null; 01061 newstate.T = Teuchos::null; 01062 01063 // generate a basis and projectAndNormalize 01064 Teuchos::RCP<const MV> ivec = problem_->getInitVec(); 01065 TEUCHOS_TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error, 01066 "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from."); 01067 01068 int initSize = MVT::GetNumberVecs(*ivec); 01069 if (initSize > blockSize_) { 01070 // we need only the first blockSize_ vectors from ivec; get a view of them 01071 initSize = blockSize_; 01072 std::vector<int> ind(blockSize_); 01073 for (int i=0; i<blockSize_; i++) ind[i] = i; 01074 ivec = MVT::CloneView(*ivec,ind); 01075 } 01076 01077 // assign ivec to first part of X_ 01078 if (initSize > 0) { 01079 std::vector<int> ind(initSize); 01080 for (int i=0; i<initSize; i++) ind[i] = i; 01081 MVT::SetBlock(*ivec,ind,*X_); 01082 } 01083 // fill the rest of X_ with random 01084 if (blockSize_ > initSize) { 01085 std::vector<int> ind(blockSize_ - initSize); 01086 for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i; 01087 Teuchos::RCP<MV> rX = MVT::CloneViewNonConst(*X_,ind); 01088 MVT::MvRandom(*rX); 01089 rX = Teuchos::null; 01090 } 01091 01092 // put data in MX 01093 if (hasM_) { 01094 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01095 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01096 #endif 01097 OPT::Apply(*MOp_,*X_,*MX_); 01098 count_ApplyM_ += blockSize_; 01099 } 01100 01101 // remove auxVecs from X_ and normalize it 01102 if (numAuxVecs_ > 0) { 01103 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01104 Teuchos::TimeMonitor lcltimer( *timerOrtho_ ); 01105 #endif 01106 Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy; 01107 int rank = orthman_->projectAndNormalizeMat(*X_,auxVecs_,dummy,Teuchos::null,MX_); 01108 TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure, 01109 "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank."); 01110 } 01111 else { 01112 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01113 Teuchos::TimeMonitor lcltimer( *timerOrtho_ ); 01114 #endif 01115 int rank = orthman_->normalizeMat(*X_,Teuchos::null,MX_); 01116 TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure, 01117 "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank."); 01118 } 01119 01120 // put data in KX 01121 { 01122 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01123 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01124 #endif 01125 OPT::Apply(*Op_,*X_,*KX_); 01126 count_ApplyOp_ += blockSize_; 01127 } 01128 } // end if (newstate.X != Teuchos::null) 01129 01130 01131 //---------------------------------------- 01132 // set up Ritz values 01133 //---------------------------------------- 01134 theta_.resize(3*blockSize_,NANVAL); 01135 if (newstate.T != Teuchos::null) { 01136 TEUCHOS_TEST_FOR_EXCEPTION( (signed int)(newstate.T->size()) < blockSize_, 01137 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.T must contain at least block size Ritz values."); 01138 for (int i=0; i<blockSize_; i++) { 01139 theta_[i] = (*newstate.T)[i]; 01140 } 01141 nevLocal_ = blockSize_; 01142 } 01143 else { 01144 // get ritz vecs/vals 01145 Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_), 01146 MM(blockSize_,blockSize_), 01147 S(blockSize_,blockSize_); 01148 { 01149 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01150 Teuchos::TimeMonitor lcltimer( *timerLocalProj_ ); 01151 #endif 01152 // project K 01153 MVT::MvTransMv(ONE,*X_,*KX_,KK); 01154 // project M 01155 MVT::MvTransMv(ONE,*X_,*MX_,MM); 01156 nevLocal_ = blockSize_; 01157 } 01158 01159 // solve the projected problem 01160 { 01161 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01162 Teuchos::TimeMonitor lcltimer( *timerDS_ ); 01163 #endif 01164 Utils::directSolver(blockSize_, KK, Teuchos::rcpFromRef(MM), S, theta_, nevLocal_, 1); 01165 TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure, 01166 "Anasazi::LOBPCG::initialize(): Initial Ritz analysis did not produce enough Ritz pairs to initialize algorithm."); 01167 } 01168 01169 // We only have blockSize_ ritz pairs, ergo we do not need to select. 01170 // However, we still require them to be ordered correctly 01171 { 01172 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01173 Teuchos::TimeMonitor lcltimer( *timerSort_ ); 01174 #endif 01175 01176 std::vector<int> order(blockSize_); 01177 // 01178 // sort the first blockSize_ values in theta_ 01179 sm_->sort(theta_, Teuchos::rcpFromRef(order), blockSize_); // don't catch exception 01180 // 01181 // apply the same ordering to the primitive ritz vectors 01182 Utils::permuteVectors(order,S); 01183 } 01184 01185 // update the solution, use R for storage 01186 { 01187 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01188 Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ ); 01189 #endif 01190 // X <- X*S 01191 MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ ); 01192 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ ); 01193 // KX <- KX*S 01194 MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ ); 01195 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ ); 01196 if (hasM_) { 01197 // MX <- MX*S 01198 MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ ); 01199 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ ); 01200 } 01201 } 01202 } 01203 01204 //---------------------------------------- 01205 // compute R 01206 //---------------------------------------- 01207 if (newstate.R != Teuchos::null) { 01208 TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.R) != MVText::GetGlobalLength(*R_), 01209 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.R not correct." ); 01210 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) < blockSize_, 01211 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.R must have blockSize number of vectors." ); 01212 MVT::SetBlock(*newstate.R,bsind,*R_); 01213 } 01214 else { 01215 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01216 Teuchos::TimeMonitor lcltimer( *timerCompRes_ ); 01217 #endif 01218 // form R <- KX - MX*T 01219 MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_); 01220 Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_); 01221 for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i]; 01222 MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_); 01223 } 01224 01225 // R has been updated; mark the norms as out-of-date 01226 Rnorms_current_ = false; 01227 R2norms_current_ = false; 01228 01229 // put data in P,KP,MP: P is not used to set theta 01230 if (newstate.P != Teuchos::null) { 01231 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.P) < blockSize_ , 01232 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.P must have blockSize number of vectors." ); 01233 TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.P) != MVText::GetGlobalLength(*P_), 01234 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.P not correct." ); 01235 hasP_ = true; 01236 01237 // set P_ 01238 MVT::SetBlock(*newstate.P,bsind,*P_); 01239 01240 // set/compute KP_ 01241 if (newstate.KP != Teuchos::null) { 01242 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KP) < blockSize_, 01243 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KP must have blockSize number of vectors." ); 01244 TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.KP) != MVText::GetGlobalLength(*KP_), 01245 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KP not correct." ); 01246 MVT::SetBlock(*newstate.KP,bsind,*KP_); 01247 } 01248 else { 01249 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01250 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01251 #endif 01252 OPT::Apply(*Op_,*P_,*KP_); 01253 count_ApplyOp_ += blockSize_; 01254 } 01255 01256 // set/compute MP_ 01257 if (hasM_) { 01258 if (newstate.MP != Teuchos::null) { 01259 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MP) < blockSize_, 01260 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MP must have blockSize number of vectors." ); 01261 TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.MP) != MVText::GetGlobalLength(*MP_), 01262 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MP not correct." ); 01263 MVT::SetBlock(*newstate.MP,bsind,*MP_); 01264 } 01265 else { 01266 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01267 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01268 #endif 01269 OPT::Apply(*MOp_,*P_,*MP_); 01270 count_ApplyM_ += blockSize_; 01271 } 01272 } 01273 } 01274 else { 01275 hasP_ = false; 01276 } 01277 01278 // finally, we are initialized 01279 initialized_ = true; 01280 01281 if (om_->isVerbosity( Debug ) ) { 01282 // Check almost everything here 01283 CheckList chk; 01284 chk.checkX = true; 01285 chk.checkKX = true; 01286 chk.checkMX = true; 01287 chk.checkP = true; 01288 chk.checkKP = true; 01289 chk.checkMP = true; 01290 chk.checkR = true; 01291 chk.checkQ = true; 01292 om_->print( Debug, accuracyCheck(chk, ": after initialize()") ); 01293 } 01294 01295 } 01296 01297 template <class ScalarType, class MV, class OP> 01298 void LOBPCG<ScalarType,MV,OP>::initialize() 01299 { 01300 LOBPCGState<ScalarType,MV> empty; 01301 initialize(empty); 01302 } 01303 01304 01306 // Instruct the solver to use full orthogonalization 01307 template <class ScalarType, class MV, class OP> 01308 void LOBPCG<ScalarType,MV,OP>::setFullOrtho (bool fullOrtho) 01309 { 01310 if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) { 01311 // state is already orthogonalized or solver is not initialized 01312 fullOrtho_ = fullOrtho; 01313 } 01314 else { 01315 // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization 01316 // ergo, we must throw away data in P 01317 fullOrtho_ = true; 01318 hasP_ = false; 01319 } 01320 01321 // the user has called setFullOrtho, so the class has been instantiated 01322 // ergo, the data has already been allocated, i.e., setBlockSize() has been called 01323 // if it is already allocated, it should be the proper size 01324 if (fullOrtho_ && tmpmvec_ == Teuchos::null) { 01325 // allocated the workspace 01326 tmpmvec_ = MVT::Clone(*X_,blockSize_); 01327 } 01328 else if (fullOrtho_==false) { 01329 // free the workspace 01330 tmpmvec_ = Teuchos::null; 01331 } 01332 } 01333 01334 01336 // Perform LOBPCG iterations until the StatusTest tells us to stop. 01337 template <class ScalarType, class MV, class OP> 01338 void LOBPCG<ScalarType,MV,OP>::iterate () 01339 { 01340 // 01341 // Allocate/initialize data structures 01342 // 01343 if (initialized_ == false) { 01344 initialize(); 01345 } 01346 01347 // 01348 // Miscellaneous definitions 01349 const int oneBlock = blockSize_; 01350 const int twoBlocks = 2*blockSize_; 01351 const int threeBlocks = 3*blockSize_; 01352 01353 std::vector<int> indblock1(blockSize_), indblock2(blockSize_), indblock3(blockSize_); 01354 for (int i=0; i<blockSize_; i++) { 01355 indblock1[i] = i; 01356 indblock2[i] = i + blockSize_; 01357 indblock3[i] = i + 2*blockSize_; 01358 } 01359 01360 // 01361 // Define dense projected/local matrices 01362 // KK = Local stiffness matrix (size: 3*blockSize_ x 3*blockSize_) 01363 // MM = Local mass matrix (size: 3*blockSize_ x 3*blockSize_) 01364 // S = Local eigenvectors (size: 3*blockSize_ x 3*blockSize_) 01365 Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ), 01366 MM( threeBlocks, threeBlocks ), 01367 S( threeBlocks, threeBlocks ); 01368 01369 while (tester_->checkStatus(this) != Passed) { 01370 01371 // Print information on current status 01372 if (om_->isVerbosity(Debug)) { 01373 currentStatus( om_->stream(Debug) ); 01374 } 01375 else if (om_->isVerbosity(IterationDetails)) { 01376 currentStatus( om_->stream(IterationDetails) ); 01377 } 01378 01379 // increment iteration counter 01380 iter_++; 01381 01382 // Apply the preconditioner on the residuals: H <- Prec*R 01383 if (Prec_ != Teuchos::null) { 01384 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01385 Teuchos::TimeMonitor lcltimer( *timerPrec_ ); 01386 #endif 01387 OPT::Apply( *Prec_, *R_, *H_ ); // don't catch the exception 01388 count_ApplyPrec_ += blockSize_; 01389 } 01390 else { 01391 MVT::MvAddMv(ONE,*R_,ZERO,*R_,*H_); 01392 } 01393 01394 // Apply the mass matrix on H 01395 if (hasM_) { 01396 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01397 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01398 #endif 01399 OPT::Apply( *MOp_, *H_, *MH_); // don't catch the exception 01400 count_ApplyM_ += blockSize_; 01401 } 01402 01403 // orthogonalize H against the auxiliary vectors 01404 // optionally: orthogonalize H against X and P ([X P] is already orthonormal) 01405 Teuchos::Array<Teuchos::RCP<const MV> > Q; 01406 Q = auxVecs_; 01407 if (fullOrtho_) { 01408 // X and P are not contiguous, so there is not much point in putting them under 01409 // a single multivector view 01410 Q.push_back(X_); 01411 if (hasP_) { 01412 Q.push_back(P_); 01413 } 01414 } 01415 { 01416 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01417 Teuchos::TimeMonitor lcltimer( *timerOrtho_ ); 01418 #endif 01419 Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummyC = 01420 Teuchos::tuple<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null); 01421 int rank = orthman_->projectAndNormalizeMat(*H_,Q,dummyC,Teuchos::null,MH_); 01422 // our views are currently in place; it is safe to throw an exception 01423 TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure, 01424 "Anasazi::LOBPCG::iterate(): unable to compute orthonormal basis for H."); 01425 } 01426 01427 if (om_->isVerbosity( Debug ) ) { 01428 CheckList chk; 01429 chk.checkH = true; 01430 chk.checkMH = true; 01431 om_->print( Debug, accuracyCheck(chk, ": after ortho H") ); 01432 } 01433 else if (om_->isVerbosity( OrthoDetails ) ) { 01434 CheckList chk; 01435 chk.checkH = true; 01436 chk.checkMH = true; 01437 om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") ); 01438 } 01439 01440 // Apply the stiffness matrix to H 01441 { 01442 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01443 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01444 #endif 01445 OPT::Apply( *Op_, *H_, *KH_); // don't catch the exception 01446 count_ApplyOp_ += blockSize_; 01447 } 01448 01449 if (hasP_) { 01450 nevLocal_ = threeBlocks; 01451 } 01452 else { 01453 nevLocal_ = twoBlocks; 01454 } 01455 01456 // 01457 // we need bases: [X H P] and [H P] (only need the latter if fullOrtho == false) 01458 // we need to perform the following operations: 01459 // X' [KX KH KP] 01460 // X' [MX MH MP] 01461 // H' [KH KP] 01462 // H' [MH MP] 01463 // P' [KP] 01464 // P' [MP] 01465 // [X H P] CX 01466 // [X H P] CP if fullOrtho 01467 // [H P] CP if !fullOrtho 01468 // 01469 // since M[X H P] is potentially the same memory as [X H P], and 01470 // because we are not allowed to have overlapping non-const views of 01471 // a multivector, we will now abandon our non-const views in favor of 01472 // const views 01473 // 01474 X_ = Teuchos::null; 01475 KX_ = Teuchos::null; 01476 MX_ = Teuchos::null; 01477 H_ = Teuchos::null; 01478 KH_ = Teuchos::null; 01479 MH_ = Teuchos::null; 01480 P_ = Teuchos::null; 01481 KP_ = Teuchos::null; 01482 MP_ = Teuchos::null; 01483 Teuchos::RCP<const MV> cX, cH, cXHP, cHP, cK_XHP, cK_HP, cM_XHP, cM_HP, cP, cK_P, cM_P; 01484 { 01485 cX = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock1); 01486 cH = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock2); 01487 01488 std::vector<int> indXHP(nevLocal_); 01489 for (int i=0; i<nevLocal_; i++) { 01490 indXHP[i] = i; 01491 } 01492 cXHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indXHP); 01493 cK_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indXHP); 01494 if (hasM_) { 01495 cM_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indXHP); 01496 } 01497 else { 01498 cM_XHP = cXHP; 01499 } 01500 01501 std::vector<int> indHP(nevLocal_-blockSize_); 01502 for (int i=blockSize_; i<nevLocal_; i++) { 01503 indHP[i-blockSize_] = i; 01504 } 01505 cHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indHP); 01506 cK_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indHP); 01507 if (hasM_) { 01508 cM_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indHP); 01509 } 01510 else { 01511 cM_HP = cHP; 01512 } 01513 01514 if (nevLocal_ == threeBlocks) { 01515 cP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock3); 01516 cK_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indblock3); 01517 if (hasM_) { 01518 cM_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indblock3); 01519 } 01520 else { 01521 cM_P = cP; 01522 } 01523 } 01524 } 01525 01526 // 01527 //---------------------------------------- 01528 // Form "local" mass and stiffness matrices 01529 //---------------------------------------- 01530 { 01531 // We will form only the block upper triangular part of 01532 // [X H P]' K [X H P] and [X H P]' M [X H P] 01533 // Get the necessary views into KK and MM: 01534 // [--K1--] [--M1--] 01535 // KK = [ -K2-] MM = [ -M2-] 01536 // [ K3] [ M3] 01537 // 01538 // It is okay to declare a zero-area view of a Teuchos::SerialDenseMatrix 01539 // 01540 Teuchos::SerialDenseMatrix<int,ScalarType> 01541 K1(Teuchos::View,KK,blockSize_,nevLocal_ ,0*blockSize_,0*blockSize_), 01542 K2(Teuchos::View,KK,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_), 01543 K3(Teuchos::View,KK,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_), 01544 M1(Teuchos::View,MM,blockSize_,nevLocal_ ,0*blockSize_,0*blockSize_), 01545 M2(Teuchos::View,MM,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_), 01546 M3(Teuchos::View,MM,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_); 01547 { 01548 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01549 Teuchos::TimeMonitor lcltimer( *timerLocalProj_ ); 01550 #endif 01551 MVT::MvTransMv( ONE, *cX, *cK_XHP, K1 ); 01552 MVT::MvTransMv( ONE, *cX, *cM_XHP, M1 ); 01553 MVT::MvTransMv( ONE, *cH, *cK_HP , K2 ); 01554 MVT::MvTransMv( ONE, *cH, *cM_HP , M2 ); 01555 if (nevLocal_ == threeBlocks) { 01556 MVT::MvTransMv( ONE, *cP, *cK_P, K3 ); 01557 MVT::MvTransMv( ONE, *cP, *cM_P, M3 ); 01558 } 01559 } 01560 } 01561 // below, we only need bases [X H P] and [H P] and friends 01562 // furthermore, we only need [H P] and friends if fullOrtho == false 01563 // clear the others now 01564 cX = Teuchos::null; 01565 cH = Teuchos::null; 01566 cP = Teuchos::null; 01567 cK_P = Teuchos::null; 01568 cM_P = Teuchos::null; 01569 if (fullOrtho_ == true) { 01570 cHP = Teuchos::null; 01571 cK_HP = Teuchos::null; 01572 cM_HP = Teuchos::null; 01573 } 01574 01575 // 01576 //--------------------------------------------------- 01577 // Perform a spectral decomposition of (KK,MM) 01578 //--------------------------------------------------- 01579 // 01580 // Get pointers to relevant part of KK, MM and S for Rayleigh-Ritz analysis 01581 Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_), 01582 lclMM(Teuchos::View,MM,nevLocal_,nevLocal_), 01583 lclS(Teuchos::View, S,nevLocal_,nevLocal_); 01584 { 01585 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01586 Teuchos::TimeMonitor lcltimer( *timerDS_ ); 01587 #endif 01588 int localSize = nevLocal_; 01589 Utils::directSolver(localSize, lclKK, Teuchos::rcpFromRef(lclMM), lclS, theta_, nevLocal_, 0); 01590 // localSize tells directSolver() how big KK,MM are 01591 // however, directSolver() may choose to use only the principle submatrices of KK,MM 01592 // because of loss of MM-orthogonality in the projected eigenvectors 01593 // nevLocal_ tells us how much it used, telling us the effective localSize 01594 // (i.e., how much of KK,MM used by directSolver) 01595 // we will not tolerate any indefiniteness, and will throw an exception if it was 01596 // detected by directSolver 01597 // 01598 if (nevLocal_ != localSize) { 01599 // before throwing the exception, and thereby leaving iterate(), setup the views again 01600 // first, clear the const views 01601 cXHP = Teuchos::null; 01602 cK_XHP = Teuchos::null; 01603 cM_XHP = Teuchos::null; 01604 cHP = Teuchos::null; 01605 cK_HP = Teuchos::null; 01606 cM_HP = Teuchos::null; 01607 setupViews(); 01608 } 01609 TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure, 01610 "Anasazi::LOBPCG::iterate(): indefiniteness detected in projected mass matrix." ); 01611 } 01612 01613 // 01614 //--------------------------------------------------- 01615 // Sort the ritz values using the sort manager 01616 //--------------------------------------------------- 01617 Teuchos::LAPACK<int,ScalarType> lapack; 01618 Teuchos::BLAS<int,ScalarType> blas; 01619 { 01620 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01621 Teuchos::TimeMonitor lcltimer( *timerSort_ ); 01622 #endif 01623 01624 std::vector<int> order(nevLocal_); 01625 // 01626 // Sort the first nevLocal_ values in theta_ 01627 sm_->sort(theta_, Teuchos::rcpFromRef(order), nevLocal_); // don't catch exception 01628 // 01629 // Sort the primitive ritz vectors 01630 Utils::permuteVectors(order,lclS); 01631 } 01632 01633 // 01634 //---------------------------------------- 01635 // Compute coefficients for X and P under [X H P] 01636 //---------------------------------------- 01637 // Before computing X,P, optionally perform orthogonalization per Hetmaniuk,Lehoucq paper 01638 // CX will be the coefficients of [X,H,P] for new X, CP for new P 01639 // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM 01640 // Here, we will also orthonormalize CX. 01641 // This is accomplished using the Cholesky factorization of [CX CP]^H lclMM [CX CP] 01642 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > CX, CP; 01643 if (fullOrtho_) { 01644 // build orthonormal basis for ( 0 ) that is MM orthogonal to ( S11 ) 01645 // ( S21 ) ( S21 ) 01646 // ( S31 ) ( S31 ) 01647 // Do this using Cholesky factorization of ( S11 0 ) 01648 // ( S21 S21 ) 01649 // ( S31 S31 ) 01650 // ( S11 0 ) 01651 // Build C = ( S21 S21 ) 01652 // ( S31 S31 ) 01653 Teuchos::SerialDenseMatrix<int,ScalarType> C(nevLocal_,twoBlocks), 01654 MMC(nevLocal_,twoBlocks), 01655 CMMC(twoBlocks ,twoBlocks); 01656 01657 // first block of rows: ( S11 0 ) 01658 for (int j=0; j<oneBlock; j++) { 01659 for (int i=0; i<oneBlock; i++) { 01660 // CX 01661 C(i,j) = lclS(i,j); 01662 // CP 01663 C(i,j+oneBlock) = ZERO; 01664 } 01665 } 01666 // second block of rows: (S21 S21) 01667 for (int j=0; j<oneBlock; j++) { 01668 for (int i=oneBlock; i<twoBlocks; i++) { 01669 // CX 01670 C(i,j) = lclS(i,j); 01671 // CP 01672 C(i,j+oneBlock) = lclS(i,j); 01673 } 01674 } 01675 // third block of rows 01676 if (nevLocal_ == threeBlocks) { 01677 for (int j=0; j<oneBlock; j++) { 01678 for (int i=twoBlocks; i<threeBlocks; i++) { 01679 // CX 01680 C(i,j) = lclS(i,j); 01681 // CP 01682 C(i,j+oneBlock) = lclS(i,j); 01683 } 01684 } 01685 } 01686 01687 // compute MMC = lclMM*C 01688 { 01689 int teuchosret; 01690 teuchosret = MMC.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,C,ZERO); 01691 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01692 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01693 // compute CMMC = C^H*MMC == C^H*lclMM*C 01694 teuchosret = CMMC.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,MMC,ZERO); 01695 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01696 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01697 } 01698 01699 // compute R (cholesky) of CMMC 01700 { 01701 int info; 01702 lapack.POTRF('U',twoBlocks,CMMC.values(),CMMC.stride(),&info); 01703 // our views ARE NOT currently in place; we must reestablish them before throwing an exception 01704 if (info != 0) { 01705 // Check symmetry of H'*K*H, this might be the first place a non-Hermitian operator will cause a problem. 01706 Teuchos::SerialDenseMatrix<int,ScalarType> K22(Teuchos::View,KK,blockSize_,blockSize_,1*blockSize_,1*blockSize_); 01707 Teuchos::SerialDenseMatrix<int,ScalarType> K22_trans( K22, Teuchos::CONJ_TRANS ); 01708 K22_trans -= K22; 01709 MagnitudeType symNorm = K22_trans.normOne(); 01710 MagnitudeType tol = SCT::magnitude(SCT::squareroot(SCT::eps())); 01711 01712 cXHP = Teuchos::null; 01713 cHP = Teuchos::null; 01714 cK_XHP = Teuchos::null; 01715 cK_HP = Teuchos::null; 01716 cM_XHP = Teuchos::null; 01717 cM_HP = Teuchos::null; 01718 setupViews(); 01719 01720 std::string errMsg = "Anasazi::LOBPCG::iterate(): Cholesky factorization failed during full orthogonalization."; 01721 if ( symNorm < tol ) 01722 { 01723 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg ); 01724 } 01725 else 01726 { 01727 errMsg += " Check the operator A (or K), it may not be Hermitian!"; 01728 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg ); 01729 } 01730 } 01731 } 01732 // compute C = C inv(R) 01733 blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG, 01734 nevLocal_,twoBlocks,ONE,CMMC.values(),CMMC.stride(),C.values(),C.stride()); 01735 // put C(:,0:oneBlock-1) into CX 01736 CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,0) ); 01737 // put C(:,oneBlock:twoBlocks-1) into CP 01738 CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,oneBlock) ); 01739 01740 // check the results 01741 if (om_->isVerbosity( Debug ) ) { 01742 Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(nevLocal_,oneBlock), 01743 tmp2(oneBlock,oneBlock); 01744 MagnitudeType tmp; 01745 int teuchosret; 01746 std::stringstream os; 01747 os.precision(2); 01748 os.setf(std::ios::scientific, std::ios::floatfield); 01749 01750 os << " Checking Full Ortho: iteration " << iter_ << std::endl; 01751 01752 // check CX^T MM CX == I 01753 // compute tmp1 = MM*CX 01754 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CX,ZERO); 01755 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01756 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01757 // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX 01758 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO); 01759 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01760 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01761 // subtrace tmp2 - I == CX^H * MM * CX - I 01762 for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE; 01763 tmp = tmp2.normFrobenius(); 01764 os << " >> Error in CX^H MM CX == I : " << tmp << std::endl; 01765 01766 // check CP^T MM CP == I 01767 // compute tmp1 = MM*CP 01768 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO); 01769 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01770 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01771 // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP 01772 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CP,tmp1,ZERO); 01773 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01774 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01775 // subtrace tmp2 - I == CP^H * MM * CP - I 01776 for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE; 01777 tmp = tmp2.normFrobenius(); 01778 os << " >> Error in CP^H MM CP == I : " << tmp << std::endl; 01779 01780 // check CX^T MM CP == 0 01781 // compute tmp1 = MM*CP 01782 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO); 01783 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01784 // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP 01785 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO); 01786 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01787 // subtrace tmp2 == CX^H * MM * CP 01788 tmp = tmp2.normFrobenius(); 01789 os << " >> Error in CX^H MM CP == 0 : " << tmp << std::endl; 01790 01791 os << std::endl; 01792 om_->print(Debug,os.str()); 01793 } 01794 } 01795 else { 01796 // [S11 ... ...] 01797 // S = [S21 ... ...] 01798 // [S31 ... ...] 01799 // 01800 // CX = [S11] 01801 // [S21] 01802 // [S31] -> X = [X H P] CX 01803 // 01804 // CP = [S21] -> P = [H P] CP 01805 // [S31] 01806 // 01807 CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_ ,oneBlock,0 ,0) ); 01808 CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_-oneBlock,oneBlock,oneBlock,0) ); 01809 } 01810 01811 // 01812 //---------------------------------------- 01813 // Compute new X and new P 01814 //---------------------------------------- 01815 // Note: Use R as a temporary work space and (if full ortho) tmpMV as well 01816 { 01817 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01818 Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ ); 01819 #endif 01820 01821 // if full ortho, then CX and CP are dense 01822 // we multiply [X H P]*CX into tmpMV 01823 // [X H P]*CP into R 01824 // then put V(:,firstblock) <- tmpMV 01825 // V(:,thirdblock) <- R 01826 // 01827 // if no full ortho, then [H P]*CP doesn't reference first block (X) 01828 // of V, so that we can modify it before computing P 01829 // so we multiply [X H P]*CX into R 01830 // V(:,firstblock) <- R 01831 // multiply [H P]*CP into R 01832 // V(:,thirdblock) <- R 01833 // 01834 // mutatis mutandis for K[XP] and M[XP] 01835 // 01836 // use SetBlock to do the assignments into V_ 01837 // 01838 // in either case, views are only allowed to be overlapping 01839 // if they are const, and it should be assume that SetBlock 01840 // creates a view of the associated part 01841 // 01842 // we have from above const-pointers to [KM]XHP, [KM]HP and (if hasP) [KM]P 01843 // 01844 if (fullOrtho_) { 01845 // X,P 01846 MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*tmpmvec_); 01847 MVT::MvTimesMatAddMv(ONE,*cXHP,*CP,ZERO,*R_); 01848 cXHP = Teuchos::null; 01849 MVT::SetBlock(*tmpmvec_,indblock1,*V_); 01850 MVT::SetBlock(*R_ ,indblock3,*V_); 01851 // KX,KP 01852 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*tmpmvec_); 01853 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CP,ZERO,*R_); 01854 cK_XHP = Teuchos::null; 01855 MVT::SetBlock(*tmpmvec_,indblock1,*KV_); 01856 MVT::SetBlock(*R_ ,indblock3,*KV_); 01857 // MX,MP 01858 if (hasM_) { 01859 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*tmpmvec_); 01860 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CP,ZERO,*R_); 01861 cM_XHP = Teuchos::null; 01862 MVT::SetBlock(*tmpmvec_,indblock1,*MV_); 01863 MVT::SetBlock(*R_ ,indblock3,*MV_); 01864 } 01865 else { 01866 cM_XHP = Teuchos::null; 01867 } 01868 } 01869 else { 01870 // X,P 01871 MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*R_); 01872 cXHP = Teuchos::null; 01873 MVT::SetBlock(*R_,indblock1,*V_); 01874 MVT::MvTimesMatAddMv(ONE,*cHP,*CP,ZERO,*R_); 01875 cHP = Teuchos::null; 01876 MVT::SetBlock(*R_,indblock3,*V_); 01877 // KX,KP 01878 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*R_); 01879 cK_XHP = Teuchos::null; 01880 MVT::SetBlock(*R_,indblock1,*KV_); 01881 MVT::MvTimesMatAddMv(ONE,*cK_HP,*CP,ZERO,*R_); 01882 cK_HP = Teuchos::null; 01883 MVT::SetBlock(*R_,indblock3,*KV_); 01884 // MX,MP 01885 if (hasM_) { 01886 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*R_); 01887 cM_XHP = Teuchos::null; 01888 MVT::SetBlock(*R_,indblock1,*MV_); 01889 MVT::MvTimesMatAddMv(ONE,*cM_HP,*CP,ZERO,*R_); 01890 cM_HP = Teuchos::null; 01891 MVT::SetBlock(*R_,indblock3,*MV_); 01892 } 01893 else { 01894 cM_XHP = Teuchos::null; 01895 cM_HP = Teuchos::null; 01896 } 01897 } 01898 } // end timing block 01899 // done with coefficient matrices 01900 CX = Teuchos::null; 01901 CP = Teuchos::null; 01902 01903 // 01904 // we now have a P direction 01905 hasP_ = true; 01906 01907 // debugging check: all of our const views should have been cleared by now 01908 // if not, we have a logic error above 01909 TEUCHOS_TEST_FOR_EXCEPTION( cXHP != Teuchos::null || cK_XHP != Teuchos::null || cM_XHP != Teuchos::null 01910 || cHP != Teuchos::null || cK_HP != Teuchos::null || cM_HP != Teuchos::null 01911 || cP != Teuchos::null || cK_P != Teuchos::null || cM_P != Teuchos::null, 01912 std::logic_error, 01913 "Anasazi::BlockKrylovSchur::iterate(): const views were not all cleared! Something went wrong!" ); 01914 01915 // 01916 // recreate our const MV views of X,H,P and friends 01917 setupViews(); 01918 01919 // 01920 // Compute the new residuals, explicitly 01921 { 01922 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01923 Teuchos::TimeMonitor lcltimer( *timerCompRes_ ); 01924 #endif 01925 MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ ); 01926 Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ ); 01927 for (int i = 0; i < blockSize_; i++) { 01928 T(i,i) = theta_[i]; 01929 } 01930 MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ ); 01931 } 01932 01933 // R has been updated; mark the norms as out-of-date 01934 Rnorms_current_ = false; 01935 R2norms_current_ = false; 01936 01937 // When required, monitor some orthogonalities 01938 if (om_->isVerbosity( Debug ) ) { 01939 // Check almost everything here 01940 CheckList chk; 01941 chk.checkX = true; 01942 chk.checkKX = true; 01943 chk.checkMX = true; 01944 chk.checkP = true; 01945 chk.checkKP = true; 01946 chk.checkMP = true; 01947 chk.checkR = true; 01948 om_->print( Debug, accuracyCheck(chk, ": after local update") ); 01949 } 01950 else if (om_->isVerbosity( OrthoDetails )) { 01951 CheckList chk; 01952 chk.checkX = true; 01953 chk.checkP = true; 01954 chk.checkR = true; 01955 om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") ); 01956 } 01957 } // end while (statusTest == false) 01958 } 01959 01960 01962 // compute/return residual M-norms 01963 template <class ScalarType, class MV, class OP> 01964 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 01965 LOBPCG<ScalarType,MV,OP>::getResNorms() { 01966 if (Rnorms_current_ == false) { 01967 // Update the residual norms 01968 orthman_->norm(*R_,Rnorms_); 01969 Rnorms_current_ = true; 01970 } 01971 return Rnorms_; 01972 } 01973 01974 01976 // compute/return residual 2-norms 01977 template <class ScalarType, class MV, class OP> 01978 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 01979 LOBPCG<ScalarType,MV,OP>::getRes2Norms() { 01980 if (R2norms_current_ == false) { 01981 // Update the residual 2-norms 01982 MVT::MvNorm(*R_,R2norms_); 01983 R2norms_current_ = true; 01984 } 01985 return R2norms_; 01986 } 01987 01988 01989 01990 01992 // Check accuracy, orthogonality, and other debugging stuff 01993 // 01994 // bools specify which tests we want to run (instead of running more than we actually care about) 01995 // 01996 // we don't bother checking the following because they are computed explicitly: 01997 // H == Prec*R 01998 // KH == K*H 01999 // 02000 // 02001 // checkX : X orthonormal 02002 // orthogonal to auxvecs 02003 // checkMX: check MX == M*X 02004 // checkKX: check KX == K*X 02005 // checkP : if fullortho P orthonormal and orthogonal to X 02006 // orthogonal to auxvecs 02007 // checkMP: check MP == M*P 02008 // checkKP: check KP == K*P 02009 // checkH : if fullortho H orthonormal and orthogonal to X and P 02010 // orthogonal to auxvecs 02011 // checkMH: check MH == M*H 02012 // checkR : check R orthogonal to X 02013 // checkQ : check that auxiliary vectors are actually orthonormal 02014 // 02015 // TODO: 02016 // add checkTheta 02017 // 02018 template <class ScalarType, class MV, class OP> 02019 std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const std::string &where ) const 02020 { 02021 using std::endl; 02022 02023 std::stringstream os; 02024 os.precision(2); 02025 os.setf(std::ios::scientific, std::ios::floatfield); 02026 MagnitudeType tmp; 02027 02028 os << " Debugging checks: iteration " << iter_ << where << endl; 02029 02030 // X and friends 02031 if (chk.checkX && initialized_) { 02032 tmp = orthman_->orthonormError(*X_); 02033 os << " >> Error in X^H M X == I : " << tmp << endl; 02034 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 02035 tmp = orthman_->orthogError(*X_,*auxVecs_[i]); 02036 os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl; 02037 } 02038 } 02039 if (chk.checkMX && hasM_ && initialized_) { 02040 tmp = Utils::errorEquality(*X_, *MX_, MOp_); 02041 os << " >> Error in MX == M*X : " << tmp << endl; 02042 } 02043 if (chk.checkKX && initialized_) { 02044 tmp = Utils::errorEquality(*X_, *KX_, Op_); 02045 os << " >> Error in KX == K*X : " << tmp << endl; 02046 } 02047 02048 // P and friends 02049 if (chk.checkP && hasP_ && initialized_) { 02050 if (fullOrtho_) { 02051 tmp = orthman_->orthonormError(*P_); 02052 os << " >> Error in P^H M P == I : " << tmp << endl; 02053 tmp = orthman_->orthogError(*P_,*X_); 02054 os << " >> Error in P^H M X == 0 : " << tmp << endl; 02055 } 02056 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 02057 tmp = orthman_->orthogError(*P_,*auxVecs_[i]); 02058 os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl; 02059 } 02060 } 02061 if (chk.checkMP && hasM_ && hasP_ && initialized_) { 02062 tmp = Utils::errorEquality(*P_, *MP_, MOp_); 02063 os << " >> Error in MP == M*P : " << tmp << endl; 02064 } 02065 if (chk.checkKP && hasP_ && initialized_) { 02066 tmp = Utils::errorEquality(*P_, *KP_, Op_); 02067 os << " >> Error in KP == K*P : " << tmp << endl; 02068 } 02069 02070 // H and friends 02071 if (chk.checkH && initialized_) { 02072 if (fullOrtho_) { 02073 tmp = orthman_->orthonormError(*H_); 02074 os << " >> Error in H^H M H == I : " << tmp << endl; 02075 tmp = orthman_->orthogError(*H_,*X_); 02076 os << " >> Error in H^H M X == 0 : " << tmp << endl; 02077 if (hasP_) { 02078 tmp = orthman_->orthogError(*H_,*P_); 02079 os << " >> Error in H^H M P == 0 : " << tmp << endl; 02080 } 02081 } 02082 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 02083 tmp = orthman_->orthogError(*H_,*auxVecs_[i]); 02084 os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl; 02085 } 02086 } 02087 if (chk.checkMH && hasM_ && initialized_) { 02088 tmp = Utils::errorEquality(*H_, *MH_, MOp_); 02089 os << " >> Error in MH == M*H : " << tmp << endl; 02090 } 02091 02092 // R: this is not M-orthogonality, but standard euclidean orthogonality 02093 if (chk.checkR && initialized_) { 02094 Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_); 02095 MVT::MvTransMv(ONE,*X_,*R_,xTx); 02096 tmp = xTx.normFrobenius(); 02097 MVT::MvTransMv(ONE,*R_,*R_,xTx); 02098 double normR = xTx.normFrobenius(); 02099 os << " >> RelError in X^H R == 0: " << tmp/normR << endl; 02100 } 02101 02102 // Q 02103 if (chk.checkQ) { 02104 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 02105 tmp = orthman_->orthonormError(*auxVecs_[i]); 02106 os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl; 02107 for (Array_size_type j=i+1; j<auxVecs_.size(); j++) { 02108 tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]); 02109 os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl; 02110 } 02111 } 02112 } 02113 02114 os << endl; 02115 02116 return os.str(); 02117 } 02118 02119 02121 // Print the current status of the solver 02122 template <class ScalarType, class MV, class OP> 02123 void 02124 LOBPCG<ScalarType,MV,OP>::currentStatus(std::ostream &os) 02125 { 02126 using std::endl; 02127 02128 os.setf(std::ios::scientific, std::ios::floatfield); 02129 os.precision(6); 02130 os <<endl; 02131 os <<"================================================================================" << endl; 02132 os << endl; 02133 os <<" LOBPCG Solver Status" << endl; 02134 os << endl; 02135 os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl; 02136 os <<"The number of iterations performed is " << iter_ << endl; 02137 os <<"The current block size is " << blockSize_ << endl; 02138 os <<"The number of auxiliary vectors is " << numAuxVecs_ << endl; 02139 os <<"The number of operations Op*x is " << count_ApplyOp_ << endl; 02140 os <<"The number of operations M*x is " << count_ApplyM_ << endl; 02141 os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl; 02142 02143 os.setf(std::ios_base::right, std::ios_base::adjustfield); 02144 02145 if (initialized_) { 02146 os << endl; 02147 os <<"CURRENT EIGENVALUE ESTIMATES "<<endl; 02148 os << std::setw(20) << "Eigenvalue" 02149 << std::setw(20) << "Residual(M)" 02150 << std::setw(20) << "Residual(2)" 02151 << endl; 02152 os <<"--------------------------------------------------------------------------------"<<endl; 02153 for (int i=0; i<blockSize_; i++) { 02154 os << std::setw(20) << theta_[i]; 02155 if (Rnorms_current_) os << std::setw(20) << Rnorms_[i]; 02156 else os << std::setw(20) << "not current"; 02157 if (R2norms_current_) os << std::setw(20) << R2norms_[i]; 02158 else os << std::setw(20) << "not current"; 02159 os << endl; 02160 } 02161 } 02162 os <<"================================================================================" << endl; 02163 os << endl; 02164 } 02165 02167 // are we initialized or not? 02168 template <class ScalarType, class MV, class OP> 02169 bool LOBPCG<ScalarType,MV,OP>::isInitialized() const { 02170 return initialized_; 02171 } 02172 02173 02175 // is P valid or not? 02176 template <class ScalarType, class MV, class OP> 02177 bool LOBPCG<ScalarType,MV,OP>::hasP() { 02178 return hasP_; 02179 } 02180 02182 // is full orthogonalization enabled or not? 02183 template <class ScalarType, class MV, class OP> 02184 bool LOBPCG<ScalarType,MV,OP>::getFullOrtho() const { 02185 return(fullOrtho_); 02186 } 02187 02188 02190 // return the current auxilliary vectors 02191 template <class ScalarType, class MV, class OP> 02192 Teuchos::Array<Teuchos::RCP<const MV> > LOBPCG<ScalarType,MV,OP>::getAuxVecs() const { 02193 return auxVecs_; 02194 } 02195 02197 // return the current block size 02198 template <class ScalarType, class MV, class OP> 02199 int LOBPCG<ScalarType,MV,OP>::getBlockSize() const { 02200 return(blockSize_); 02201 } 02202 02204 // return the current eigenproblem 02205 template <class ScalarType, class MV, class OP> 02206 const Eigenproblem<ScalarType,MV,OP>& LOBPCG<ScalarType,MV,OP>::getProblem() const { 02207 return(*problem_); 02208 } 02209 02210 02212 // return the max subspace dimension 02213 template <class ScalarType, class MV, class OP> 02214 int LOBPCG<ScalarType,MV,OP>::getMaxSubspaceDim() const { 02215 return 3*blockSize_; 02216 } 02217 02219 // return the current subspace dimension 02220 template <class ScalarType, class MV, class OP> 02221 int LOBPCG<ScalarType,MV,OP>::getCurSubspaceDim() const { 02222 if (!initialized_) return 0; 02223 return nevLocal_; 02224 } 02225 02226 02228 // return the current ritz residual norms 02229 template <class ScalarType, class MV, class OP> 02230 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 02231 LOBPCG<ScalarType,MV,OP>::getRitzRes2Norms() 02232 { 02233 return this->getRes2Norms(); 02234 } 02235 02236 02238 // return the current compression indices 02239 template <class ScalarType, class MV, class OP> 02240 std::vector<int> LOBPCG<ScalarType,MV,OP>::getRitzIndex() { 02241 std::vector<int> ret(nevLocal_,0); 02242 return ret; 02243 } 02244 02245 02247 // return the current ritz values 02248 template <class ScalarType, class MV, class OP> 02249 std::vector<Value<ScalarType> > LOBPCG<ScalarType,MV,OP>::getRitzValues() { 02250 std::vector<Value<ScalarType> > ret(nevLocal_); 02251 for (int i=0; i<nevLocal_; i++) { 02252 ret[i].realpart = theta_[i]; 02253 ret[i].imagpart = ZERO; 02254 } 02255 return ret; 02256 } 02257 02259 // Set a new StatusTest for the solver. 02260 template <class ScalarType, class MV, class OP> 02261 void LOBPCG<ScalarType,MV,OP>::setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test) { 02262 TEUCHOS_TEST_FOR_EXCEPTION(test == Teuchos::null,std::invalid_argument, 02263 "Anasazi::LOBPCG::setStatusTest() was passed a null StatusTest."); 02264 tester_ = test; 02265 } 02266 02268 // Get the current StatusTest used by the solver. 02269 template <class ScalarType, class MV, class OP> 02270 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > LOBPCG<ScalarType,MV,OP>::getStatusTest() const { 02271 return tester_; 02272 } 02273 02275 // return the current ritz vectors 02276 template <class ScalarType, class MV, class OP> 02277 Teuchos::RCP<const MV> LOBPCG<ScalarType,MV,OP>::getRitzVectors() { 02278 return X_; 02279 } 02280 02281 02283 // reset the iteration counter 02284 template <class ScalarType, class MV, class OP> 02285 void LOBPCG<ScalarType,MV,OP>::resetNumIters() { 02286 iter_=0; 02287 } 02288 02289 02291 // return the number of iterations 02292 template <class ScalarType, class MV, class OP> 02293 int LOBPCG<ScalarType,MV,OP>::getNumIters() const { 02294 return(iter_); 02295 } 02296 02297 02299 // return the state 02300 template <class ScalarType, class MV, class OP> 02301 LOBPCGState<ScalarType,MV> LOBPCG<ScalarType,MV,OP>::getState() const { 02302 LOBPCGState<ScalarType,MV> state; 02303 state.V = V_; 02304 state.KV = KV_; 02305 state.X = X_; 02306 state.KX = KX_; 02307 state.P = P_; 02308 state.KP = KP_; 02309 state.H = H_; 02310 state.KH = KH_; 02311 state.R = R_; 02312 state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_)); 02313 if (hasM_) { 02314 state.MV = MV_; 02315 state.MX = MX_; 02316 state.MP = MP_; 02317 state.MH = MH_; 02318 } 02319 else { 02320 state.MX = Teuchos::null; 02321 state.MP = Teuchos::null; 02322 state.MH = Teuchos::null; 02323 } 02324 return state; 02325 } 02326 02327 } // end Anasazi namespace 02328 02329 #endif // ANASAZI_LOBPCG_HPP
1.7.6.1