|
Anasazi
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2004) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 00034 /* 00035 LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities 00036 X,H,P,R 00037 KX,KH,KP (product of K and the above) 00038 MX,MH,MP (product of M and the above, not allocated if we don't have an M matrix) 00039 If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to 00040 compute the local update of X and P. 00041 00042 A solver is bound to an eigenproblem at declaration. 00043 Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically. 00044 00045 The orthogonalization manager is used to project away from the auxiliary vectors. 00046 If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis. 00047 The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product. 00048 LOBPCG will not work correctly if the orthomanager uses a different inner product. 00049 */ 00050 00051 00052 #ifndef ANASAZI_LOBPCG_HPP 00053 #define ANASAZI_LOBPCG_HPP 00054 00055 #include "AnasaziTypes.hpp" 00056 00057 #include "AnasaziEigensolver.hpp" 00058 #include "AnasaziMultiVecTraits.hpp" 00059 #include "AnasaziOperatorTraits.hpp" 00060 #include "Teuchos_ScalarTraits.hpp" 00061 00062 #include "AnasaziMatOrthoManager.hpp" 00063 #include "AnasaziSolverUtils.hpp" 00064 00065 #include "Teuchos_LAPACK.hpp" 00066 #include "Teuchos_BLAS.hpp" 00067 #include "Teuchos_SerialDenseMatrix.hpp" 00068 #include "Teuchos_ParameterList.hpp" 00069 #include "Teuchos_TimeMonitor.hpp" 00070 00091 namespace Anasazi { 00092 00094 00095 00100 template <class ScalarType, class MultiVector> 00101 struct LOBPCGState { 00103 Teuchos::RCP<const MultiVector> V; 00105 Teuchos::RCP<const MultiVector> KV; 00107 Teuchos::RCP<const MultiVector> MV; 00108 00110 Teuchos::RCP<const MultiVector> X; 00112 Teuchos::RCP<const MultiVector> KX; 00114 Teuchos::RCP<const MultiVector> MX; 00115 00117 Teuchos::RCP<const MultiVector> P; 00119 Teuchos::RCP<const MultiVector> KP; 00121 Teuchos::RCP<const MultiVector> MP; 00122 00127 Teuchos::RCP<const MultiVector> H; 00129 Teuchos::RCP<const MultiVector> KH; 00131 Teuchos::RCP<const MultiVector> MH; 00132 00134 Teuchos::RCP<const MultiVector> R; 00135 00137 Teuchos::RCP<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T; 00138 00139 LOBPCGState() : 00140 V(Teuchos::null),KV(Teuchos::null),MV(Teuchos::null), 00141 X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null), 00142 P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null), 00143 H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null), 00144 R(Teuchos::null),T(Teuchos::null) {}; 00145 }; 00146 00148 00150 00151 00165 class LOBPCGRitzFailure : public AnasaziError {public: 00166 LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg) 00167 {}}; 00168 00181 class LOBPCGInitFailure : public AnasaziError {public: 00182 LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg) 00183 {}}; 00184 00201 class LOBPCGOrthoFailure : public AnasaziError {public: 00202 LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg) 00203 {}}; 00204 00206 00207 00208 template <class ScalarType, class MV, class OP> 00209 class LOBPCG : public Eigensolver<ScalarType,MV,OP> { 00210 public: 00211 00213 00214 00222 LOBPCG( const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 00223 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter, 00224 const Teuchos::RCP<OutputManager<ScalarType> > &printer, 00225 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester, 00226 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho, 00227 Teuchos::ParameterList ¶ms 00228 ); 00229 00231 virtual ~LOBPCG() {}; 00232 00234 00236 00237 00264 void iterate(); 00265 00291 void initialize(LOBPCGState<ScalarType,MV> newstate); 00292 00296 void initialize(); 00297 00317 bool isInitialized() const; 00318 00329 LOBPCGState<ScalarType,MV> getState() const; 00330 00332 00334 00335 00337 int getNumIters() const; 00338 00340 void resetNumIters(); 00341 00349 Teuchos::RCP<const MV> getRitzVectors(); 00350 00356 std::vector<Value<ScalarType> > getRitzValues(); 00357 00365 std::vector<int> getRitzIndex(); 00366 00367 00373 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms(); 00374 00375 00381 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms(); 00382 00383 00391 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms(); 00392 00393 00402 int getCurSubspaceDim() const; 00403 00407 int getMaxSubspaceDim() const; 00408 00410 00412 00413 00415 void setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test); 00416 00418 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > getStatusTest() const; 00419 00421 const Eigenproblem<ScalarType,MV,OP>& getProblem() const; 00422 00423 00432 void setBlockSize(int blockSize); 00433 00434 00436 int getBlockSize() const; 00437 00438 00450 void setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs); 00451 00453 Teuchos::Array<Teuchos::RCP<const MV> > getAuxVecs() const; 00454 00456 00458 00459 00465 void setFullOrtho(bool fullOrtho); 00466 00468 bool getFullOrtho() const; 00469 00471 bool hasP(); 00472 00474 00476 00477 00479 void currentStatus(std::ostream &os); 00480 00482 00483 private: 00484 // 00485 // 00486 // 00487 void setupViews(); 00488 // 00489 // Convenience typedefs 00490 // 00491 typedef SolverUtils<ScalarType,MV,OP> Utils; 00492 typedef MultiVecTraits<ScalarType,MV> MVT; 00493 typedef OperatorTraits<ScalarType,MV,OP> OPT; 00494 typedef Teuchos::ScalarTraits<ScalarType> SCT; 00495 typedef typename SCT::magnitudeType MagnitudeType; 00496 const MagnitudeType ONE; 00497 const MagnitudeType ZERO; 00498 const MagnitudeType NANVAL; 00499 // 00500 // Internal structs 00501 // 00502 struct CheckList { 00503 bool checkX, checkMX, checkKX; 00504 bool checkH, checkMH; 00505 bool checkP, checkMP, checkKP; 00506 bool checkR, checkQ; 00507 CheckList() : checkX(false),checkMX(false),checkKX(false), 00508 checkH(false),checkMH(false), 00509 checkP(false),checkMP(false),checkKP(false), 00510 checkR(false),checkQ(false) {}; 00511 }; 00512 // 00513 // Internal methods 00514 // 00515 std::string accuracyCheck(const CheckList &chk, const std::string &where) const; 00516 // 00517 // Classes inputed through constructor that define the eigenproblem to be solved. 00518 // 00519 const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > problem_; 00520 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > sm_; 00521 const Teuchos::RCP<OutputManager<ScalarType> > om_; 00522 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > tester_; 00523 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > orthman_; 00524 // 00525 // Information obtained from the eigenproblem 00526 // 00527 Teuchos::RCP<const OP> Op_; 00528 Teuchos::RCP<const OP> MOp_; 00529 Teuchos::RCP<const OP> Prec_; 00530 bool hasM_; 00531 // 00532 // Internal timers 00533 // 00534 Teuchos::RCP<Teuchos::Time> timerOp_, timerMOp_, timerPrec_, 00535 timerSort_, 00536 timerLocalProj_, timerDS_, 00537 timerLocalUpdate_, timerCompRes_, 00538 timerOrtho_, timerInit_; 00539 // 00540 // Counters 00541 // 00542 // Number of operator applications 00543 int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_; 00544 00545 // 00546 // Algorithmic parameters. 00547 // 00548 // blockSize_ is the solver block size 00549 int blockSize_; 00550 // 00551 // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should 00552 // be activated (see citations at the top of this file) 00553 bool fullOrtho_; 00554 00555 // 00556 // Current solver state 00557 // 00558 // initialized_ specifies that the basis vectors have been initialized and the iterate() routine 00559 // is capable of running; _initialize is controlled by the initialize() member method 00560 // For the implications of the state of initialized_, please see documentation for initialize() 00561 bool initialized_; 00562 // 00563 // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_) 00564 // this tells us how many of the values in theta_ are valid Ritz values 00565 int nevLocal_; 00566 // 00567 // hasP_ tells us whether there is valid data in P (and KP,MP) 00568 bool hasP_; 00569 // 00570 // State Multivecs 00571 // V_, KV_ MV_ and R_ are primary pointers to allocated multivectors 00572 Teuchos::RCP<MV> V_, KV_, MV_, R_; 00573 // the rest are multivector views into V_, KV_ and MV_ 00574 Teuchos::RCP<MV> X_, KX_, MX_, 00575 H_, KH_, MH_, 00576 P_, KP_, MP_; 00577 00578 // 00579 // if fullOrtho_ == true, then we must produce the following on every iteration: 00580 // [newX newP] = [X H P] [CX;CP] 00581 // the structure of [CX;CP] when using full orthogonalization does not allow us to 00582 // do this in situ, and R_ does not have enough storage for newX and newP. therefore, 00583 // we must allocate additional storage for this. 00584 // otherwise, when not using full orthogonalization, the structure 00585 // [newX newP] = [X H P] [CX1 0 ] 00586 // [CX2 CP2] allows us to work using only R as work space 00587 // [CX3 CP3] 00588 Teuchos::RCP<MV> tmpmvec_; 00589 // 00590 // auxiliary vectors 00591 Teuchos::Array<Teuchos::RCP<const MV> > auxVecs_; 00592 int numAuxVecs_; 00593 // 00594 // Number of iterations that have been performed. 00595 int iter_; 00596 // 00597 // Current eigenvalues, residual norms 00598 std::vector<MagnitudeType> theta_, Rnorms_, R2norms_; 00599 // 00600 // are the residual norms current with the residual? 00601 bool Rnorms_current_, R2norms_current_; 00602 00603 }; 00604 00605 00606 00607 00609 // Constructor 00610 template <class ScalarType, class MV, class OP> 00611 LOBPCG<ScalarType,MV,OP>::LOBPCG( 00612 const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 00613 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter, 00614 const Teuchos::RCP<OutputManager<ScalarType> > &printer, 00615 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester, 00616 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho, 00617 Teuchos::ParameterList ¶ms 00618 ) : 00619 ONE(Teuchos::ScalarTraits<MagnitudeType>::one()), 00620 ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()), 00621 NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()), 00622 // problem, tools 00623 problem_(problem), 00624 sm_(sorter), 00625 om_(printer), 00626 tester_(tester), 00627 orthman_(ortho), 00628 // timers, counters 00629 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 00630 timerOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Op*x")), 00631 timerMOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation M*x")), 00632 timerPrec_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Prec*x")), 00633 timerSort_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Sorting eigenvalues")), 00634 timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local projection")), 00635 timerDS_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Direct solve")), 00636 timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local update")), 00637 timerCompRes_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Computing residuals")), 00638 timerOrtho_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Orthogonalization")), 00639 timerInit_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Initialization")), 00640 #endif 00641 count_ApplyOp_(0), 00642 count_ApplyM_(0), 00643 count_ApplyPrec_(0), 00644 // internal data 00645 blockSize_(0), 00646 fullOrtho_(params.get("Full Ortho", true)), 00647 initialized_(false), 00648 nevLocal_(0), 00649 hasP_(false), 00650 auxVecs_( Teuchos::Array<Teuchos::RCP<const MV> >(0) ), 00651 numAuxVecs_(0), 00652 iter_(0), 00653 Rnorms_current_(false), 00654 R2norms_current_(false) 00655 { 00656 TEUCHOS_TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument, 00657 "Anasazi::LOBPCG::constructor: user passed null problem pointer."); 00658 TEUCHOS_TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument, 00659 "Anasazi::LOBPCG::constructor: user passed null sort manager pointer."); 00660 TEUCHOS_TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument, 00661 "Anasazi::LOBPCG::constructor: user passed null output manager pointer."); 00662 TEUCHOS_TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument, 00663 "Anasazi::LOBPCG::constructor: user passed null status test pointer."); 00664 TEUCHOS_TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument, 00665 "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer."); 00666 TEUCHOS_TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument, 00667 "Anasazi::LOBPCG::constructor: problem is not set."); 00668 TEUCHOS_TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument, 00669 "Anasazi::LOBPCG::constructor: problem is not Hermitian; LOBPCG requires Hermitian problem."); 00670 00671 // get the problem operators 00672 Op_ = problem_->getOperator(); 00673 TEUCHOS_TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument, 00674 "Anasazi::LOBPCG::constructor: problem provides no operator."); 00675 MOp_ = problem_->getM(); 00676 Prec_ = problem_->getPrec(); 00677 hasM_ = (MOp_ != Teuchos::null); 00678 00679 // set the block size and allocate data 00680 int bs = params.get("Block Size", problem_->getNEV()); 00681 setBlockSize(bs); 00682 } 00683 00684 00686 // Set the block size and make necessary adjustments. 00687 template <class ScalarType, class MV, class OP> 00688 void LOBPCG<ScalarType,MV,OP>::setBlockSize (int newBS) 00689 { 00690 // time spent here counts towards timerInit_ 00691 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 00692 Teuchos::TimeMonitor lcltimer( *timerInit_ ); 00693 #endif 00694 00695 // This routine only allocates space; it doesn't not perform any computation 00696 // if size is decreased, take the first newBS vectors of all and leave state as is 00697 // otherwise, grow/allocate space and set solver to unitialized 00698 00699 Teuchos::RCP<const MV> tmp; 00700 // grab some Multivector to Clone 00701 // in practice, getInitVec() should always provide this, but it is possible to use a 00702 // Eigenproblem with nothing in getInitVec() by manually initializing with initialize(); 00703 // in case of that strange scenario, we will try to Clone from R_ because it is smaller 00704 // than V_, and we don't want to keep V_ around longer than necessary 00705 if (blockSize_ > 0) { 00706 tmp = R_; 00707 } 00708 else { 00709 tmp = problem_->getInitVec(); 00710 TEUCHOS_TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error, 00711 "Anasazi::LOBPCG::setBlockSize(): eigenproblem did not specify initial vectors to clone from."); 00712 } 00713 00714 TEUCHOS_TEST_FOR_EXCEPTION(newBS <= 0 || newBS > MVT::GetVecLength(*tmp), std::invalid_argument, "Anasazi::LOBPCG::setBlockSize(): block size must be strictly positive."); 00715 if (newBS == blockSize_) { 00716 // do nothing 00717 return; 00718 } 00719 else if (newBS < blockSize_ && initialized_) { 00720 // 00721 // shrink vectors 00722 00723 // release views so we can modify the bases 00724 X_ = Teuchos::null; 00725 KX_ = Teuchos::null; 00726 MX_ = Teuchos::null; 00727 H_ = Teuchos::null; 00728 KH_ = Teuchos::null; 00729 MH_ = Teuchos::null; 00730 P_ = Teuchos::null; 00731 KP_ = Teuchos::null; 00732 MP_ = Teuchos::null; 00733 00734 // make new indices vectors 00735 std::vector<int> newind(newBS), oldind(newBS); 00736 for (int i=0; i<newBS; i++) { 00737 newind[i] = i; 00738 oldind[i] = i; 00739 } 00740 00741 Teuchos::RCP<MV> newV, newMV, newKV, newR; 00742 Teuchos::RCP<const MV> src; 00743 // allocate R and newV 00744 newR = MVT::Clone(*tmp,newBS); 00745 newV = MVT::Clone(*tmp,newBS*3); 00746 newKV = MVT::Clone(*tmp,newBS*3); 00747 if (hasM_) { 00748 newMV = MVT::Clone(*tmp,newBS*3); 00749 } 00750 00751 // 00752 // if we are initialized, we want to pull the data from V_ into newV: 00753 // bs | bs | bs 00754 // newV = [newX | **** |newP ] 00755 // newKV = [newKX| **** |newKP] 00756 // newMV = [newMX| **** |newMP] 00757 // where 00758 // oldbs | oldbs | oldbs 00759 // V_ = [newX *** | ******* | newP ***] 00760 // KV_ = [newKX *** | ******* | newKP ***] 00761 // MV_ = [newMX *** | ******* | newMP ***] 00762 // 00763 // we don't care to copy the data corresponding to H 00764 // we will not copy the M data if !hasM_, because it doesn't exist 00765 // 00766 00767 // these are shrink operations which preserve their data 00768 theta_.resize(3*newBS); 00769 Rnorms_.resize(newBS); 00770 R2norms_.resize(newBS); 00771 00772 // copy residual vectors: oldind,newind currently contains [0,...,newBS-1] 00773 src = MVT::CloneView(*R_,newind); 00774 MVT::SetBlock(*src,newind,*newR); 00775 // free old memory and point to new memory 00776 R_ = newR; 00777 00778 // copy in order: newX newKX newMX, then newP newKP newMP 00779 // for X: [0,bs-1] <-- [0,bs-1] 00780 src = MVT::CloneView(*V_,oldind); 00781 MVT::SetBlock(*src,newind,*newV); 00782 src = MVT::CloneView(*KV_,oldind); 00783 MVT::SetBlock(*src,newind,*newKV); 00784 if (hasM_) { 00785 src = MVT::CloneView(*MV_,oldind); 00786 MVT::SetBlock(*src,newind,*newMV); 00787 } 00788 // for P: [2*bs, 3*bs-1] <-- [2*oldbs, 2*oldbs+bs-1] 00789 for (int i=0; i<newBS; i++) { 00790 newind[i] += 2*newBS; 00791 oldind[i] += 2*blockSize_; 00792 } 00793 src = MVT::CloneView(*V_,oldind); 00794 MVT::SetBlock(*src,newind,*newV); 00795 src = MVT::CloneView(*KV_,oldind); 00796 MVT::SetBlock(*src,newind,*newKV); 00797 if (hasM_) { 00798 src = MVT::CloneView(*MV_,oldind); 00799 MVT::SetBlock(*src,newind,*newMV); 00800 } 00801 00802 // release temp view 00803 src = Teuchos::null; 00804 00805 // release old allocations and point at new memory 00806 V_ = newV; 00807 KV_ = newKV; 00808 if (hasM_) { 00809 MV_ = newMV; 00810 } 00811 else { 00812 MV_ = V_; 00813 } 00814 } 00815 else { 00816 // newBS > blockSize_ or not initialized 00817 // this is also the scenario for our initial call to setBlockSize(), in the constructor 00818 initialized_ = false; 00819 hasP_ = false; 00820 00821 // release views 00822 X_ = Teuchos::null; 00823 KX_ = Teuchos::null; 00824 MX_ = Teuchos::null; 00825 H_ = Teuchos::null; 00826 KH_ = Teuchos::null; 00827 MH_ = Teuchos::null; 00828 P_ = Teuchos::null; 00829 KP_ = Teuchos::null; 00830 MP_ = Teuchos::null; 00831 00832 // free allocated storage 00833 R_ = Teuchos::null; 00834 V_ = Teuchos::null; 00835 00836 // allocate scalar vectors 00837 theta_.resize(3*newBS,NANVAL); 00838 Rnorms_.resize(newBS,NANVAL); 00839 R2norms_.resize(newBS,NANVAL); 00840 00841 // clone multivectors off of tmp 00842 R_ = MVT::Clone(*tmp,newBS); 00843 V_ = MVT::Clone(*tmp,newBS*3); 00844 KV_ = MVT::Clone(*tmp,newBS*3); 00845 if (hasM_) { 00846 MV_ = MVT::Clone(*tmp,newBS*3); 00847 } 00848 else { 00849 MV_ = V_; 00850 } 00851 } 00852 00853 // allocate tmp space 00854 tmpmvec_ = Teuchos::null; 00855 if (fullOrtho_) { 00856 tmpmvec_ = MVT::Clone(*tmp,newBS); 00857 } 00858 00859 // set new block size 00860 blockSize_ = newBS; 00861 00862 // setup new views 00863 setupViews(); 00864 } 00865 00866 00868 // Setup views into V,KV,MV 00869 template <class ScalarType, class MV, class OP> 00870 void LOBPCG<ScalarType,MV,OP>::setupViews() 00871 { 00872 std::vector<int> ind(blockSize_); 00873 00874 for (int i=0; i<blockSize_; i++) { 00875 ind[i] = i; 00876 } 00877 X_ = MVT::CloneViewNonConst(*V_,ind); 00878 KX_ = MVT::CloneViewNonConst(*KV_,ind); 00879 if (hasM_) { 00880 MX_ = MVT::CloneViewNonConst(*MV_,ind); 00881 } 00882 else { 00883 MX_ = X_; 00884 } 00885 00886 for (int i=0; i<blockSize_; i++) { 00887 ind[i] += blockSize_; 00888 } 00889 H_ = MVT::CloneViewNonConst(*V_,ind); 00890 KH_ = MVT::CloneViewNonConst(*KV_,ind); 00891 if (hasM_) { 00892 MH_ = MVT::CloneViewNonConst(*MV_,ind); 00893 } 00894 else { 00895 MH_ = H_; 00896 } 00897 00898 for (int i=0; i<blockSize_; i++) { 00899 ind[i] += blockSize_; 00900 } 00901 P_ = MVT::CloneViewNonConst(*V_,ind); 00902 KP_ = MVT::CloneViewNonConst(*KV_,ind); 00903 if (hasM_) { 00904 MP_ = MVT::CloneViewNonConst(*MV_,ind); 00905 } 00906 else { 00907 MP_ = P_; 00908 } 00909 } 00910 00911 00913 // Set the auxiliary vectors 00914 template <class ScalarType, class MV, class OP> 00915 void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs) { 00916 typedef typename Teuchos::Array<Teuchos::RCP<const MV> >::iterator tarcpmv; 00917 00918 // set new auxiliary vectors 00919 auxVecs_ = auxvecs; 00920 00921 numAuxVecs_ = 0; 00922 for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) { 00923 numAuxVecs_ += MVT::GetNumberVecs(**i); 00924 } 00925 00926 // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors 00927 if (numAuxVecs_ > 0 && initialized_) { 00928 initialized_ = false; 00929 hasP_ = false; 00930 } 00931 00932 if (om_->isVerbosity( Debug ) ) { 00933 // Check almost everything here 00934 CheckList chk; 00935 chk.checkQ = true; 00936 om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") ); 00937 } 00938 } 00939 00940 00942 /* Initialize the state of the solver 00943 * 00944 * POST-CONDITIONS: 00945 * 00946 * initialized_ == true 00947 * X is orthonormal, orthogonal to auxVecs_ 00948 * KX = Op*X 00949 * MX = M*X if hasM_ 00950 * theta_ contains Ritz values of X 00951 * R = KX - MX*diag(theta_) 00952 * if hasP() == true, 00953 * P orthogonal to auxVecs_ 00954 * if fullOrtho_ == true, 00955 * P orthonormal and orthogonal to X 00956 * KP = Op*P 00957 * MP = M*P 00958 */ 00959 template <class ScalarType, class MV, class OP> 00960 void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV> newstate) 00961 { 00962 // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone 00963 // NOTE: Overall time spent in this routine is counted to timerInit_; portions will also be counted towards other primitives 00964 00965 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 00966 Teuchos::TimeMonitor inittimer( *timerInit_ ); 00967 #endif 00968 00969 std::vector<int> bsind(blockSize_); 00970 for (int i=0; i<blockSize_; i++) bsind[i] = i; 00971 00972 // in LOBPCG, X (the subspace iterate) is primary 00973 // the order of dependence follows like so. 00974 // --init-> X 00975 // --op apply-> MX,KX 00976 // --ritz analysis-> theta 00977 // --optional-> P,MP,KP 00978 // 00979 // if the user specifies all data for a level, we will accept it. 00980 // otherwise, we will generate the whole level, and all subsequent levels. 00981 // 00982 // the data members are ordered based on dependence, and the levels are 00983 // partitioned according to the amount of work required to produce the 00984 // items in a level. 00985 // 00986 // inconsitent multivectors widths and lengths will not be tolerated, and 00987 // will be treated with exceptions. 00988 00989 // set up X, KX, MX: get them from "state" if user specified them 00990 00991 //---------------------------------------- 00992 // set up X, MX, KX 00993 //---------------------------------------- 00994 if (newstate.X != Teuchos::null) { 00995 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.X) != MVT::GetVecLength(*X_), 00996 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.X not correct." ); 00997 // newstate.X must have blockSize_ vectors; any more will be ignored 00998 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.X) < blockSize_, 00999 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.X must have at least block size vectors."); 01000 01001 // put X data in X_ 01002 MVT::SetBlock(*newstate.X,bsind,*X_); 01003 01004 // put MX data in MX_ 01005 if (hasM_) { 01006 if (newstate.MX != Teuchos::null) { 01007 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MX) != MVT::GetVecLength(*MX_), 01008 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MX not correct." ); 01009 // newstate.MX must have blockSize_ vectors; any more will be ignored 01010 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MX) < blockSize_, 01011 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MX must have at least block size vectors."); 01012 MVT::SetBlock(*newstate.MX,bsind,*MX_); 01013 } 01014 else { 01015 // user didn't specify MX, compute it 01016 { 01017 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01018 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01019 #endif 01020 OPT::Apply(*MOp_,*X_,*MX_); 01021 count_ApplyM_ += blockSize_; 01022 } 01023 // we generated MX; we will generate R as well 01024 newstate.R = Teuchos::null; 01025 } 01026 } 01027 01028 // put data in KX 01029 if (newstate.KX != Teuchos::null) { 01030 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KX) != MVT::GetVecLength(*KX_), 01031 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KX not correct." ); 01032 // newstate.KX must have blockSize_ vectors; any more will be ignored 01033 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KX) < blockSize_, 01034 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KX must have at least block size vectors."); 01035 MVT::SetBlock(*newstate.KX,bsind,*KX_); 01036 } 01037 else { 01038 // user didn't specify KX, compute it 01039 { 01040 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01041 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01042 #endif 01043 OPT::Apply(*Op_,*X_,*KX_); 01044 count_ApplyOp_ += blockSize_; 01045 } 01046 // we generated KX; we will generate R as well 01047 newstate.R = Teuchos::null; 01048 } 01049 } 01050 else { 01051 // user did not specify X 01052 // we will initialize X, compute KX and MX, and compute R 01053 // 01054 // clear state so we won't use any data from it below 01055 newstate.P = Teuchos::null; 01056 newstate.KP = Teuchos::null; 01057 newstate.MP = Teuchos::null; 01058 newstate.R = Teuchos::null; 01059 newstate.T = Teuchos::null; 01060 01061 // generate a basis and projectAndNormalize 01062 Teuchos::RCP<const MV> ivec = problem_->getInitVec(); 01063 TEUCHOS_TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error, 01064 "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from."); 01065 01066 int initSize = MVT::GetNumberVecs(*ivec); 01067 if (initSize > blockSize_) { 01068 // we need only the first blockSize_ vectors from ivec; get a view of them 01069 initSize = blockSize_; 01070 std::vector<int> ind(blockSize_); 01071 for (int i=0; i<blockSize_; i++) ind[i] = i; 01072 ivec = MVT::CloneView(*ivec,ind); 01073 } 01074 01075 // assign ivec to first part of X_ 01076 if (initSize > 0) { 01077 std::vector<int> ind(initSize); 01078 for (int i=0; i<initSize; i++) ind[i] = i; 01079 MVT::SetBlock(*ivec,ind,*X_); 01080 } 01081 // fill the rest of X_ with random 01082 if (blockSize_ > initSize) { 01083 std::vector<int> ind(blockSize_ - initSize); 01084 for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i; 01085 Teuchos::RCP<MV> rX = MVT::CloneViewNonConst(*X_,ind); 01086 MVT::MvRandom(*rX); 01087 rX = Teuchos::null; 01088 } 01089 01090 // put data in MX 01091 if (hasM_) { 01092 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01093 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01094 #endif 01095 OPT::Apply(*MOp_,*X_,*MX_); 01096 count_ApplyM_ += blockSize_; 01097 } 01098 01099 // remove auxVecs from X_ and normalize it 01100 if (numAuxVecs_ > 0) { 01101 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01102 Teuchos::TimeMonitor lcltimer( *timerOrtho_ ); 01103 #endif 01104 Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy; 01105 int rank = orthman_->projectAndNormalizeMat(*X_,auxVecs_,dummy,Teuchos::null,MX_); 01106 TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure, 01107 "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank."); 01108 } 01109 else { 01110 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01111 Teuchos::TimeMonitor lcltimer( *timerOrtho_ ); 01112 #endif 01113 int rank = orthman_->normalizeMat(*X_,Teuchos::null,MX_); 01114 TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure, 01115 "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank."); 01116 } 01117 01118 // put data in KX 01119 { 01120 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01121 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01122 #endif 01123 OPT::Apply(*Op_,*X_,*KX_); 01124 count_ApplyOp_ += blockSize_; 01125 } 01126 } // end if (newstate.X != Teuchos::null) 01127 01128 01129 //---------------------------------------- 01130 // set up Ritz values 01131 //---------------------------------------- 01132 theta_.resize(3*blockSize_,NANVAL); 01133 if (newstate.T != Teuchos::null) { 01134 TEUCHOS_TEST_FOR_EXCEPTION( (signed int)(newstate.T->size()) < blockSize_, 01135 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.T must contain at least block size Ritz values."); 01136 for (int i=0; i<blockSize_; i++) { 01137 theta_[i] = (*newstate.T)[i]; 01138 } 01139 nevLocal_ = blockSize_; 01140 } 01141 else { 01142 // get ritz vecs/vals 01143 Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_), 01144 MM(blockSize_,blockSize_), 01145 S(blockSize_,blockSize_); 01146 { 01147 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01148 Teuchos::TimeMonitor lcltimer( *timerLocalProj_ ); 01149 #endif 01150 // project K 01151 MVT::MvTransMv(ONE,*X_,*KX_,KK); 01152 // project M 01153 MVT::MvTransMv(ONE,*X_,*MX_,MM); 01154 nevLocal_ = blockSize_; 01155 } 01156 01157 // solve the projected problem 01158 { 01159 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01160 Teuchos::TimeMonitor lcltimer( *timerDS_ ); 01161 #endif 01162 Utils::directSolver(blockSize_, KK, Teuchos::rcpFromRef(MM), S, theta_, nevLocal_, 1); 01163 TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure, 01164 "Anasazi::LOBPCG::initialize(): Initial Ritz analysis did not produce enough Ritz pairs to initialize algorithm."); 01165 } 01166 01167 // We only have blockSize_ ritz pairs, ergo we do not need to select. 01168 // However, we still require them to be ordered correctly 01169 { 01170 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01171 Teuchos::TimeMonitor lcltimer( *timerSort_ ); 01172 #endif 01173 01174 std::vector<int> order(blockSize_); 01175 // 01176 // sort the first blockSize_ values in theta_ 01177 sm_->sort(theta_, Teuchos::rcpFromRef(order), blockSize_); // don't catch exception 01178 // 01179 // apply the same ordering to the primitive ritz vectors 01180 Utils::permuteVectors(order,S); 01181 } 01182 01183 // update the solution, use R for storage 01184 { 01185 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01186 Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ ); 01187 #endif 01188 // X <- X*S 01189 MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ ); 01190 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ ); 01191 // KX <- KX*S 01192 MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ ); 01193 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ ); 01194 if (hasM_) { 01195 // MX <- MX*S 01196 MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ ); 01197 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ ); 01198 } 01199 } 01200 } 01201 01202 //---------------------------------------- 01203 // compute R 01204 //---------------------------------------- 01205 if (newstate.R != Teuchos::null) { 01206 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.R) != MVT::GetVecLength(*R_), 01207 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.R not correct." ); 01208 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) < blockSize_, 01209 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.R must have blockSize number of vectors." ); 01210 MVT::SetBlock(*newstate.R,bsind,*R_); 01211 } 01212 else { 01213 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01214 Teuchos::TimeMonitor lcltimer( *timerCompRes_ ); 01215 #endif 01216 // form R <- KX - MX*T 01217 MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_); 01218 Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_); 01219 for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i]; 01220 MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_); 01221 } 01222 01223 // R has been updated; mark the norms as out-of-date 01224 Rnorms_current_ = false; 01225 R2norms_current_ = false; 01226 01227 // put data in P,KP,MP: P is not used to set theta 01228 if (newstate.P != Teuchos::null) { 01229 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.P) < blockSize_ , 01230 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.P must have blockSize number of vectors." ); 01231 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.P) != MVT::GetVecLength(*P_), 01232 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.P not correct." ); 01233 hasP_ = true; 01234 01235 // set P_ 01236 MVT::SetBlock(*newstate.P,bsind,*P_); 01237 01238 // set/compute KP_ 01239 if (newstate.KP != Teuchos::null) { 01240 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KP) < blockSize_, 01241 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KP must have blockSize number of vectors." ); 01242 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KP) != MVT::GetVecLength(*KP_), 01243 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KP not correct." ); 01244 MVT::SetBlock(*newstate.KP,bsind,*KP_); 01245 } 01246 else { 01247 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01248 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01249 #endif 01250 OPT::Apply(*Op_,*P_,*KP_); 01251 count_ApplyOp_ += blockSize_; 01252 } 01253 01254 // set/compute MP_ 01255 if (hasM_) { 01256 if (newstate.MP != Teuchos::null) { 01257 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MP) < blockSize_, 01258 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MP must have blockSize number of vectors." ); 01259 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MP) != MVT::GetVecLength(*MP_), 01260 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MP not correct." ); 01261 MVT::SetBlock(*newstate.MP,bsind,*MP_); 01262 } 01263 else { 01264 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01265 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01266 #endif 01267 OPT::Apply(*MOp_,*P_,*MP_); 01268 count_ApplyM_ += blockSize_; 01269 } 01270 } 01271 } 01272 else { 01273 hasP_ = false; 01274 } 01275 01276 // finally, we are initialized 01277 initialized_ = true; 01278 01279 if (om_->isVerbosity( Debug ) ) { 01280 // Check almost everything here 01281 CheckList chk; 01282 chk.checkX = true; 01283 chk.checkKX = true; 01284 chk.checkMX = true; 01285 chk.checkP = true; 01286 chk.checkKP = true; 01287 chk.checkMP = true; 01288 chk.checkR = true; 01289 chk.checkQ = true; 01290 om_->print( Debug, accuracyCheck(chk, ": after initialize()") ); 01291 } 01292 01293 } 01294 01295 template <class ScalarType, class MV, class OP> 01296 void LOBPCG<ScalarType,MV,OP>::initialize() 01297 { 01298 LOBPCGState<ScalarType,MV> empty; 01299 initialize(empty); 01300 } 01301 01302 01304 // Instruct the solver to use full orthogonalization 01305 template <class ScalarType, class MV, class OP> 01306 void LOBPCG<ScalarType,MV,OP>::setFullOrtho (bool fullOrtho) 01307 { 01308 if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) { 01309 // state is already orthogonalized or solver is not initialized 01310 fullOrtho_ = fullOrtho; 01311 } 01312 else { 01313 // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization 01314 // ergo, we must throw away data in P 01315 fullOrtho_ = true; 01316 hasP_ = false; 01317 } 01318 01319 // the user has called setFullOrtho, so the class has been instantiated 01320 // ergo, the data has already been allocated, i.e., setBlockSize() has been called 01321 // if it is already allocated, it should be the proper size 01322 if (fullOrtho_ && tmpmvec_ == Teuchos::null) { 01323 // allocated the workspace 01324 tmpmvec_ = MVT::Clone(*X_,blockSize_); 01325 } 01326 else if (fullOrtho_==false) { 01327 // free the workspace 01328 tmpmvec_ = Teuchos::null; 01329 } 01330 } 01331 01332 01334 // Perform LOBPCG iterations until the StatusTest tells us to stop. 01335 template <class ScalarType, class MV, class OP> 01336 void LOBPCG<ScalarType,MV,OP>::iterate () 01337 { 01338 // 01339 // Allocate/initialize data structures 01340 // 01341 if (initialized_ == false) { 01342 initialize(); 01343 } 01344 01345 // 01346 // Miscellaneous definitions 01347 const int oneBlock = blockSize_; 01348 const int twoBlocks = 2*blockSize_; 01349 const int threeBlocks = 3*blockSize_; 01350 01351 std::vector<int> indblock1(blockSize_), indblock2(blockSize_), indblock3(blockSize_); 01352 for (int i=0; i<blockSize_; i++) { 01353 indblock1[i] = i; 01354 indblock2[i] = i + blockSize_; 01355 indblock3[i] = i + 2*blockSize_; 01356 } 01357 01358 // 01359 // Define dense projected/local matrices 01360 // KK = Local stiffness matrix (size: 3*blockSize_ x 3*blockSize_) 01361 // MM = Local mass matrix (size: 3*blockSize_ x 3*blockSize_) 01362 // S = Local eigenvectors (size: 3*blockSize_ x 3*blockSize_) 01363 Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ), 01364 MM( threeBlocks, threeBlocks ), 01365 S( threeBlocks, threeBlocks ); 01366 01367 while (tester_->checkStatus(this) != Passed) { 01368 01369 // Print information on current status 01370 if (om_->isVerbosity(Debug)) { 01371 currentStatus( om_->stream(Debug) ); 01372 } 01373 else if (om_->isVerbosity(IterationDetails)) { 01374 currentStatus( om_->stream(IterationDetails) ); 01375 } 01376 01377 // increment iteration counter 01378 iter_++; 01379 01380 // Apply the preconditioner on the residuals: H <- Prec*R 01381 if (Prec_ != Teuchos::null) { 01382 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01383 Teuchos::TimeMonitor lcltimer( *timerPrec_ ); 01384 #endif 01385 OPT::Apply( *Prec_, *R_, *H_ ); // don't catch the exception 01386 count_ApplyPrec_ += blockSize_; 01387 } 01388 else { 01389 MVT::MvAddMv(ONE,*R_,ZERO,*R_,*H_); 01390 } 01391 01392 // Apply the mass matrix on H 01393 if (hasM_) { 01394 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01395 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01396 #endif 01397 OPT::Apply( *MOp_, *H_, *MH_); // don't catch the exception 01398 count_ApplyM_ += blockSize_; 01399 } 01400 01401 // orthogonalize H against the auxiliary vectors 01402 // optionally: orthogonalize H against X and P ([X P] is already orthonormal) 01403 Teuchos::Array<Teuchos::RCP<const MV> > Q; 01404 Q = auxVecs_; 01405 if (fullOrtho_) { 01406 // X and P are not contiguous, so there is not much point in putting them under 01407 // a single multivector view 01408 Q.push_back(X_); 01409 if (hasP_) { 01410 Q.push_back(P_); 01411 } 01412 } 01413 { 01414 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01415 Teuchos::TimeMonitor lcltimer( *timerOrtho_ ); 01416 #endif 01417 Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummyC = 01418 Teuchos::tuple<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null); 01419 int rank = orthman_->projectAndNormalizeMat(*H_,Q,dummyC,Teuchos::null,MH_); 01420 // our views are currently in place; it is safe to throw an exception 01421 TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure, 01422 "Anasazi::LOBPCG::iterate(): unable to compute orthonormal basis for H."); 01423 } 01424 01425 if (om_->isVerbosity( Debug ) ) { 01426 CheckList chk; 01427 chk.checkH = true; 01428 chk.checkMH = true; 01429 om_->print( Debug, accuracyCheck(chk, ": after ortho H") ); 01430 } 01431 else if (om_->isVerbosity( OrthoDetails ) ) { 01432 CheckList chk; 01433 chk.checkH = true; 01434 chk.checkMH = true; 01435 om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") ); 01436 } 01437 01438 // Apply the stiffness matrix to H 01439 { 01440 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01441 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01442 #endif 01443 OPT::Apply( *Op_, *H_, *KH_); // don't catch the exception 01444 count_ApplyOp_ += blockSize_; 01445 } 01446 01447 if (hasP_) { 01448 nevLocal_ = threeBlocks; 01449 } 01450 else { 01451 nevLocal_ = twoBlocks; 01452 } 01453 01454 // 01455 // we need bases: [X H P] and [H P] (only need the latter if fullOrtho == false) 01456 // we need to perform the following operations: 01457 // X' [KX KH KP] 01458 // X' [MX MH MP] 01459 // H' [KH KP] 01460 // H' [MH MP] 01461 // P' [KP] 01462 // P' [MP] 01463 // [X H P] CX 01464 // [X H P] CP if fullOrtho 01465 // [H P] CP if !fullOrtho 01466 // 01467 // since M[X H P] is potentially the same memory as [X H P], and 01468 // because we are not allowed to have overlapping non-const views of 01469 // a multivector, we will now abandon our non-const views in favor of 01470 // const views 01471 // 01472 X_ = Teuchos::null; 01473 KX_ = Teuchos::null; 01474 MX_ = Teuchos::null; 01475 H_ = Teuchos::null; 01476 KH_ = Teuchos::null; 01477 MH_ = Teuchos::null; 01478 P_ = Teuchos::null; 01479 KP_ = Teuchos::null; 01480 MP_ = Teuchos::null; 01481 Teuchos::RCP<const MV> cX, cH, cXHP, cHP, cK_XHP, cK_HP, cM_XHP, cM_HP, cP, cK_P, cM_P; 01482 { 01483 cX = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock1); 01484 cH = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock2); 01485 01486 std::vector<int> indXHP(nevLocal_); 01487 for (int i=0; i<nevLocal_; i++) { 01488 indXHP[i] = i; 01489 } 01490 cXHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indXHP); 01491 cK_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indXHP); 01492 if (hasM_) { 01493 cM_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indXHP); 01494 } 01495 else { 01496 cM_XHP = cXHP; 01497 } 01498 01499 std::vector<int> indHP(nevLocal_-blockSize_); 01500 for (int i=blockSize_; i<nevLocal_; i++) { 01501 indHP[i-blockSize_] = i; 01502 } 01503 cHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indHP); 01504 cK_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indHP); 01505 if (hasM_) { 01506 cM_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indHP); 01507 } 01508 else { 01509 cM_HP = cHP; 01510 } 01511 01512 if (nevLocal_ == threeBlocks) { 01513 cP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock3); 01514 cK_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indblock3); 01515 if (hasM_) { 01516 cM_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indblock3); 01517 } 01518 else { 01519 cM_P = cP; 01520 } 01521 } 01522 } 01523 01524 // 01525 //---------------------------------------- 01526 // Form "local" mass and stiffness matrices 01527 //---------------------------------------- 01528 { 01529 // We will form only the block upper triangular part of 01530 // [X H P]' K [X H P] and [X H P]' M [X H P] 01531 // Get the necessary views into KK and MM: 01532 // [--K1--] [--M1--] 01533 // KK = [ -K2-] MM = [ -M2-] 01534 // [ K3] [ M3] 01535 // 01536 // It is okay to declare a zero-area view of a Teuchos::SerialDenseMatrix 01537 // 01538 Teuchos::SerialDenseMatrix<int,ScalarType> 01539 K1(Teuchos::View,KK,blockSize_,nevLocal_ ,0*blockSize_,0*blockSize_), 01540 K2(Teuchos::View,KK,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_), 01541 K3(Teuchos::View,KK,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_), 01542 M1(Teuchos::View,MM,blockSize_,nevLocal_ ,0*blockSize_,0*blockSize_), 01543 M2(Teuchos::View,MM,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_), 01544 M3(Teuchos::View,MM,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_); 01545 { 01546 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01547 Teuchos::TimeMonitor lcltimer( *timerLocalProj_ ); 01548 #endif 01549 MVT::MvTransMv( ONE, *cX, *cK_XHP, K1 ); 01550 MVT::MvTransMv( ONE, *cX, *cM_XHP, M1 ); 01551 MVT::MvTransMv( ONE, *cH, *cK_HP , K2 ); 01552 MVT::MvTransMv( ONE, *cH, *cM_HP , M2 ); 01553 if (nevLocal_ == threeBlocks) { 01554 MVT::MvTransMv( ONE, *cP, *cK_P, K3 ); 01555 MVT::MvTransMv( ONE, *cP, *cM_P, M3 ); 01556 } 01557 } 01558 } 01559 // below, we only need bases [X H P] and [H P] and friends 01560 // furthermore, we only need [H P] and friends if fullOrtho == false 01561 // clear the others now 01562 cX = Teuchos::null; 01563 cH = Teuchos::null; 01564 cP = Teuchos::null; 01565 cK_P = Teuchos::null; 01566 cM_P = Teuchos::null; 01567 if (fullOrtho_ == true) { 01568 cHP = Teuchos::null; 01569 cK_HP = Teuchos::null; 01570 cM_HP = Teuchos::null; 01571 } 01572 01573 // 01574 //--------------------------------------------------- 01575 // Perform a spectral decomposition of (KK,MM) 01576 //--------------------------------------------------- 01577 // 01578 // Get pointers to relevant part of KK, MM and S for Rayleigh-Ritz analysis 01579 Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_), 01580 lclMM(Teuchos::View,MM,nevLocal_,nevLocal_), 01581 lclS(Teuchos::View, S,nevLocal_,nevLocal_); 01582 { 01583 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01584 Teuchos::TimeMonitor lcltimer( *timerDS_ ); 01585 #endif 01586 int localSize = nevLocal_; 01587 Utils::directSolver(localSize, lclKK, Teuchos::rcpFromRef(lclMM), lclS, theta_, nevLocal_, 0); 01588 // localSize tells directSolver() how big KK,MM are 01589 // however, directSolver() may choose to use only the principle submatrices of KK,MM 01590 // because of loss of MM-orthogonality in the projected eigenvectors 01591 // nevLocal_ tells us how much it used, telling us the effective localSize 01592 // (i.e., how much of KK,MM used by directSolver) 01593 // we will not tolerate any indefiniteness, and will throw an exception if it was 01594 // detected by directSolver 01595 // 01596 if (nevLocal_ != localSize) { 01597 // before throwing the exception, and thereby leaving iterate(), setup the views again 01598 // first, clear the const views 01599 cXHP = Teuchos::null; 01600 cK_XHP = Teuchos::null; 01601 cM_XHP = Teuchos::null; 01602 cHP = Teuchos::null; 01603 cK_HP = Teuchos::null; 01604 cM_HP = Teuchos::null; 01605 setupViews(); 01606 } 01607 TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure, 01608 "Anasazi::LOBPCG::iterate(): indefiniteness detected in projected mass matrix." ); 01609 } 01610 01611 // 01612 //--------------------------------------------------- 01613 // Sort the ritz values using the sort manager 01614 //--------------------------------------------------- 01615 Teuchos::LAPACK<int,ScalarType> lapack; 01616 Teuchos::BLAS<int,ScalarType> blas; 01617 { 01618 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01619 Teuchos::TimeMonitor lcltimer( *timerSort_ ); 01620 #endif 01621 01622 std::vector<int> order(nevLocal_); 01623 // 01624 // Sort the first nevLocal_ values in theta_ 01625 sm_->sort(theta_, Teuchos::rcpFromRef(order), nevLocal_); // don't catch exception 01626 // 01627 // Sort the primitive ritz vectors 01628 Utils::permuteVectors(order,lclS); 01629 } 01630 01631 // 01632 //---------------------------------------- 01633 // Compute coefficients for X and P under [X H P] 01634 //---------------------------------------- 01635 // Before computing X,P, optionally perform orthogonalization per Hetmaniuk,Lehoucq paper 01636 // CX will be the coefficients of [X,H,P] for new X, CP for new P 01637 // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM 01638 // Here, we will also orthonormalize CX. 01639 // This is accomplished using the Cholesky factorization of [CX CP]^H lclMM [CX CP] 01640 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > CX, CP; 01641 if (fullOrtho_) { 01642 // build orthonormal basis for ( 0 ) that is MM orthogonal to ( S11 ) 01643 // ( S21 ) ( S21 ) 01644 // ( S31 ) ( S31 ) 01645 // Do this using Cholesky factorization of ( S11 0 ) 01646 // ( S21 S21 ) 01647 // ( S31 S31 ) 01648 // ( S11 0 ) 01649 // Build C = ( S21 S21 ) 01650 // ( S31 S31 ) 01651 Teuchos::SerialDenseMatrix<int,ScalarType> C(nevLocal_,twoBlocks), 01652 MMC(nevLocal_,twoBlocks), 01653 CMMC(twoBlocks ,twoBlocks); 01654 01655 // first block of rows: ( S11 0 ) 01656 for (int j=0; j<oneBlock; j++) { 01657 for (int i=0; i<oneBlock; i++) { 01658 // CX 01659 C(i,j) = lclS(i,j); 01660 // CP 01661 C(i,j+oneBlock) = ZERO; 01662 } 01663 } 01664 // second block of rows: (S21 S21) 01665 for (int j=0; j<oneBlock; j++) { 01666 for (int i=oneBlock; i<twoBlocks; i++) { 01667 // CX 01668 C(i,j) = lclS(i,j); 01669 // CP 01670 C(i,j+oneBlock) = lclS(i,j); 01671 } 01672 } 01673 // third block of rows 01674 if (nevLocal_ == threeBlocks) { 01675 for (int j=0; j<oneBlock; j++) { 01676 for (int i=twoBlocks; i<threeBlocks; i++) { 01677 // CX 01678 C(i,j) = lclS(i,j); 01679 // CP 01680 C(i,j+oneBlock) = lclS(i,j); 01681 } 01682 } 01683 } 01684 01685 // compute MMC = lclMM*C 01686 { 01687 int teuchosret; 01688 teuchosret = MMC.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,C,ZERO); 01689 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01690 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01691 // compute CMMC = C^H*MMC == C^H*lclMM*C 01692 teuchosret = CMMC.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,MMC,ZERO); 01693 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01694 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01695 } 01696 01697 // compute R (cholesky) of CMMC 01698 { 01699 int info; 01700 lapack.POTRF('U',twoBlocks,CMMC.values(),CMMC.stride(),&info); 01701 // our views ARE NOT currently in place; we must reestablish them before throwing an exception 01702 if (info != 0) { 01703 // Check symmetry of H'*K*H, this might be the first place a non-Hermitian operator will cause a problem. 01704 Teuchos::SerialDenseMatrix<int,ScalarType> K22(Teuchos::View,KK,blockSize_,blockSize_,1*blockSize_,1*blockSize_); 01705 Teuchos::SerialDenseMatrix<int,ScalarType> K22_trans( K22, Teuchos::CONJ_TRANS ); 01706 K22_trans -= K22; 01707 MagnitudeType symNorm = K22_trans.normOne(); 01708 MagnitudeType tol = SCT::magnitude(SCT::squareroot(SCT::eps())); 01709 01710 cXHP = Teuchos::null; 01711 cHP = Teuchos::null; 01712 cK_XHP = Teuchos::null; 01713 cK_HP = Teuchos::null; 01714 cM_XHP = Teuchos::null; 01715 cM_HP = Teuchos::null; 01716 setupViews(); 01717 01718 std::string errMsg = "Anasazi::LOBPCG::iterate(): Cholesky factorization failed during full orthogonalization."; 01719 if ( symNorm < tol ) 01720 { 01721 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg ); 01722 } 01723 else 01724 { 01725 errMsg += " Check the operator A (or K), it may not be Hermitian!"; 01726 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg ); 01727 } 01728 } 01729 } 01730 // compute C = C inv(R) 01731 blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG, 01732 nevLocal_,twoBlocks,ONE,CMMC.values(),CMMC.stride(),C.values(),C.stride()); 01733 // put C(:,0:oneBlock-1) into CX 01734 CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,0) ); 01735 // put C(:,oneBlock:twoBlocks-1) into CP 01736 CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,oneBlock) ); 01737 01738 // check the results 01739 if (om_->isVerbosity( Debug ) ) { 01740 Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(nevLocal_,oneBlock), 01741 tmp2(oneBlock,oneBlock); 01742 MagnitudeType tmp; 01743 int teuchosret; 01744 std::stringstream os; 01745 os.precision(2); 01746 os.setf(std::ios::scientific, std::ios::floatfield); 01747 01748 os << " Checking Full Ortho: iteration " << iter_ << std::endl; 01749 01750 // check CX^T MM CX == I 01751 // compute tmp1 = MM*CX 01752 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CX,ZERO); 01753 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01754 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01755 // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX 01756 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO); 01757 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01758 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01759 // subtrace tmp2 - I == CX^H * MM * CX - I 01760 for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE; 01761 tmp = tmp2.normFrobenius(); 01762 os << " >> Error in CX^H MM CX == I : " << tmp << std::endl; 01763 01764 // check CP^T MM CP == I 01765 // compute tmp1 = MM*CP 01766 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO); 01767 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01768 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01769 // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP 01770 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CP,tmp1,ZERO); 01771 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01772 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01773 // subtrace tmp2 - I == CP^H * MM * CP - I 01774 for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE; 01775 tmp = tmp2.normFrobenius(); 01776 os << " >> Error in CP^H MM CP == I : " << tmp << std::endl; 01777 01778 // check CX^T MM CP == 0 01779 // compute tmp1 = MM*CP 01780 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO); 01781 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01782 // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP 01783 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO); 01784 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01785 // subtrace tmp2 == CX^H * MM * CP 01786 tmp = tmp2.normFrobenius(); 01787 os << " >> Error in CX^H MM CP == 0 : " << tmp << std::endl; 01788 01789 os << std::endl; 01790 om_->print(Debug,os.str()); 01791 } 01792 } 01793 else { 01794 // [S11 ... ...] 01795 // S = [S21 ... ...] 01796 // [S31 ... ...] 01797 // 01798 // CX = [S11] 01799 // [S21] 01800 // [S31] -> X = [X H P] CX 01801 // 01802 // CP = [S21] -> P = [H P] CP 01803 // [S31] 01804 // 01805 CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_ ,oneBlock,0 ,0) ); 01806 CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_-oneBlock,oneBlock,oneBlock,0) ); 01807 } 01808 01809 // 01810 //---------------------------------------- 01811 // Compute new X and new P 01812 //---------------------------------------- 01813 // Note: Use R as a temporary work space and (if full ortho) tmpMV as well 01814 { 01815 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01816 Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ ); 01817 #endif 01818 01819 // if full ortho, then CX and CP are dense 01820 // we multiply [X H P]*CX into tmpMV 01821 // [X H P]*CP into R 01822 // then put V(:,firstblock) <- tmpMV 01823 // V(:,thirdblock) <- R 01824 // 01825 // if no full ortho, then [H P]*CP doesn't reference first block (X) 01826 // of V, so that we can modify it before computing P 01827 // so we multiply [X H P]*CX into R 01828 // V(:,firstblock) <- R 01829 // multiply [H P]*CP into R 01830 // V(:,thirdblock) <- R 01831 // 01832 // mutatis mutandis for K[XP] and M[XP] 01833 // 01834 // use SetBlock to do the assignments into V_ 01835 // 01836 // in either case, views are only allowed to be overlapping 01837 // if they are const, and it should be assume that SetBlock 01838 // creates a view of the associated part 01839 // 01840 // we have from above const-pointers to [KM]XHP, [KM]HP and (if hasP) [KM]P 01841 // 01842 if (fullOrtho_) { 01843 // X,P 01844 MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*tmpmvec_); 01845 MVT::MvTimesMatAddMv(ONE,*cXHP,*CP,ZERO,*R_); 01846 cXHP = Teuchos::null; 01847 MVT::SetBlock(*tmpmvec_,indblock1,*V_); 01848 MVT::SetBlock(*R_ ,indblock3,*V_); 01849 // KX,KP 01850 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*tmpmvec_); 01851 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CP,ZERO,*R_); 01852 cK_XHP = Teuchos::null; 01853 MVT::SetBlock(*tmpmvec_,indblock1,*KV_); 01854 MVT::SetBlock(*R_ ,indblock3,*KV_); 01855 // MX,MP 01856 if (hasM_) { 01857 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*tmpmvec_); 01858 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CP,ZERO,*R_); 01859 cM_XHP = Teuchos::null; 01860 MVT::SetBlock(*tmpmvec_,indblock1,*MV_); 01861 MVT::SetBlock(*R_ ,indblock3,*MV_); 01862 } 01863 else { 01864 cM_XHP = Teuchos::null; 01865 } 01866 } 01867 else { 01868 // X,P 01869 MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*R_); 01870 cXHP = Teuchos::null; 01871 MVT::SetBlock(*R_,indblock1,*V_); 01872 MVT::MvTimesMatAddMv(ONE,*cHP,*CP,ZERO,*R_); 01873 cHP = Teuchos::null; 01874 MVT::SetBlock(*R_,indblock3,*V_); 01875 // KX,KP 01876 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*R_); 01877 cK_XHP = Teuchos::null; 01878 MVT::SetBlock(*R_,indblock1,*KV_); 01879 MVT::MvTimesMatAddMv(ONE,*cK_HP,*CP,ZERO,*R_); 01880 cK_HP = Teuchos::null; 01881 MVT::SetBlock(*R_,indblock3,*KV_); 01882 // MX,MP 01883 if (hasM_) { 01884 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*R_); 01885 cM_XHP = Teuchos::null; 01886 MVT::SetBlock(*R_,indblock1,*MV_); 01887 MVT::MvTimesMatAddMv(ONE,*cM_HP,*CP,ZERO,*R_); 01888 cM_HP = Teuchos::null; 01889 MVT::SetBlock(*R_,indblock3,*MV_); 01890 } 01891 else { 01892 cM_XHP = Teuchos::null; 01893 cM_HP = Teuchos::null; 01894 } 01895 } 01896 } // end timing block 01897 // done with coefficient matrices 01898 CX = Teuchos::null; 01899 CP = Teuchos::null; 01900 01901 // 01902 // we now have a P direction 01903 hasP_ = true; 01904 01905 // debugging check: all of our const views should have been cleared by now 01906 // if not, we have a logic error above 01907 TEUCHOS_TEST_FOR_EXCEPTION( cXHP != Teuchos::null || cK_XHP != Teuchos::null || cM_XHP != Teuchos::null 01908 || cHP != Teuchos::null || cK_HP != Teuchos::null || cM_HP != Teuchos::null 01909 || cP != Teuchos::null || cK_P != Teuchos::null || cM_P != Teuchos::null, 01910 std::logic_error, 01911 "Anasazi::BlockKrylovSchur::iterate(): const views were not all cleared! Something went wrong!" ); 01912 01913 // 01914 // recreate our const MV views of X,H,P and friends 01915 setupViews(); 01916 01917 // 01918 // Compute the new residuals, explicitly 01919 { 01920 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR 01921 Teuchos::TimeMonitor lcltimer( *timerCompRes_ ); 01922 #endif 01923 MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ ); 01924 Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ ); 01925 for (int i = 0; i < blockSize_; i++) { 01926 T(i,i) = theta_[i]; 01927 } 01928 MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ ); 01929 } 01930 01931 // R has been updated; mark the norms as out-of-date 01932 Rnorms_current_ = false; 01933 R2norms_current_ = false; 01934 01935 // When required, monitor some orthogonalities 01936 if (om_->isVerbosity( Debug ) ) { 01937 // Check almost everything here 01938 CheckList chk; 01939 chk.checkX = true; 01940 chk.checkKX = true; 01941 chk.checkMX = true; 01942 chk.checkP = true; 01943 chk.checkKP = true; 01944 chk.checkMP = true; 01945 chk.checkR = true; 01946 om_->print( Debug, accuracyCheck(chk, ": after local update") ); 01947 } 01948 else if (om_->isVerbosity( OrthoDetails )) { 01949 CheckList chk; 01950 chk.checkX = true; 01951 chk.checkP = true; 01952 chk.checkR = true; 01953 om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") ); 01954 } 01955 } // end while (statusTest == false) 01956 } 01957 01958 01960 // compute/return residual M-norms 01961 template <class ScalarType, class MV, class OP> 01962 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 01963 LOBPCG<ScalarType,MV,OP>::getResNorms() { 01964 if (Rnorms_current_ == false) { 01965 // Update the residual norms 01966 orthman_->norm(*R_,Rnorms_); 01967 Rnorms_current_ = true; 01968 } 01969 return Rnorms_; 01970 } 01971 01972 01974 // compute/return residual 2-norms 01975 template <class ScalarType, class MV, class OP> 01976 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 01977 LOBPCG<ScalarType,MV,OP>::getRes2Norms() { 01978 if (R2norms_current_ == false) { 01979 // Update the residual 2-norms 01980 MVT::MvNorm(*R_,R2norms_); 01981 R2norms_current_ = true; 01982 } 01983 return R2norms_; 01984 } 01985 01986 01987 01988 01990 // Check accuracy, orthogonality, and other debugging stuff 01991 // 01992 // bools specify which tests we want to run (instead of running more than we actually care about) 01993 // 01994 // we don't bother checking the following because they are computed explicitly: 01995 // H == Prec*R 01996 // KH == K*H 01997 // 01998 // 01999 // checkX : X orthonormal 02000 // orthogonal to auxvecs 02001 // checkMX: check MX == M*X 02002 // checkKX: check KX == K*X 02003 // checkP : if fullortho P orthonormal and orthogonal to X 02004 // orthogonal to auxvecs 02005 // checkMP: check MP == M*P 02006 // checkKP: check KP == K*P 02007 // checkH : if fullortho H orthonormal and orthogonal to X and P 02008 // orthogonal to auxvecs 02009 // checkMH: check MH == M*H 02010 // checkR : check R orthogonal to X 02011 // checkQ : check that auxiliary vectors are actually orthonormal 02012 // 02013 // TODO: 02014 // add checkTheta 02015 // 02016 template <class ScalarType, class MV, class OP> 02017 std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const std::string &where ) const 02018 { 02019 using std::endl; 02020 02021 std::stringstream os; 02022 os.precision(2); 02023 os.setf(std::ios::scientific, std::ios::floatfield); 02024 MagnitudeType tmp; 02025 02026 os << " Debugging checks: iteration " << iter_ << where << endl; 02027 02028 // X and friends 02029 if (chk.checkX && initialized_) { 02030 tmp = orthman_->orthonormError(*X_); 02031 os << " >> Error in X^H M X == I : " << tmp << endl; 02032 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 02033 tmp = orthman_->orthogError(*X_,*auxVecs_[i]); 02034 os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl; 02035 } 02036 } 02037 if (chk.checkMX && hasM_ && initialized_) { 02038 tmp = Utils::errorEquality(*X_, *MX_, MOp_); 02039 os << " >> Error in MX == M*X : " << tmp << endl; 02040 } 02041 if (chk.checkKX && initialized_) { 02042 tmp = Utils::errorEquality(*X_, *KX_, Op_); 02043 os << " >> Error in KX == K*X : " << tmp << endl; 02044 } 02045 02046 // P and friends 02047 if (chk.checkP && hasP_ && initialized_) { 02048 if (fullOrtho_) { 02049 tmp = orthman_->orthonormError(*P_); 02050 os << " >> Error in P^H M P == I : " << tmp << endl; 02051 tmp = orthman_->orthogError(*P_,*X_); 02052 os << " >> Error in P^H M X == 0 : " << tmp << endl; 02053 } 02054 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 02055 tmp = orthman_->orthogError(*P_,*auxVecs_[i]); 02056 os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl; 02057 } 02058 } 02059 if (chk.checkMP && hasM_ && hasP_ && initialized_) { 02060 tmp = Utils::errorEquality(*P_, *MP_, MOp_); 02061 os << " >> Error in MP == M*P : " << tmp << endl; 02062 } 02063 if (chk.checkKP && hasP_ && initialized_) { 02064 tmp = Utils::errorEquality(*P_, *KP_, Op_); 02065 os << " >> Error in KP == K*P : " << tmp << endl; 02066 } 02067 02068 // H and friends 02069 if (chk.checkH && initialized_) { 02070 if (fullOrtho_) { 02071 tmp = orthman_->orthonormError(*H_); 02072 os << " >> Error in H^H M H == I : " << tmp << endl; 02073 tmp = orthman_->orthogError(*H_,*X_); 02074 os << " >> Error in H^H M X == 0 : " << tmp << endl; 02075 if (hasP_) { 02076 tmp = orthman_->orthogError(*H_,*P_); 02077 os << " >> Error in H^H M P == 0 : " << tmp << endl; 02078 } 02079 } 02080 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 02081 tmp = orthman_->orthogError(*H_,*auxVecs_[i]); 02082 os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl; 02083 } 02084 } 02085 if (chk.checkMH && hasM_ && initialized_) { 02086 tmp = Utils::errorEquality(*H_, *MH_, MOp_); 02087 os << " >> Error in MH == M*H : " << tmp << endl; 02088 } 02089 02090 // R: this is not M-orthogonality, but standard euclidean orthogonality 02091 if (chk.checkR && initialized_) { 02092 Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_); 02093 MVT::MvTransMv(ONE,*X_,*R_,xTx); 02094 tmp = xTx.normFrobenius(); 02095 MVT::MvTransMv(ONE,*R_,*R_,xTx); 02096 double normR = xTx.normFrobenius(); 02097 os << " >> RelError in X^H R == 0: " << tmp/normR << endl; 02098 } 02099 02100 // Q 02101 if (chk.checkQ) { 02102 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 02103 tmp = orthman_->orthonormError(*auxVecs_[i]); 02104 os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl; 02105 for (Array_size_type j=i+1; j<auxVecs_.size(); j++) { 02106 tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]); 02107 os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl; 02108 } 02109 } 02110 } 02111 02112 os << endl; 02113 02114 return os.str(); 02115 } 02116 02117 02119 // Print the current status of the solver 02120 template <class ScalarType, class MV, class OP> 02121 void 02122 LOBPCG<ScalarType,MV,OP>::currentStatus(std::ostream &os) 02123 { 02124 using std::endl; 02125 02126 os.setf(std::ios::scientific, std::ios::floatfield); 02127 os.precision(6); 02128 os <<endl; 02129 os <<"================================================================================" << endl; 02130 os << endl; 02131 os <<" LOBPCG Solver Status" << endl; 02132 os << endl; 02133 os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl; 02134 os <<"The number of iterations performed is " << iter_ << endl; 02135 os <<"The current block size is " << blockSize_ << endl; 02136 os <<"The number of auxiliary vectors is " << numAuxVecs_ << endl; 02137 os <<"The number of operations Op*x is " << count_ApplyOp_ << endl; 02138 os <<"The number of operations M*x is " << count_ApplyM_ << endl; 02139 os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl; 02140 02141 os.setf(std::ios_base::right, std::ios_base::adjustfield); 02142 02143 if (initialized_) { 02144 os << endl; 02145 os <<"CURRENT EIGENVALUE ESTIMATES "<<endl; 02146 os << std::setw(20) << "Eigenvalue" 02147 << std::setw(20) << "Residual(M)" 02148 << std::setw(20) << "Residual(2)" 02149 << endl; 02150 os <<"--------------------------------------------------------------------------------"<<endl; 02151 for (int i=0; i<blockSize_; i++) { 02152 os << std::setw(20) << theta_[i]; 02153 if (Rnorms_current_) os << std::setw(20) << Rnorms_[i]; 02154 else os << std::setw(20) << "not current"; 02155 if (R2norms_current_) os << std::setw(20) << R2norms_[i]; 02156 else os << std::setw(20) << "not current"; 02157 os << endl; 02158 } 02159 } 02160 os <<"================================================================================" << endl; 02161 os << endl; 02162 } 02163 02165 // are we initialized or not? 02166 template <class ScalarType, class MV, class OP> 02167 bool LOBPCG<ScalarType,MV,OP>::isInitialized() const { 02168 return initialized_; 02169 } 02170 02171 02173 // is P valid or not? 02174 template <class ScalarType, class MV, class OP> 02175 bool LOBPCG<ScalarType,MV,OP>::hasP() { 02176 return hasP_; 02177 } 02178 02180 // is full orthogonalization enabled or not? 02181 template <class ScalarType, class MV, class OP> 02182 bool LOBPCG<ScalarType,MV,OP>::getFullOrtho() const { 02183 return(fullOrtho_); 02184 } 02185 02186 02188 // return the current auxilliary vectors 02189 template <class ScalarType, class MV, class OP> 02190 Teuchos::Array<Teuchos::RCP<const MV> > LOBPCG<ScalarType,MV,OP>::getAuxVecs() const { 02191 return auxVecs_; 02192 } 02193 02195 // return the current block size 02196 template <class ScalarType, class MV, class OP> 02197 int LOBPCG<ScalarType,MV,OP>::getBlockSize() const { 02198 return(blockSize_); 02199 } 02200 02202 // return the current eigenproblem 02203 template <class ScalarType, class MV, class OP> 02204 const Eigenproblem<ScalarType,MV,OP>& LOBPCG<ScalarType,MV,OP>::getProblem() const { 02205 return(*problem_); 02206 } 02207 02208 02210 // return the max subspace dimension 02211 template <class ScalarType, class MV, class OP> 02212 int LOBPCG<ScalarType,MV,OP>::getMaxSubspaceDim() const { 02213 return 3*blockSize_; 02214 } 02215 02217 // return the current subspace dimension 02218 template <class ScalarType, class MV, class OP> 02219 int LOBPCG<ScalarType,MV,OP>::getCurSubspaceDim() const { 02220 if (!initialized_) return 0; 02221 return nevLocal_; 02222 } 02223 02224 02226 // return the current ritz residual norms 02227 template <class ScalarType, class MV, class OP> 02228 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 02229 LOBPCG<ScalarType,MV,OP>::getRitzRes2Norms() 02230 { 02231 return this->getRes2Norms(); 02232 } 02233 02234 02236 // return the current compression indices 02237 template <class ScalarType, class MV, class OP> 02238 std::vector<int> LOBPCG<ScalarType,MV,OP>::getRitzIndex() { 02239 std::vector<int> ret(nevLocal_,0); 02240 return ret; 02241 } 02242 02243 02245 // return the current ritz values 02246 template <class ScalarType, class MV, class OP> 02247 std::vector<Value<ScalarType> > LOBPCG<ScalarType,MV,OP>::getRitzValues() { 02248 std::vector<Value<ScalarType> > ret(nevLocal_); 02249 for (int i=0; i<nevLocal_; i++) { 02250 ret[i].realpart = theta_[i]; 02251 ret[i].imagpart = ZERO; 02252 } 02253 return ret; 02254 } 02255 02257 // Set a new StatusTest for the solver. 02258 template <class ScalarType, class MV, class OP> 02259 void LOBPCG<ScalarType,MV,OP>::setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test) { 02260 TEUCHOS_TEST_FOR_EXCEPTION(test == Teuchos::null,std::invalid_argument, 02261 "Anasazi::LOBPCG::setStatusTest() was passed a null StatusTest."); 02262 tester_ = test; 02263 } 02264 02266 // Get the current StatusTest used by the solver. 02267 template <class ScalarType, class MV, class OP> 02268 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > LOBPCG<ScalarType,MV,OP>::getStatusTest() const { 02269 return tester_; 02270 } 02271 02273 // return the current ritz vectors 02274 template <class ScalarType, class MV, class OP> 02275 Teuchos::RCP<const MV> LOBPCG<ScalarType,MV,OP>::getRitzVectors() { 02276 return X_; 02277 } 02278 02279 02281 // reset the iteration counter 02282 template <class ScalarType, class MV, class OP> 02283 void LOBPCG<ScalarType,MV,OP>::resetNumIters() { 02284 iter_=0; 02285 } 02286 02287 02289 // return the number of iterations 02290 template <class ScalarType, class MV, class OP> 02291 int LOBPCG<ScalarType,MV,OP>::getNumIters() const { 02292 return(iter_); 02293 } 02294 02295 02297 // return the state 02298 template <class ScalarType, class MV, class OP> 02299 LOBPCGState<ScalarType,MV> LOBPCG<ScalarType,MV,OP>::getState() const { 02300 LOBPCGState<ScalarType,MV> state; 02301 state.V = V_; 02302 state.KV = KV_; 02303 state.X = X_; 02304 state.KX = KX_; 02305 state.P = P_; 02306 state.KP = KP_; 02307 state.H = H_; 02308 state.KH = KH_; 02309 state.R = R_; 02310 state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_)); 02311 if (hasM_) { 02312 state.MV = MV_; 02313 state.MX = MX_; 02314 state.MP = MP_; 02315 state.MH = MH_; 02316 } 02317 else { 02318 state.MX = Teuchos::null; 02319 state.MP = Teuchos::null; 02320 state.MH = Teuchos::null; 02321 } 02322 return state; 02323 } 02324 02325 } // end Anasazi namespace 02326 02327 #endif // ANASAZI_LOBPCG_HPP
1.7.6.1