|
Belos
Version of the Day
|
00001 //@HEADER 00002 // ************************************************************************ 00003 // 00004 // Belos: Block Linear Solvers Package 00005 // Copyright 2004 Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 //@HEADER 00041 00042 #ifndef BELOS_BLOCK_GCRODR_ITER_HPP 00043 #define BELOS_BLOCK_GCRODR_ITER_HPP 00044 00045 00050 #include "BelosConfigDefs.hpp" 00051 #include "BelosTypes.hpp" 00052 00053 #include "BelosLinearProblem.hpp" 00054 #include "BelosMatOrthoManager.hpp" 00055 #include "BelosOutputManager.hpp" 00056 #include "BelosStatusTest.hpp" 00057 #include "BelosOperatorTraits.hpp" 00058 #include "BelosMultiVecTraits.hpp" 00059 00060 #include "Teuchos_BLAS.hpp" 00061 #include "Teuchos_SerialDenseMatrix.hpp" 00062 #include "Teuchos_SerialDenseVector.hpp" 00063 #include "Teuchos_ScalarTraits.hpp" 00064 #include "Teuchos_ParameterList.hpp" 00065 #include "Teuchos_TimeMonitor.hpp" 00066 00067 // MLP 00068 #include <unistd.h> 00069 00084 namespace Belos{ 00085 00087 00088 00094 template <class ScalarType, class MV> 00095 struct BlockGCRODRIterState { 00101 int curDim; 00102 00104 Teuchos::RCP<MV> V; 00105 00107 Teuchos::RCP<MV> U, C; 00108 00114 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > H; 00115 00118 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > B; 00119 00120 BlockGCRODRIterState() : curDim(0), V(Teuchos::null), 00121 U(Teuchos::null), C(Teuchos::null), 00122 H(Teuchos::null), B(Teuchos::null) 00123 {} 00124 00125 }; 00126 00128 00129 00130 00132 00133 00146 class BlockGCRODRIterInitFailure : public BelosError { 00147 public: 00148 BlockGCRODRIterInitFailure(const std::string& what_arg) : BelosError(what_arg) {} 00149 }; 00150 00158 class BlockGCRODRIterOrthoFailure : public BelosError { 00159 public: 00160 BlockGCRODRIterOrthoFailure(const std::string& what_arg) : BelosError(what_arg) {} 00161 }; 00162 00164 00165 00166 template<class ScalarType, class MV, class OP> 00167 class BlockGCRODRIter : virtual public Iteration<ScalarType,MV,OP> { 00168 public: 00169 00170 // 00171 //Convenience typedefs 00172 // 00173 typedef MultiVecTraits<ScalarType,MV> MVT; 00174 typedef OperatorTraits<ScalarType,MV,OP> OPT; 00175 typedef Teuchos::ScalarTraits<ScalarType> SCT; 00176 typedef typename SCT::magnitudeType MagnitudeType; 00177 typedef Teuchos::SerialDenseMatrix<int,ScalarType> SDM; 00178 typedef Teuchos::SerialDenseVector<int,ScalarType> SDV; 00179 00181 00182 00191 BlockGCRODRIter( const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem, 00192 const Teuchos::RCP<OutputManager<ScalarType> > &printer, 00193 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester, 00194 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho, 00195 Teuchos::ParameterList ¶ms ); 00196 00198 virtual ~BlockGCRODRIter() {}; 00200 00202 00203 00225 void iterate(); 00226 00249 void initialize() { 00250 BlockGCRODRIterState<ScalarType,MV> empty; 00251 initialize(empty); 00252 } 00253 00257 void initialize(BlockGCRODRIterState<ScalarType,MV> newstate); 00258 00266 BlockGCRODRIterState<ScalarType,MV> getState() const{ 00267 BlockGCRODRIterState<ScalarType,MV> state; 00268 state.curDim = curDim_; 00269 state.V = V_; 00270 state.U = U_; 00271 state.C = C_; 00272 state.H = H_; 00273 state.B = B_; 00274 return state; 00275 } 00277 00279 00280 00282 bool isInitialized(){ return initialized_;}; 00283 00285 int getNumIters() const { return iter_; }; 00286 00288 void resetNumIters( int iter = 0 ) { iter_ = iter; }; 00289 00292 Teuchos::RCP<const MV> getNativeResiduals( std::vector<MagnitudeType> *norms ) const; 00293 00295 00300 Teuchos::RCP<MV> getCurrentUpdate() const; 00301 00302 00304 00305 00307 00308 00309 00311 const LinearProblem<ScalarType,MV,OP>& getProblem() const { return *lp_; }; 00312 00314 int getNumBlocks() const { return numBlocks_; } 00315 00317 int getBlockSize() const { return blockSize_; }; 00318 00320 int getCurSubspaceDim() const { 00321 if (!initialized_) return 0; 00322 return curDim_; 00323 }; 00324 00326 int getMaxSubspaceDim() const { return numBlocks_*blockSize_; }; 00327 00329 int getRecycledBlocks() const { return recycledBlocks_; }; 00330 00332 00333 00335 00336 00337 void updateLSQR( int dim = -1); 00338 00340 void setBlockSize(int blockSize){ blockSize_ = blockSize; } 00341 00343 void setRecycledBlocks(int recycledBlocks) { setSize( recycledBlocks, numBlocks_ ); }; 00344 00346 void setNumBlocks(int numBlocks) { setSize( recycledBlocks_, numBlocks ); }; 00347 00349 void setSize( int recycledBlocks, int numBlocks ) { 00350 // only call resize if size changed 00351 if ( (recycledBlocks_ != recycledBlocks) || (numBlocks_ != numBlocks) ) { 00352 recycledBlocks_ = recycledBlocks; 00353 numBlocks_ = numBlocks; 00354 cs_.sizeUninitialized( numBlocks_ ); 00355 sn_.sizeUninitialized( numBlocks_ ); 00356 Z_.shapeUninitialized( numBlocks_*blockSize_, blockSize_ ); 00357 } 00358 } 00359 00361 00362 private: 00363 00364 // 00365 // Internal methods 00366 // 00367 00368 00369 00370 //Classes inputed through constructor that define the linear problem to be solved 00371 // 00372 const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > lp_; 00373 const Teuchos::RCP<OutputManager<ScalarType> > om_; 00374 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > stest_; 00375 const Teuchos::RCP<OrthoManager<ScalarType,MV> > ortho_; 00376 00377 // 00378 //Algorithmic Parameters 00379 // 00380 00381 //numBlocks_ is the size of the allocated space for the Krylov basis, in blocks. 00382 //blockSize_ is the number of columns in each block Krylov vector. 00383 int numBlocks_, blockSize_; 00384 00385 //boolean vector indicating which right-hand sides we care about 00386 //when we are testing residual norms. THIS IS NOT IMPLEMENTED. RIGHT NOW JUST 00387 //SELECTS ALL RIGHT HANDS SIDES FOR NORM COMPUTATION. 00388 std::vector<bool> trueRHSIndices_; 00389 00390 // recycledBlocks_ is the size of the allocated space for the recycled subspace, in vectors. 00391 int recycledBlocks_; 00392 00393 //Storage for QR factorization of the least squares system if using plane rotations. 00394 SDV sn_; 00395 Teuchos::SerialDenseVector<int,MagnitudeType> cs_; 00396 00397 //Storage for QR factorization of the least squares system if using Householder reflections 00398 //Per block Krylov vector, we actually construct a 2*blockSize_ by 2*blockSize_ matrix which 00399 //is the product of all Householder transformations for that block. This has been shown to yield 00400 //speed ups without losing accuracy because we can apply previous Householder transformations 00401 //with BLAS3 operations. 00402 std::vector< SDM >House_; 00403 SDV beta_; 00404 00405 // 00406 //Current Solver State 00407 // 00408 //initialized_ specifies that the basis vectors have been initialized and the iterate() routine 00409 //is capable of running; _initialize is controlled by the initialize() member method 00410 //For the implications of the state of initialized_, please see documentation for initialize() 00411 bool initialized_; 00412 00413 // Current subspace dimension, number of iterations performed, and number of iterations performed in this cycle. 00414 int curDim_, iter_, lclIter_; 00415 00416 // 00417 // Recycled Krylov Method Storage 00418 // 00419 00420 00422 Teuchos::RCP<MV> V_; 00423 00425 Teuchos::RCP<MV> U_, C_; 00426 00427 00429 00430 00434 Teuchos::RCP<SDM > H_; 00435 00439 Teuchos::RCP<SDM > B_; 00440 00447 Teuchos::RCP<SDM> R_; 00448 00450 SDM Z_; 00451 00453 00454 // File stream variables to use Mike Parks' Matlab output codes. 00455 std::ofstream ofs; 00456 char filename[30]; 00457 00458 };//End BlockGCRODRIter Class Definition 00459 00461 //Constructor. 00462 template<class ScalarType, class MV, class OP> 00463 BlockGCRODRIter<ScalarType,MV,OP>::BlockGCRODRIter(const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem, 00464 const Teuchos::RCP<OutputManager<ScalarType> > &printer, 00465 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester, 00466 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho, 00467 Teuchos::ParameterList ¶ms ):lp_(problem), 00468 om_(printer), stest_(tester), ortho_(ortho) { 00469 numBlocks_ = 0; 00470 blockSize_ = 0; 00471 recycledBlocks_ = 0; 00472 initialized_ = false; 00473 curDim_ = 0; 00474 iter_ = 0; 00475 lclIter_ = 0; 00476 V_ = Teuchos::null; 00477 U_ = Teuchos::null; 00478 C_ = Teuchos::null; 00479 H_ = Teuchos::null; 00480 B_ = Teuchos::null; 00481 R_ = Teuchos::null; 00482 // Get the maximum number of blocks allowed for this Krylov subspace 00483 TEUCHOS_TEST_FOR_EXCEPTION(!params.isParameter("Num Blocks"), std::invalid_argument, "Belos::BlockGCRODRIter::constructor: mandatory parameter \"Num Blocks\" is not specified."); 00484 int nb = Teuchos::getParameter<int>(params, "Num Blocks"); 00485 00486 TEUCHOS_TEST_FOR_EXCEPTION(!params.isParameter("Recycled Blocks"), std::invalid_argument,"Belos::BlockGCRODRIter::constructor: mandatory parameter \"Recycled Blocks\" is not specified."); 00487 int rb = Teuchos::getParameter<int>(params, "Recycled Blocks"); 00488 00489 TEUCHOS_TEST_FOR_EXCEPTION(nb <= 0, std::invalid_argument, "Belos::BlockGCRODRIter() was passed a non-positive argument for \"Num Blocks\"."); 00490 TEUCHOS_TEST_FOR_EXCEPTION(rb >= nb, std::invalid_argument, "Belos::BlockGCRODRIter() the number of recycled blocks is larger than the allowable subspace."); 00491 00492 00493 int bs = Teuchos::getParameter<int>(params, "Block Size"); 00494 00495 TEUCHOS_TEST_FOR_EXCEPTION(bs <= 0, std::invalid_argument, "Belos::BlockGCRODRIter() the block size was passed a non-postitive argument."); 00496 00497 00498 numBlocks_ = nb; 00499 recycledBlocks_ = rb; 00500 blockSize_ = bs; 00501 00502 //INITIALIZE ENTRIES OF trueRHSIndices_ TO CHECK EVERY NORM FOR NOW. LATER, THE USER 00503 //SHOULD SPECIFY WHICH RIGHT HAND SIDES ARE IMPORTANT FOR CONVERGENCE TESTING 00504 trueRHSIndices_.resize(blockSize_); 00505 int i; 00506 for(i=0; i<blockSize_; i++){ 00507 trueRHSIndices_[i] = true; 00508 } 00509 00510 //THIS MAKES SPACE FOR GIVENS ROTATIONS BUT IN REALITY WE NEED TO DO TESTING ON BLOCK SIZE 00511 //AND CHOOSE BETWEEN GIVENS ROTATIONS AND HOUSEHOLDER TRANSFORMATIONS. 00512 cs_.sizeUninitialized( numBlocks_+1 ); 00513 sn_.sizeUninitialized( numBlocks_+1 ); 00514 Z_.shapeUninitialized( (numBlocks_+1)*blockSize_,blockSize_ ); 00515 00516 House_.resize(numBlocks_); 00517 00518 for(i=0; i<numBlocks_;i++){ 00519 House_[i].shapeUninitialized(2*blockSize_, 2*blockSize_); 00520 } 00521 }//End Constructor Definition 00522 00524 // Iterate until the status test informs us we should stop. 00525 template <class ScalarType, class MV, class OP> 00526 void BlockGCRODRIter<ScalarType,MV,OP>::iterate() { 00527 TEUCHOS_TEST_FOR_EXCEPTION( initialized_ == false, BlockGCRODRIterInitFailure,"Belos::BlockGCRODRIter::iterate(): GCRODRIter class not initialized." ); 00528 00529 // MLP 00530 //sleep(1); 00531 //std::cout << "Calling setSize" << std::endl; 00532 // Force call to setsize to ensure internal storage is correct dimension 00533 setSize( recycledBlocks_, numBlocks_ ); 00534 00535 Teuchos::RCP<MV> Vnext; 00536 Teuchos::RCP<const MV> Vprev; 00537 std::vector<int> curind(blockSize_); 00538 00539 // z_ must be zeroed out in order to compute Givens rotations correctly 00540 Z_.putScalar(0.0); 00541 00542 // Orthonormalize the new V_0 00543 for(int i = 0; i<blockSize_; i++){curind[i] = i;}; 00544 00545 // MLP 00546 //sleep(1); 00547 //std::cout << "Calling normalize" << std::endl; 00548 Vnext = MVT::CloneViewNonConst(*V_,curind); 00549 //Orthonormalize Initial Columns 00550 //Store orthogonalization coefficients in Z0 00551 Teuchos::RCP<SDM > Z0 = 00552 Teuchos::rcp( new SDM(blockSize_,blockSize_) ); 00553 int rank = ortho_->normalize(*Vnext,Z0); 00554 00555 // MLP 00556 //sleep(1); 00557 //std::cout << "Assigning Z" << std::endl; 00558 TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,BlockGCRODRIterOrthoFailure, "Belos::BlockGCRODRIter::iterate(): couldn't generate basis of full rank at the initial step."); 00559 // Copy Z0 into the leading blockSize_ by blockSize_ block of Z_ 00560 Teuchos::RCP<SDM > Z_block = Teuchos::rcp( new SDM(Teuchos::View, Z_, blockSize_,blockSize_) ); 00561 Z_block->assign(*Z0); 00562 00563 std::vector<int> prevind(blockSize_*(numBlocks_ + 1)); 00564 00566 // iterate until the status test tells us to stop. 00567 // 00568 // also break if the basis is full 00569 // 00570 while( (stest_->checkStatus(this) != Passed) && (curDim_+blockSize_-1) < (numBlocks_*blockSize_)) { 00571 lclIter_++; 00572 iter_++; 00573 //KMS 00574 //std::cout << "Iter=" << iter_ << std::endl << "lclIter=" << lclIter_ << std::endl; 00575 00576 int HFirstCol = curDim_-blockSize_;//First column of H we need view of 00577 int HLastCol = HFirstCol + blockSize_-1 ;//last column of H we need a view of 00578 int HLastOrthRow = HLastCol;//Last row of H we will put orthog coefficients in 00579 int HFirstNormRow = HLastOrthRow + 1;//First row of H where normalization matrix goes 00580 //KMS 00581 //std::cout << "curDim_ = " << curDim_ << ", HFirstCol = " << HFirstCol << ", HLastCol = " << HLastCol <<", HLastOrthRow = " << HLastOrthRow << ", HFirstNormRow = " << HFirstNormRow << std::endl; 00582 // Get next basis indices 00583 for(int i = 0; i< blockSize_; i++){ 00584 curind[i] = curDim_ + i; 00585 } 00586 Vnext = MVT::CloneViewNonConst(*V_,curind); 00587 00588 //Get a view of the previous block Krylov vector. 00589 //This is used for orthogonalization and for computing V^H K H 00590 // Get next basis indices 00591 for(int i = 0; i< blockSize_; i++){ 00592 curind[blockSize_ - 1 - i] = curDim_ - i - 1; 00593 } 00594 Vprev = MVT::CloneView(*V_,curind); 00595 // Compute the next vector in the Krylov basis: Vnext = Op*Vprev 00596 lp_->apply(*Vprev,*Vnext); 00597 Vprev = Teuchos::null; 00598 00599 //First, remove the recycled subspace (C) from Vnext and put coefficients in B. 00600 00601 //Get a view of the matrix B and put the pointer into an array 00602 //Put a pointer to C in another array 00603 Teuchos::Array<Teuchos::RCP<const MV> > C(1, C_); 00604 00605 Teuchos::RCP<SDM > 00606 subB = Teuchos::rcp( new SDM ( Teuchos::View,*B_,recycledBlocks_,blockSize_,0, HFirstCol ) ); 00607 00608 Teuchos::Array<Teuchos::RCP<SDM > > AsubB; 00609 AsubB.append( subB ); 00610 // Project out the recycled subspace. 00611 ortho_->project( *Vnext, AsubB, C ); 00612 //Now, remove block Krylov Subspace from Vnext and store coefficients in H_ and R_ 00613 00614 // Get a view of all the previous vectors 00615 prevind.resize(curDim_); 00616 for (int i=0; i<curDim_; i++) { prevind[i] = i; } 00617 Vprev = MVT::CloneView(*V_,prevind); 00618 Teuchos::Array<Teuchos::RCP<const MV> > AVprev(1, Vprev); 00619 00620 // Get a view of the part of the Hessenberg matrix needed to hold the ortho coeffs. 00621 Teuchos::RCP<SDM> subH = Teuchos::rcp( new SDM ( Teuchos::View,*H_,curDim_,blockSize_,0,HFirstCol ) ); 00622 Teuchos::Array<Teuchos::RCP<SDM > > AsubH; 00623 AsubH.append( subH ); 00624 // Get a view of the part of the Hessenberg matrix needed to hold the norm coeffs. 00625 Teuchos::RCP<SDM > subR = Teuchos::rcp( new SDM ( Teuchos::View,*H_,blockSize_,blockSize_,HFirstNormRow,HFirstCol ) ); 00626 // Project out the previous Krylov vectors and normalize the next vector. 00627 int rank = ortho_->projectAndNormalize(*Vnext,AsubH,subR,AVprev); 00628 00629 // Copy over the coefficients to R just in case we run into an error. 00630 SDM subR2( Teuchos::View,*R_,(lclIter_+1)*blockSize_,blockSize_,0,HFirstCol); 00631 SDM subH2( Teuchos::View,*H_,(lclIter_+1)*blockSize_,blockSize_,0,HFirstCol); 00632 subR2.assign(subH2); 00633 00634 TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,BlockGCRODRIterOrthoFailure, "Belos::BlockGCRODRIter::iterate(): couldn't generate basis of full rank."); 00635 00636 // Update the QR factorization of the upper Hessenberg matrix 00637 updateLSQR(); 00638 00639 // Update basis dim 00640 curDim_ = curDim_ + blockSize_; 00641 00642 00643 00644 }//end while(stest_->checkStatus(this) ~= Passed && curDim_+1 <= numBlocks_*blockSize_) 00645 00646 }//end iterate() defintition 00647 00649 //Initialize this iteration object. 00650 template <class ScalarType, class MV, class OP> 00651 void BlockGCRODRIter<ScalarType,MV,OP>::initialize(BlockGCRODRIterState<ScalarType,MV> newstate) { 00652 if (newstate.V != Teuchos::null && newstate.H != Teuchos::null) { 00653 curDim_ = newstate.curDim; 00654 V_ = newstate.V; 00655 U_ = newstate.U; 00656 C_ = newstate.C; 00657 H_ = newstate.H; 00658 B_ = newstate.B; 00659 lclIter_ = 0;//resets the count of local iterations for the new cycle 00660 R_ = Teuchos::rcp(new SDM(H_->numRows(), H_->numCols() )); //R_ should look like H but point to separate memory 00661 00662 //All Householder product matrices start out as identity matrices. 00663 //We construct an identity from which to copy. 00664 SDM Identity(2*blockSize_, 2*blockSize_); 00665 for(int i=0;i<2*blockSize_; i++){ 00666 Identity[i][i] = 1; 00667 } 00668 for(int i=0; i<numBlocks_;i++){ 00669 House_[i].assign(Identity); 00670 } 00671 } 00672 else { 00673 TEUCHOS_TEST_FOR_EXCEPTION(newstate.V == Teuchos::null,std::invalid_argument,"Belos::GCRODRIter::initialize(): BlockGCRODRIterState does not have V initialized."); 00674 TEUCHOS_TEST_FOR_EXCEPTION(newstate.H == Teuchos::null,std::invalid_argument,"Belos::GCRODRIter::initialize(): BlockGCRODRIterState does not have H initialized."); 00675 } 00676 // the solver is initialized 00677 initialized_ = true; 00678 }//end initialize() defintition 00679 00681 //Get the native residuals stored in this iteration. 00682 //This function will only compute the native residuals for 00683 //right-hand sides we are interested in, as dictated by 00684 //std::vector<int> trueRHSIndices_ (THIS IS NOT YET IMPLEMENTED. JUST GETS ALL RESIDUALS) 00685 //A norm of -1 is entered for all residuals about which we do not care. 00686 template <class ScalarType, class MV, class OP> 00687 Teuchos::RCP<const MV> 00688 BlockGCRODRIter<ScalarType,MV,OP>::getNativeResiduals( std::vector<MagnitudeType> *norms ) const 00689 { 00690 // 00691 // NOTE: Make sure the incoming std::vector is the correct size! 00692 // 00693 if (norms != NULL) { 00694 if (static_cast<int> (norms->size()) < blockSize_) { 00695 norms->resize( blockSize_ ); 00696 } 00697 Teuchos::BLAS<int,ScalarType> blas; 00698 for (int j=0; j<blockSize_; j++) { 00699 if(trueRHSIndices_[j]){ 00700 (*norms)[j] = blas.NRM2( blockSize_, &Z_(curDim_-blockSize_+j, j), 1); 00701 } 00702 else{ 00703 (*norms)[j] = -1; 00704 } 00705 } 00706 return Teuchos::null; 00707 } else { // norms is NULL 00708 // FIXME If norms is NULL, return residual vectors. 00709 return Teuchos::null; 00710 } 00711 }//end getNativeResiduals() definition 00712 00714 //Get the current update from this subspace. 00715 template <class ScalarType, class MV, class OP> 00716 Teuchos::RCP<MV> BlockGCRODRIter<ScalarType,MV,OP>::getCurrentUpdate() const { 00717 // 00718 // If this is the first iteration of the Arnoldi factorization, 00719 // there is no update, so return Teuchos::null. 00720 // 00721 Teuchos::RCP<MV> currentUpdate = Teuchos::null; 00722 //KMS if(curDim_==0) { 00723 if(curDim_<=blockSize_) { 00724 return currentUpdate; 00725 } 00726 else{ 00727 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one(); 00728 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero(); 00729 Teuchos::BLAS<int,ScalarType> blas; 00730 currentUpdate = MVT::Clone( *V_, blockSize_ ); 00731 // 00732 // Make a view and then copy the RHS of the least squares problem. DON'T OVERWRITE IT! 00733 // 00734 SDM Y( Teuchos::Copy, Z_, curDim_-blockSize_, blockSize_ ); 00735 Teuchos::RCP<SDM> Rtmp = Teuchos::rcp(new SDM(Teuchos::View, *R_, curDim_, curDim_-blockSize_)); 00736 //KMS 00737 //sleep(1); 00738 //std::cout << "Before TRSM" << std::endl; 00739 //sleep(1); 00740 //std::cout << "The size of Rtmp is " << Rtmp -> numRows() << " by " << Rtmp -> numCols() << std::endl; 00741 //std::cout << "The size of Y is " << Y.numRows() << " by " << Y.numCols() << std::endl; 00742 //std::cout << "blockSize_ = " << blockSize_ << std::endl; 00743 //std::cout << "curDim_ = " << curDim_ << std::endl; 00744 //std::cout << "curDim_ - blockSize_ = " << curDim_ - blockSize_ << std::endl; 00745 // 00746 // Solve the least squares problem. 00747 // Observe that in calling TRSM, we use the value 00748 // curDim_ -blockSize_. This is because curDim_ has 00749 // already been incremented but the proper size of R is still 00750 // based on the previous value. 00751 // 00752 blas.TRSM( Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 00753 Teuchos::NON_UNIT_DIAG, curDim_-blockSize_, blockSize_, one, 00754 Rtmp->values(), Rtmp->stride(), Y.values(), Y.stride() ); 00755 //KMS 00756 //sleep(1); 00757 //std::cout << "After TRSM" << std::endl; 00758 // 00759 // Compute the current update from the Krylov basis; V(:,1:curDim_)*y. 00760 // 00761 std::vector<int> index(curDim_-blockSize_); 00762 for ( int i=0; i<curDim_-blockSize_; i++ ) index[i] = i; 00763 Teuchos::RCP<const MV> Vjp1 = MVT::CloneView( *V_, index ); 00764 MVT::MvTimesMatAddMv( one, *Vjp1, Y, zero, *currentUpdate ); 00765 00766 00767 00768 // 00769 // Add in portion of update from recycled subspace U; U(:,1:recycledBlocks_)*B*y. 00770 // 00771 if (U_ != Teuchos::null) { 00772 SDM z(recycledBlocks_,blockSize_); 00773 SDM subB( Teuchos::View, *B_, recycledBlocks_, curDim_-blockSize_ ); 00774 z.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS, one, subB, Y, zero ); 00775 00776 //std::cout << (*U_).MyLength() << " " << (*U_).NumVectors() << " " << subB.numRows() << " " << subB.numCols() << " " << Y.numRows() << " " << Y.numCols()<< " " << curDim_ << " " << recycledBlocks_; 00777 MVT::MvTimesMatAddMv( -one, *U_, z, one, *currentUpdate ); 00778 } 00779 } 00780 00781 00782 00783 return currentUpdate; 00784 }//end getCurrentUpdate() definition 00785 00786 template<class ScalarType, class MV, class OP> 00787 void BlockGCRODRIter<ScalarType,MV,OP>::updateLSQR( int dim ) { 00788 00789 int i; 00790 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero(); 00791 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one(); 00792 00793 // Get correct dimension based on input "dim" 00794 // Remember that ortho failures result in an exit before updateLSQR() is called. 00795 // Therefore, it is possible that dim == curDim_. 00796 int curDim = curDim_; 00797 if ( (dim >= curDim_) && (dim < getMaxSubspaceDim()) ){ 00798 curDim = dim; 00799 } 00800 00801 Teuchos::BLAS<int, ScalarType> blas; 00802 00803 if(blockSize_ == 1){//if only one right-hand side then use Givens rotations 00804 // 00805 // Apply previous rotations and compute new rotations to reduce upper-Hessenberg 00806 // system to upper-triangular form. 00807 // 00808 // QR factorization of Least-Squares system with Givens rotations 00809 // 00810 for (i=0; i<curDim-1; i++) { 00811 // 00812 // Apply previous Givens rotations to new column of Hessenberg matrix 00813 // 00814 blas.ROT( 1, &(*R_)(i,curDim-1), 1, &(*R_)(i+1, curDim-1), 1, &cs_[i], &sn_[i] ); 00815 } 00816 // 00817 // Calculate new Givens rotation 00818 // 00819 blas.ROTG( &(*R_)(curDim-1,curDim-1), &(*R_)(curDim,curDim-1), &cs_[curDim-1], &sn_[curDim-1] ); 00820 (*R_)(curDim,curDim-1) = zero; 00821 // 00822 // Update RHS w/ new transformation 00823 // 00824 blas.ROT( 1, &Z_(curDim-1,0), 1, &Z_(curDim,0), 1, &cs_[curDim-1], &sn_[curDim-1] ); 00825 } 00826 else{//if multiple right-hand sides then use Householder transormations 00827 // 00828 //apply previous reflections and compute new reflections to reduce upper-Hessenberg 00829 //system to upper-triagular form. 00830 00831 //In Matlab, applying the reflection to a matrix M would look like 00832 // M_refl = M - 2*v_refl*(v_refl'*M)/(norm(v_refl)^2) 00833 00834 //In order to take advantage of BLAS while applying reflections to a matrix, we 00835 //perform it in a two step process 00836 //1. workvec = M'*v_refl {using BLAS.GEMV()} 00837 //2. M_refl = M_refl - 2*v_refl*workvec'/(norm(v_refl)^2) {using BLAS.GER()} 00838 00839 Teuchos::RCP< SDM > workmatrix = Teuchos::null;//matrix of column vectors onto which we apply the reflection 00840 Teuchos::RCP< SDV > workvec = Teuchos::null;//where we store the result of the first step of the 2-step reflection process 00841 Teuchos::RCP<SDV> v_refl = Teuchos::null;//the reflection vector 00842 int R_colStart = curDim_-blockSize_; 00843 Teuchos::RCP< SDM >Rblock = Teuchos::null; 00844 00845 // 00846 //Apply previous reflections 00847 // 00848 for(i=0; i<lclIter_-1; i++){ 00849 int R_rowStart = i*blockSize_; 00850 //get a view of the part of R_ effected by these reflections. 00851 Teuchos::RCP< SDM > RblockCopy = rcp(new SDM (Teuchos::Copy, *R_, 2*blockSize_,blockSize_, R_rowStart, R_colStart)); 00852 Teuchos::RCP< SDM > RblockView = rcp(new SDM (Teuchos::View, *R_, 2*blockSize_,blockSize_, R_rowStart, R_colStart)); 00853 blas.GEMM(Teuchos::NO_TRANS,Teuchos::NO_TRANS, 2*blockSize_,blockSize_,2*blockSize_,one,House_[i].values(),House_[i].stride(), RblockCopy->values(),RblockCopy -> stride(), zero, RblockView->values(),RblockView -> stride()); 00854 00855 } 00856 00857 00858 //Get a view of last 2*blockSize entries of entire block to 00859 //generate new reflections. 00860 Rblock = rcp(new SDM (Teuchos::View, *R_, 2*blockSize_,blockSize_, curDim_-blockSize_, curDim_-blockSize_)); 00861 00862 //Calculate and apply the new reflections 00863 for(i=0; i<blockSize_; i++){ 00864 // 00865 //Calculating Reflection 00866 // 00867 int curcol = (lclIter_ - 1)*blockSize_ + i;//current column of R_ 00868 int lclCurcol = i;//current column of Rblock 00869 ScalarType signDiag = (*R_)(curcol,curcol) / Teuchos::ScalarTraits<ScalarType>::magnitude((*R_)(curcol,curcol)); 00870 00871 // Norm of the vector to be reflected. 00872 // BLAS returns a ScalarType, but it really should be a magnitude. 00873 ScalarType nvs = blas.NRM2(blockSize_+1,&((*R_)[curcol][curcol]),1); 00874 ScalarType alpha = -signDiag*nvs; 00875 00876 //norm of reflection vector which is just the vector being reflected 00877 //i.e. v = R_(curcol:curcol+blockSize_,curcol)) 00878 //v_refl = v - alpha*e1 00879 //norm(v_refl) = norm(v) + alpha^2 - 2*v*alpha 00880 //store in v_refl 00881 00882 // Beware, nvs should have a zero imaginary part (since 00883 // it is a norm of a vector), but it may not due to rounding 00884 // error. 00885 //nvs = nvs + alpha*alpha - 2*(*R_)(curcol,curcol)*alpha; 00886 //(*R_)(curcol,curcol) -= alpha; 00887 00888 //Copy relevant values of the current column of R_ into the reflection vector 00889 //Modify first entry 00890 //Take norm of reflection vector 00891 //Square the norm 00892 v_refl = rcp(new SDV(Teuchos::Copy, &((*R_)(curcol,curcol)), blockSize_ + 1 )); 00893 (*v_refl)[0] -= alpha; 00894 nvs = blas.NRM2(blockSize_+1,v_refl -> values(),1); 00895 nvs *= nvs; 00896 00897 // 00898 //Apply new reflector to: 00899 //1. To subsequent columns of R_ in the current block 00900 //2. To House[iter_] to store product of reflections for this column 00901 //3. To the least-squares right-hand side. 00902 //4. The current column 00903 // 00904 // 00905 00906 00907 // 00908 //1. 00909 // 00910 if(i < blockSize_ - 1){//only do this when there are subsquent columns in the block to apply to 00911 workvec = Teuchos::rcp(new SDV(blockSize_ - i -1)); 00912 //workvec = Teuchos::rcp(new SDV(2*blockSize_)); 00913 workmatrix = Teuchos::rcp(new SDM (Teuchos::View, *Rblock, blockSize_+1, blockSize_ - i -1, lclCurcol, lclCurcol +1 ) ); 00914 blas.GEMV(Teuchos::TRANS, workmatrix->numRows(), workmatrix->numCols(), one, workmatrix->values(), workmatrix->stride(), v_refl->values(), 1, zero, workvec->values(), 1); 00915 blas.GER(workmatrix->numRows(),workmatrix->numCols(), -2.0*one/nvs, v_refl->values(),1,workvec->values(),1,workmatrix->values(),workmatrix->stride()); 00916 } 00917 00918 00919 // 00920 //2. 00921 // 00922 workvec = Teuchos::rcp(new SDV(2*blockSize_)); 00923 workmatrix = Teuchos::rcp(new SDM (Teuchos::View, House_[lclIter_ -1], blockSize_+1, 2*blockSize_, i, 0 ) ); 00924 blas.GEMV(Teuchos::TRANS,workmatrix->numRows(),workmatrix->numCols(),one,workmatrix->values(),workmatrix->stride(), v_refl->values(), 1,zero,workvec->values(),1); 00925 blas.GER(workmatrix->numRows(),workmatrix->numCols(), -2.0*one/nvs, v_refl -> values(),1,workvec->values(),1,workmatrix->values(),(*workmatrix).stride()); 00926 00927 // 00928 //3. 00929 // 00930 workvec = Teuchos::rcp(new SDV(blockSize_)); 00931 workmatrix = Teuchos::rcp(new SDM (Teuchos::View, Z_, blockSize_+1, blockSize_, curcol, 0 ) ); 00932 blas.GEMV(Teuchos::TRANS, workmatrix->numRows(), workmatrix->numCols(), one, workmatrix-> values(), workmatrix->stride(), v_refl -> values(), 1, zero, workvec->values(), 1); 00933 blas.GER((*workmatrix).numRows(),(*workmatrix).numCols(), -2.0*one/nvs,v_refl -> values(), 1,&((*workvec)[0]),1,(*workmatrix)[0],(*workmatrix).stride()); 00934 00935 // 00936 //4. 00937 // 00938 (*R_)[curcol][curcol] = alpha; 00939 for(int ii=1; ii<= blockSize_; ii++){ 00940 (*R_)[curcol][curcol+ii] = 0; 00941 } 00942 } 00943 00944 } 00945 00946 } // end updateLSQR() 00947 00948 00949 }//End Belos Namespace 00950 00951 #endif /* BELOS_BLOCK_GCRODR_ITER_HPP */
1.7.6.1