|
MoochoPack : Framework for Large-Scale Optimization Algorithms
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization 00005 // Copyright (2003) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 // @HEADER 00041 00042 #include <limits> 00043 00044 #include "MoochoPack_BFGSUpdate_Strategy.hpp" 00045 #include "MoochoPack_Exceptions.hpp" 00046 #include "AbstractLinAlgPack_TestMatrixSymSecant.hpp" 00047 #include "AbstractLinAlgPack_MatrixSymSecant.hpp" 00048 #include "AbstractLinAlgPack_MatrixSymOp.hpp" 00049 #include "AbstractLinAlgPack_MatrixOpOut.hpp" 00050 #include "AbstractLinAlgPack_VectorSpace.hpp" 00051 #include "AbstractLinAlgPack_VectorMutable.hpp" 00052 #include "AbstractLinAlgPack_VectorOut.hpp" 00053 #include "AbstractLinAlgPack_VectorStdOps.hpp" 00054 #include "AbstractLinAlgPack_LinAlgOpPack.hpp" 00055 #include "Teuchos_dyn_cast.hpp" 00056 #include "Teuchos_Assert.hpp" 00057 00058 namespace MoochoPack { 00059 00060 BFGSUpdate_Strategy::BFGSUpdate_Strategy( 00061 bool rescale_init_identity 00062 ,bool use_dampening 00063 ,ESecantTesting secant_testing 00064 ,value_type secant_warning_tol 00065 ,value_type secant_error_tol 00066 ) 00067 :rescale_init_identity_(rescale_init_identity) 00068 ,use_dampening_(use_dampening) 00069 ,secant_testing_(secant_testing) 00070 ,secant_warning_tol_(secant_warning_tol) 00071 ,secant_error_tol_(secant_error_tol) 00072 {} 00073 00074 void BFGSUpdate_Strategy::perform_update( 00075 VectorMutable *s_bfgs 00076 ,VectorMutable *y_bfgs 00077 ,bool first_update 00078 ,std::ostream &out 00079 ,EJournalOutputLevel olevel 00080 ,bool check_results 00081 ,MatrixSymOp *B 00082 ,QuasiNewtonStats *quasi_newton_stats 00083 ) 00084 { 00085 namespace rcp = MemMngPack; 00086 using Teuchos::dyn_cast; 00087 using AbstractLinAlgPack::dot; 00088 using AbstractLinAlgPack::Vt_S; 00089 using AbstractLinAlgPack::Vp_StV; 00090 using LinAlgOpPack::Vp_V; 00091 using LinAlgOpPack::V_StV; 00092 using LinAlgOpPack::V_VpV; 00093 using LinAlgOpPack::V_VmV; 00094 using LinAlgOpPack::V_MtV; 00095 00096 const value_type 00097 NOT_CALCULATED = std::numeric_limits<value_type>::max(); 00098 value_type 00099 sTy = NOT_CALCULATED, 00100 yTy = NOT_CALCULATED; 00101 bool used_dampening = false; 00102 00103 // Make sure the update is defined! 00104 if( sTy == NOT_CALCULATED ) 00105 sTy = dot( *s_bfgs, *y_bfgs ); 00106 if( sTy == 0 ) { 00107 if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) { 00108 out << "\n(y'*s) == 0, skipping the update ...\n"; 00109 } 00110 quasi_newton_stats->set_updated_stats( QuasiNewtonStats::INDEF_SKIPED ); 00111 return; 00112 } 00113 00114 MatrixSymSecant 00115 &B_updatable = dyn_cast<MatrixSymSecant>(*B); 00116 00117 // ///////////////////////////////////////////////////////////// 00118 // Consider rescaling the initial identity hessian before 00119 // the update. 00120 // 00121 // This was taken from Nocedal & Wright, 1999, p. 200 00122 // 00123 // Bo = (y'*y)/(y'*s) * I 00124 // 00125 if( first_update && rescale_init_identity() ) { 00126 if( sTy == NOT_CALCULATED ) 00127 sTy = dot( *s_bfgs, *y_bfgs ); 00128 if( yTy == NOT_CALCULATED ) 00129 yTy = dot( *y_bfgs, *y_bfgs ); 00130 const value_type 00131 Iscale = yTy/sTy; 00132 const value_type 00133 Iscale_too_small = 1e-5; // ToDo: Make this adjustable 00134 if( Iscale >= Iscale_too_small ) { 00135 if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) { 00136 out << "\nRescaling the initial identity matrix before the update as:\n" 00137 << "Iscale = (y'*y)/(y'*s) = ("<<yTy<<")/("<<sTy<<") = "<<Iscale<<"\n" 00138 << "B = Iscale * eye(n-r) ...\n"; 00139 } 00140 B_updatable.init_identity( y_bfgs->space(), Iscale ); 00141 if( (int)olevel >= (int)PRINT_ITERATION_QUANTITIES ) { 00142 out << "\nB after rescaling = \n" << *B; 00143 } 00144 } 00145 else { 00146 if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) { 00147 out << "\nSkipping rescaling of the initial identity matrix since:\n" 00148 << "Iscale = (y'*y)/(y'*s) = ("<<yTy<<")/("<<sTy<<") = "<<Iscale 00149 << " < Iscale_too_small = " << Iscale_too_small << std::endl; 00150 } 00151 } 00152 } 00153 00154 // //////////////////////////////////////////////////// 00155 // Modify the s_bfgs and y_bfgs vectors for dampening? 00156 VectorSpace::vec_mut_ptr_t 00157 Bs = Teuchos::null; 00158 if( use_dampening() ) { 00159 if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) 00160 { 00161 out 00162 << "\nConsidering the dampened update ...\n"; 00163 } 00164 // Bs = Bm1 * s_bfgs 00165 Bs = y_bfgs->space().create_member(); 00166 V_MtV( Bs.get(), *B, BLAS_Cpp::no_trans, *s_bfgs ); 00167 // sTy = s_bfgs' * y_bfgs 00168 if( sTy == NOT_CALCULATED ) 00169 sTy = dot( *s_bfgs, *y_bfgs ); 00170 // sTBs = s_bfgs' * Bs 00171 const value_type sTBs = dot( *s_bfgs, *Bs ); 00172 // Select dampening parameter theta 00173 const value_type 00174 theta = ( sTy >= 0.2 * sTBs ) 00175 ? 1.0 00176 : (0.8 * sTBs ) / ( sTBs - sTy ); 00177 if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) 00178 { 00179 out 00180 << "\ns_bfgs'*y_bfgs = " << sTy 00181 << ( theta == 1.0 ? " >= " : " < " ) 00182 << " s_bfgs' * B * s_bfgs = " << sTBs << std::endl; 00183 } 00184 if( theta == 1.0 ) { 00185 if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) 00186 { 00187 out 00188 << "Perform the undamped update ...\n"; 00189 } 00190 } 00191 else { 00192 // y_bfgs = theta*y_bfgs + (1-theta)*B*s_bfgs 00193 Vt_S( y_bfgs, theta ); 00194 Vp_StV( y_bfgs, (1.0-theta), *Bs ); 00195 00196 if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) 00197 { 00198 out 00199 << "Dampen the update ...\n" 00200 << "theta = " << theta << std::endl 00201 << "y_bfgs = theta*y_bfgs + (1-theta)*B*s_bfgs ...\n" 00202 << "||y_bfgs||inf = " << y_bfgs->norm_inf() << std::endl; 00203 } 00204 00205 if( (int)olevel >= (int)PRINT_VECTORS ) 00206 { 00207 out 00208 << "y_bfgs =\n" << *y_bfgs; 00209 } 00210 00211 used_dampening = true; 00212 } 00213 } 00214 00215 // Perform the update if it is defined (s_bfgs' * y_bfgs > 0.0) 00216 00217 VectorSpace::vec_mut_ptr_t 00218 s_bfgs_save = Teuchos::null, 00219 y_bfgs_save = Teuchos::null; 00220 if( check_results ) { 00221 // Save s and y since they may be overwritten in the update. 00222 s_bfgs_save = s_bfgs->clone(); 00223 y_bfgs_save = y_bfgs->clone(); 00224 } 00225 try { 00226 B_updatable.secant_update( 00227 s_bfgs 00228 ,y_bfgs 00229 ,Bs.get() 00230 ); 00231 } 00232 catch( const MatrixSymSecant::UpdateFailedException& excpt ) { 00233 if( (int)olevel >= (int)PRINT_BASIC_ALGORITHM_INFO ) { 00234 out << "\nThe factorization of B failed in a major way! Rethrow!\n"; 00235 } 00236 throw; 00237 } 00238 catch( const MatrixSymSecant::UpdateSkippedException& excpt ) { 00239 if( (int)olevel >= (int)PRINT_BASIC_ALGORITHM_INFO ) { 00240 out << excpt.what() << std::endl 00241 << "\nSkipping BFGS update. B = B ...\n"; 00242 } 00243 quasi_newton_stats->set_updated_stats( 00244 QuasiNewtonStats::INDEF_SKIPED ); 00245 return; 00246 } 00247 00248 if( (int)olevel >= (int)PRINT_ITERATION_QUANTITIES ) { 00249 out << "\nB after the BFGS update = \n" << *B; 00250 } 00251 00252 if( ( check_results && secant_testing() == SECANT_TEST_DEFAULT ) 00253 || secant_testing() == SECANT_TEST_ALWAYS ) 00254 { 00255 const bool result = 00256 AbstractLinAlgPack::TestMatrixSymSecant( 00257 *B, *s_bfgs_save, *y_bfgs_save 00258 , secant_warning_tol(), secant_error_tol() 00259 , (int)olevel >= (int)PRINT_VECTORS 00260 , (int)olevel > (int)PRINT_NOTHING ? &out : NULL 00261 , (int)olevel >= (int)PRINT_ALGORITHM_STEPS 00262 ); 00263 if( !result ) { 00264 const char 00265 msg[] = "Error, the secant property for the BFGS update failed\n" 00266 "Stopping the algorithm ...\n"; 00267 out << msg; 00268 TEUCHOS_TEST_FOR_EXCEPTION( 00269 true, TestFailed 00270 ," BFGSUpdate_Strategy::perform_update(...) : " << msg ); 00271 } 00272 } 00273 00274 quasi_newton_stats->set_updated_stats( 00275 used_dampening 00276 ? QuasiNewtonStats::DAMPENED_UPDATED 00277 : QuasiNewtonStats::UPDATED ); 00278 } 00279 00280 void BFGSUpdate_Strategy::print_step( std::ostream& out, const std::string& L ) const 00281 { 00282 out 00283 << L << "if use_dampening == true then\n" 00284 << L << " if s'*y >= 0.2 * s'*B*s then\n" 00285 << L << " theta = 1.0\n" 00286 << L << " else\n" 00287 << L << " theta = 0.8*s'*B*s / (s'*B*s - s'*y)\n" 00288 << L << " end\n" 00289 << L << " y = theta*y + (1-theta)*B*s\n" 00290 << L << "end\n" 00291 << L << "if first_update && rescale_init_identity and y'*s is sufficently positive then\n" 00292 << L << " B = |(y'*y)/(y'*s)| * eye(size(s))\n" 00293 << L << "end\n" 00294 << L << "if s'*y is sufficently positive then\n" 00295 << L << " *** Peform BFGS update\n" 00296 << L << " (B, s, y ) -> B (through MatrixSymSecantUpdate interface)\n" 00297 << L << " if ( check_results && secant_testing == SECANT_TEST_DEFAULT )\n" 00298 << L << " or ( secant_testing == SECANT_TEST_ALWAYS ) then\n" 00299 << L << " if B*s != y then\n" 00300 << L << " *** The secant condition does not check out\n" 00301 << L << " Throw TestFailed!\n" 00302 << L << " end\n" 00303 << L << " end\n" 00304 << L << "end\n" 00305 ; 00306 } 00307 00308 } // end namespace MoochoPack
1.7.6.1