mcpele
1.0.0
The Monte Carlo Python Energy Landscape Explorer
|
00001 #include "pele/lbfgs.h" 00002 #include <memory> 00003 00004 using std::cout; 00005 00006 namespace pele { 00007 00008 LBFGS::LBFGS( std::shared_ptr<pele::BasePotential> potential, const pele::Array<double> x0, 00009 double tol, int M) 00010 : GradientOptimizer(potential, x0, tol), 00011 M_(M), 00012 max_f_rise_(1e-4), 00013 use_relative_f_(false), 00014 rho_(M_), 00015 H0_(0.1), 00016 k_(0) 00017 { 00018 // set the precision of the printing 00019 cout << std::setprecision(12); 00020 00021 // allocate space for s_ and y_ 00022 for (int i = 0; i < M_; ++i){ 00023 s_.push_back(Array<double>(x_.size())); 00024 y_.push_back(Array<double>(x_.size())); 00025 } 00026 } 00027 00031 void LBFGS::one_iteration() 00032 { 00033 if (!func_initialized_) 00034 initialize_func_gradient(); 00035 00036 // make a copy of the position and gradient 00037 Array<double> xold(x_.copy()); 00038 Array<double> gold(g_.copy()); 00039 00040 // get the stepsize and direction from the LBFGS algorithm 00041 Array<double> step(x_.size()); 00042 compute_lbfgs_step(step); 00043 00044 // reduce the stepsize if necessary 00045 double stepsize = backtracking_linesearch(step); 00046 00047 // update the LBFGS memeory 00048 update_memory(xold, gold, x_, g_); 00049 00050 // print some status information 00051 if ((iprint_ > 0) && (iter_number_ % iprint_ == 0)){ 00052 cout << "lbgs: " << iter_number_ 00053 << " E " << f_ 00054 << " rms " << rms_ 00055 << " nfev " << nfev_ 00056 << " stepsize " << stepsize << "\n"; 00057 } 00058 iter_number_ += 1; 00059 } 00060 00061 void LBFGS::update_memory( 00062 Array<double> xold, 00063 Array<double> gold, 00064 Array<double> xnew, 00065 Array<double> gnew) 00066 { 00067 // update the lbfgs memory 00068 // This updates s_, y_, rho_, and H0_, and k_ 00069 int klocal = k_ % M_; 00070 for (size_t j2 = 0; j2 < x_.size(); ++j2){ 00071 y_[klocal][j2] = gnew[j2] - gold[j2]; 00072 s_[klocal][j2] = xnew[j2] - xold[j2]; 00073 } 00074 00075 double ys = dot(y_[klocal], s_[klocal]); 00076 if (ys == 0.) { 00077 if (verbosity_ > 0) { 00078 cout << "warning: resetting YS to 1.\n"; 00079 } 00080 ys = 1.; 00081 } 00082 00083 rho_[klocal] = 1. / ys; 00084 00085 double yy = dot(y_[klocal], y_[klocal]); 00086 if (yy == 0.) { 00087 if (verbosity_ > 0) { 00088 cout << "warning: resetting YY to 1.\n"; 00089 } 00090 yy = 1.; 00091 } 00092 H0_ = ys / yy; 00093 00094 // increment k 00095 k_ += 1; 00096 } 00097 00098 void LBFGS::compute_lbfgs_step(Array<double> step) 00099 { 00100 if (k_ == 0){ 00101 // take a conservative first step 00102 double gnorm = norm(g_); 00103 if (gnorm > 1.) gnorm = 1. / gnorm; 00104 for (size_t j2 = 0; j2 < x_.size(); ++j2){ 00105 step[j2] = -gnorm * H0_ * g_[j2]; 00106 } 00107 return; 00108 } 00109 00110 // copy the gradient into step 00111 step.assign(g_); 00112 00113 int jmin = std::max(0, k_ - M_); 00114 int jmax = k_; 00115 int i; 00116 double beta; 00117 Array<double> alpha(M_); 00118 00119 // loop backwards through the memory 00120 for (int j = jmax - 1; j >= jmin; --j){ 00121 i = j % M_; 00122 //cout << " i " << i << " j " << j << "\n"; 00123 alpha[i] = rho_[i] * dot(s_[i], step); 00124 for (size_t j2 = 0; j2 < step.size(); ++j2){ 00125 step[j2] -= alpha[i] * y_[i][j2]; 00126 } 00127 } 00128 00129 // scale the step size by H0 00130 step *= H0_; 00131 00132 // loop forwards through the memory 00133 for (int j = jmin; j < jmax; ++j){ 00134 i = j % M_; 00135 //cout << " i " << i << " j " << j << "\n"; 00136 beta = rho_[i] * dot(y_[i], step); 00137 for (size_t j2 = 0; j2 < step.size(); ++j2){ 00138 step[j2] += s_[i][j2] * (alpha[i] - beta); 00139 } 00140 } 00141 00142 // invert the step to point downhill 00143 step *= -1; 00144 } 00145 00146 double LBFGS::backtracking_linesearch(Array<double> step) 00147 { 00148 Array<double> xnew(x_.size()); 00149 Array<double> gnew(x_.size()); 00150 double fnew; 00151 00152 // if the step is pointing uphill, invert it 00153 if (dot(step, g_) > 0.){ 00154 if (verbosity_ > 1) { 00155 cout << "warning: step direction was uphill. inverting\n"; 00156 } 00157 for (size_t j2 = 0; j2 < step.size(); ++j2){ 00158 step[j2] *= -1; 00159 } 00160 } 00161 00162 double factor = 1.; 00163 double stepsize = norm(step); 00164 00165 // make sure the step is no larger than maxstep_ 00166 if (factor * stepsize > maxstep_){ 00167 factor = maxstep_ / stepsize; 00168 } 00169 00170 int nred; 00171 int nred_max = 10; 00172 for (nred = 0; nred < nred_max; ++nred){ 00173 for (size_t j2 = 0; j2 < xnew.size(); ++j2){ 00174 xnew[j2] = x_[j2] + factor * step[j2]; 00175 } 00176 compute_func_gradient(xnew, fnew, gnew); 00177 00178 double df = fnew - f_; 00179 if (use_relative_f_) { 00180 double fabs = 1e-100; 00181 if (f_ != 0) { 00182 fabs = abs(f_); 00183 } 00184 df /= fabs; 00185 } 00186 if (df < max_f_rise_){ 00187 break; 00188 } 00189 else { 00190 factor /= 10.; 00191 if (verbosity_ > 2) { 00192 cout 00193 << "energy increased by " << df 00194 << " to " << fnew 00195 << " from " << f_ 00196 << " reducing step size to " << factor * stepsize 00197 << " H0 " << H0_ << "\n"; 00198 } 00199 } 00200 } 00201 00202 if (nred >= nred_max){ 00203 // possibly raise an error here 00204 if (verbosity_ > 0) { 00205 cout << "warning: the line search backtracked too many times\n"; 00206 } 00207 } 00208 00209 x_.assign(xnew); 00210 g_.assign(gnew); 00211 f_ = fnew; 00212 rms_ = norm(gnew) / sqrt(gnew.size()); 00213 return stepsize * factor; 00214 } 00215 00216 void LBFGS::reset(pele::Array<double> &x0) 00217 { 00218 if (x0.size() != x_.size()){ 00219 throw std::invalid_argument("The number of degrees of freedom (x0.size()) cannot change when calling reset()"); 00220 } 00221 k_ = 0; 00222 iter_number_ = 0; 00223 nfev_ = 0; 00224 x_.assign(x0); 00225 initialize_func_gradient(); 00226 } 00227 00228 00229 }