pele: /home/js850/projects/pele/source/lbfgs.cpp Source File

00001 #include "pele/lbfgs.h"
00002 #include <memory>
00003 
00004 using std::cout;
00005 
00006 namespace pele {
00007 
00008 LBFGS::LBFGS( std::shared_ptr<pele::BasePotential> potential, const pele::Array<double> x0,
00009         double tol, int M)
00010     : GradientOptimizer(potential, x0, tol),
00011       M_(M),
00012       max_f_rise_(1e-4),
00013       use_relative_f_(false),
00014       rho_(M_),
00015       H0_(0.1),
00016       k_(0)
00017 {
00018     // set the precision of the printing
00019     cout << std::setprecision(12);
00020 
00021     // allocate space for s_ and y_
00022     for (int i = 0; i < M_; ++i){
00023         s_.push_back(Array<double>(x_.size()));
00024         y_.push_back(Array<double>(x_.size()));
00025     }
00026 }
00027 
00031 void LBFGS::one_iteration()
00032 {
00033     if (!func_initialized_)
00034         initialize_func_gradient();
00035 
00036     // make a copy of the position and gradient
00037     Array<double> xold(x_.copy());
00038     Array<double> gold(g_.copy());
00039 
00040     // get the stepsize and direction from the LBFGS algorithm
00041     Array<double> step(x_.size());
00042     compute_lbfgs_step(step);
00043 
00044     // reduce the stepsize if necessary
00045     double stepsize = backtracking_linesearch(step);
00046 
00047     // update the LBFGS memeory
00048     update_memory(xold, gold, x_, g_);
00049 
00050     // print some status information
00051     if ((iprint_ > 0) && (iter_number_ % iprint_ == 0)){
00052         cout << "lbgs: " << iter_number_
00053             << " E " << f_
00054             << " rms " << rms_
00055             << " nfev " << nfev_
00056             << " stepsize " << stepsize << "\n";
00057     }
00058     iter_number_ += 1;
00059 }
00060 
00061 void LBFGS::update_memory(
00062         Array<double> xold,
00063         Array<double> gold,
00064         Array<double> xnew,
00065         Array<double> gnew)
00066 {
00067     // update the lbfgs memory
00068     // This updates s_, y_, rho_, and H0_, and k_
00069     int klocal = k_ % M_;
00070     for (size_t j2 = 0; j2 < x_.size(); ++j2){
00071         y_[klocal][j2] = gnew[j2] - gold[j2];
00072         s_[klocal][j2] = xnew[j2] - xold[j2];
00073     }
00074 
00075     double ys = dot(y_[klocal], s_[klocal]);
00076     if (ys == 0.) {
00077         if (verbosity_ > 0) {
00078             cout << "warning: resetting YS to 1.\n";
00079         }
00080         ys = 1.;
00081     }
00082 
00083     rho_[klocal] = 1. / ys;
00084 
00085     double yy = dot(y_[klocal], y_[klocal]);
00086     if (yy == 0.) {
00087         if (verbosity_ > 0) {
00088             cout << "warning: resetting YY to 1.\n";
00089         }
00090         yy = 1.;
00091     }
00092     H0_ = ys / yy;
00093 
00094     // increment k
00095     k_ += 1;
00096 }
00097 
00098 void LBFGS::compute_lbfgs_step(Array<double> step)
00099 {
00100     if (k_ == 0){
00101         // take a conservative first step
00102         double gnorm = norm(g_);
00103         if (gnorm > 1.) gnorm = 1. / gnorm;
00104         for (size_t j2 = 0; j2 < x_.size(); ++j2){
00105             step[j2] = -gnorm * H0_ * g_[j2];
00106         }
00107         return;
00108     }
00109 
00110     // copy the gradient into step
00111     step.assign(g_);
00112 
00113     int jmin = std::max(0, k_ - M_);
00114     int jmax = k_;
00115     int i;
00116     double beta;
00117     Array<double> alpha(M_);
00118 
00119     // loop backwards through the memory
00120     for (int j = jmax - 1; j >= jmin; --j){
00121         i = j % M_;
00122         //cout << "    i " << i << " j " << j << "\n";
00123         alpha[i] = rho_[i] * dot(s_[i], step);
00124         for (size_t j2 = 0; j2 < step.size(); ++j2){
00125             step[j2] -= alpha[i] * y_[i][j2];
00126         }
00127     }
00128 
00129     // scale the step size by H0
00130     step *= H0_;
00131 
00132     // loop forwards through the memory
00133     for (int j = jmin; j < jmax; ++j){
00134         i = j % M_;
00135         //cout << "    i " << i << " j " << j << "\n";
00136         beta = rho_[i] * dot(y_[i], step);
00137         for (size_t j2 = 0; j2 < step.size(); ++j2){
00138             step[j2] += s_[i][j2] * (alpha[i] - beta);
00139         }
00140     }
00141 
00142     // invert the step to point downhill
00143     step *= -1;
00144 }
00145 
00146 double LBFGS::backtracking_linesearch(Array<double> step)
00147 {
00148     Array<double> xnew(x_.size());
00149     Array<double> gnew(x_.size());
00150     double fnew;
00151 
00152     // if the step is pointing uphill, invert it
00153     if (dot(step, g_) > 0.){
00154         if (verbosity_ > 1) {
00155             cout << "warning: step direction was uphill.  inverting\n";
00156         }
00157         for (size_t j2 = 0; j2 < step.size(); ++j2){
00158             step[j2] *= -1;
00159         }
00160     }
00161 
00162     double factor = 1.;
00163     double stepsize = norm(step);
00164 
00165     // make sure the step is no larger than maxstep_
00166     if (factor * stepsize > maxstep_){
00167         factor = maxstep_ / stepsize;
00168     }
00169 
00170     int nred;
00171     int nred_max = 10;
00172     for (nred = 0; nred < nred_max; ++nred){
00173         for (size_t j2 = 0; j2 < xnew.size(); ++j2){
00174             xnew[j2] = x_[j2] + factor * step[j2];
00175         }
00176         compute_func_gradient(xnew, fnew, gnew);
00177 
00178         double df = fnew - f_;
00179         if (use_relative_f_) {
00180             double fabs = 1e-100; 
00181             if (f_ != 0) {
00182                 fabs = abs(f_);
00183             }
00184             df /= fabs;
00185         }
00186         if (df < max_f_rise_){
00187             break;
00188         }
00189         else {
00190             factor /= 10.;
00191             if (verbosity_ > 2) {
00192                 cout
00193                     << "energy increased by " << df
00194                     << " to " << fnew
00195                     << " from " << f_
00196                     << " reducing step size to " << factor * stepsize
00197                     << " H0 " << H0_ << "\n";
00198             }
00199         }
00200     }
00201 
00202     if (nred >= nred_max){
00203         // possibly raise an error here
00204         if (verbosity_ > 0) {
00205             cout << "warning: the line search backtracked too many times\n";
00206         }
00207     }
00208 
00209     x_.assign(xnew);
00210     g_.assign(gnew);
00211     f_ = fnew;
00212     rms_ = norm(gnew) / sqrt(gnew.size());
00213     return stepsize * factor;
00214 }
00215 
00216 void LBFGS::reset(pele::Array<double> &x0)
00217 {
00218     if (x0.size() != x_.size()){
00219         throw std::invalid_argument("The number of degrees of freedom (x0.size()) cannot change when calling reset()");
00220     }
00221     k_ = 0;
00222     iter_number_ = 0;
00223     nfev_ = 0;
00224     x_.assign(x0);
00225     initialize_func_gradient();
00226 }
00227 
00228 
00229 }