Skip to content

Commit fbfb31d

Browse files
authored
Merge pull request #57 from arvigj/adam
Add ADAM optimizer
2 parents e9bb718 + 49edbec commit fbfb31d

File tree

5 files changed

+211
-0
lines changed

5 files changed

+211
-0
lines changed

non-linear-solver-spec.json

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
"LBFGS",
1616
"LBFGSB",
1717
"Newton",
18+
"ADAM",
19+
"StochasticADAM",
1820
"StochasticGradientDescent",
1921
"box_constraints",
2022
"advanced"
@@ -29,6 +31,8 @@
2931
"Newton",
3032
"DenseNewton",
3133
"GradientDescent",
34+
"ADAM",
35+
"StochasticADAM",
3236
"StochasticGradientDescent",
3337
"L-BFGS",
3438
"BFGS",
@@ -166,6 +170,85 @@
166170
"type": "bool",
167171
"doc": "Use PSD as fallback using second order solvers (i.e., Newton's method)."
168172
},
173+
{
174+
"pointer": "/ADAM",
175+
"default": null,
176+
"type": "object",
177+
"optional": [
178+
"alpha",
179+
"beta_1",
180+
"beta_2",
181+
"epsilon"
182+
],
183+
"doc": "Options for ADAM."
184+
},
185+
{
186+
"pointer": "/ADAM/alpha",
187+
"default": 0.001,
188+
"type": "float",
189+
"doc": "Parameter alpha for ADAM."
190+
},
191+
{
192+
"pointer": "/ADAM/beta_1",
193+
"default": 0.9,
194+
"type": "float",
195+
"doc": "Parameter beta_1 for ADAM."
196+
},
197+
{
198+
"pointer": "/ADAM/beta_2",
199+
"default": 0.999,
200+
"type": "float",
201+
"doc": "Parameter beta_2 for ADAM."
202+
},
203+
{
204+
"pointer": "/ADAM/epsilon",
205+
"default": 1e-8,
206+
"type": "float",
207+
"doc": "Parameter epsilon for ADAM."
208+
},
209+
{
210+
"pointer": "/StochasticADAM",
211+
"default": null,
212+
"type": "object",
213+
"optional": [
214+
"alpha",
215+
"beta_1",
216+
"beta_2",
217+
"epsilon",
218+
"erase_component_probability"
219+
],
220+
"doc": "Options for ADAM."
221+
},
222+
{
223+
"pointer": "/StochasticADAM/alpha",
224+
"default": 0.001,
225+
"type": "float",
226+
"doc": "Parameter alpha for ADAM."
227+
},
228+
{
229+
"pointer": "/StochasticADAM/beta_1",
230+
"default": 0.9,
231+
"type": "float",
232+
"doc": "Parameter beta_1 for ADAM."
233+
},
234+
{
235+
"pointer": "/StochasticADAM/beta_2",
236+
"default": 0.999,
237+
"type": "float",
238+
"doc": "Parameter beta_2 for ADAM."
239+
},
240+
{
241+
"pointer": "/StochasticADAM/epsilon",
242+
"default": 1e-8,
243+
"type": "float",
244+
"doc": "Parameter epsilon for ADAM."
245+
},
246+
{
247+
"pointer": "/StochasticADAM/erase_component_probability",
248+
"default": 0.3,
249+
"type": "float",
250+
"doc": "Probability of erasing a component on the gradient for ADAM."
251+
},
169252
{
170253
"pointer": "/StochasticGradientDescent",
171254
"default": null,

src/polysolve/nonlinear/Solver.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include "descent_strategies/BFGS.hpp"
77
#include "descent_strategies/Newton.hpp"
8+
#include "descent_strategies/ADAM.hpp"
89
#include "descent_strategies/GradientDescent.hpp"
910
#include "descent_strategies/LBFGS.hpp"
1011

@@ -85,6 +86,17 @@ namespace polysolve::nonlinear
8586
solver->add_strategy(std::make_unique<LBFGS>(
8687
solver_params, characteristic_length, logger));
8788
}
89+
else if (solver_name == "ADAM" || solver_name == "adam")
90+
{
91+
solver->add_strategy(std::make_unique<ADAM>(
92+
solver_params, false, characteristic_length, logger));
93+
}
94+
else if (solver_name == "StochasticADAM" || solver_name == "stochastic_adam")
95+
{
96+
solver->add_strategy(std::make_unique<ADAM>(
97+
solver_params, true, characteristic_length, logger));
98+
}
99+
88100
else if (solver_name == "StochasticGradientDescent" || solver_name == "stochastic_gradient_descent")
89101
{
90102
solver->add_strategy(std::make_unique<GradientDescent>(
@@ -109,6 +121,8 @@ namespace polysolve::nonlinear
109121
return {"BFGS",
110122
"DenseNewton",
111123
"Newton",
124+
"ADAM",
125+
"StochasticADAM",
112126
"GradientDescent",
113127
"StochasticGradientDescent",
114128
"L-BFGS"};
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// ADAM from "ADAM: A METHOD FOR STOCHASTIC OPTIMIZATION"
2+
3+
#include "ADAM.hpp"
4+
5+
namespace polysolve::nonlinear
6+
{
7+
8+
ADAM::ADAM(const json &solver_params,
9+
const bool is_stochastic,
10+
const double characteristic_length,
11+
spdlog::logger &logger)
12+
: Superclass(solver_params, characteristic_length, logger), is_stochastic_(is_stochastic)
13+
{
14+
std::string param_name = is_stochastic ? "StochasticADAM" : "ADAM";
15+
alpha_ = solver_params[param_name]["alpha"];
16+
beta_1_ = solver_params[param_name]["beta_1"];
17+
beta_2_ = solver_params[param_name]["beta_2"];
18+
epsilon_ = solver_params[param_name]["epsilon"];
19+
if (is_stochastic)
20+
erase_component_probability_ = solver_params["StochasticADAM"]["erase_component_probability"];
21+
}
22+
23+
void ADAM::reset(const int ndof)
24+
{
25+
Superclass::reset(ndof);
26+
m_prev_ = Eigen::VectorXd::Zero(ndof);
27+
v_prev_ = Eigen::VectorXd::Zero(ndof);
28+
t_ = 0;
29+
}
30+
31+
bool ADAM::compute_update_direction(
32+
Problem &objFunc,
33+
const TVector &x,
34+
const TVector &grad,
35+
TVector &direction)
36+
{
37+
if (m_prev_.size() == 0)
38+
m_prev_ = Eigen::VectorXd::Zero(x.size());
39+
if (v_prev_.size() == 0)
40+
v_prev_ = Eigen::VectorXd::Zero(x.size());
41+
42+
TVector grad_modified = grad;
43+
44+
if (is_stochastic_)
45+
{
46+
Eigen::VectorXd mask = (Eigen::VectorXd::Random(direction.size()).array() + 1.) / 2.;
47+
for (int i = 0; i < direction.size(); ++i)
48+
grad_modified(i) *= (mask(i) < erase_component_probability_) ? 0. : 1.;
49+
}
50+
51+
TVector m = (beta_1_ * m_prev_) + ((1 - beta_1_) * grad_modified);
52+
TVector v = beta_2_ * v_prev_;
53+
for (int i = 0; i < v.size(); ++i)
54+
v(i) += (1 - beta_2_) * grad_modified(i) * grad_modified(i);
55+
56+
m = m.array() / (1 - pow(beta_1_, t_));
57+
v = v.array() / (1 - pow(beta_2_, t_));
58+
59+
direction = -alpha_ * m;
60+
for (int i = 0; i < v.size(); ++i)
61+
direction(i) /= sqrt(v(i) + epsilon_);
62+
63+
++t_;
64+
65+
return true;
66+
}
67+
} // namespace polysolve::nonlinear
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#pragma once
2+
3+
#include "DescentStrategy.hpp"
4+
#include <polysolve/Utils.hpp>
5+
6+
#include <polysolve/linear/Solver.hpp>
7+
8+
namespace polysolve::nonlinear
9+
{
10+
class ADAM : public DescentStrategy
11+
{
12+
public:
13+
using Superclass = DescentStrategy;
14+
15+
ADAM(const json &solver_params,
16+
const bool is_stochastic,
17+
const double characteristic_length,
18+
spdlog::logger &logger);
19+
20+
std::string name() const override { return is_stochastic_ ? "StochasticADAM" : "ADAM"; }
21+
22+
void reset(const int ndof) override;
23+
24+
virtual bool compute_update_direction(
25+
Problem &objFunc,
26+
const TVector &x,
27+
const TVector &grad,
28+
TVector &direction) override;
29+
30+
bool is_direction_descent() override { return false; }
31+
32+
private:
33+
TVector m_prev_;
34+
TVector v_prev_;
35+
36+
double beta_1_, beta_2_;
37+
double alpha_;
38+
39+
int t_ = 0;
40+
double epsilon_;
41+
42+
bool is_stochastic_;
43+
double erase_component_probability_ = 0;
44+
};
45+
} // namespace polysolve::nonlinear

src/polysolve/nonlinear/descent_strategies/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ set(SOURCES
66
BFGS.hpp
77
GradientDescent.cpp
88
GradientDescent.hpp
9+
ADAM.cpp
10+
ADAM.hpp
911
Newton.hpp
1012
Newton.cpp
1113
)

0 commit comments

Comments
 (0)