Skip to content

Commit 05775a6

Browse files
author
Teseo Schneider
committed
Merge branch 'main' into custom-fallback
2 parents 60d0dc5 + fbfb31d commit 05775a6

File tree

5 files changed

+268
-0
lines changed

5 files changed

+268
-0
lines changed

non-linear-solver-spec.json

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
"L-BFGS",
1616
"L-BFGS-B",
1717
"Newton",
18+
"ADAM",
19+
"StochasticADAM",
1820
"StochasticGradientDescent",
1921
"box_constraints",
2022
"advanced"
@@ -29,6 +31,8 @@
2931
"Newton",
3032
"DenseNewton",
3133
"GradientDescent",
34+
"ADAM",
35+
"StochasticADAM",
3236
"StochasticGradientDescent",
3337
"L-BFGS",
3438
"BFGS",
@@ -166,6 +170,85 @@
166170
"type": "bool",
167171
"doc": "Use PSD as fallback using second order solvers (i.e., Newton's method)."
168172
},
173+
{
174+
"pointer": "/ADAM",
175+
"default": null,
176+
"type": "object",
177+
"optional": [
178+
"alpha",
179+
"beta_1",
180+
"beta_2",
181+
"epsilon"
182+
],
183+
"doc": "Options for ADAM."
184+
},
185+
{
186+
"pointer": "/ADAM/alpha",
187+
"default": 0.001,
188+
"type": "float",
189+
"doc": "Parameter alpha for ADAM."
190+
},
191+
{
192+
"pointer": "/ADAM/beta_1",
193+
"default": 0.9,
194+
"type": "float",
195+
"doc": "Parameter beta_1 for ADAM."
196+
},
197+
{
198+
"pointer": "/ADAM/beta_2",
199+
"default": 0.999,
200+
"type": "float",
201+
"doc": "Parameter beta_2 for ADAM."
202+
},
203+
{
204+
"pointer": "/ADAM/epsilon",
205+
"default": 1e-8,
206+
"type": "float",
207+
"doc": "Parameter epsilon for ADAM."
208+
},
209+
{
210+
"pointer": "/StochasticADAM",
211+
"default": null,
212+
"type": "object",
213+
"optional": [
214+
"alpha",
215+
"beta_1",
216+
"beta_2",
217+
"epsilon",
218+
"erase_component_probability"
219+
],
220+
"doc": "Options for ADAM."
221+
},
222+
{
223+
"pointer": "/StochasticADAM/alpha",
224+
"default": 0.001,
225+
"type": "float",
226+
"doc": "Parameter alpha for ADAM."
227+
},
228+
{
229+
"pointer": "/StochasticADAM/beta_1",
230+
"default": 0.9,
231+
"type": "float",
232+
"doc": "Parameter beta_1 for ADAM."
233+
},
234+
{
235+
"pointer": "/StochasticADAM/beta_2",
236+
"default": 0.999,
237+
"type": "float",
238+
"doc": "Parameter beta_2 for ADAM."
239+
},
240+
{
241+
"pointer": "/StochasticADAM/epsilon",
242+
"default": 1e-8,
243+
"type": "float",
244+
"doc": "Parameter epsilon for ADAM."
245+
},
246+
{
247+
"pointer": "/StochasticADAM/erase_component_probability",
248+
"default": 0.3,
249+
"type": "float",
250+
"doc": "Probability of erasing a component on the gradient for ADAM."
251+
},
169252
{
170253
"pointer": "/StochasticGradientDescent",
171254
"default": null,
@@ -306,6 +389,37 @@
306389
],
307390
"doc": "Options for BFGS."
308391
},
392+
{
393+
"pointer": "/solver/*",
394+
"type": "object",
395+
"type_name": "ADAM",
396+
"required": [
397+
"type"
398+
],
399+
"optional": [
400+
"alpha",
401+
"beta_1",
402+
"beta_2",
403+
"epsilon"
404+
],
405+
"doc": "Options for ADAM."
406+
},
407+
{
408+
"pointer": "/solver/*",
409+
"type": "object",
410+
"type_name": "StochasticADAM",
411+
"required": [
412+
"type"
413+
],
414+
"optional": [
415+
"alpha",
416+
"beta_1",
417+
"beta_2",
418+
"epsilon",
419+
"erase_component_probability"
420+
],
421+
"doc": "Options for ADAM."
422+
},
309423
{
310424
"pointer": "/solver/*/type",
311425
"type": "string",
@@ -318,6 +432,8 @@
318432
"DenseRegularizedNewton",
319433
"GradientDescent",
320434
"StochasticGradientDescent",
435+
"ADAM",
436+
"StochasticADAM",
321437
"L-BFGS",
322438
"BFGS"
323439
],
@@ -359,6 +475,30 @@
359475
"type": "int",
360476
"doc": "The number of corrections to approximate the inverse Hessian matrix."
361477
},
478+
{
479+
"pointer": "/solver/*/alpha",
480+
"default": 0.001,
481+
"type": "float",
482+
"doc": "Parameter alpha for ADAM."
483+
},
484+
{
485+
"pointer": "/solver/*/beta_1",
486+
"default": 0.9,
487+
"type": "float",
488+
"doc": "Parameter beta_1 for ADAM."
489+
},
490+
{
491+
"pointer": "/solver/*/beta_2",
492+
"default": 0.999,
493+
"type": "float",
494+
"doc": "Parameter beta_2 for ADAM."
495+
},
496+
{
497+
"pointer": "/solver/*/epsilon",
498+
"default": 1e-8,
499+
"type": "float",
500+
"doc": "Parameter epsilon for ADAM."
501+
},
362502
{
363503
"pointer": "/line_search",
364504
"default": null,

src/polysolve/nonlinear/Solver.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include "descent_strategies/BFGS.hpp"
77
#include "descent_strategies/Newton.hpp"
8+
#include "descent_strategies/ADAM.hpp"
89
#include "descent_strategies/GradientDescent.hpp"
910
#include "descent_strategies/LBFGS.hpp"
1011

@@ -77,6 +78,15 @@ namespace polysolve::nonlinear
7778
{
7879
return std::make_shared<GradientDescent>(solver_params, false, characteristic_length, logger);
7980
}
81+
82+
else if (solver_name == "ADAM" || solver_name == "adam")
83+
{
84+
return std::make_shared<ADAM>(solver_params, false, characteristic_length, logger);
85+
}
86+
else if (solver_name == "StochasticADAM" || solver_name == "stochastic_adam")
87+
{
88+
return std::make_shared<ADAM>(solver_params, true, characteristic_length, logger);
89+
}
8090
else
8191
throw std::runtime_error("Unrecognized solver type: " + solver_name);
8292
}
@@ -166,6 +176,8 @@ namespace polysolve::nonlinear
166176
return {"BFGS",
167177
"DenseNewton",
168178
"Newton",
179+
"ADAM",
180+
"StochasticADAM",
169181
"GradientDescent",
170182
"StochasticGradientDescent",
171183
"L-BFGS"};
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// ADAM from "ADAM: A METHOD FOR STOCHASTIC OPTIMIZATION"
2+
3+
#include "ADAM.hpp"
4+
5+
#include <polysolve/Utils.hpp>
6+
7+
namespace polysolve::nonlinear
8+
{
9+
10+
ADAM::ADAM(const json &solver_params,
11+
const bool is_stochastic,
12+
const double characteristic_length,
13+
spdlog::logger &logger)
14+
: Superclass(solver_params, characteristic_length, logger), is_stochastic_(is_stochastic)
15+
{
16+
std::string param_name = is_stochastic ? "StochasticADAM" : "ADAM";
17+
alpha_ = extract_param(param_name, "alpha", solver_params);
18+
beta_1_ = extract_param(param_name, "beta_1", solver_params);
19+
beta_2_ = extract_param(param_name, "beta_2", solver_params);
20+
epsilon_ = extract_param(param_name, "epsilon", solver_params);
21+
if (is_stochastic)
22+
erase_component_probability_ = extract_param("StochasticADAM", "erase_component_probability", solver_params);
23+
}
24+
25+
void ADAM::reset(const int ndof)
26+
{
27+
Superclass::reset(ndof);
28+
m_prev_ = Eigen::VectorXd::Zero(ndof);
29+
v_prev_ = Eigen::VectorXd::Zero(ndof);
30+
t_ = 0;
31+
}
32+
33+
bool ADAM::compute_update_direction(
34+
Problem &objFunc,
35+
const TVector &x,
36+
const TVector &grad,
37+
TVector &direction)
38+
{
39+
if (m_prev_.size() == 0)
40+
m_prev_ = Eigen::VectorXd::Zero(x.size());
41+
if (v_prev_.size() == 0)
42+
v_prev_ = Eigen::VectorXd::Zero(x.size());
43+
44+
TVector grad_modified = grad;
45+
46+
if (is_stochastic_)
47+
{
48+
Eigen::VectorXd mask = (Eigen::VectorXd::Random(direction.size()).array() + 1.) / 2.;
49+
for (int i = 0; i < direction.size(); ++i)
50+
grad_modified(i) *= (mask(i) < erase_component_probability_) ? 0. : 1.;
51+
}
52+
53+
TVector m = (beta_1_ * m_prev_) + ((1 - beta_1_) * grad_modified);
54+
TVector v = beta_2_ * v_prev_;
55+
for (int i = 0; i < v.size(); ++i)
56+
v(i) += (1 - beta_2_) * grad_modified(i) * grad_modified(i);
57+
58+
m = m.array() / (1 - pow(beta_1_, t_));
59+
v = v.array() / (1 - pow(beta_2_, t_));
60+
61+
direction = -alpha_ * m;
62+
for (int i = 0; i < v.size(); ++i)
63+
direction(i) /= sqrt(v(i) + epsilon_);
64+
65+
++t_;
66+
67+
return true;
68+
}
69+
} // namespace polysolve::nonlinear
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#pragma once
2+
3+
#include "DescentStrategy.hpp"
4+
#include <polysolve/Utils.hpp>
5+
6+
#include <polysolve/linear/Solver.hpp>
7+
8+
namespace polysolve::nonlinear
9+
{
10+
class ADAM : public DescentStrategy
11+
{
12+
public:
13+
using Superclass = DescentStrategy;
14+
15+
ADAM(const json &solver_params,
16+
const bool is_stochastic,
17+
const double characteristic_length,
18+
spdlog::logger &logger);
19+
20+
std::string name() const override { return is_stochastic_ ? "StochasticADAM" : "ADAM"; }
21+
22+
void reset(const int ndof) override;
23+
24+
virtual bool compute_update_direction(
25+
Problem &objFunc,
26+
const TVector &x,
27+
const TVector &grad,
28+
TVector &direction) override;
29+
30+
bool is_direction_descent() override { return false; }
31+
32+
private:
33+
TVector m_prev_;
34+
TVector v_prev_;
35+
36+
double beta_1_, beta_2_;
37+
double alpha_;
38+
39+
int t_ = 0;
40+
double epsilon_;
41+
42+
bool is_stochastic_;
43+
double erase_component_probability_ = 0;
44+
};
45+
} // namespace polysolve::nonlinear

src/polysolve/nonlinear/descent_strategies/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ set(SOURCES
66
BFGS.hpp
77
GradientDescent.cpp
88
GradientDescent.hpp
9+
ADAM.cpp
10+
ADAM.hpp
911
Newton.hpp
1012
Newton.cpp
1113
)

0 commit comments

Comments
 (0)