|
15 | 15 | "L-BFGS", |
16 | 16 | "L-BFGS-B", |
17 | 17 | "Newton", |
| 18 | + "ADAM", |
| 19 | + "StochasticADAM", |
18 | 20 | "StochasticGradientDescent", |
19 | 21 | "box_constraints", |
20 | 22 | "advanced" |
|
29 | 31 | "Newton", |
30 | 32 | "DenseNewton", |
31 | 33 | "GradientDescent", |
| 34 | + "ADAM", |
| 35 | + "StochasticADAM", |
32 | 36 | "StochasticGradientDescent", |
33 | 37 | "L-BFGS", |
34 | 38 | "BFGS", |
|
166 | 170 | "type": "bool", |
167 | 171 | "doc": "Use PSD as fallback using second order solvers (i.e., Newton's method)." |
168 | 172 | }, |
| 173 | + { |
| 174 | + "pointer": "/ADAM", |
| 175 | + "default": null, |
| 176 | + "type": "object", |
| 177 | + "optional": [ |
| 178 | + "alpha", |
| 179 | + "beta_1", |
| 180 | + "beta_2", |
| 181 | + "epsilon" |
| 182 | + ], |
| 183 | + "doc": "Options for ADAM." |
| 184 | + }, |
| 185 | + { |
| 186 | + "pointer": "/ADAM/alpha", |
| 187 | + "default": 0.001, |
| 188 | + "type": "float", |
| 189 | + "doc": "Parameter alpha for ADAM." |
| 190 | + }, |
| 191 | + { |
| 192 | + "pointer": "/ADAM/beta_1", |
| 193 | + "default": 0.9, |
| 194 | + "type": "float", |
| 195 | + "doc": "Parameter beta_1 for ADAM." |
| 196 | + }, |
| 197 | + { |
| 198 | + "pointer": "/ADAM/beta_2", |
| 199 | + "default": 0.999, |
| 200 | + "type": "float", |
| 201 | + "doc": "Parameter beta_2 for ADAM." |
| 202 | + }, |
| 203 | + { |
| 204 | + "pointer": "/ADAM/epsilon", |
| 205 | + "default": 1e-8, |
| 206 | + "type": "float", |
| 207 | + "doc": "Parameter epsilon for ADAM." |
| 208 | + }, |
| 209 | + { |
| 210 | + "pointer": "/StochasticADAM", |
| 211 | + "default": null, |
| 212 | + "type": "object", |
| 213 | + "optional": [ |
| 214 | + "alpha", |
| 215 | + "beta_1", |
| 216 | + "beta_2", |
| 217 | + "epsilon", |
| 218 | + "erase_component_probability" |
| 219 | + ], |
| 220 | + "doc": "Options for ADAM." |
| 221 | + }, |
| 222 | + { |
| 223 | + "pointer": "/StochasticADAM/alpha", |
| 224 | + "default": 0.001, |
| 225 | + "type": "float", |
| 226 | + "doc": "Parameter alpha for ADAM." |
| 227 | + }, |
| 228 | + { |
| 229 | + "pointer": "/StochasticADAM/beta_1", |
| 230 | + "default": 0.9, |
| 231 | + "type": "float", |
| 232 | + "doc": "Parameter beta_1 for ADAM." |
| 233 | + }, |
| 234 | + { |
| 235 | + "pointer": "/StochasticADAM/beta_2", |
| 236 | + "default": 0.999, |
| 237 | + "type": "float", |
| 238 | + "doc": "Parameter beta_2 for ADAM." |
| 239 | + }, |
| 240 | + { |
| 241 | + "pointer": "/StochasticADAM/epsilon", |
| 242 | + "default": 1e-8, |
| 243 | + "type": "float", |
| 244 | + "doc": "Parameter epsilon for ADAM." |
| 245 | + }, |
| 246 | + { |
| 247 | + "pointer": "/StochasticADAM/erase_component_probability", |
| 248 | + "default": 0.3, |
| 249 | + "type": "float", |
| 250 | + "doc": "Probability of erasing a component on the gradient for ADAM." |
| 251 | + }, |
169 | 252 | { |
170 | 253 | "pointer": "/StochasticGradientDescent", |
171 | 254 | "default": null, |
|
306 | 389 | ], |
307 | 390 | "doc": "Options for BFGS." |
308 | 391 | }, |
| 392 | + { |
| 393 | + "pointer": "/solver/*", |
| 394 | + "type": "object", |
| 395 | + "type_name": "ADAM", |
| 396 | + "required": [ |
| 397 | + "type" |
| 398 | + ], |
| 399 | + "optional": [ |
| 400 | + "alpha", |
| 401 | + "beta_1", |
| 402 | + "beta_2", |
| 403 | + "epsilon" |
| 404 | + ], |
| 405 | + "doc": "Options for ADAM." |
| 406 | + }, |
| 407 | + { |
| 408 | + "pointer": "/solver/*", |
| 409 | + "type": "object", |
| 410 | + "type_name": "StochasticADAM", |
| 411 | + "required": [ |
| 412 | + "type" |
| 413 | + ], |
| 414 | + "optional": [ |
| 415 | + "alpha", |
| 416 | + "beta_1", |
| 417 | + "beta_2", |
| 418 | + "epsilon", |
| 419 | + "erase_component_probability" |
| 420 | + ], |
| 421 | + "doc": "Options for ADAM." |
| 422 | + }, |
309 | 423 | { |
310 | 424 | "pointer": "/solver/*/type", |
311 | 425 | "type": "string", |
|
318 | 432 | "DenseRegularizedNewton", |
319 | 433 | "GradientDescent", |
320 | 434 | "StochasticGradientDescent", |
| 435 | + "ADAM", |
| 436 | + "StochasticADAM", |
321 | 437 | "L-BFGS", |
322 | 438 | "BFGS" |
323 | 439 | ], |
|
359 | 475 | "type": "int", |
360 | 476 | "doc": "The number of corrections to approximate the inverse Hessian matrix." |
361 | 477 | }, |
| 478 | + { |
| 479 | + "pointer": "/solver/*/alpha", |
| 480 | + "default": 0.001, |
| 481 | + "type": "float", |
| 482 | + "doc": "Parameter alpha for ADAM." |
| 483 | + }, |
| 484 | + { |
| 485 | + "pointer": "/solver/*/beta_1", |
| 486 | + "default": 0.9, |
| 487 | + "type": "float", |
| 488 | + "doc": "Parameter beta_1 for ADAM." |
| 489 | + }, |
| 490 | + { |
| 491 | + "pointer": "/solver/*/beta_2", |
| 492 | + "default": 0.999, |
| 493 | + "type": "float", |
| 494 | + "doc": "Parameter beta_2 for ADAM." |
| 495 | + }, |
| 496 | + { |
| 497 | + "pointer": "/solver/*/epsilon", |
| 498 | + "default": 1e-8, |
| 499 | + "type": "float", |
| 500 | + "doc": "Parameter epsilon for ADAM." |
| 501 | + }, |
362 | 502 | { |
363 | 503 | "pointer": "/line_search", |
364 | 504 | "default": null, |
|
0 commit comments