|
310 | 310 | (Adan, {'lr': 5e-1, 'max_grad_norm': 1.0}, 5), |
311 | 311 | (Adan, {'lr': 5e-1, 'weight_decay': 1e-3, 'use_gc': True}, 5), |
312 | 312 | (Adan, {'lr': 5e-1, 'weight_decay': 1e-3, 'weight_decouple': True}, 5), |
313 | | - (DAdaptAdaGrad, {'lr': 2.0, 'weight_decay': 1e-3}, 50), |
314 | | - (DAdaptAdaGrad, {'lr': 2.0, 'weight_decay': 1e-3, 'momentum': 0.1}, 50), |
315 | | - (DAdaptAdam, {'lr': 2.0, 'weight_decay': 1e-3}, 25), |
316 | | - (DAdaptAdam, {'lr': 1.0, 'weight_decay': 1e-3, 'weight_decouple': True}, 50), |
317 | | - (DAdaptSGD, {'lr': 2.0, 'weight_decay': 1e-2}, 25), |
318 | | - (DAdaptSGD, {'lr': 2.0, 'momentum': 0.9, 'weight_decay': 1e-3}, 25), |
319 | | - (DAdaptAdan, {'lr': 1.0, 'weight_decay': 1e-2}, 25), |
320 | | - (DAdaptAdan, {'lr': 1.0, 'weight_decay': 1e-2, 'weight_decouple': True}, 50), |
321 | | - (AdamS, {'lr': 1.0, 'weight_decay': 1e-3}, 10), |
322 | | - (AdamS, {'lr': 1.0, 'weight_decay': 1e-3, 'ams_bound': True}, 20), |
| 313 | + (DAdaptAdaGrad, {'lr': 2e0, 'weight_decay': 1e-3}, 50), |
| 314 | + (DAdaptAdaGrad, {'lr': 2e0, 'weight_decay': 1e-3, 'momentum': 0.1}, 50), |
| 315 | + (DAdaptAdam, {'lr': 5e2, 'weight_decay': 1e-3}, 25), |
| 316 | + (DAdaptSGD, {'lr': 2e0, 'weight_decay': 1e-3}, 25), |
| 317 | + (DAdaptAdan, {'lr': 1e0, 'weight_decay': 1e-2}, 25), |
| 318 | + (DAdaptAdan, {'lr': 1e0, 'weight_decay': 1e-2, 'weight_decouple': True}, 50), |
| 319 | + (AdamS, {'lr': 1e0, 'weight_decay': 1e-3}, 10), |
| 320 | + (AdamS, {'lr': 1e0, 'weight_decay': 1e-3, 'ams_bound': True}, 20), |
323 | 321 | (AdaFactor, {'lr': 7.5e-1, 'weight_decay': 1e-3, 'scale_parameter': False}, 100), |
324 | 322 | (AdaFactor, {'lr': 7.5e-1, 'weight_decay': 1e-3, 'ams_bound': True}, 125), |
325 | 323 | (Apollo, {'lr': 5e-1, 'weight_decay': 1e-3}, 10), |
|
0 commit comments