|
310 | 310 | (Adan, {'lr': 5e-1, 'max_grad_norm': 1.0}, 5), |
311 | 311 | (Adan, {'lr': 5e-1, 'weight_decay': 1e-3, 'use_gc': True}, 5), |
312 | 312 | (Adan, {'lr': 5e-1, 'weight_decay': 1e-3, 'weight_decouple': True}, 5), |
313 | | - (DAdaptAdaGrad, {'lr': 2e0, 'weight_decay': 1e-3}, 50), |
314 | | - (DAdaptAdaGrad, {'lr': 2e0, 'weight_decay': 1e-3, 'momentum': 0.1}, 50), |
315 | | - (DAdaptAdam, {'lr': 5e2, 'weight_decay': 1e-3}, 25), |
| 313 | + (DAdaptAdaGrad, {'lr': 3e0, 'weight_decay': 1e-3}, 30), |
| 314 | + (DAdaptAdaGrad, {'lr': 5e0, 'weight_decay': 1e-3, 'momentum': 0.1}, 20), |
| 315 | + (DAdaptAdam, {'lr': 5e4, 'weight_decay': 1e-1}, 10), |
316 | 316 | (DAdaptSGD, {'lr': 2e0, 'weight_decay': 1e-3}, 25), |
317 | | - (DAdaptAdan, {'lr': 1e0, 'weight_decay': 1e-2}, 25), |
318 | | - (DAdaptAdan, {'lr': 1e0, 'weight_decay': 1e-2, 'weight_decouple': True}, 50), |
| 317 | + (DAdaptAdan, {'lr': 2e0, 'weight_decay': 1e-3}, 20), |
319 | 318 | (AdamS, {'lr': 1e0, 'weight_decay': 1e-3}, 10), |
320 | 319 | (AdamS, {'lr': 1e0, 'weight_decay': 1e-3, 'ams_bound': True}, 20), |
321 | 320 | (AdaFactor, {'lr': 7.5e-1, 'weight_decay': 1e-3, 'scale_parameter': False}, 100), |
|
0 commit comments