|
375 | 375 | (AdamS, {'lr': 1e0, 'weight_decay': 1e-3, 'ams_bound': True}, 20), |
376 | 376 | (AdaFactor, {'lr': 1e1, 'weight_decay': 1e-3, 'scale_parameter': False}, 100), |
377 | 377 | (AdaFactor, {'lr': 1e1, 'weight_decay': 1e-3, 'ams_bound': True}, 120), |
| 378 | + (AdaFactor, {'lr': 1e1, 'weight_decay': 1e-3, 'cautious': True}, 70), |
378 | 379 | (AdaFactor, {'lr': 1e1, 'betas': (None, 0.999), 'weight_decay': 1e-3}, 40), |
379 | 380 | (Apollo, {'lr': 5e-1, 'weight_decay': 1e-3}, 10), |
380 | 381 | (Apollo, {'lr': 5e-1, 'weight_decay': 1e-3, 'rebound': 'belief'}, 10), |
|
383 | 384 | (Lion, {'lr': 5e-1, 'weight_decay': 1e-3}, 5), |
384 | 385 | (Lion, {'lr': 5e-1, 'weight_decay': 1e-3, 'weight_decouple': False}, 5), |
385 | 386 | (Lion, {'lr': 5e-1, 'weight_decay': 1e-3, 'use_gc': True}, 10), |
| 387 | + (Lion, {'lr': 5e-1, 'weight_decay': 1e-3, 'cautious': True}, 5), |
386 | 388 | (AliG, {'max_lr': 5e-1, 'momentum': 0.9}, 5), |
387 | 389 | (AliG, {'max_lr': 5e-1, 'momentum': 0.9, 'adjusted_momentum': True}, 5), |
388 | 390 | (SM3, {'lr': 5e-1, 'momentum': 0.9, 'beta': 0.9}, 5), |
|
469 | 471 | {'lr': 5e-1, 'weight_decay': 1e-3, 'rank': 2, 'scale': 1.0, 'update_proj_gap': 2, 'projection_type': 'full'}, |
470 | 472 | 5, |
471 | 473 | ), |
| 474 | + ( |
| 475 | + GaLore, |
| 476 | + {'lr': 1e0, 'weight_decay': 1e-3, 'rank': 2, 'scale': 1.0, 'update_proj_gap': 1, 'projection_type': 'random'}, |
| 477 | + 5, |
| 478 | + ), |
472 | 479 | (Adalite, {'lr': 1e0, 'weight_decay': 1e-3}, 5), |
473 | 480 | (ScheduleFreeSGD, {'lr': 1e0, 'weight_decay': 1e-3}, 5), |
474 | 481 | (ScheduleFreeAdamW, {'lr': 1e0, 'weight_decay': 1e-3}, 5), |
|
478 | 485 | (Kate, {'lr': 5e-2}, 10), |
479 | 486 | (StableAdamW, {'lr': 1e0}, 5), |
480 | 487 | (AdamG, {'lr': 1e0}, 20), |
481 | | - (AdEMAMix, {'lr': 1e0}, 5), |
482 | | - (AdEMAMix, {'lr': 1e0, 't_alpha_beta3': 5}, 5), |
| 488 | + (AdEMAMix, {'lr': 1e0}, 3), |
| 489 | + (AdEMAMix, {'lr': 1e0, 't_alpha_beta3': 5}, 3), |
| 490 | + (AdEMAMix, {'lr': 1e0, 'cautious': True}, 2), |
483 | 491 | ( |
484 | 492 | SOAP, |
485 | 493 | {'lr': 1e0, 'shampoo_beta': 0.95, 'precondition_frequency': 1, 'merge_dims': False, 'precondition_1d': True}, |
|
0 commit comments