|
22 | 22 | DiffRGrad, |
23 | 23 | Lamb, |
24 | 24 | Nero, |
| 25 | + NovoGrad, |
25 | 26 | RAdam, |
26 | 27 | RaLamb, |
27 | 28 | Ranger, |
|
68 | 69 | 'dadaptadam', |
69 | 70 | 'adams', |
70 | 71 | 'adafactor', |
| 72 | + 'novograd', |
71 | 73 | ] |
72 | 74 |
|
73 | 75 | VALID_LR_SCHEDULER_NAMES: List[str] = [ |
|
158 | 160 | (Apollo, {'lr': 5e-1, 'weight_decay': 1e-3}, 10), |
159 | 161 | (Apollo, {'lr': 5e-1, 'weight_decay': 1e-3, 'rebound': 'belief'}, 10), |
160 | 162 | (Apollo, {'lr': 5e-1, 'weight_decay': 1e-3, 'weight_decay_type': 'stable', 'warmup_steps': 0}, 50), |
| 163 | + (NovoGrad, {'lr': 5e-1, 'weight_decay': 1e-3, 'grad_averaging': True}, 50), |
161 | 164 | ] |
162 | 165 | ADAMD_SUPPORTED_OPTIMIZERS: List[Tuple[Any, Dict[str, Union[float, bool, int]], int]] = [ |
163 | 166 | (build_lookahead, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 10), |
|
172 | 175 | (Ranger21, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True, 'num_iterations': 200}, 200), |
173 | 176 | (AdaPNM, {'lr': 3e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 50), |
174 | 177 | (AdamS, {'lr': 2e1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 50), |
| 178 | + (NovoGrad, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 50), |
175 | 179 | ] |
0 commit comments