|
9 | 9 | AdaBound, |
10 | 10 | Adai, |
11 | 11 | AdamP, |
| 12 | + AdamS, |
12 | 13 | Adan, |
13 | 14 | AdaPNM, |
14 | 15 | DAdaptAdaGrad, |
|
79 | 80 | 'dadaptadagrad', |
80 | 81 | 'dadaptadam', |
81 | 82 | 'dadaptsgd', |
| 83 | + 'adams', |
82 | 84 | ] |
83 | 85 | INVALID_OPTIMIZER_NAMES: List[str] = [ |
84 | 86 | 'asam', |
|
105 | 107 | 'adai', |
106 | 108 | 'shampoo', |
107 | 109 | 'dadaptadam', |
| 110 | + 'adams', |
108 | 111 | ] |
109 | 112 |
|
110 | 113 | VALID_LR_SCHEDULER_NAMES: List[str] = [ |
|
188 | 191 | (DAdaptAdam, {'lr': 1.0, 'weight_decay': 1e-2, 'weight_decouple': True}, 50), |
189 | 192 | (DAdaptSGD, {'lr': 1.0, 'weight_decay': 1e-2}, 50), |
190 | 193 | (DAdaptSGD, {'lr': 1.0, 'momentum': 0.9, 'weight_decay': 1e-3}, 50), |
| 194 | + (AdamS, {'lr': 1.0, 'weight_decay': 1e-3}, 50), |
| 195 | + (AdamS, {'lr': 1.0, 'weight_decay': 1e-3, 'amsgrad': True}, 50), |
191 | 196 | ] |
192 | 197 | ADAMD_SUPPORTED_OPTIMIZERS: List[Tuple[Any, Dict[str, Union[float, bool, int]], int]] = [ |
193 | 198 | (build_lookahead, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 10), |
|
202 | 207 | (Ranger, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 100), |
203 | 208 | (Ranger21, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True, 'num_iterations': 200}, 200), |
204 | 209 | (AdaPNM, {'lr': 3e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 50), |
| 210 | + (AdamS, {'lr': 1e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 50), |
205 | 211 | ] |
0 commit comments