|
9 | 9 | AdaBound, |
10 | 10 | Adai, |
11 | 11 | AdamP, |
| 12 | + AdamS, |
12 | 13 | Adan, |
13 | 14 | AdaPNM, |
14 | 15 | DAdaptAdaGrad, |
|
55 | 56 | 'pnm', |
56 | 57 | 'dadaptadam', |
57 | 58 | 'dadaptsgd', |
| 59 | + 'adams', |
58 | 60 | ] |
59 | 61 | VALID_OPTIMIZER_NAMES: List[str] = [ |
60 | 62 | 'adamp', |
|
79 | 81 | 'dadaptadagrad', |
80 | 82 | 'dadaptadam', |
81 | 83 | 'dadaptsgd', |
| 84 | + 'adams', |
82 | 85 | ] |
83 | 86 | INVALID_OPTIMIZER_NAMES: List[str] = [ |
84 | 87 | 'asam', |
|
105 | 108 | 'adai', |
106 | 109 | 'shampoo', |
107 | 110 | 'dadaptadam', |
| 111 | + 'adams', |
108 | 112 | ] |
109 | 113 |
|
110 | 114 | VALID_LR_SCHEDULER_NAMES: List[str] = [ |
|
135 | 139 | (Adai, {'lr': 1e-1, 'weight_decay': 0.0, 'dampening': 0.9}, 150), |
136 | 140 | (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': False}, 150), |
137 | 141 | (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': True}, 150), |
| 142 | + (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': False, 'use_stable_weight_decay': True}, 150), |
| 143 | + (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': True, 'use_stable_weight_decay': True}, 150), |
138 | 144 | (AdamP, {'lr': 5e-1, 'weight_decay': 1e-3}, 10), |
139 | 145 | (AdamP, {'lr': 5e-1, 'weight_decay': 1e-3, 'use_gc': True}, 10), |
140 | 146 | (AdamP, {'lr': 5e-1, 'weight_decay': 1e-3, 'nesterov': True}, 10), |
|
188 | 194 | (DAdaptAdam, {'lr': 1.0, 'weight_decay': 1e-2, 'weight_decouple': True}, 50), |
189 | 195 | (DAdaptSGD, {'lr': 1.0, 'weight_decay': 1e-2}, 50), |
190 | 196 | (DAdaptSGD, {'lr': 1.0, 'momentum': 0.9, 'weight_decay': 1e-3}, 50), |
| 197 | + (AdamS, {'lr': 1.0, 'weight_decay': 1e-3}, 50), |
| 198 | + (AdamS, {'lr': 1.0, 'weight_decay': 1e-3, 'amsgrad': True}, 50), |
191 | 199 | ] |
192 | 200 | ADAMD_SUPPORTED_OPTIMIZERS: List[Tuple[Any, Dict[str, Union[float, bool, int]], int]] = [ |
193 | 201 | (build_lookahead, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 10), |
194 | 202 | (AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 50), |
195 | 203 | (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 100), |
196 | | - (AdaBound, {'lr': 1e-2, 'gamma': 0.1, 'weight_decay': 1e-3, 'amsbound': True, 'adamd_debias_term': True}, 100), |
197 | 204 | (AdamP, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 10), |
198 | 205 | (DiffGrad, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 10), |
199 | 206 | (DiffRGrad, {'lr': 1e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 100), |
|
202 | 209 | (Ranger, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 100), |
203 | 210 | (Ranger21, {'lr': 5e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True, 'num_iterations': 200}, 200), |
204 | 211 | (AdaPNM, {'lr': 3e-1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 50), |
| 212 | + (AdamS, {'lr': 2e1, 'weight_decay': 1e-3, 'adamd_debias_term': True}, 50), |
205 | 213 | ] |
0 commit comments