|
133 | 133 | (AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'weight_decouple': False}, 10), |
134 | 134 | (AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'fixed_decay': True}, 10), |
135 | 135 | (AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'rectify': False}, 10), |
136 | | - (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3}, 100), |
137 | | - (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'fixed_decay': True}, 100), |
138 | | - (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'weight_decouple': False}, 100), |
139 | | - (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'amsbound': True}, 100), |
140 | | - (Adai, {'lr': 1e-1, 'weight_decay': 0.0}, 150), |
141 | | - (Adai, {'lr': 1e-1, 'weight_decay': 0.0, 'use_gc': True}, 150), |
142 | | - (Adai, {'lr': 1e-1, 'weight_decay': 0.0, 'dampening': 0.9}, 150), |
143 | | - (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': False}, 100), |
144 | | - (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': True}, 100), |
145 | | - (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': False, 'use_stable_weight_decay': True}, 100), |
146 | | - (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': True, 'use_stable_weight_decay': True}, 100), |
| 136 | + (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3}, 75), |
| 137 | + (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'fixed_decay': True}, 75), |
| 138 | + (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'weight_decouple': False}, 75), |
| 139 | + (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'amsbound': True}, 75), |
| 140 | + (Adai, {'lr': 2e-1, 'weight_decay': 0.0}, 50), |
| 141 | + (Adai, {'lr': 2e-1, 'weight_decay': 0.0, 'use_gc': True}, 75), |
| 142 | + (Adai, {'lr': 2e-1, 'weight_decay': 0.0, 'dampening': 0.9}, 50), |
| 143 | + (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': False}, 50), |
| 144 | + (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': True}, 50), |
| 145 | + (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': False, 'use_stable_weight_decay': True}, 50), |
| 146 | + (Adai, {'lr': 1e-1, 'weight_decay': 1e-4, 'weight_decouple': True, 'use_stable_weight_decay': True}, 50), |
147 | 147 | (AdamP, {'lr': 5e-1, 'weight_decay': 1e-3}, 10), |
148 | 148 | (AdamP, {'lr': 5e-1, 'weight_decay': 1e-3, 'use_gc': True}, 10), |
149 | 149 | (AdamP, {'lr': 5e-1, 'weight_decay': 1e-3, 'nesterov': True}, 10), |
|
156 | 156 | (Lamb, {'lr': 1e-1, 'weight_decay': 1e-3, 'pre_norm': True, 'eps': 1e-8}, 100), |
157 | 157 | (LARS, {'lr': 1e-1, 'weight_decay': 1e-3}, 100), |
158 | 158 | (LARS, {'lr': 1e-1, 'nesterov': True}, 100), |
159 | | - (RaLamb, {'lr': 1e-1, 'weight_decay': 1e-3}, 100), |
160 | | - (RaLamb, {'lr': 1e-2, 'weight_decay': 1e-3, 'pre_norm': True}, 100), |
161 | | - (RaLamb, {'lr': 1e-2, 'weight_decay': 1e-3, 'degenerated_to_sgd': True}, 100), |
162 | | - (MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3}, 100), |
163 | | - (MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3, 'eps': 0.0}, 100), |
164 | | - (MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3, 'momentum': 0.0}, 100), |
165 | | - (MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3, 'decouple_decay': True}, 100), |
166 | | - (RAdam, {'lr': 1e-1, 'weight_decay': 1e-3}, 100), |
167 | | - (RAdam, {'lr': 1e-1, 'weight_decay': 1e-3, 'degenerated_to_sgd': True}, 100), |
| 159 | + (RaLamb, {'lr': 1e-1, 'weight_decay': 1e-3}, 50), |
| 160 | + (RaLamb, {'lr': 1e-1, 'weight_decay': 1e-3, 'pre_norm': True}, 50), |
| 161 | + (RaLamb, {'lr': 1e-1, 'weight_decay': 1e-3, 'degenerated_to_sgd': True}, 50), |
| 162 | + (MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3}, 50), |
| 163 | + (MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3, 'eps': 0.0}, 50), |
| 164 | + (MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3, 'momentum': 0.0}, 50), |
| 165 | + (MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3, 'decouple_decay': True}, 50), |
| 166 | + (RAdam, {'lr': 1e-1, 'weight_decay': 1e-3}, 50), |
| 167 | + (RAdam, {'lr': 1e-1, 'weight_decay': 1e-3, 'degenerated_to_sgd': True}, 50), |
168 | 168 | (SGDP, {'lr': 5e-2, 'weight_decay': 1e-4}, 50), |
169 | 169 | (SGDP, {'lr': 5e-2, 'weight_decay': 1e-4, 'nesterov': True}, 50), |
170 | | - (Ranger, {'lr': 5e-1, 'weight_decay': 1e-3}, 200), |
| 170 | + (Ranger, {'lr': 5e-1, 'weight_decay': 1e-3}, 150), |
171 | 171 | (Ranger21, {'lr': 5e-1, 'weight_decay': 1e-3, 'num_iterations': 500}, 200), |
172 | 172 | (Shampoo, {'lr': 5e-1, 'weight_decay': 1e-3, 'momentum': 0.1}, 10), |
173 | 173 | (ScalableShampoo, {'lr': 1e-1, 'weight_decay': 1e-3, 'graft_type': 0}, 10), |
|
188 | 188 | (AdaPNM, {'lr': 3e-1, 'weight_decay': 1e-3, 'amsgrad': False}, 50), |
189 | 189 | (Nero, {'lr': 5e-1}, 50), |
190 | 190 | (Nero, {'lr': 5e-1, 'constraints': False}, 50), |
191 | | - (Adan, {'lr': 5e-1}, 100), |
192 | | - (Adan, {'lr': 5e-1, 'max_grad_norm': 1.0}, 100), |
193 | | - (Adan, {'lr': 5e-1, 'weight_decay': 1e-3, 'use_gc': True}, 150), |
194 | | - (Adan, {'lr': 1e-1, 'weight_decay': 1e-3, 'use_gc': True, 'weight_decouple': True}, 100), |
195 | | - (DAdaptAdaGrad, {'lr': 1.0, 'weight_decay': 1e-2}, 150), |
196 | | - (DAdaptAdaGrad, {'lr': 1.0, 'weight_decay': 1e-2, 'momentum': 0.1}, 150), |
| 191 | + (Adan, {'lr': 5e-1}, 75), |
| 192 | + (Adan, {'lr': 5e-1, 'max_grad_norm': 1.0}, 75), |
| 193 | + (Adan, {'lr': 5e-1, 'weight_decay': 1e-3, 'use_gc': True}, 100), |
| 194 | + (Adan, {'lr': 5e-1, 'weight_decay': 1e-3, 'use_gc': True, 'weight_decouple': True}, 75), |
| 195 | + (DAdaptAdaGrad, {'lr': 1.0, 'weight_decay': 1e-3}, 150), |
| 196 | + (DAdaptAdaGrad, {'lr': 1.0, 'weight_decay': 1e-3, 'momentum': 0.1}, 150), |
197 | 197 | (DAdaptAdam, {'lr': 1.0, 'weight_decay': 1e-2}, 50), |
198 | 198 | (DAdaptAdam, {'lr': 1.0, 'weight_decay': 1e-2, 'weight_decouple': True}, 50), |
199 | 199 | (DAdaptSGD, {'lr': 1.0, 'weight_decay': 1e-2}, 30), |
|
0 commit comments