|
93 | 93 | (AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'weight_decouple': False}, 10), |
94 | 94 | (AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'fixed_decay': True}, 10), |
95 | 95 | (AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'rectify': False}, 10), |
96 | | - (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3}, 75), |
97 | | - (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'fixed_decay': True}, 75), |
98 | | - (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'weight_decouple': False}, 75), |
99 | | - (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'amsbound': True}, 75), |
100 | | - (Adai, {'lr': 2e-1, 'weight_decay': 0.0}, 25), |
101 | | - (Adai, {'lr': 2e-1, 'weight_decay': 0.0, 'use_gc': True}, 75), |
102 | | - (Adai, {'lr': 2e-1, 'weight_decay': 0.0, 'dampening': 0.9}, 25), |
103 | | - (Adai, {'lr': 2e-1, 'weight_decay': 1e-4, 'weight_decouple': False}, 25), |
104 | | - (Adai, {'lr': 2e-1, 'weight_decay': 1e-4, 'weight_decouple': True}, 25), |
105 | | - (Adai, {'lr': 2e-1, 'weight_decay': 1e-4, 'weight_decouple': False, 'stable_weight_decay': True}, 25), |
106 | | - (Adai, {'lr': 2e-1, 'weight_decay': 1e-4, 'weight_decouple': True, 'stable_weight_decay': True}, 25), |
| 96 | + (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3}, 50), |
| 97 | + (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'fixed_decay': True}, 50), |
| 98 | + (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'weight_decouple': False}, 50), |
| 99 | + (AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'amsbound': True}, 50), |
| 100 | + (Adai, {'lr': 5e-1, 'weight_decay': 0.0}, 5), |
| 101 | + (Adai, {'lr': 5e-1, 'weight_decay': 0.0, 'use_gc': True}, 50), |
| 102 | + (Adai, {'lr': 5e-1, 'weight_decay': 0.0, 'dampening': 0.9}, 5), |
| 103 | + (Adai, {'lr': 5e-1, 'weight_decay': 1e-4, 'weight_decouple': False}, 5), |
| 104 | + (Adai, {'lr': 5e-1, 'weight_decay': 1e-4, 'weight_decouple': True}, 5), |
| 105 | + (Adai, {'lr': 5e-1, 'weight_decay': 1e-4, 'weight_decouple': False, 'stable_weight_decay': True}, 5), |
| 106 | + (Adai, {'lr': 5e-1, 'weight_decay': 1e-4, 'weight_decouple': True, 'stable_weight_decay': True}, 5), |
107 | 107 | (AdamP, {'lr': 5e-1, 'weight_decay': 1e-3}, 5), |
108 | 108 | (AdamP, {'lr': 5e-1, 'weight_decay': 1e-3, 'use_gc': True}, 10), |
109 | 109 | (AdamP, {'lr': 5e-1, 'weight_decay': 1e-3, 'nesterov': True}, 5), |
110 | 110 | (DiffGrad, {'lr': 5e-2, 'weight_decay': 1e-3}, 10), |
111 | 111 | (DiffGrad, {'lr': 5e-2, 'weight_decay': 1e-3, 'amsgrad': True}, 10), |
112 | 112 | (DiffGrad, {'lr': 5e-1, 'weight_decay': 1e-3, 'rectify': True}, 20), |
113 | | - (Lamb, {'lr': 5e-1, 'weight_decay': 1e-3}, 25), |
114 | | - (Lamb, {'lr': 5e-1, 'weight_decay': 1e-3, 'pre_norm': True, 'max_grad_norm': 0.0}, 25), |
115 | | - (Lamb, {'lr': 5e-1, 'weight_decay': 1e-3, 'grad_averaging': False}, 25), |
| 113 | + (Lamb, {'lr': 5e-1, 'weight_decay': 1e-3}, 20), |
| 114 | + (Lamb, {'lr': 5e-1, 'weight_decay': 1e-3, 'pre_norm': True, 'max_grad_norm': 0.0}, 20), |
| 115 | + (Lamb, {'lr': 5e-1, 'weight_decay': 1e-3, 'grad_averaging': False}, 20), |
116 | 116 | (Lamb, {'lr': 5e-1, 'weight_decay': 1e-3, 'adam': True, 'eps': 1e-8}, 10), |
117 | | - (Lamb, {'lr': 5e-1, 'weight_decay': 1e-3, 'pre_norm': True, 'eps': 1e-8}, 25), |
| 117 | + (Lamb, {'lr': 5e-1, 'weight_decay': 1e-3, 'pre_norm': True, 'eps': 1e-8}, 20), |
118 | 118 | (Lamb, {'lr': 5e-1, 'weight_decay': 1e-3, 'rectify': True, 'degenerated_to_sgd': True}, 10), |
119 | | - (LARS, {'lr': 5e-1, 'weight_decay': 1e-3}, 25), |
120 | | - (LARS, {'lr': 5e-1, 'nesterov': True}, 25), |
121 | | - (MADGRAD, {'lr': 5e-2, 'weight_decay': 1e-3}, 25), |
122 | | - (MADGRAD, {'lr': 5e-2, 'weight_decay': 1e-3, 'eps': 0.0}, 25), |
123 | | - (MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3, 'momentum': 0.0}, 25), |
124 | | - (MADGRAD, {'lr': 5e-2, 'weight_decay': 1e-3, 'decouple_decay': True}, 25), |
| 119 | + (LARS, {'lr': 5e-1, 'weight_decay': 1e-3}, 20), |
| 120 | + (LARS, {'lr': 5e-1, 'nesterov': True}, 20), |
| 121 | + (MADGRAD, {'lr': 5e-2, 'weight_decay': 1e-3}, 20), |
| 122 | + (MADGRAD, {'lr': 5e-2, 'weight_decay': 1e-3, 'eps': 0.0}, 20), |
| 123 | + (MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3, 'momentum': 0.0}, 20), |
| 124 | + (MADGRAD, {'lr': 5e-2, 'weight_decay': 1e-3, 'decouple_decay': True}, 20), |
125 | 125 | (RAdam, {'lr': 5e-1, 'weight_decay': 1e-3}, 20), |
126 | 126 | (RAdam, {'lr': 5e-1, 'weight_decay': 1e-3, 'degenerated_to_sgd': True}, 10), |
127 | 127 | (SGDP, {'lr': 5e-1, 'weight_decay': 1e-4}, 10), |
|
286 | 286 | (Adan, {'lr': 5e-1, 'weight_decay': 1e-3, 'weight_decouple': True}, 5), |
287 | 287 | (DAdaptAdaGrad, {'lr': 2.0, 'weight_decay': 1e-3}, 50), |
288 | 288 | (DAdaptAdaGrad, {'lr': 2.0, 'weight_decay': 1e-3, 'momentum': 0.1}, 50), |
289 | | - (DAdaptAdam, {'lr': 2.0, 'weight_decay': 1e-3}, 50), |
| 289 | + (DAdaptAdam, {'lr': 2.0, 'weight_decay': 1e-3}, 25), |
290 | 290 | (DAdaptAdam, {'lr': 1.0, 'weight_decay': 1e-3, 'weight_decouple': True}, 50), |
291 | 291 | (DAdaptSGD, {'lr': 2.0, 'weight_decay': 1e-2}, 25), |
292 | 292 | (DAdaptSGD, {'lr': 2.0, 'momentum': 0.9, 'weight_decay': 1e-3}, 25), |
293 | 293 | (DAdaptAdan, {'lr': 1.0, 'weight_decay': 1e-2}, 25), |
294 | 294 | (DAdaptAdan, {'lr': 1.0, 'weight_decay': 1e-2, 'weight_decouple': True}, 50), |
295 | | - (AdamS, {'lr': 1.0, 'weight_decay': 1e-3}, 20), |
| 295 | + (AdamS, {'lr': 1.0, 'weight_decay': 1e-3}, 10), |
296 | 296 | (AdamS, {'lr': 1.0, 'weight_decay': 1e-3, 'amsgrad': True}, 20), |
297 | | - (AdaFactor, {'lr': 5e-1, 'weight_decay': 1e-3, 'scale_parameter': False}, 100), |
| 297 | + (AdaFactor, {'lr': 7.5e-1, 'weight_decay': 1e-3, 'scale_parameter': False}, 100), |
298 | 298 | (AdaFactor, {'lr': 7.5e-1, 'weight_decay': 1e-3, 'amsgrad': True}, 125), |
299 | 299 | (Apollo, {'lr': 5e-1, 'weight_decay': 1e-3}, 10), |
300 | 300 | (Apollo, {'lr': 5e-1, 'weight_decay': 1e-3, 'rebound': 'belief'}, 10), |
|
0 commit comments