|
58 | 58 | (Lamb, {'lr': 1e-1, 'weight_decay': 1e-3, 'adam': True, 'eps': 1e-8}, 500), |
59 | 59 | (Lamb, {'lr': 1e-1, 'weight_decay': 1e-3, 'pre_norm': True, 'eps': 1e-8}, 500), |
60 | 60 | (LARS, {'lr': 1e-1, 'weight_decay': 1e-3}, 500), |
61 | | - (RaLamb, {'lr': 2e-1, 'weight_decay': 1e-3}, 200), |
| 61 | + (RaLamb, {'lr': 1e-1, 'weight_decay': 1e-3}, 200), |
62 | 62 | (RaLamb, {'lr': 5e-1, 'weight_decay': 1e-3, 'pre_norm': True}, 500), |
63 | 63 | # (RaLamb, {'lr': 1e-1, 'weight_decay': 1e-3, 'degenerated_to_sgd': True}, 200), |
64 | 64 | (MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3}, 500), |
@@ -101,6 +101,11 @@ def test_f32_optimizers(optimizer_fp32_config): |
101 | 101 | (x_data, y_data), model, loss_fn = build_environment() |
102 | 102 |
|
103 | 103 | optimizer_class, config, iterations = optimizer_fp32_config |
| 104 | + |
| 105 | + optimizer_name: str = optimizer_class.__name__ |
| 106 | + if optimizer_name == 'Nero' and 'constraints' not in config: |
| 107 | + return True |
| 108 | + |
104 | 109 | optimizer = optimizer_class(model.parameters(), **config) |
105 | 110 |
|
106 | 111 | init_loss, loss = np.inf, np.inf |
|
0 commit comments