|
1 | 1 | import numpy as np |
2 | | -import pytest |
| 2 | +import time |
3 | 3 | from sklearn.datasets import make_regression |
4 | 4 | from sklearn.linear_model import ElasticNet |
5 | 5 | from sklearn.model_selection import GridSearchCV, KFold |
6 | 6 | from skglm.datafits import Quadratic |
7 | 7 | from skglm.penalties import L1_plus_L2 |
8 | 8 | from skglm.solvers import AndersonCD |
9 | 9 | from skglm.cv import GeneralizedLinearEstimatorCV |
| 10 | +import pytest |
10 | 11 |
|
11 | 12 |
|
12 | | -@pytest.mark.parametrize("seed", [0, 42]) |
13 | | -def test_elasticnet_cv_matches_sklearn(seed): |
| 13 | +@pytest.mark.parametrize("n_samples,n_features,noise", |
| 14 | + [(100, 10, 0.1), (100, 500, 0.2), (100, 500, 0.3)]) |
| 15 | +def test_elasticnet_cv_matches_sklearn(n_samples, n_features, noise): |
14 | 16 | """Test GeneralizedLinearEstimatorCV matches sklearn GridSearchCV for ElasticNet.""" |
15 | | - X, y = make_regression(n_samples=100, n_features=20, noise=0.1, random_state=seed) |
| 17 | + seed = 42 |
| 18 | + X, y = make_regression(n_samples=n_samples, |
| 19 | + n_features=n_features, noise=noise, random_state=seed) |
16 | 20 |
|
17 | | - alphas = np.array([0.001, 0.01, 0.1, 1.0]) |
| 21 | + n = X.shape[0] |
| 22 | + alpha_max = np.max(np.abs(X.T @ y)) / n |
| 23 | + alphas = alpha_max * np.array([1, 0.1, 0.01, 0.001]) |
18 | 24 | l1_ratios = np.array([0.2, 0.5, 0.8]) |
19 | 25 | cv = KFold(n_splits=5, shuffle=True, random_state=seed) |
20 | 26 |
|
| 27 | + start_time = time.time() |
21 | 28 | sklearn_model = GridSearchCV( |
22 | 29 | ElasticNet(max_iter=10000, tol=1e-8), |
23 | 30 | {'alpha': alphas, 'l1_ratio': l1_ratios}, |
24 | 31 | cv=cv, scoring='neg_mean_squared_error', n_jobs=1 |
25 | 32 | ).fit(X, y) |
| 33 | + sklearn_time = time.time() - start_time |
26 | 34 |
|
| 35 | + start_time = time.time() |
27 | 36 | skglm_model = GeneralizedLinearEstimatorCV( |
28 | 37 | Quadratic(), L1_plus_L2(0.1, 0.5), AndersonCD(max_iter=10000, tol=1e-8), |
29 | 38 | alphas=alphas, l1_ratio=l1_ratios, cv=5, random_state=seed, n_jobs=1 |
30 | 39 | ).fit(X, y) |
| 40 | + skglm_time = time.time() - start_time |
31 | 41 |
|
32 | | - assert sklearn_model.best_params_['alpha'] == skglm_model.alpha_ |
33 | | - assert sklearn_model.best_params_['l1_ratio'] == skglm_model.l1_ratio_ |
34 | | - np.testing.assert_allclose(sklearn_model.best_estimator_.coef_, |
35 | | - skglm_model.coef_.ravel(), rtol=1e-4, atol=1e-6) |
36 | | - np.testing.assert_allclose(sklearn_model.best_estimator_.intercept_, |
37 | | - skglm_model.intercept_, rtol=1e-4, atol=1e-6) |
| 42 | + print(f"\nTest case: {n_samples} samples, {n_features} features, noise={noise}") |
| 43 | + print(f"Timing comparison (seed={seed}):") |
| 44 | + print(f"sklearn: {sklearn_time:.2f}s") |
| 45 | + print(f"skglm: {skglm_time:.2f}s") |
| 46 | + print(f"speedup: {sklearn_time/skglm_time:.1f}x") |
| 47 | + |
| 48 | + try: |
| 49 | + assert sklearn_model.best_params_['alpha'] == skglm_model.alpha_ |
| 50 | + assert sklearn_model.best_params_['l1_ratio'] == skglm_model.l1_ratio_ |
| 51 | + np.testing.assert_allclose(sklearn_model.best_estimator_.coef_, |
| 52 | + skglm_model.coef_.ravel(), rtol=1e-4, atol=1e-6) |
| 53 | + np.testing.assert_allclose(sklearn_model.best_estimator_.intercept_, |
| 54 | + skglm_model.intercept_, rtol=1e-4, atol=1e-6) |
| 55 | + except AssertionError: |
| 56 | + print("\nBest parameters:") |
| 57 | + print(f"sklearn: alpha={sklearn_model.best_params_['alpha']}, " |
| 58 | + f"l1_ratio={sklearn_model.best_params_['l1_ratio']}") |
| 59 | + print(f"skglm: alpha={skglm_model.alpha_}, l1_ratio={skglm_model.l1_ratio_}") |
| 60 | + raise |
38 | 61 |
|
39 | 62 |
|
40 | 63 | if __name__ == "__main__": |
|
0 commit comments