Skip to content

Commit 0ab3844

Browse files
first try at CV for GLE, regression task still not working well
1 parent 8e27664 commit 0ab3844

File tree

8 files changed

+755
-40
lines changed

8 files changed

+755
-40
lines changed

examples/plot_CV.py

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
"""
2+
Cross-validation for Generalized Linear Models
3+
============================================
4+
5+
This example demonstrates how to use cross-validation to select the optimal
6+
regularization parameters for different types of generalized linear models.
7+
8+
We cover:
9+
1. Lasso regression (L1 penalty)
10+
2. Elastic Net regression (L1 + L2 penalty)
11+
3. Logistic regression with L1 penalty
12+
4. Logistic regression with Elastic Net penalty
13+
14+
15+
Understanding Cross-Validation
16+
----------------------------
17+
Cross-validation (CV) is a technique to evaluate how well a model will perform
18+
on unseen data. In this example, we use K-fold CV (K=5 by default) to:
19+
1. Split the data into K folds
20+
2. Train the model K times, each time using K-1 folds for training
21+
3. Evaluate the model on the remaining fold
22+
4. Average the results to get a robust estimate of model performance
23+
24+
The Process
25+
----------
26+
For each model type, we:
27+
1. Generate synthetic data (or use real data)
28+
2. Split it into training and test sets
29+
3. Use CV to find the best regularization parameters
30+
4. Train the final model with the best parameters
31+
5. Evaluate on the test set
32+
33+
References
34+
----------
35+
[1] scikit-learn. (n.d.). Cross-validation: evaluating estimator performance.
36+
https://scikit-learn.org/stable/modules/cross_validation.html
37+
"""
38+
39+
import numpy as np
40+
import matplotlib.pyplot as plt
41+
from sklearn.datasets import make_regression, make_classification
42+
from sklearn.model_selection import train_test_split
43+
from sklearn.metrics import mean_squared_error, accuracy_score
44+
45+
from skglm.estimators import GeneralizedLinearEstimator
46+
from skglm.datafits import Quadratic, Logistic
47+
from skglm.penalties import L1, L1_plus_L2
48+
from skglm.solvers import AndersonCD
49+
from sklearn.preprocessing import StandardScaler
50+
51+
# Set random seed for reproducibility
52+
np.random.seed(42)
53+
54+
# 1. Lasso Regression Example
55+
# --------------------------
56+
print("1. Lasso Regression Example")
57+
print("-" * 30)
58+
59+
# Generate synthetic data
60+
X, y = make_regression(n_samples=100, n_features=20, noise=0.1)
61+
62+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
63+
scaler = StandardScaler()
64+
X_train = scaler.fit_transform(X_train)
65+
X_test = scaler.transform(X_test)
66+
67+
# Create and fit Lasso with CV
68+
lasso = GeneralizedLinearEstimator(
69+
datafit=Quadratic(),
70+
penalty=L1(alpha=1.0),
71+
solver=AndersonCD()
72+
)
73+
lasso.cross_validate(X_train, y_train, alphas='auto', cv=5,
74+
scoring='neg_mean_squared_error')
75+
76+
# Evaluate on test set
77+
y_pred = lasso.predict(X_test)
78+
mse = mean_squared_error(y_test, y_pred)
79+
print(f"Best alpha: {lasso.best_alpha_:.3f}")
80+
print(f"Test MSE: {mse:.3f}")
81+
82+
# Plot CV scores
83+
plt.figure(figsize=(10, 6))
84+
plt.semilogx(lasso.alphas_, lasso.cv_scores_[None].mean(axis=0))
85+
plt.axvline(lasso.best_alpha_, color='r', linestyle='--',
86+
label=f'Best alpha: {lasso.best_alpha_:.3f}')
87+
plt.xlabel('Alpha')
88+
plt.ylabel('CV Score')
89+
plt.title('Lasso CV Scores')
90+
plt.legend()
91+
plt.grid(True)
92+
93+
# 2. Elastic Net Regression Example
94+
# --------------------------------
95+
print("\n2. Elastic Net Regression Example")
96+
print("-" * 30)
97+
98+
# Create and fit Elastic Net with CV
99+
enet = GeneralizedLinearEstimator(
100+
datafit=Quadratic(),
101+
penalty=L1_plus_L2(alpha=1.0, l1_ratio=0.5),
102+
solver=AndersonCD()
103+
)
104+
enet.cross_validate(X_train, y_train, alphas='auto',
105+
l1_ratios=[0.1, 0.5, 0.9], cv=5, scoring='neg_mean_squared_error')
106+
107+
# Evaluate on test set
108+
y_pred = enet.predict(X_test)
109+
mse = mean_squared_error(y_test, y_pred)
110+
print(f"Best alpha: {enet.best_alpha_:.3f}")
111+
print(f"Best l1_ratio: {enet.best_l1_ratio_:.3f}")
112+
print(f"Test MSE: {mse:.3f}")
113+
114+
# Plot CV scores for different l1_ratios
115+
plt.figure(figsize=(10, 6))
116+
for ratio in enet.cv_scores_:
117+
plt.semilogx(enet.alphas_, enet.cv_scores_[ratio].mean(axis=0),
118+
label=f'l1_ratio={ratio}')
119+
plt.axvline(enet.best_alpha_, color='r', linestyle='--',
120+
label=f'Best alpha: {enet.best_alpha_:.3f}')
121+
plt.xlabel('Alpha')
122+
plt.ylabel('CV Score')
123+
plt.title('Elastic Net CV Scores')
124+
plt.legend()
125+
plt.grid(True)
126+
127+
# 3. Logistic Regression with L1 Penalty
128+
# -------------------------------------
129+
print("\n3. Logistic Regression with L1 Penalty")
130+
print("-" * 30)
131+
132+
# Generate synthetic classification data
133+
X, y = make_classification(n_samples=100, n_features=20, n_classes=2)
134+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
135+
scaler = StandardScaler()
136+
X_train = scaler.fit_transform(X_train)
137+
X_test = scaler.transform(X_test)
138+
139+
# Create and fit Logistic Regression with CV
140+
logreg = GeneralizedLinearEstimator(
141+
datafit=Logistic(),
142+
penalty=L1(alpha=1.0),
143+
solver=AndersonCD()
144+
)
145+
logreg.cross_validate(X_train, y_train, alphas='auto', cv=5)
146+
147+
# Evaluate on test set
148+
y_pred = logreg.predict(X_test)
149+
accuracy = accuracy_score(y_test, y_pred)
150+
print(f"Best alpha: {logreg.best_alpha_:.3f}")
151+
print(f"Test Accuracy: {accuracy:.3f}")
152+
153+
# Plot CV scores
154+
plt.figure(figsize=(10, 6))
155+
plt.semilogx(logreg.alphas_, logreg.cv_scores_[None].mean(axis=0))
156+
plt.axvline(logreg.best_alpha_, color='r', linestyle='--',
157+
label=f'Best alpha: {logreg.best_alpha_:.3f}')
158+
plt.xlabel('Alpha')
159+
plt.ylabel('CV Score')
160+
plt.title('Logistic Regression CV Scores')
161+
plt.legend()
162+
plt.grid(True)
163+
164+
# 4. Logistic Regression with Elastic Net Penalty
165+
# ---------------------------------------------
166+
print("\n4. Logistic Regression with Elastic Net Penalty")
167+
print("-" * 30)
168+
169+
# Create and fit Logistic Regression with Elastic Net penalty
170+
logreg_enet = GeneralizedLinearEstimator(
171+
datafit=Logistic(),
172+
penalty=L1_plus_L2(alpha=1.0, l1_ratio=0.5),
173+
solver=AndersonCD()
174+
)
175+
logreg_enet.cross_validate(X_train, y_train, alphas='auto',
176+
l1_ratios=[0.1, 0.5, 0.9], cv=5)
177+
178+
# Evaluate on test set
179+
y_pred = logreg_enet.predict(X_test)
180+
accuracy = accuracy_score(y_test, y_pred)
181+
print(f"Best alpha: {logreg_enet.best_alpha_:.3f}")
182+
print(f"Best l1_ratio: {logreg_enet.best_l1_ratio_:.3f}")
183+
print(f"Test Accuracy: {accuracy:.3f}")
184+
185+
# Plot CV scores for different l1_ratios
186+
plt.figure(figsize=(10, 6))
187+
for ratio in logreg_enet.cv_scores_:
188+
plt.semilogx(logreg_enet.alphas_, logreg_enet.cv_scores_[ratio].mean(axis=0),
189+
label=f'l1_ratio={ratio}')
190+
plt.axvline(logreg_enet.best_alpha_, color='r', linestyle='--',
191+
label=f'Best alpha: {logreg_enet.best_alpha_:.3f}')
192+
plt.xlabel('Alpha')
193+
plt.ylabel('CV Score')
194+
plt.title('Logistic Regression with Elastic Net CV Scores')
195+
plt.legend()
196+
plt.grid(True)
197+
198+
plt.show()

skglm/datafits/base.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
class BaseDatafit:
32
"""Base class for datafits."""
43

@@ -20,6 +19,39 @@ def params_to_dict(self):
2019
The parameters to instantiate an object of the class.
2120
"""
2221

22+
def get_params(self, deep=True):
23+
"""Get parameters for this datafit.
24+
25+
Parameters
26+
----------
27+
deep : bool, default=True
28+
If True, will return the parameters for this datafit and
29+
contained subobjects that are datafits.
30+
31+
Returns
32+
-------
33+
params : dict
34+
Parameter names mapped to their values.
35+
"""
36+
return self.params_to_dict()
37+
38+
def set_params(self, **params):
39+
"""Set the parameters of this datafit.
40+
41+
Parameters
42+
----------
43+
**params : dict
44+
Datafit parameters.
45+
46+
Returns
47+
-------
48+
self : object
49+
Returns self.
50+
"""
51+
for key, value in params.items():
52+
setattr(self, key, value)
53+
return self
54+
2355
def initialize(self, X, y):
2456
"""Pre-computations before fitting on X and y.
2557
@@ -70,6 +102,34 @@ def value(self, y, w, Xw):
70102
The datafit value at vector w.
71103
"""
72104

105+
def gradient_scalar(self, X, y, w, Xw):
106+
"""Compute gradient of datafit wrt to scalar w."""
107+
108+
def gradient(self, X, y, w, Xw):
109+
"""Compute gradient of datafit wrt to w."""
110+
111+
def gradient_scalar_sparse(self, data, indptr, indices, y, n_samples, w, Xw):
112+
"""Compute gradient of datafit wrt to scalar w for sparse X."""
113+
114+
def gradient_sparse(self, data, indptr, indices, y, n_samples, w, Xw):
115+
"""Compute gradient of datafit wrt to w for sparse X."""
116+
117+
def gradient_at_zero(self, X, y):
118+
"""Compute gradient at w=0 for cross-validation support.
119+
120+
Parameters
121+
----------
122+
X : array-like, shape (n_samples, n_features)
123+
Training data.
124+
y : array-like, shape (n_samples,)
125+
Target values.
126+
127+
Returns
128+
-------
129+
gradient : array-like, shape (n_features,)
130+
Gradient at w=0.
131+
"""
132+
73133

74134
class BaseMultitaskDatafit:
75135
"""Base class for multitask datafits."""

skglm/datafits/single_task.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,23 @@ def get_spec(self):
3838
def params_to_dict(self):
3939
return dict()
4040

41+
def gradient_at_zero(self, X, y):
42+
"""Compute gradient at w=0 for cross-validation support.
43+
44+
Parameters
45+
----------
46+
X : array-like, shape (n_samples, n_features)
47+
Training data.
48+
y : array-like, shape (n_samples,)
49+
Target values.
50+
51+
Returns
52+
-------
53+
grad : array, shape (n_features,)
54+
Gradient at w=0.
55+
"""
56+
return -X.T @ y / len(y)
57+
4158
def get_lipschitz(self, X, y):
4259
n_features = X.shape[1]
4360

@@ -95,6 +112,17 @@ def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, Xw, j):
95112
def gradient(self, X, y, Xw):
96113
return X.T @ (Xw - y) / len(y)
97114

115+
def gradient_sparse(self, X_data, X_indptr, X_indices, y, Xw):
116+
"""Compute gradient of datafit wrt to w for sparse X."""
117+
n_features = X_indptr.shape[0] - 1
118+
out = np.zeros(n_features, dtype=X_data.dtype)
119+
raw_grad = self.raw_grad(y, Xw)
120+
121+
for j in range(n_features):
122+
out[j] = _sparse_xj_dot(X_data, X_indptr, X_indices, j, raw_grad)
123+
124+
return out
125+
98126
def raw_grad(self, y, Xw):
99127
"""Compute gradient of datafit w.r.t ``Xw``."""
100128
return (Xw - y) / len(y)
@@ -217,6 +245,17 @@ def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, Xw, j):
217245
def gradient(self, X, y, Xw):
218246
return X.T @ (self.sample_weights * (Xw - y)) / self.sample_weights.sum()
219247

248+
def gradient_sparse(self, X_data, X_indptr, X_indices, y, Xw):
249+
"""Compute gradient of datafit wrt to w for sparse X."""
250+
n_features = X_indptr.shape[0] - 1
251+
out = np.zeros(n_features, dtype=X_data.dtype)
252+
raw_grad = self.raw_grad(y, Xw)
253+
254+
for j in range(n_features):
255+
out[j] = _sparse_xj_dot(X_data, X_indptr, X_indices, j, raw_grad)
256+
257+
return out
258+
220259
def raw_grad(self, y, Xw):
221260
return (self.sample_weights * (Xw - y)) / self.sample_weights.sum()
222261

@@ -303,6 +342,23 @@ def get_spec(self):
303342
def params_to_dict(self):
304343
return dict()
305344

345+
def gradient_at_zero(self, X, y):
346+
"""Compute gradient at w=0 for cross-validation support.
347+
348+
Parameters
349+
----------
350+
X : array-like, shape (n_samples, n_features)
351+
Training data.
352+
y : array-like, shape (n_samples,)
353+
Target values.
354+
355+
Returns
356+
-------
357+
grad : array, shape (n_features,)
358+
Gradient at w=0.
359+
"""
360+
return -X.T @ y / (2 * len(y))
361+
306362
def raw_grad(self, y, Xw):
307363
"""Compute gradient of datafit w.r.t ``Xw``."""
308364
return -y / (1 + np.exp(y * Xw)) / len(y)

0 commit comments

Comments
 (0)