scikit-learn-contrib
diff --git a/‎examples/plot_CV.py‎
Lines changed: 198 additions & 0 deletions b/‎examples/plot_CV.py‎
Lines changed: 198 additions & 0 deletions
diff --git a/‎skglm/datafits/base.py‎
Lines changed: 61 additions & 1 deletion b/‎skglm/datafits/base.py‎
Lines changed: 61 additions & 1 deletion
diff --git a/‎skglm/datafits/single_task.py‎
Lines changed: 56 additions & 0 deletions b/‎skglm/datafits/single_task.py‎
Lines changed: 56 additions & 0 deletions
@@ -0,0 +1,198 @@
+"""
+Cross-validation for Generalized Linear Models
+============================================
+
+This example demonstrates how to use cross-validation to select the optimal
+regularization parameters for different types of generalized linear models.
+
+We cover:
+1. Lasso regression (L1 penalty)
+2. Elastic Net regression (L1 + L2 penalty)
+3. Logistic regression with L1 penalty
+4. Logistic regression with Elastic Net penalty
+
+
+Understanding Cross-Validation
+----------------------------
+Cross-validation (CV) is a technique to evaluate how well a model will perform
+on unseen data. In this example, we use K-fold CV (K=5 by default) to:
+1. Split the data into K folds
+2. Train the model K times, each time using K-1 folds for training
+3. Evaluate the model on the remaining fold
+4. Average the results to get a robust estimate of model performance
+
+The Process
+----------
+For each model type, we:
+1. Generate synthetic data (or use real data)
+2. Split it into training and test sets
+3. Use CV to find the best regularization parameters
+4. Train the final model with the best parameters
+5. Evaluate on the test set
+
+References
+----------
+[1] scikit-learn. (n.d.). Cross-validation: evaluating estimator performance.
+    https://scikit-learn.org/stable/modules/cross_validation.html
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.datasets import make_regression, make_classification
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error, accuracy_score
+
+from skglm.estimators import GeneralizedLinearEstimator
+from skglm.datafits import Quadratic, Logistic
+from skglm.penalties import L1, L1_plus_L2
+from skglm.solvers import AndersonCD
+from sklearn.preprocessing import StandardScaler
+
+# Set random seed for reproducibility
+np.random.seed(42)
+
+# 1. Lasso Regression Example
+# --------------------------
+print("1. Lasso Regression Example")
+print("-" * 30)
+
+# Generate synthetic data
+X, y = make_regression(n_samples=100, n_features=20, noise=0.1)
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+scaler = StandardScaler()
+X_train = scaler.fit_transform(X_train)
+X_test = scaler.transform(X_test)
+
+# Create and fit Lasso with CV
+lasso = GeneralizedLinearEstimator(
+    datafit=Quadratic(),
+    penalty=L1(alpha=1.0),
+    solver=AndersonCD()
+)
+lasso.cross_validate(X_train, y_train, alphas='auto', cv=5,
+                     scoring='neg_mean_squared_error')
+
+# Evaluate on test set
+y_pred = lasso.predict(X_test)
+mse = mean_squared_error(y_test, y_pred)
+print(f"Best alpha: {lasso.best_alpha_:.3f}")
+print(f"Test MSE: {mse:.3f}")
+
+# Plot CV scores
+plt.figure(figsize=(10, 6))
+plt.semilogx(lasso.alphas_, lasso.cv_scores_[None].mean(axis=0))
+plt.axvline(lasso.best_alpha_, color='r', linestyle='--',
+            label=f'Best alpha: {lasso.best_alpha_:.3f}')
+plt.xlabel('Alpha')
+plt.ylabel('CV Score')
+plt.title('Lasso CV Scores')
+plt.legend()
+plt.grid(True)
+
+# 2. Elastic Net Regression Example
+# --------------------------------
+print("\n2. Elastic Net Regression Example")
+print("-" * 30)
+
+# Create and fit Elastic Net with CV
+enet = GeneralizedLinearEstimator(
+    datafit=Quadratic(),
+    penalty=L1_plus_L2(alpha=1.0, l1_ratio=0.5),
+    solver=AndersonCD()
+)
+enet.cross_validate(X_train, y_train, alphas='auto',
+                    l1_ratios=[0.1, 0.5, 0.9], cv=5, scoring='neg_mean_squared_error')
+
+# Evaluate on test set
+y_pred = enet.predict(X_test)
+mse = mean_squared_error(y_test, y_pred)
+print(f"Best alpha: {enet.best_alpha_:.3f}")
+print(f"Best l1_ratio: {enet.best_l1_ratio_:.3f}")
+print(f"Test MSE: {mse:.3f}")
+
+# Plot CV scores for different l1_ratios
+plt.figure(figsize=(10, 6))
+for ratio in enet.cv_scores_:
+    plt.semilogx(enet.alphas_, enet.cv_scores_[ratio].mean(axis=0),
+                 label=f'l1_ratio={ratio}')
+plt.axvline(enet.best_alpha_, color='r', linestyle='--',
+            label=f'Best alpha: {enet.best_alpha_:.3f}')
+plt.xlabel('Alpha')
+plt.ylabel('CV Score')
+plt.title('Elastic Net CV Scores')
+plt.legend()
+plt.grid(True)
+
+# 3. Logistic Regression with L1 Penalty
+# -------------------------------------
+print("\n3. Logistic Regression with L1 Penalty")
+print("-" * 30)
+
+# Generate synthetic classification data
+X, y = make_classification(n_samples=100, n_features=20, n_classes=2)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+scaler = StandardScaler()
+X_train = scaler.fit_transform(X_train)
+X_test = scaler.transform(X_test)
+
+# Create and fit Logistic Regression with CV
+logreg = GeneralizedLinearEstimator(
+    datafit=Logistic(),
+    penalty=L1(alpha=1.0),
+    solver=AndersonCD()
+)
+logreg.cross_validate(X_train, y_train, alphas='auto', cv=5)
+
+# Evaluate on test set
+y_pred = logreg.predict(X_test)
+accuracy = accuracy_score(y_test, y_pred)
+print(f"Best alpha: {logreg.best_alpha_:.3f}")
+print(f"Test Accuracy: {accuracy:.3f}")
+
+# Plot CV scores
+plt.figure(figsize=(10, 6))
+plt.semilogx(logreg.alphas_, logreg.cv_scores_[None].mean(axis=0))
+plt.axvline(logreg.best_alpha_, color='r', linestyle='--',
+            label=f'Best alpha: {logreg.best_alpha_:.3f}')
+plt.xlabel('Alpha')
+plt.ylabel('CV Score')
+plt.title('Logistic Regression CV Scores')
+plt.legend()
+plt.grid(True)
+
+# 4. Logistic Regression with Elastic Net Penalty
+# ---------------------------------------------
+print("\n4. Logistic Regression with Elastic Net Penalty")
+print("-" * 30)
+
+# Create and fit Logistic Regression with Elastic Net penalty
+logreg_enet = GeneralizedLinearEstimator(
+    datafit=Logistic(),
+    penalty=L1_plus_L2(alpha=1.0, l1_ratio=0.5),
+    solver=AndersonCD()
+)
+logreg_enet.cross_validate(X_train, y_train, alphas='auto',
+                           l1_ratios=[0.1, 0.5, 0.9], cv=5)
+
+# Evaluate on test set
+y_pred = logreg_enet.predict(X_test)
+accuracy = accuracy_score(y_test, y_pred)
+print(f"Best alpha: {logreg_enet.best_alpha_:.3f}")
+print(f"Best l1_ratio: {logreg_enet.best_l1_ratio_:.3f}")
+print(f"Test Accuracy: {accuracy:.3f}")
+
+# Plot CV scores for different l1_ratios
+plt.figure(figsize=(10, 6))
+for ratio in logreg_enet.cv_scores_:
+    plt.semilogx(logreg_enet.alphas_, logreg_enet.cv_scores_[ratio].mean(axis=0),
+                 label=f'l1_ratio={ratio}')
+plt.axvline(logreg_enet.best_alpha_, color='r', linestyle='--',
+            label=f'Best alpha: {logreg_enet.best_alpha_:.3f}')
+plt.xlabel('Alpha')
+plt.ylabel('CV Score')
+plt.title('Logistic Regression with Elastic Net CV Scores')
+plt.legend()
+plt.grid(True)
+
+plt.show()
@@ -1,4 +1,3 @@
-
 class BaseDatafit:
     """Base class for datafits."""
 
@@ -20,6 +19,39 @@ def params_to_dict(self):
             The parameters to instantiate an object of the class.
         """
 
+    def get_params(self, deep=True):
+        """Get parameters for this datafit.
+
+        Parameters
+        ----------
+        deep : bool, default=True
+            If True, will return the parameters for this datafit and
+            contained subobjects that are datafits.
+
+        Returns
+        -------
+        params : dict
+            Parameter names mapped to their values.
+        """
+        return self.params_to_dict()
+
+    def set_params(self, **params):
+        """Set the parameters of this datafit.
+
+        Parameters
+        ----------
+        **params : dict
+            Datafit parameters.
+
+        Returns
+        -------
+        self : object
+            Returns self.
+        """
+        for key, value in params.items():
+            setattr(self, key, value)
+        return self
+
     def initialize(self, X, y):
         """Pre-computations before fitting on X and y.
 
@@ -70,6 +102,34 @@ def value(self, y, w, Xw):
             The datafit value at vector w.
         """
 
+    def gradient_scalar(self, X, y, w, Xw):
+        """Compute gradient of datafit wrt to scalar w."""
+
+    def gradient(self, X, y, w, Xw):
+        """Compute gradient of datafit wrt to w."""
+
+    def gradient_scalar_sparse(self, data, indptr, indices, y, n_samples, w, Xw):
+        """Compute gradient of datafit wrt to scalar w for sparse X."""
+
+    def gradient_sparse(self, data, indptr, indices, y, n_samples, w, Xw):
+        """Compute gradient of datafit wrt to w for sparse X."""
+
+    def gradient_at_zero(self, X, y):
+        """Compute gradient at w=0 for cross-validation support.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training data.
+        y : array-like, shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        gradient : array-like, shape (n_features,)
+            Gradient at w=0.
+        """
+
 
 class BaseMultitaskDatafit:
     """Base class for multitask datafits."""
 
@@ -38,6 +38,23 @@ def get_spec(self):
     def params_to_dict(self):
         return dict()
 
+    def gradient_at_zero(self, X, y):
+        """Compute gradient at w=0 for cross-validation support.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training data.
+        y : array-like, shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        grad : array, shape (n_features,)
+            Gradient at w=0.
+        """
+        return -X.T @ y / len(y)
+
     def get_lipschitz(self, X, y):
         n_features = X.shape[1]
 
@@ -95,6 +112,17 @@ def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, Xw, j):
     def gradient(self, X, y, Xw):
         return X.T @ (Xw - y) / len(y)
 
+    def gradient_sparse(self, X_data, X_indptr, X_indices, y, Xw):
+        """Compute gradient of datafit wrt to w for sparse X."""
+        n_features = X_indptr.shape[0] - 1
+        out = np.zeros(n_features, dtype=X_data.dtype)
+        raw_grad = self.raw_grad(y, Xw)
+
+        for j in range(n_features):
+            out[j] = _sparse_xj_dot(X_data, X_indptr, X_indices, j, raw_grad)
+
+        return out
+
     def raw_grad(self, y, Xw):
         """Compute gradient of datafit w.r.t ``Xw``."""
         return (Xw - y) / len(y)
@@ -217,6 +245,17 @@ def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, Xw, j):
     def gradient(self, X, y, Xw):
         return X.T @ (self.sample_weights * (Xw - y)) / self.sample_weights.sum()
 
+    def gradient_sparse(self, X_data, X_indptr, X_indices, y, Xw):
+        """Compute gradient of datafit wrt to w for sparse X."""
+        n_features = X_indptr.shape[0] - 1
+        out = np.zeros(n_features, dtype=X_data.dtype)
+        raw_grad = self.raw_grad(y, Xw)
+
+        for j in range(n_features):
+            out[j] = _sparse_xj_dot(X_data, X_indptr, X_indices, j, raw_grad)
+
+        return out
+
     def raw_grad(self, y, Xw):
         return (self.sample_weights * (Xw - y)) / self.sample_weights.sum()
 
@@ -303,6 +342,23 @@ def get_spec(self):
     def params_to_dict(self):
         return dict()
 
+    def gradient_at_zero(self, X, y):
+        """Compute gradient at w=0 for cross-validation support.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training data.
+        y : array-like, shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        grad : array, shape (n_features,)
+            Gradient at w=0.
+        """
+        return -X.T @ y / (2 * len(y))
+
     def raw_grad(self, y, Xw):
         """Compute gradient of datafit w.r.t ``Xw``."""
         return -y / (1 + np.exp(y * Xw)) / len(y)