scikit-learn-contrib · PascalCarrivain · Dec 14, 2023 · Dec 14, 2023 · Dec 15, 2023 · Dec 15, 2023
diff --git a/.DS_Store b/.DS_Store
diff --git a/skglm/.DS_Store b/skglm/.DS_Store
diff --git a/skglm/experimental/.DS_Store b/skglm/experimental/.DS_Store
diff --git a/skglm/experimental/sqrt_lasso.py b/skglm/experimental/sqrt_lasso.py
@@ -100,10 +100,14 @@ class SqrtLasso(LinearModel, RegressorMixin):
 
     verbose : bool, default False
         Amount of verbosity. 0/False is silent.
+
+    fit_intercept: bool, optional (default=True)
+        Whether or not to fit an intercept.
     """
 
     def __init__(self, alpha=1., max_iter=100, max_pn_iter=100, p0=10,
-                 tol=1e-4, verbose=0):
+                 tol=1e-4, verbose=0, fit_intercept=True):
+
         super().__init__()
         self.alpha = alpha
         self.max_iter = max_iter
@@ -112,6 +116,7 @@ def __init__(self, alpha=1., max_iter=100, max_pn_iter=100, p0=10,
         self.p0 = p0
         self.tol = tol
         self.verbose = verbose
+        self.fit_intercept = fit_intercept
 
     def fit(self, X, y):
         """Fit the model according to the given training data.
@@ -130,8 +135,23 @@ def fit(self, X, y):
         self :
             Fitted estimator.
         """
-        self.coef_ = self.path(X, y, alphas=[self.alpha])[1][0]
-        self.intercept_ = 0.  # TODO handle fit_intercept
+        # self.coef_ = self.path(X, y, alphas=[self.alpha])[1][0]
+        if self.fit_intercept:
+            X_mean = X.mean(axis=0)
+            y_mean = y.mean()
+            X_centered = X - X_mean
+            y_centered = y - y_mean
+        else:
+            X_centered = X
+            y_centered = y
+
+        self.coef_ = self.path(X_centered, y_centered, alphas=[self.alpha])[1][0]
+
+        if self.fit_intercept:
+            self.intercept_ = y_mean - X_mean @ self.coef_[:-1]
+            self.coef_ = self.coef_[:-1]
+        else:
+            self.intercept_ = 0.
         return self
 
     def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
@@ -168,7 +188,7 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
         if not hasattr(self, "solver_"):
             self.solver_ = ProxNewton(
                 tol=self.tol, max_iter=self.max_iter, verbose=self.verbose,
-                fit_intercept=False)
+                fit_intercept=self.fit_intercept)
         # build path
         if alphas is None:
             alpha_max = norm(X.T @ y, ord=np.inf) / (np.sqrt(len(y)) * norm(y))
@@ -181,7 +201,7 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
         sqrt_quadratic = SqrtQuadratic()
         l1_penalty = L1(1.)  # alpha is set along the path
 
-        coefs = np.zeros((n_alphas, n_features))
+        coefs = np.zeros((n_alphas, n_features + self.fit_intercept))
 
         for i in range(n_alphas):
             if self.verbose:
@@ -192,12 +212,14 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
 
             l1_penalty.alpha = alphas[i]
             # no warm start for the first alpha
-            coef_init = coefs[i].copy() if i else np.zeros(n_features)
+            coef_init = coefs[i].copy() if i else np.zeros(n_features
+                                                           + self.fit_intercept)
 
             try:
                 coef, _, _ = self.solver_.solve(
                     X, y, sqrt_quadratic, l1_penalty,
-                    w_init=coef_init, Xw_init=X @ coef_init)
+                    w_init=coef_init, Xw_init=X @ coef_init[:-1] + coef_init[-1]
+                    if self.fit_intercept else X @ coef_init)
                 coefs[i] = coef
             except ValueError as val_exception:
                 # make sure to catch residual error
@@ -208,7 +230,8 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
                 # save coef despite not converging
                 # coef_init holds a ref to coef
                 coef = coef_init
-                res_norm = norm(y - X @ coef)
+                X_coef = X @ coef[:-1] + coef[-1] if self.fit_intercept else X @ coef
+                res_norm = norm(y - X_coef)
                 warnings.warn(
                     f"Small residuals prevented the solver from converging "
                     f"at alpha={alphas[i]:.2e} (residuals' norm: {res_norm:.4e}). "

diff --git a/skglm/experimental/tests/test_sqrt_lasso.py b/skglm/experimental/tests/test_sqrt_lasso.py
@@ -7,6 +7,7 @@
 from skglm.experimental.sqrt_lasso import (SqrtLasso, SqrtQuadratic,
                                            _chambolle_pock_sqrt)
 from skglm.experimental.pdcd_ws import PDCD_WS
+from skglm import Lasso
 
 
 def test_alpha_max():
@@ -16,7 +17,10 @@ def test_alpha_max():
 
     sqrt_lasso = SqrtLasso(alpha=alpha_max).fit(X, y)
 
-    np.testing.assert_equal(sqrt_lasso.coef_, 0)
+    if sqrt_lasso.fit_intercept:
+        np.testing.assert_equal(sqrt_lasso.coef_[:-1], 0)
+    else:
+        np.testing.assert_equal(sqrt_lasso.coef_, 0)
-    if sqrt_lasso.fit_intercept:
-        np.testing.assert_equal(sqrt_lasso.coef_[:-1], 0)
-    else:
-        np.testing.assert_equal(sqrt_lasso.coef_, 0)
+    np.testing.assert_equal(sqrt_lasso.coef_[:n_features], 0)
-    if sqrt_lasso.fit_intercept:
-        np.testing.assert_equal(sqrt_lasso.coef_[:-1], 0)
-    else:
-        np.testing.assert_equal(sqrt_lasso.coef_, 0)
+    np.testing.assert_equal(sqrt_lasso.coef_[:n_features], 0)
 
 
 def test_vs_statsmodels():
@@ -31,7 +35,7 @@ def test_vs_statsmodels():
     n_alphas = 3
     alphas = alpha_max * np.geomspace(1, 1e-2, n_alphas+1)[1:]
 
-    sqrt_lasso = SqrtLasso(tol=1e-9)
+    sqrt_lasso = SqrtLasso(tol=1e-9, fit_intercept=False)
     coefs_skglm = sqrt_lasso.path(X, y, alphas)[1]
 
     coefs_statsmodels = np.zeros((len(alphas), n_features))
@@ -54,7 +58,7 @@ def test_prox_newton_cp():
 
     alpha_max = norm(X.T @ y, ord=np.inf) / norm(y)
     alpha = alpha_max / 10
-    clf = SqrtLasso(alpha=alpha, tol=1e-12).fit(X, y)
+    clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y)
     w, _, _ = _chambolle_pock_sqrt(X, y, alpha, max_iter=1000)
     np.testing.assert_allclose(clf.coef_, w)
 
@@ -73,9 +77,56 @@ def test_PDCD_WS(with_dual_init):
     penalty = L1(alpha)
 
     w = PDCD_WS(dual_init=dual_init).solve(X, y, datafit, penalty)[0]
-    clf = SqrtLasso(alpha=alpha, tol=1e-12).fit(X, y)
+
+    clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y)
+
     np.testing.assert_allclose(clf.coef_, w, atol=1e-6)
 
 
+def test_sqrt_lasso_with_intercept():
+    np.random.seed(0)
+    X = np.random.randn(10, 20)
+    y = np.random.randn(10)
+    y += 1
+
+    n = len(y)
+    alpha_max = norm(X.T @ y, ord=np.inf) / n
+    alpha = alpha_max / 10
+
+    # Fit standard Lasso with intercept
+    lass = Lasso(alpha=alpha, fit_intercept=True, tol=1e-8).fit(X, y)
+    w_lass = lass.coef_
+    assert norm(w_lass) > 0
+
+    scal = n / norm(y - lass.predict(X))
+
+    # Fit SqrtLasso with intercept
+    sqrt = SqrtLasso(alpha=alpha * scal, fit_intercept=True, tol=1e-8).fit(X, y)
+
+    # Make sure intercept was learned
+    assert abs(sqrt.intercept_) > 1e-6
+
+    y_pred = sqrt.predict(X)
+    assert y_pred.shape == y.shape
+
+    # Check that coef_ and intercept_ are handled separately
+    assert sqrt.coef_.shape == (20,)
+    assert np.isscalar(sqrt.intercept_)
+
+    # Confirm prediction matches manual computation
+    manual_pred = X @ sqrt.coef_ + sqrt.intercept_
+    np.testing.assert_allclose(manual_pred, y_pred, rtol=1e-6)
+
+    np.testing.assert_allclose(
+        sqrt.intercept_, y.mean() - X.mean(axis=0) @ sqrt.coef_, rtol=1e-6
+    )
+
+    sqrt_no_intercept = SqrtLasso(
+        alpha=alpha * scal, fit_intercept=False, tol=1e-8).fit(X, y)
+    assert np.isscalar(sqrt_no_intercept.intercept_)
+    np.testing.assert_allclose(sqrt_no_intercept.predict(
+        X), X @ sqrt_no_intercept.coef_ + sqrt_no_intercept.intercept_)
+
+
 if __name__ == '__main__':
     pass