ENH add support for intercept in SqrtLasso (#298)

PascalCarrivain · Badr-MOUFAD · mathurinm · web-flow · commit d87638c4ee4f · 2025-04-23T14:55:21.000+02:00
Co-authored-by: Badr-MOUFAD &lt;badr.moufad@emines.um6p.ma&gt;
Co-authored-by: mathurinm &lt;mathurin.massias@gmail.com&gt;
Co-authored-by: floriankozikowski &lt;florian.kozikowski@polytechnique.edu&gt;
diff --git a/doc/changes/0.4.rst b/doc/changes/0.4.rst
@@ -1,6 +1,6 @@
 .. _changes_0_4:
 
-Version 0.4 (2023/04/08)
+Version 0.4 (2025/04/08)
 -------------------------
 - Add :ref:`GroupLasso Estimator <skglm.GroupLasso>` (PR: :gh:`228`)
 - Add support and tutorial for positive coefficients to :ref:`Group Lasso Penalty <skglm.penalties.WeightedGroupL2>` (PR: :gh:`221`)
diff --git a/doc/changes/0.5.rst b/doc/changes/0.5.rst
@@ -2,3 +2,4 @@
 
 Version 0.5 (in progress)
 -------------------------
+- Add support for fitting an intercept in :ref:`SqrtLasso <skglm.experimental.sqrt_lasso.SqrtLasso>` (PR: :gh:`298`)
diff --git a/skglm/experimental/sqrt_lasso.py b/skglm/experimental/sqrt_lasso.py
@@ -100,10 +100,13 @@ class SqrtLasso(LinearModel, RegressorMixin):
 
     verbose : bool, default False
         Amount of verbosity. 0/False is silent.
+
+    fit_intercept: bool, optional (default=True)
+        Whether or not to fit an intercept.
     """
 
     def __init__(self, alpha=1., max_iter=100, max_pn_iter=100, p0=10,
-                 tol=1e-4, verbose=0):
+                 tol=1e-4, verbose=0, fit_intercept=True):
         super().__init__()
         self.alpha = alpha
         self.max_iter = max_iter
@@ -112,6 +115,7 @@ def __init__(self, alpha=1., max_iter=100, max_pn_iter=100, p0=10,
         self.p0 = p0
         self.tol = tol
         self.verbose = verbose
+        self.fit_intercept = fit_intercept
 
     def fit(self, X, y):
         """Fit the model according to the given training data.
@@ -131,7 +135,11 @@ def fit(self, X, y):
             Fitted estimator.
         """
         self.coef_ = self.path(X, y, alphas=[self.alpha])[1][0]
-        self.intercept_ = 0.  # TODO handle fit_intercept
+        if self.fit_intercept:
+            self.intercept_ = self.coef_[-1]
+            self.coef_ = self.coef_[:-1]
+        else:
+            self.intercept_ = 0.
         return self
 
     def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
@@ -168,7 +176,7 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
         if not hasattr(self, "solver_"):
             self.solver_ = ProxNewton(
                 tol=self.tol, max_iter=self.max_iter, verbose=self.verbose,
-                fit_intercept=False)
+                fit_intercept=self.fit_intercept)
         # build path
         if alphas is None:
             alpha_max = norm(X.T @ y, ord=np.inf) / (np.sqrt(len(y)) * norm(y))
@@ -181,7 +189,7 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
         sqrt_quadratic = SqrtQuadratic()
         l1_penalty = L1(1.)  # alpha is set along the path
 
-        coefs = np.zeros((n_alphas, n_features))
+        coefs = np.zeros((n_alphas, n_features + self.fit_intercept))
 
         for i in range(n_alphas):
             if self.verbose:
@@ -192,12 +200,14 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
 
             l1_penalty.alpha = alphas[i]
             # no warm start for the first alpha
-            coef_init = coefs[i].copy() if i else np.zeros(n_features)
+            coef_init = coefs[i].copy() if i else np.zeros(n_features
+                                                           + self.fit_intercept)
 
             try:
                 coef, _, _ = self.solver_.solve(
                     X, y, sqrt_quadratic, l1_penalty,
-                    w_init=coef_init, Xw_init=X @ coef_init)
+                    w_init=coef_init, Xw_init=X @ coef_init[:-1] + coef_init[-1]
+                    if self.fit_intercept else X @ coef_init)
                 coefs[i] = coef
             except ValueError as val_exception:
                 # make sure to catch residual error
@@ -208,7 +218,8 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
                 # save coef despite not converging
                 # coef_init holds a ref to coef
                 coef = coef_init
-                res_norm = norm(y - X @ coef)
+                X_coef = X @ coef[:-1] + coef[-1] if self.fit_intercept else X @ coef
+                res_norm = norm(y - X_coef)
                 warnings.warn(
                     f"Small residuals prevented the solver from converging "
                     f"at alpha={alphas[i]:.2e} (residuals' norm: {res_norm:.4e}). "
diff --git a/skglm/experimental/tests/test_sqrt_lasso.py b/skglm/experimental/tests/test_sqrt_lasso.py
@@ -7,6 +7,7 @@
 from skglm.experimental.sqrt_lasso import (SqrtLasso, SqrtQuadratic,
                                            _chambolle_pock_sqrt)
 from skglm.experimental.pdcd_ws import PDCD_WS
+from skglm import Lasso
 
 
 def test_alpha_max():
@@ -16,7 +17,10 @@ def test_alpha_max():
 
     sqrt_lasso = SqrtLasso(alpha=alpha_max).fit(X, y)
 
-    np.testing.assert_equal(sqrt_lasso.coef_, 0)
+    if sqrt_lasso.fit_intercept:
+        np.testing.assert_equal(sqrt_lasso.coef_[:-1], 0)
+    else:
+        np.testing.assert_equal(sqrt_lasso.coef_, 0)
 
 
 def test_vs_statsmodels():
@@ -31,7 +35,7 @@ def test_vs_statsmodels():
     n_alphas = 3
     alphas = alpha_max * np.geomspace(1, 1e-2, n_alphas+1)[1:]
 
-    sqrt_lasso = SqrtLasso(tol=1e-9)
+    sqrt_lasso = SqrtLasso(tol=1e-9, fit_intercept=False)
     coefs_skglm = sqrt_lasso.path(X, y, alphas)[1]
 
     coefs_statsmodels = np.zeros((len(alphas), n_features))
@@ -54,7 +58,7 @@ def test_prox_newton_cp():
 
     alpha_max = norm(X.T @ y, ord=np.inf) / norm(y)
     alpha = alpha_max / 10
-    clf = SqrtLasso(alpha=alpha, tol=1e-12).fit(X, y)
+    clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y)
     w, _, _ = _chambolle_pock_sqrt(X, y, alpha, max_iter=1000)
     np.testing.assert_allclose(clf.coef_, w)
 
@@ -73,9 +77,26 @@ def test_PDCD_WS(with_dual_init):
     penalty = L1(alpha)
 
     w = PDCD_WS(dual_init=dual_init).solve(X, y, datafit, penalty)[0]
-    clf = SqrtLasso(alpha=alpha, tol=1e-12).fit(X, y)
+    clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y)
     np.testing.assert_allclose(clf.coef_, w, atol=1e-6)
 
 
+@pytest.mark.parametrize("fit_intercept", [True, False])
+def test_lasso_sqrt_lasso_equivalence(fit_intercept):
+    n_samples, n_features = 50, 10
+    X, y, _ = make_correlated_data(n_samples, n_features, random_state=0)
+
+    alpha_max = norm(X.T @ y, ord=np.inf) / norm(y)
+    alpha = alpha_max / 10
+
+    lasso = Lasso(alpha=alpha, fit_intercept=fit_intercept, tol=1e-8).fit(X, y)
+
+    scal = n_samples / norm(y - lasso.predict(X))
+    sqrt = SqrtLasso(
+        alpha=alpha * scal, fit_intercept=fit_intercept, tol=1e-8).fit(X, y)
+
+    np.testing.assert_allclose(sqrt.coef_, lasso.coef_, rtol=1e-6)
+
+
 if __name__ == '__main__':
     pass

Original file line number	Diff line number	Diff line change
`@@ -2,3 +2,4 @@`
`2`	`2`
`3`	`3`	`Version 0.5 (in progress)`
`4`	`4`	`-------------------------`
	`5`	+- Add support for fitting an intercept in :ref:`SqrtLasso <skglm.experimental.sqrt_lasso.SqrtLasso>` (PR: :gh:`298`)