ENH - add support for sparse dataset in LBFGS (#173)

Badr-MOUFAD · web-flow · commit d80c9aa6b26b · 2023-06-20T17:00:03.000+02:00
diff --git a/skglm/datafits/single_task.py b/skglm/datafits/single_task.py
@@ -5,6 +5,7 @@
 
 from skglm.datafits.base import BaseDatafit
 from skglm.utils.sparse_ops import spectral_norm
+from skglm.solvers.prox_newton import _sparse_xj_dot
 
 
 class Quadratic(BaseDatafit):
@@ -185,6 +186,16 @@ def gradient_scalar(self, X, y, w, Xw, j):
     def gradient(self, X, y, Xw):
         return X.T @ self.raw_grad(y, Xw)
 
+    def gradient_sparse(self, X_data, X_indptr, X_indices, y, Xw):
+        n_features = X_indptr.shape[0] - 1
+        out = np.zeros(n_features, dtype=X_data.dtype)
+        raw_grad = self.raw_grad(y, Xw)
+
+        for j in range(n_features):
+            out[j] = _sparse_xj_dot(X_data, X_indptr, X_indices, j, raw_grad)
+
+        return out
+
     def full_grad_sparse(
             self, X_data, X_indptr, X_indices, y, Xw):
         n_features = X_indptr.shape[0] - 1
@@ -654,6 +665,17 @@ def gradient(self, X, y, Xw):
         """Compute gradient of the datafit."""
         return X.T @ self.raw_grad(y, Xw)
 
+    def gradient_sparse(self, X_data, X_indptr, X_indices, y, Xw):
+        """Compute gradient of the datafit in case ``X`` is sparse."""
+        n_features = X_indptr.shape[0] - 1
+        out = np.zeros(n_features, dtype=X_data.dtype)
+        raw_grad = self.raw_grad(y, Xw)
+
+        for j in range(n_features):
+            out[j] = _sparse_xj_dot(X_data, X_indptr, X_indices, j, raw_grad)
+
+        return out
+
     def initialize(self, X, y):
         """Initialize the datafit attributes."""
         tm, s = y
diff --git a/skglm/solvers/lbfgs.py b/skglm/solvers/lbfgs.py
@@ -4,6 +4,7 @@
 import numpy as np
 import scipy.optimize
 from numpy.linalg import norm
+from scipy.sparse import issparse
 
 from skglm.solvers import BaseSolver
 
@@ -33,27 +34,34 @@ def __init__(self, max_iter=50, tol=1e-4, verbose=False):
 
     def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
 
-        def objective_function(w):
+        def objective(w):
             Xw = X @ w
             datafit_value = datafit.value(y, w, Xw)
             penalty_value = penalty.value(w)
 
             return datafit_value + penalty_value
 
-        def jacobian_function(w):
+        def d_jac(w):
             Xw = X @ w
             datafit_grad = datafit.gradient(X, y, Xw)
             penalty_grad = penalty.gradient(w)
 
             return datafit_grad + penalty_grad
 
+        def s_jac(w):
+            Xw = X @ w
+            datafit_grad = datafit.gradient_sparse(X.data, X.indptr, X.indices, y, Xw)
+            penalty_grad = penalty.gradient(w)
+
+            return datafit_grad + penalty_grad
+
         def callback_post_iter(w_k):
             # save p_obj
-            p_obj = objective_function(w_k)
+            p_obj = objective(w_k)
             p_objs_out.append(p_obj)
 
             if self.verbose:
-                grad = jacobian_function(w_k)
+                grad = jac(w_k)
                 stop_crit = norm(grad)
 
                 it = len(p_objs_out)
@@ -64,11 +72,12 @@ def callback_post_iter(w_k):
 
         n_features = X.shape[1]
         w = np.zeros(n_features) if w_init is None else w_init
+        jac = s_jac if issparse(X) else d_jac
         p_objs_out = []
 
         result = scipy.optimize.minimize(
-            fun=objective_function,
-            jac=jacobian_function,
+            fun=objective,
+            jac=jac,
             x0=w,
             method="L-BFGS-B",
             options=dict(
diff --git a/skglm/tests/test_lbfgs_solver.py b/skglm/tests/test_lbfgs_solver.py
@@ -12,12 +12,15 @@
 from skglm.utils.data import make_correlated_data, make_dummy_survival_data
 
 
-def test_lbfgs_L2_logreg():
+@pytest.mark.parametrize("X_sparse", [True, False])
+def test_lbfgs_L2_logreg(X_sparse):
     reg = 1.
+    X_density = 1. if not X_sparse else 0.5
     n_samples, n_features = 100, 50
 
     X, y, _ = make_correlated_data(
-        n_samples, n_features, random_state=0)
+        n_samples, n_features, random_state=0, X_density=X_density,
+    )
     y = np.sign(y)
 
     # fit L-BFGS