fix cox model

Badr-MOUFAD · Badr-MOUFAD · commit 415c1e98cc77 · 2025-04-15T16:22:26.000+02:00
diff --git a/skglm/solvers/base.py b/skglm/solvers/base.py
@@ -94,8 +94,9 @@ def custom_checks(self, X, y, datafit, penalty):
         """
         pass
 
-    def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None,
-              *, run_checks=True):
+    def solve(
+        self, X, y, datafit, penalty, w_init=None, Xw_init=None, *, run_checks=True
+    ):
         """Solve the optimization problem after validating its compatibility.
 
         A proxy of ``_solve`` method that implicitly ensures the compatibility
@@ -108,7 +109,8 @@ def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None,
         """
         if "jitclass" in str(type(datafit)):
             warnings.warn(
-                "Do not pass a compiled datafit, compilation is done inside solver now")
+                "Do not pass a compiled datafit, compilation is done inside solver now"
+            )
         else:
             if datafit is not None:
                 datafit = compiled_clone(datafit, to_float32=X.dtype == np.float32)
diff --git a/skglm/solvers/lbfgs.py b/skglm/solvers/lbfgs.py
@@ -38,6 +38,13 @@ def __init__(self, max_iter=50, tol=1e-4, verbose=False):
 
     def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
 
+        # TODO: to be isolated in a seperated method
+        is_sparse = issparse(X)
+        if is_sparse:
+            datafit.initialize_sparse(X.data, X.indptr, X.indices, y)
+        else:
+            datafit.initialize(X, y)
+
         def objective(w):
             Xw = X @ w
             datafit_value = datafit.value(y, w, Xw)
@@ -70,8 +77,7 @@ def callback_post_iter(w_k):
 
                 it = len(p_objs_out)
                 print(
-                    f"Iteration {it}: {p_obj:.10f}, "
-                    f"stopping crit: {stop_crit:.2e}"
+                    f"Iteration {it}: {p_obj:.10f}, " f"stopping crit: {stop_crit:.2e}"
                 )
 
         n_features = X.shape[1]
@@ -87,7 +93,7 @@ def callback_post_iter(w_k):
             options=dict(
                 maxiter=self.max_iter,
                 gtol=self.tol,
-                ftol=0.  # set ftol=0. to control convergence using only gtol
+                ftol=0.0,  # set ftol=0. to control convergence using only gtol
             ),
             callback=callback_post_iter,
         )
@@ -97,7 +103,7 @@ def callback_post_iter(w_k):
                 f"`LBFGS` did not converge for tol={self.tol:.3e} "
                 f"and max_iter={self.max_iter}.\n"
                 "Consider increasing `max_iter` and/or `tol`.",
-                category=ConvergenceWarning
+                category=ConvergenceWarning,
             )
 
         w = result.x
@@ -110,7 +116,8 @@ def callback_post_iter(w_k):
     def custom_checks(self, X, y, datafit, penalty):
         # check datafit support sparse data
         check_attrs(
-            datafit, solver=self,
+            datafit,
+            solver=self,
             required_attr=self._datafit_required_attr,
-            support_sparse=issparse(X)
+            support_sparse=issparse(X),
         )
diff --git a/skglm/tests/test_lbfgs_solver.py b/skglm/tests/test_lbfgs_solver.py
@@ -13,12 +13,15 @@
 
 @pytest.mark.parametrize("X_sparse", [True, False])
 def test_lbfgs_L2_logreg(X_sparse):
-    reg = 1.
-    X_density = 1. if not X_sparse else 0.5
+    reg = 1.0
+    X_density = 1.0 if not X_sparse else 0.5
     n_samples, n_features = 100, 50
 
     X, y, _ = make_correlated_data(
-        n_samples, n_features, random_state=0, X_density=X_density,
+        n_samples,
+        n_features,
+        random_state=0,
+        X_density=X_density,
     )
     y = np.sign(y)
 
@@ -29,7 +32,7 @@ def test_lbfgs_L2_logreg(X_sparse):
 
     # fit scikit learn
     estimator = LogisticRegression(
-        penalty='l2',
+        penalty="l2",
         C=1 / (n_samples * reg),
         fit_intercept=False,
         tol=1e-12,
@@ -48,24 +51,26 @@ def test_L2_Cox(use_efron):
             "Run `pip install lifelines`"
         )
 
-    alpha = 10.
+    alpha = 10.0
     n_samples, n_features = 100, 50
 
     X, y = make_dummy_survival_data(
-        n_samples, n_features, normalize=True,
-        with_ties=use_efron, random_state=0)
+        n_samples, n_features, normalize=True, with_ties=use_efron, random_state=0
+    )
 
     datafit = Cox(use_efron)
     penalty = L2(alpha)
 
+    # XXX: intialize is needed here although it is done in LBFGS
+    # is used to evaluate the objective
     datafit.initialize(X, y)
     w, *_ = LBFGS().solve(X, y, datafit, penalty)
 
     # fit lifeline estimator
     stacked_y_X = np.hstack((y, X))
     df = pd.DataFrame(stacked_y_X)
 
-    estimator = CoxPHFitter(penalizer=alpha, l1_ratio=0.).fit(
+    estimator = CoxPHFitter(penalizer=alpha, l1_ratio=0.0).fit(
         df, duration_col=0, event_col=1
     )
     w_ll = estimator.params_.values