ENH add global_lipschitz to Cox datafit (#180)

Badr-MOUFAD · web-flow · commit 10fa99c6251d · 2023-06-29T09:22:25.000+02:00
diff --git a/skglm/datafits/single_task.py b/skglm/datafits/single_task.py
@@ -599,6 +599,7 @@ def get_spec(self):
             ('use_efron', bool_),
             ('T_indptr', int64[:]), ('T_indices', int64[:]),
             ('H_indptr', int64[:]), ('H_indices', int64[:]),
+            ('global_lipschitz', float64),
         )
 
     def params_to_dict(self):
@@ -693,6 +694,20 @@ def initialize_sparse(self, X_data, X_indptr, X_indices, y):
         # small hack to avoid repetitive code: pass in X_data as only its dtype is used
         self.initialize(X_data, y)
 
+    def init_global_lipschitz(self, X, y):
+        s = y[:, 1]
+
+        n_samples = X.shape[0]
+        self.global_lipschitz = s.sum() * norm(X, ord=2) ** 2 / n_samples
+
+    def init_global_lipschitz_sparse(self, X_data, X_indptr, X_indices, y):
+        s = y[:, 1]
+
+        n_samples = s.shape[0]
+        norm_X = spectral_norm(X_data, X_indptr, X_indices, n_samples)
+
+        self.global_lipschitz = s.sum() * norm_X ** 2 / n_samples
+
     def _B_dot_vec(self, vec):
         # compute `B @ vec` in O(n) instead of O(n^2)
         out = np.zeros_like(vec)
diff --git a/skglm/solvers/fista.py b/skglm/solvers/fista.py
@@ -63,11 +63,19 @@ def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
             t_old = t_new
             t_new = (1 + np.sqrt(1 + 4 * t_old ** 2)) / 2
             w_old = w.copy()
+
             if X_is_sparse:
-                grad = construct_grad_sparse(
-                    X.data, X.indptr, X.indices, y, z, X @ z, datafit, all_features)
+                if hasattr(datafit, "gradient_sparse"):
+                    grad = datafit.gradient_sparse(
+                        X.data, X.indptr, X.indices, y, X @ z)
+                else:
+                    grad = construct_grad_sparse(
+                        X.data, X.indptr, X.indices, y, z, X @ z, datafit, all_features)
             else:
-                grad = construct_grad(X, y, z, X @ z, datafit, all_features)
+                if hasattr(datafit, "gradient"):
+                    grad = datafit.gradient(X, y, X @ z)
+                else:
+                    grad = construct_grad(X, y, z, X @ z, datafit, all_features)
 
             step = 1 / lipschitz
             z -= step * grad
diff --git a/skglm/tests/test_estimators.py b/skglm/tests/test_estimators.py
@@ -14,15 +14,16 @@
 from sklearn.svm import LinearSVC as LinearSVC_sklearn
 from sklearn.utils.estimator_checks import check_estimator
 
+import scipy.optimize
 from scipy.sparse import csc_matrix, issparse
 
 from skglm.utils.data import make_correlated_data, make_dummy_survival_data
 from skglm.estimators import (
     GeneralizedLinearEstimator, Lasso, MultiTaskLasso, WeightedLasso, ElasticNet,
     MCPRegression, SparseLogisticRegression, LinearSVC)
 from skglm.datafits import Logistic, Quadratic, QuadraticSVC, QuadraticMultiTask, Cox
-from skglm.penalties import L1, IndicatorBox, L1_plus_L2, MCPenalty, WeightedL1
-from skglm.solvers import AndersonCD
+from skglm.penalties import L1, IndicatorBox, L1_plus_L2, MCPenalty, WeightedL1, SLOPE
+from skglm.solvers import AndersonCD, FISTA
 
 import pandas as pd
 from skglm.solvers import ProxNewton
@@ -326,6 +327,87 @@ def test_Cox_sk_compatibility():
     check_estimator(CoxEstimator())
 
 
+@pytest.mark.parametrize("use_efron, issparse", product([True, False], repeat=2))
+def test_equivalence_cox_SLOPE_cox_L1(use_efron, issparse):
+    # this only tests the case of SLOPE equivalent to L1 (equal alphas)
+    reg = 1e-2
+    n_samples, n_features = 100, 10
+    X_density = 1. if not issparse else 0.2
+
+    X, y = make_dummy_survival_data(
+        n_samples, n_features, with_ties=use_efron, X_density=X_density,
+        random_state=0)
+
+    # init datafit
+    datafit = compiled_clone(Cox(use_efron))
+
+    if not issparse:
+        datafit.initialize(X, y)
+    else:
+        datafit.initialize_sparse(X.data, X.indptr, X.indices, y)
+
+    # compute alpha_max
+    grad_0 = datafit.raw_grad(y, np.zeros(n_samples))
+    alpha_max = np.linalg.norm(X.T @ grad_0, ord=np.inf)
+
+    # init penalty
+    alpha = reg * alpha_max
+    alphas = alpha * np.ones(n_features)
+    penalty = compiled_clone(SLOPE(alphas))
+
+    solver = FISTA(opt_strategy="fixpoint", max_iter=10_000, tol=1e-9)
+
+    w, *_ = solver.solve(X, y, datafit, penalty)
+
+    method = 'efron' if use_efron else 'breslow'
+    estimator = CoxEstimator(alpha, l1_ratio=1., method=method, tol=1e-9).fit(X, y)
+
+    np.testing.assert_allclose(w, estimator.coef_, atol=1e-6)
+
+
+@pytest.mark.parametrize("use_efron", [True, False])
+def test_cox_SLOPE(use_efron):
+    reg = 1e-2
+    n_samples, n_features = 100, 10
+
+    X, y = make_dummy_survival_data(
+        n_samples, n_features, with_ties=use_efron, random_state=0)
+
+    # init datafit
+    datafit = compiled_clone(Cox(use_efron))
+    datafit.initialize(X, y)
+
+    # compute alpha_max
+    grad_0 = datafit.raw_grad(y, np.zeros(n_samples))
+    alpha_ref = np.linalg.norm(X.T @ grad_0, ord=np.inf)
+
+    # init penalty
+    alpha = reg * alpha_ref
+    alphas = alpha / np.arange(n_features + 1)[1:]
+    penalty = compiled_clone(SLOPE(alphas))
+
+    solver = FISTA(opt_strategy="fixpoint", max_iter=10_000, tol=1e-9)
+
+    w, *_ = solver.solve(X, y, datafit, penalty)
+
+    result = scipy.optimize.minimize(
+        fun=lambda w: datafit.value(y, w, X @ w) + penalty.value(w),
+        x0=np.zeros(n_features),
+        method="SLSQP",
+        options=dict(
+            ftol=1e-9,
+            maxiter=10_000,
+        ),
+    )
+    w_sp = result.x
+
+    # check both methods yield the same objective
+    np.testing.assert_allclose(
+        datafit.value(y, w, X @ w) + penalty.value(w),
+        datafit.value(y, w_sp, X @ w_sp) + penalty.value(w_sp)
+    )
+
+
 # Test if GeneralizedLinearEstimator returns the correct coefficients
 @pytest.mark.parametrize("Datafit, Penalty, Estimator, pen_args", [
     (Quadratic, L1, Lasso, [alpha]),