ENH - Add LogisitcGroup datafit (#94)

Badr-MOUFAD · web-flow · commit 238ce8aabdf1 · 2022-10-18T12:57:59.000+02:00
diff --git a/doc/api.rst b/doc/api.rst
@@ -55,6 +55,7 @@ Datafits
 
    Huber
    Logistic
+   LogisticGroup
    Quadratic
    QuadraticGroup
    QuadraticSVC
diff --git a/skglm/datafits/__init__.py b/skglm/datafits/__init__.py
@@ -1,12 +1,12 @@
 from .base import BaseDatafit, BaseMultitaskDatafit
 from .single_task import Quadratic, QuadraticSVC, Logistic, Huber, Poisson
 from .multi_task import QuadraticMultiTask
-from .group import QuadraticGroup
+from .group import QuadraticGroup, LogisticGroup
 
 
 __all__ = [
     BaseDatafit, BaseMultitaskDatafit,
     Quadratic, QuadraticSVC, Logistic, Huber, Poisson,
     QuadraticMultiTask,
-    QuadraticGroup
+    QuadraticGroup, LogisticGroup
 ]
diff --git a/skglm/datafits/group.py b/skglm/datafits/group.py
@@ -3,6 +3,7 @@
 from numba import int32, float64
 
 from skglm.datafits.base import BaseDatafit
+from skglm.datafits.single_task import Logistic
 
 
 class QuadraticGroup(BaseDatafit):
@@ -71,3 +72,63 @@ def gradient_scalar(self, X, y, w, Xw, j):
 
     def intercept_update_step(self, y, Xw):
         return np.mean(Xw - y)
+
+
+class LogisticGroup(Logistic):
+    r"""Logistic datafit used with group penalties.
+
+    The datafit reads::
+
+    (1 / n_samples) * \sum_i log(1 + exp(-y_i * Xw_i))
+
+    Attributes
+    ----------
+    grp_indices : array, shape (n_features,)
+        The group indices stacked contiguously
+        ([grp1_indices, grp2_indices, ...]).
+
+    grp_ptr : array, shape (n_groups + 1,)
+        The group pointers such that two consecutive elements delimit
+        the indices of a group in ``grp_indices``.
+
+    lipschitz : array, shape (n_groups,)
+        The lipschitz constants for each group.
+    """
+
+    def __init__(self, grp_ptr, grp_indices):
+        self.grp_ptr, self.grp_indices = grp_ptr, grp_indices
+
+    def get_spec(self):
+        spec = (
+            ('grp_ptr', int32[:]),
+            ('grp_indices', int32[:]),
+            ('lipschitz', float64[:])
+        )
+        return spec
+
+    def params_to_dict(self):
+        return dict(grp_ptr=self.grp_ptr,
+                    grp_indices=self.grp_indices)
+
+    def initialize(self, X, y):
+        grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
+        n_groups = len(grp_ptr) - 1
+
+        lipschitz = np.zeros(n_groups)
+        for g in range(n_groups):
+            grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
+            X_g = X[:, grp_g_indices]
+            lipschitz[g] = norm(X_g, ord=2) ** 2 / (4 * len(y))
+
+        self.lipschitz = lipschitz
+
+    def gradient_g(self, X, y, w, Xw, g):
+        grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
+        grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
+        raw_grad_val = self.raw_grad(y, Xw)
+
+        grad_g = np.zeros(len(grp_g_indices))
+        for idx, j in enumerate(grp_g_indices):
+            grad_g[idx] = X[:, j] @ raw_grad_val
+
+        return grad_g
diff --git a/skglm/tests/test_group.py b/skglm/tests/test_group.py
@@ -5,12 +5,13 @@
 from skglm.penalties import L1
 from skglm.datafits import Quadratic
 from skglm.penalties.block_separable import WeightedGroupL2
-from skglm.datafits.group import QuadraticGroup
+from skglm.datafits.group import QuadraticGroup, LogisticGroup
 from skglm.solvers import GroupBCD
 from skglm.utils import (
     _alpha_max_group_lasso, grp_converter, make_correlated_data, compiled_clone,
     AndersonAcceleration)
 from celer import GroupLasso, Lasso
+from sklearn.linear_model import LogisticRegression
 
 
 def _generate_random_grp(n_groups, n_features, shuffle=True):
@@ -160,6 +161,61 @@ def test_intercept_grouplasso():
     np.testing.assert_allclose(model.intercept_, w[-1], atol=1e-5)
 
 
+@pytest.mark.parametrize("rho", [1e-1, 1e-2])
+def test_equivalence_logreg(rho):
+    n_samples, n_features = 30, 50
+    rng = np.random.RandomState(1123)
+    X, y, _ = make_correlated_data(n_samples, n_features, random_state=rng)
+    y = np.sign(y)
+
+    grp_indices, grp_ptr = grp_converter(1, n_features)
+    weights = np.ones(n_features)
+    alpha_max = norm(X.T @ y, ord=np.inf) / (2 * n_samples)
+    alpha = rho * alpha_max / 10.
+
+    group_logistic = LogisticGroup(grp_ptr=grp_ptr, grp_indices=grp_indices)
+    group_penalty = WeightedGroupL2(
+        alpha=alpha, grp_ptr=grp_ptr,
+        grp_indices=grp_indices, weights=weights)
+
+    group_logistic = compiled_clone(group_logistic, to_float32=X.dtype == np.float32)
+    group_penalty = compiled_clone(group_penalty)
+    w = GroupBCD(tol=1e-12).solve(X, y, group_logistic, group_penalty)[0]
+
+    sk_logreg = LogisticRegression(penalty='l1', C=1/(n_samples * alpha),
+                                   fit_intercept=False, tol=1e-12, solver='liblinear')
+    sk_logreg.fit(X, y)
+
+    np.testing.assert_allclose(sk_logreg.coef_.flatten(), w, atol=1e-6, rtol=1e-5)
+
+
+@pytest.mark.parametrize("n_groups, rho", [[15, 1e-1], [25, 1e-2]])
+def test_group_logreg(n_groups, rho):
+    n_samples, n_features, shuffle = 30, 60, True
+    random_state = 123
+    rng = np.random.RandomState(random_state)
+
+    X, y, _ = make_correlated_data(n_samples, n_features, random_state=rng)
+    y = np.sign(y)
+
+    rng.seed(random_state)
+    weights = np.abs(rng.randn(n_groups))
+    grp_indices, grp_ptr, _ = _generate_random_grp(n_groups, n_features, shuffle)
+
+    alpha_max = _alpha_max_group_lasso(X, y, grp_indices, grp_ptr, weights)
+    alpha = rho * alpha_max
+
+    # skglm
+    group_logistic = LogisticGroup(grp_ptr=grp_ptr, grp_indices=grp_indices)
+    group_penalty = WeightedGroupL2(alpha, weights, grp_ptr, grp_indices)
+
+    group_logistic = compiled_clone(group_logistic, to_float32=X.dtype == np.float32)
+    group_penalty = compiled_clone(group_penalty)
+    stop_crit = GroupBCD(tol=1e-12).solve(X, y, group_logistic, group_penalty)[2]
+
+    np.testing.assert_array_less(stop_crit, 1e-12)
+
+
 def test_anderson_acceleration():
     # VAR: w = rho * w + 1 with |rho| < 1
     # converges to w_star = 1 / (1 - rho)