Add SCAD and BlockSCAD penalties

PABannier · Klopfe · mathurinm · web-flow · commit 8a581b01c4a8 · 2022-07-15T20:06:50.000+02:00
Co-authored-by: Klopfe &lt;quentin.klopfenstein@u-bourgogne.fr&gt;
Co-authored-by: mathurinm &lt;mathurin.massias@gmail.com&gt;
Co-authored-by: mathurinm &lt;mathurinm@users.noreply.github.com&gt;
Co-authored-by: QB3 &lt;quentin.bertrand@inria.fr&gt;
diff --git a/examples/plot_pen_prox.py b/examples/plot_pen_prox.py
@@ -9,17 +9,21 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-from skglm.penalties import WeightedL1, L1, L1_plus_L2, MCPenalty
+from skglm.penalties import WeightedL1, L1, L1_plus_L2, MCPenalty, SCAD, L0_5, L2_3
 
 
-penalties = [WeightedL1(alpha=1, weights=np.array([2.])),
-             L1(alpha=1),
-             L1_plus_L2(alpha=1, l1_ratio=0.7),
-             MCPenalty(alpha=1, gamma=3.),
-             ]
+penalties = [
+    WeightedL1(alpha=1, weights=np.array([2.])),
+    L1(alpha=1),
+    L1_plus_L2(alpha=1, l1_ratio=0.7),
+    MCPenalty(alpha=1, gamma=3.),
+    SCAD(alpha=1, gamma=3.),
+    L0_5(alpha=1),
+    L2_3(alpha=1),
+]
 
 
-x_range = np.linspace(-5, 5, num=300)
+x_range = np.linspace(-4, 4, num=300)
 
 fig, axarr = plt.subplots(1, 2, figsize=(8, 3), constrained_layout=True)
 
@@ -32,7 +36,6 @@
                   label=pen.__class__.__name__)
 
 axarr[0].legend()
-axarr[1].legend()
 axarr[0].set_title("Penalty value")
 axarr[1].set_title("Proximal operator of penalty")
 plt.show(block=False)
diff --git a/examples/plot_sparse_recovery.py b/examples/plot_sparse_recovery.py
@@ -18,7 +18,7 @@
 from skglm.utils import make_correlated_data
 from skglm.solvers import cd_solver_path
 from skglm.datafits import Quadratic
-from skglm.penalties import L1, MCPenalty, L0_5, L2_3
+from skglm.penalties import L1, MCPenalty, L0_5, L2_3, SCAD
 
 cmap = plt.get_cmap('tab10')
 
@@ -51,14 +51,16 @@
 penalties = {}
 penalties['lasso'] = L1(alpha=1)
 penalties['mcp'] = MCPenalty(alpha=1, gamma=3)
+penalties['scad'] = SCAD(alpha=1, gamma=3)
 penalties['l05'] = L0_5(alpha=1)
 penalties['l23'] = L2_3(alpha=1)
 
 colors = {}
 colors['lasso'] = cmap(0)
 colors['mcp'] = cmap(1)
-colors['l05'] = cmap(2)
-colors['l23'] = cmap(3)
+colors['scad'] = cmap(2)
+colors['l05'] = cmap(3)
+colors['l23'] = cmap(4)
 
 f1 = {}
 estimation_error = {}
@@ -83,12 +85,14 @@
 
 name_estimators = {'lasso': "Lasso"}
 name_estimators['mcp'] = r"MCP, $\gamma=%s$" % 3
+name_estimators['scad'] = r"SCAD, $\gamma=%s$" % 3
 name_estimators['l05'] = r"$\ell_{1/2}$"
 name_estimators['l23'] = r"$\ell_{2/3}$"
 
 
 plt.close('all')
-fig, axarr = plt.subplots(2, 1, sharex=True, sharey=False, figsize=[8.2, 5.7])
+fig, axarr = plt.subplots(2, 1, sharex=True, sharey=False, figsize=[
+                          6.3, 3.7], constrained_layout=True)
 
 for idx, estimator in enumerate(penalties.keys()):
 
@@ -127,5 +131,6 @@
     axarr[1].set_ylabel("pred. RMSE left-out")
     axarr[0].legend(
         bbox_to_anchor=(0, 1.02, 1, 0.2), loc="lower left",
-        mode="expand", borderaxespad=0, ncol=4)
-    plt.show(block=False)
+        mode="expand", borderaxespad=0, ncol=1)
+
+plt.show(block=False)
diff --git a/skglm/penalties/__init__.py b/skglm/penalties/__init__.py
@@ -1,7 +1,9 @@
 from .base import BasePenalty  # noqa F401
 
 from .separable import (  # noqa F401
-    L1_plus_L2, L0_5, L1, L2_3, MCPenalty, WeightedL1, IndicatorBox, BasePenalty
+    L1_plus_L2, L0_5, L1, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox, BasePenalty
 )
 
-from .block_separable import L2_05, L2_1, BlockMCPenalty, WeightedGroupL2  # noqa F401
+from .block_separable import ( # noqa F401
+    L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2
+)
diff --git a/skglm/penalties/block_separable.py b/skglm/penalties/block_separable.py
@@ -6,7 +6,8 @@
 from numba.types import bool_
 
 from skglm.penalties.base import BasePenalty
-from skglm.utils import BST, prox_block_2_05
+from skglm.utils import (
+    BST, prox_block_2_05, prox_SCAD, value_SCAD, prox_MCP, value_MCP)
 
 
 spec_L21 = [
@@ -117,21 +118,13 @@ def __init__(self, alpha, gamma):
     def value(self, W):
         """Compute the value of BlockMCP at W."""
         norm_rows = np.sqrt(np.sum(W ** 2, axis=1))
-        s0 = norm_rows < self.gamma * self.alpha
-        value = np.full_like(norm_rows, self.gamma * self.alpha ** 2 / 2.)
-        value[s0] = self.alpha * norm_rows[s0] - norm_rows[s0]**2 / (2 * self.gamma)
-        return np.sum(value)
+        return value_MCP(norm_rows, self.alpha, self.gamma)
 
     def prox_1feat(self, value, stepsize, j):
         """Compute the proximal operator of BlockMCP."""
-        tau = self.alpha * stepsize
-        g = self.gamma / stepsize
-        norm_value = norm(value)
-        if norm_value <= tau:
-            return np.zeros_like(value)
-        if norm_value > g * tau:
-            return value
-        return (1 - tau / norm_value) * value / (1. - 1./g)
+        norm_rows = norm(value)
+        prox = prox_MCP(norm_rows, stepsize, self.alpha, self.gamma)
+        return prox * value / norm_rows
 
     def subdiff_distance(self, W, grad, ws):
         """Compute distance of negative gradient to the subdifferential at W."""
@@ -156,6 +149,68 @@ def is_penalized(self, n_features):
         return np.ones(n_features, bool_)
 
 
+spec_BlockSCAD = [
+    ('alpha', float64),
+    ('gamma', float64),
+]
+
+
+@jitclass(spec_BlockSCAD)
+class BlockSCAD(BasePenalty):
+    """Block Smoothly Clipped Absolute Deviation.
+
+    Notes
+    -----
+    With W_j the j-th row of W, the penalty is:
+        pen(||W_j||) = alpha * ||W_j||               if ||W_j|| =< alpha
+                       (2 * gamma * alpha * ||W_j|| - ||W_j|| ** 2 - alpha ** 2) \
+                           / (2 * (gamma - 1))       if alpha < ||W_j|| < alpha * gamma
+                       (alpha **2 * (gamma + 1)) / 2 if ||W_j|| > gamma * alpha
+        value = sum_{j=1}^{n_features} pen(||W_j||)
+    """
+
+    def __init__(self, alpha, gamma):
+        self.alpha = alpha
+        self.gamma = gamma
+
+    def value(self, W):
+        """Compute the value of the SCAD penalty at W."""
+        norm_rows = np.sqrt(np.sum(W ** 2, axis=1))
+        return value_SCAD(norm_rows, self.alpha, self.gamma)
+
+    def prox_1feat(self, value, stepsize, j):
+        """Compute the proximal operator of BlockSCAD."""
+        norm_value = norm(value)
+        prox = prox_SCAD(norm_value, stepsize, self.alpha, self.gamma)
+        return prox * value / norm_value
+
+    def subdiff_distance(self, W, grad, ws):
+        """Compute distance of negative gradient to the subdifferential at W."""
+        subdiff_dist = np.zeros_like(ws, dtype=grad.dtype)
+        for idx, j in enumerate(ws):
+            norm_Wj = norm(W[j])
+            if not np.any(W[j]):
+                # distance of -grad_j to alpha * unit_ball
+                subdiff_dist[idx] = max(0, norm(grad[idx]) - self.alpha)
+            elif norm_Wj <= self.alpha:
+                # distance of -grad_j to alpha * W[j] / ||W[j]||
+                subdiff_dist[idx] = norm(grad[idx] + self.alpha * W[j] / norm_Wj)
+            elif norm_Wj <= self.gamma * self.alpha:
+                # distance of -grad_j to (alpha * gamma - ||W[j]||)
+                # / ((gamma - 1) * ||W[j]||) * W[j]
+                subdiff_dist[idx] = norm(grad[idx] + (
+                    (self.alpha * self.gamma - norm_Wj) / (norm_Wj * (self.gamma - 1))
+                ) * W[j])
+            else:
+                # distance of -grad_j to 0
+                subdiff_dist[idx] = norm(grad[idx])
+        return subdiff_dist
+
+    def is_penalized(self, n_features):
+        """Return a binary mask with the penalized features."""
+        return np.ones(n_features, bool_)
+
+
 spec_WeightedGroupL2 = [
     ('alpha', float64),
     ('weights', float64[:]),
diff --git a/skglm/penalties/separable.py b/skglm/penalties/separable.py
@@ -4,7 +4,8 @@
 from numba.types import bool_
 
 from skglm.penalties.base import BasePenalty
-from skglm.utils import ST, box_proj, prox_05, prox_2_3
+from skglm.utils import (
+    ST, box_proj, prox_05, prox_2_3, prox_SCAD, value_SCAD, prox_MCP, value_MCP)
 
 
 spec_L1 = [
@@ -174,7 +175,7 @@ class MCPenalty(BasePenalty):
     With x >= 0
     pen(x) =
     alpha * x - x^2 / (2 * gamma) if x =< gamma * alpha
-    gamma * alpha 2 / 2           if x > gamma * alpha
+    gamma * alpha^2 / 2           if x > gamma * alpha
     value = sum_{j=1}^{n_features} pen(abs(w_j))
     """
 
@@ -183,21 +184,11 @@ def __init__(self, alpha, gamma):
         self.gamma = gamma
 
     def value(self, w):
-        """Compute the value of MCP."""
-        s0 = np.abs(w) < self.gamma * self.alpha
-        value = np.full_like(w, self.gamma * self.alpha ** 2 / 2.)
-        value[s0] = self.alpha * np.abs(w[s0]) - w[s0]**2 / (2 * self.gamma)
-        return np.sum(value)
+        return value_MCP(w, self.alpha, self.gamma)
 
     def prox_1d(self, value, stepsize, j):
         """Compute the proximal operator of MCP."""
-        tau = self.alpha * stepsize
-        g = self.gamma / stepsize  # what does g stand for ?
-        if np.abs(value) <= tau:
-            return 0.
-        if np.abs(value) > g * tau:
-            return value
-        return np.sign(value) * (np.abs(value) - tau) / (1. - 1./g)
+        return prox_MCP(value, stepsize, self.alpha, self.gamma)
 
     def subdiff_distance(self, w, grad, ws):
         """Compute distance of negative gradient to the subdifferential at w."""
@@ -207,10 +198,9 @@ def subdiff_distance(self, w, grad, ws):
                 # distance of -grad to alpha * [-1, 1]
                 subdiff_dist[idx] = max(0, np.abs(grad[idx]) - self.alpha)
             elif np.abs(w[j]) < self.alpha * self.gamma:
-                # distance of -grad_j to (alpha - abs(w[j])/gamma) * sign(w[j])
+                # distance of -grad_j to (alpha * sign(w[j]) - w[j] / gamma)
                 subdiff_dist[idx] = np.abs(
-                    grad[idx] + self.alpha * np.sign(w[j])
-                    - w[j] / self.gamma)
+                    grad[idx] + self.alpha * np.sign(w[j]) - w[j] / self.gamma)
             else:
                 # distance of grad to 0
                 subdiff_dist[idx] = np.abs(grad[idx])
@@ -229,6 +219,70 @@ def alpha_max(self, gradient0):
         return np.max(np.abs(gradient0))
 
 
+spec_SCAD = [
+    ('alpha', float64),
+    ('gamma', float64)
+]
+
+
+@jitclass(spec_SCAD)
+class SCAD(BasePenalty):
+    """Smoothly Clipped Absolute Deviation.
+
+    Notes
+    -----
+    With x >= 0
+    pen(x) =
+    alpha * x                         if x =< alpha
+    2 * gamma * alpha * x - x^2 - alpha^2 \
+        / 2 * (gamma - 1))            if alpha < x < alpha * gamma
+    alpha^2 * (gamma + 1) / 2      if x > gamma * alpha
+    value = sum_{j=1}^{n_features} pen(abs(w_j))
+    """
+
+    def __init__(self, alpha, gamma):
+        self.alpha = alpha
+        self.gamma = gamma
+
+    def value(self, w):
+        """Compute the value of the SCAD penalty at w."""
+        return value_SCAD(w, self.alpha, self.gamma)
+
+    def prox_1d(self, value, stepsize, j):
+        """Compute the proximal operator of SCAD penalty."""
+        return prox_SCAD(value, stepsize, self.alpha, self.gamma)
+
+    def subdiff_distance(self, w, grad, ws):
+        """Compute distance of negative gradient to the subdifferential at w."""
+        subdiff_dist = np.zeros_like(grad)
+        for idx, j in enumerate(ws):
+            if w[j] == 0:
+                # distance of -grad_j to alpha * [-1, 1]
+                subdiff_dist[idx] = max(0, np.abs(grad[idx]) - self.alpha)
+            elif np.abs(w[j]) <= self.alpha:
+                # distance of -grad_j to alpha * sgn(w[j])
+                subdiff_dist[idx] = np.abs(grad[idx] + self.alpha * np.sign(w[j]))
+            elif np.abs(w[j]) <= self.alpha * self.gamma:
+                # distance of -grad_j to (alpha * gamma * sign(w[j]) - w[j])
+                #                        / (gamma - 1)
+                subdiff_dist[idx] = np.abs(
+                    grad[idx] +
+                    (np.sign(w[j]) * self.alpha * self.gamma - w[j]) / (self.gamma - 1)
+                )
+            else:
+                # distance of -grad_j to 0
+                subdiff_dist[idx] = np.abs(grad[idx])
+        return subdiff_dist
+
+    def is_penalized(self, n_features):
+        """Return a binary mask with the penalized features."""
+        return np.ones(n_features, bool_)
+
+    def generalized_support(self, w):
+        """Return a mask with non-zero coefficients."""
+        return w != 0
+
+
 spec_IndicatorBox = [
     ('alpha', float64)
 ]
diff --git a/skglm/tests/test_penalties.py b/skglm/tests/test_penalties.py
@@ -0,0 +1,63 @@
+import pytest
+import numpy as np
+
+from numpy.linalg import norm
+from numpy.testing import assert_array_less
+
+from skglm.datafits import Quadratic, QuadraticMultiTask
+from skglm.penalties import (
+    L1, L1_plus_L2, WeightedL1, MCPenalty, SCAD, IndicatorBox, L0_5, L2_3,
+    L2_1, L2_05, BlockMCPenalty, BlockSCAD)
+from skglm import GeneralizedLinearEstimator
+from skglm.utils import make_correlated_data
+
+
+n_samples = 20
+n_features = 10
+n_tasks = 10
+X, Y, _ = make_correlated_data(
+    n_samples=n_samples, n_features=n_features, n_tasks=n_tasks, density=0.1,
+    random_state=0)
+y = Y[:, 0]
+
+n_samples, n_features = X.shape
+alpha_max = norm(X.T @ y, ord=np.inf) / n_samples
+alpha = alpha_max / 1000
+
+penalties = [
+    L1(alpha=alpha),
+    L1_plus_L2(alpha=alpha, l1_ratio=0.5),
+    WeightedL1(alpha=1, weights=np.arange(n_features)),
+    MCPenalty(alpha=alpha, gamma=4),
+    SCAD(alpha=alpha, gamma=4),
+    IndicatorBox(alpha=alpha),
+    L0_5(alpha),
+    L2_3(alpha)]
+
+block_penalties = [
+    L2_1(alpha=alpha), L2_05(alpha=alpha),
+    BlockMCPenalty(alpha=alpha, gamma=4),
+    BlockSCAD(alpha=alpha, gamma=4)
+    ]
+
+
+@pytest.mark.parametrize('penalty', penalties)
+def test_subdiff_diff(penalty):
+    est = GeneralizedLinearEstimator(
+        datafit=Quadratic(),
+        penalty=penalty,
+        tol=1e-14,
+    ).fit(X, y)
+    # assert the stopping criterion is satisfied
+    assert_array_less(est.stop_crit_, est.tol)
+
+
+@pytest.mark.parametrize('block_penalty', block_penalties)
+def test_subdiff_diff_block(block_penalty):
+    est = GeneralizedLinearEstimator(
+        datafit=QuadraticMultiTask(),
+        penalty=block_penalty,
+        tol=1e-14,
+    ).fit(X, Y)
+    # assert the stopping criterion is satisfied
+    assert_array_less(est.stop_crit_, est.tol)
diff --git a/skglm/utils.py b/skglm/utils.py