FEAT - Add log-sum penalty (scikit-learn-contrib#127)

PABannier · Badr-MOUFAD · web-flow · commit 5309fd9aec99 · 2023-11-03T18:03:36.000+01:00
Co-authored-by: Badr-MOUFAD &lt;badr.moufad@emines.um6p.ma&gt;
diff --git a/doc/api.rst b/doc/api.rst
@@ -40,6 +40,7 @@ Penalties
    L1_plus_L2
    L2
    L2_3
+   LogSumPenalty
    MCPenalty
    PositiveConstraint
    WeightedL1
@@ -96,7 +97,7 @@ Experimental
    :toctree: generated/
 
    IterativeReweightedL1
-   PDCD_WS 
+   PDCD_WS
    Pinball
    SqrtQuadratic
    SqrtLasso
diff --git a/doc/changes/0.4.rst b/doc/changes/0.4.rst
@@ -3,4 +3,5 @@
 Version 0.4 (in progress)
 ---------------------------
 - Add support for weights and positive coefficients to :ref:`MCPRegression Estimator <skglm.MCPRegression>` (PR: :gh:`184`)
-- Move solver specific computations from ``Datafit.initialize()`` to separate ``Datafit`` methods to ease ``Solver`` - ``Datafit`` compatibility check (PR: :gh:`192`)
+- Move solver specific computations from ``Datafit.initialize()`` to separate ``Datafit`` methods to ease ``Solver`` - ``Datafit`` compatibility check (PR: :gh:`192`)
+- Add :ref:`LogSumPenalty <skglm.penalties.LogSumPenalty>` (PR: :gh:`#127`)
diff --git a/skglm/penalties/__init__.py b/skglm/penalties/__init__.py
@@ -1,7 +1,7 @@
 from .base import BasePenalty
 from .separable import (
     L1_plus_L2, L0_5, L1, L2, L2_3, MCPenalty, WeightedMCPenalty, SCAD,
-    WeightedL1, IndicatorBox, PositiveConstraint
+    WeightedL1, IndicatorBox, PositiveConstraint, LogSumPenalty
 )
 from .block_separable import (
     L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2
@@ -14,5 +14,5 @@
     BasePenalty,
     L1_plus_L2, L0_5, L1, L2, L2_3, MCPenalty, WeightedMCPenalty, SCAD, WeightedL1,
     IndicatorBox, PositiveConstraint, L2_05, L2_1, BlockMCPenalty, BlockSCAD,
-    WeightedGroupL2, SLOPE
+    WeightedGroupL2, SLOPE, LogSumPenalty
 ]
diff --git a/skglm/penalties/separable.py b/skglm/penalties/separable.py
@@ -4,8 +4,8 @@
 
 from skglm.penalties.base import BasePenalty
 from skglm.utils.prox_funcs import (
-    ST, box_proj, prox_05, prox_2_3, prox_SCAD, value_SCAD, prox_MCP,
-    value_MCP, value_weighted_MCP)
+    ST, box_proj, prox_05, prox_2_3, prox_SCAD, value_SCAD, prox_MCP, value_MCP,
+    value_weighted_MCP, prox_log_sum)
 
 
 class L1(BasePenalty):
@@ -607,6 +607,66 @@ def generalized_support(self, w):
         return w != 0
 
 
+class LogSumPenalty(BasePenalty):
+    """Log sum penalty.
+
+    The penalty value reads
+
+    .. math::
+
+        "value"(w) = sum_(j=1)^(n_"features") log(1 + abs(w_j) / epsilon)
+    """
+
+    def __init__(self, alpha, eps):
+        self.alpha = alpha
+        self.eps = eps
+
+    def get_spec(self):
+        spec = (
+            ('alpha', float64),
+            ('eps', float64),
+        )
+        return spec
+
+    def params_to_dict(self):
+        return dict(alpha=self.alpha, eps=self.eps)
+
+    def value(self, w):
+        """Compute the value of the log-sum penalty at ``w``."""
+        return self.alpha * np.sum(np.log(1 + np.abs(w) / self.eps))
+
+    def derivative(self, w):
+        """Compute the element-wise derivative."""
+        return np.sign(w) / (np.abs(w) + self.eps)
+
+    def prox_1d(self, value, stepsize, j):
+        """Compute the proximal operator of the log-sum penalty."""
+        return prox_log_sum(value, self.alpha * stepsize, self.eps)
+
+    def subdiff_distance(self, w, grad, ws):
+        """Compute distance of negative gradient to the subdifferential at w."""
+        subdiff_dist = np.zeros_like(grad)
+        alpha = self.alpha
+        eps = self.eps
+
+        for idx, j in enumerate(ws):
+            if w[j] == 0:
+                # distance of -grad_j to [-alpha/eps, alpha/eps]
+                subdiff_dist[idx] = max(0, np.abs(grad[idx]) - alpha / eps)
+            else:
+                # distance of -grad_j to {alpha * sign(w[j]) / (eps + |w[j]|)}
+                subdiff_dist[idx] = np.abs(
+                    grad[idx] + np.sign(w[j]) * alpha / (eps + np.abs(w[j])))
+        return subdiff_dist
+
+    def is_penalized(self, n_features):
+        """Return a binary mask with the penalized features."""
+        return np.ones(n_features, bool_)
+
+    def generalized_support(self, w):
+        return w != 0
+
+
 class PositiveConstraint(BasePenalty):
     """Positivity constraint penalty."""
 
diff --git a/skglm/tests/test_penalties.py b/skglm/tests/test_penalties.py
@@ -9,11 +9,13 @@
 from skglm.datafits import Quadratic, QuadraticMultiTask
 from skglm.penalties import (
     L1, L1_plus_L2, WeightedL1, MCPenalty, SCAD, IndicatorBox, L0_5, L2_3, SLOPE,
-    PositiveConstraint, L2_1, L2_05, BlockMCPenalty, BlockSCAD)
+    LogSumPenalty, PositiveConstraint, L2_1, L2_05, BlockMCPenalty, BlockSCAD)
 from skglm import GeneralizedLinearEstimator, Lasso
 from skglm.solvers import AndersonCD, MultiTaskBCD, FISTA
 from skglm.utils.data import make_correlated_data
 
+from skglm.utils.prox_funcs import prox_log_sum, _log_sum_prox_val
+
 
 n_samples = 20
 n_features = 10
@@ -37,7 +39,9 @@
     SCAD(alpha=alpha, gamma=4),
     IndicatorBox(alpha=alpha),
     L0_5(alpha),
-    L2_3(alpha)]
+    L2_3(alpha),
+    LogSumPenalty(alpha=alpha, eps=1e-2)
+]
 
 block_penalties = [
     L2_1(alpha=alpha), L2_05(alpha=alpha),
@@ -118,5 +122,23 @@ def test_nnls(fit_intercept):
     np.testing.assert_allclose(clf.intercept_, reg_nnls.intercept_)
 
 
+def test_logsum_prox():
+    alpha = 1.
+
+    grid_z = np.linspace(-2, 2, num=10)
+    grid_test = np.linspace(-5, 5, num=100)
+    grid_eps = np.linspace(0, 5, num=10 + 1)[1:]
+
+    for z, eps in zip(grid_z, grid_eps):
+        prox = prox_log_sum(z, alpha, eps)
+        obj_at_prox = _log_sum_prox_val(prox, z, alpha, eps)
+
+        is_lowest = all(
+            obj_at_prox <= _log_sum_prox_val(x, z, alpha, eps) for x in grid_test
+        )
+
+        np.testing.assert_equal(is_lowest, True)
+
+
 if __name__ == "__main__":
     pass
diff --git a/skglm/utils/prox_funcs.py b/skglm/utils/prox_funcs.py
@@ -204,3 +204,71 @@ def prox_SLOPE(z, alphas):
             x[i] = d
 
     return x
+
+
+@njit
+def prox_log_sum(x, alpha, eps):
+    """Proximal operator of log-sum penalty.
+
+    Parameters
+    ----------
+    x : float
+        Coefficient.
+
+    alpha : float
+        Regularization hyperparameter.
+
+    eps : float
+        Curvature hyperparameter.
+
+    References
+    ----------
+    .. [1] Ashley Prater-Bennette, Lixin Shen, Erin E. Tripp
+        The Proximity Operator of the Log-Sum Penalty (2021)
+    """
+    if np.sqrt(alpha) <= eps:
+        if abs(x) <= alpha / eps:
+            return 0.
+        else:
+            return np.sign(x) * _r2(abs(x), alpha, eps)
+    else:
+        a = 2 * np.sqrt(alpha) - eps
+        b = alpha / eps
+        # f is continuous and f(a) * f(b) < 0, the root can be found by bisection
+        x_star = _find_root_by_bisection(a, b, alpha, eps)
+        if abs(x) <= x_star:
+            return 0.
+        else:
+            return np.sign(x) * _r2(abs(x), alpha, eps)
+
+
+@njit
+def _r2(x, alpha, eps):
+    # compute r2 as in (eq. 7), ref [1] in `prox_log_sum`
+    return (x - eps) / 2. + np.sqrt(((x + eps) ** 2) / 4 - alpha)
+
+
+@njit
+def _log_sum_prox_val(x, z, alpha, eps):
+    # prox objective of log-sum `log(1 + abs(x) / eps)`
+    return ((x - z) ** 2) / (2 * alpha) + np.log1p(np.abs(x) / eps)
+
+
+@njit
+def _r(x, alpha, eps):
+    # compute r as defined in (eq. 9), ref [1] in `prox_log_sum`
+    r_z = _log_sum_prox_val(_r2(x, alpha, eps), x, alpha, eps)
+    r_0 = _log_sum_prox_val(0, x, alpha, eps)
+    return r_z - r_0
+
+
+@njit
+def _find_root_by_bisection(a, b, alpha, eps, tol=1e-8):
+    # find root of function func in interval [a, b] by bisection."""
+    while b - a > tol:
+        c = (a + b) / 2.
+        if _r(a, alpha, eps) * _r(c, alpha, eps) < 0:
+            b = c
+        else:
+            a = c
+    return c