ENH Add IterativeRewieghtedL1 (#87)

PABannier · mathurinm · web-flow · commit 762293650fcb · 2022-10-25T08:38:12.000+02:00
Co-authored-by: mathurinm &lt;mathurin.massias@gmail.com&gt;
diff --git a/examples/plot_reweighted_l1.py b/examples/plot_reweighted_l1.py
@@ -0,0 +1,95 @@
+"""
+=================================================================
+Timing comparison between direct prox computation and reweighting
+=================================================================
+Compare time and objective value of L0_5-regularized problem with
+direct proximal computation and iterative reweighting.
+"""
+# Author: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
+
+import time
+import numpy as np
+import pandas as pd
+from numpy.linalg import norm
+import matplotlib.pyplot as plt
+
+from skglm.penalties.separable import L0_5
+from skglm.utils import make_correlated_data
+from skglm.estimators import GeneralizedLinearEstimator
+from skglm.experimental import IterativeReweightedL1
+from skglm.solvers import AndersonCD
+
+
+n_samples, n_features = 200, 500
+X, y, w_true = make_correlated_data(
+    n_samples=n_samples, n_features=n_features, random_state=24)
+
+alpha_max = norm(X.T @ y, ord=np.inf) / n_samples
+alphas = [alpha_max / 10, alpha_max / 100, alpha_max / 1000]
+tol = 1e-10
+
+
+def _obj(w):
+    return (np.sum((y - X @ w) ** 2) / (2 * n_samples)
+            + alpha * np.sum(np.sqrt(np.abs(w))))
+
+
+def fit_l05(alpha):
+    start = time.time()
+    iterative_l05 = IterativeReweightedL1(
+        penalty=L0_5(alpha),
+        solver=AndersonCD(tol=tol, fit_intercept=False)).fit(X, y)
+    iterative_time = time.time() - start
+
+    # `subdiff` strategy for WS is uninformative for L0_5
+    start = time.time()
+    direct_l05 = GeneralizedLinearEstimator(
+        penalty=L0_5(alpha),
+        solver=AndersonCD(tol=tol, fit_intercept=False,
+                          ws_strategy="fixpoint")).fit(X, y)
+    direct_time = time.time() - start
+
+    results = {
+        "iterative": (iterative_l05, iterative_time),
+        "direct": (direct_l05, direct_time),
+    }
+    return results
+
+
+# caching Numba compilation
+fit_l05(alpha_max/10)
+
+time_results = np.zeros((2, len(alphas)))
+obj_results = np.zeros((2, len(alphas)))
+
+# actual run
+for i, alpha in enumerate(alphas):
+    results = fit_l05(alpha=alpha)
+    iterative_l05, iterative_time = results["iterative"]
+    direct_l05, direct_time = results["direct"]
+
+    iterative_obj = _obj(iterative_l05.coef_)
+    direct_obj = _obj(direct_l05.coef_)
+
+    obj_results[:, i] = np.array([iterative_obj, direct_obj])
+    time_results[:, i] = np.array([iterative_time, direct_time])
+
+time_df = pd.DataFrame(time_results.T, columns=["Iterative", "Direct"])
+obj_df = pd.DataFrame(obj_results.T, columns=["Iterative", "Direct"])
+
+time_df.index = [1e-1, 1e-2, 1e-3]
+obj_df.index = [1e-1, 1e-2, 1e-3]
+
+fig, axarr = plt.subplots(1, 2, figsize=(8, 3.5), constrained_layout=True)
+ax = axarr[0]
+time_df.plot.bar(rot=0, ax=ax)
+ax.set_xlabel(r"$\lambda/\lambda_{max}$")
+ax.set_ylabel("time (in s)")
+ax.set_title("Time to fit")
+
+ax = axarr[1]
+obj_df.plot.bar(rot=0, ax=ax)
+ax.set_xlabel(r"$\lambda/\lambda_{max}$")
+ax.set_ylabel("obj. value")
+ax.set_title("Objective at solution")
+plt.show(block=False)
diff --git a/skglm/experimental/__init__.py b/skglm/experimental/__init__.py
@@ -1,5 +1,7 @@
+from .reweighted import IterativeReweightedL1
 from .sqrt_lasso import SqrtLasso
 
 __all__ = [
     SqrtLasso,
+    IterativeReweightedL1,
 ]
diff --git a/skglm/experimental/reweighted.py b/skglm/experimental/reweighted.py
@@ -0,0 +1,90 @@
+import numpy as np
+from skglm.datafits import Quadratic
+from skglm.estimators import GeneralizedLinearEstimator
+from skglm.penalties import WeightedL1, L0_5
+from skglm.utils import compiled_clone
+
+
+class IterativeReweightedL1(GeneralizedLinearEstimator):
+    r"""Reweighted L1-norm estimator.
+
+    This estimator solves a non-convex problems by iteratively solving
+    convex surrogates involving weighted L1 norms.
+
+    Parameters
+    ----------
+    alpha : float, optional
+        Penalty strength.
+
+    datafit : instance of BaseDatafit, optional
+        Datafit. If None, ``datafit`` is initialized as a ``Quadratic`` datafit.
+        ``datafit`` is replaced by a JIT-compiled instance when calling fit.
+
+    solver : instance of BaseSolver, optional
+        Solver. If None, `solver` is initialized as an `AndersonCD` solver.
+
+    n_reweights : int, optional
+        Number of reweighting performed (convex surrogates solved).
+
+    Attributes
+    ----------
+    coef_ : array, shape (n_features,)
+        Parameter vector (w in the cost function formula).
+
+    loss_history_ : list
+        Objective history after every reweighting.
+
+    References
+    ----------
+    .. [1] Candès et al. (2007), Enhancing sparsity by reweighted l1 minimization
+           https://web.stanford.edu/~boyd/papers/pdf/rwl1.pdf
+    """
+
+    def __init__(self, datafit=Quadratic(), penalty=L0_5(1.), solver=None,
+                 n_reweights=5):
+        super().__init__(datafit=datafit, penalty=penalty, solver=solver)
+        self.n_reweights = n_reweights
+
+    def fit(self, X, y):
+        """Fit the model according to the given training data.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training data, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        y : array-like, shape (n_samples,)
+            Target vector relative to X.
+
+        Returns
+        -------
+        self :
+            Fitted estimator.
+        """
+        if not hasattr(self.penalty, "derivative"):
+            raise ValueError(
+                "Missing `derivative` method. Reweighting is not implemented for " +
+                f"penalty {self.penalty.__class__.__name__}")
+
+        n_features = X.shape[1]
+        _penalty = compiled_clone(WeightedL1(self.penalty.alpha, np.ones(n_features)))
+        self.datafit = compiled_clone(self.datafit)
+        self.penalty = compiled_clone(self.penalty)
+
+        self.loss_history_ = []
+
+        for iter_reweight in range(self.n_reweights):
+            coef_ = self.solver.solve(X, y, self.datafit, _penalty)[0]
+            _penalty.weights = self.penalty.derivative(coef_)
+
+            loss = (self.datafit.value(y, coef_, X @ coef_)
+                    + self.penalty.value(coef_))
+            self.loss_history_.append(loss)
+
+            if self.solver.verbose:
+                print(f"Reweight {iter_reweight}/{self.n_reweights}, objective {loss}")
+
+        self.coef_ = coef_
+
+        return self
diff --git a/skglm/experimental/tests/test_reweighted.py b/skglm/experimental/tests/test_reweighted.py
@@ -0,0 +1,27 @@
+import numpy as np
+from numpy.linalg import norm
+
+from skglm.penalties.separable import L0_5
+from skglm.utils import make_correlated_data
+from skglm.experimental import IterativeReweightedL1
+from skglm.solvers import AndersonCD
+
+
+n_samples, n_features = 20, 50
+X, y, w_true = make_correlated_data(
+    n_samples=n_samples, n_features=n_features, random_state=24)
+
+alpha_max = norm(X.T @ y, ord=np.inf) / n_samples
+alpha = alpha_max / 100
+tol = 1e-10
+
+
+def test_decreasing_loss():
+    # reweighting can't increase the L0.5 objective
+    iterative_l05 = IterativeReweightedL1(
+        penalty=L0_5(alpha),
+        solver=AndersonCD(tol=tol, fit_intercept=False)).fit(X, y)
+    np.testing.assert_array_less(
+        iterative_l05.loss_history_[-1], iterative_l05.loss_history_[0])
+    diffs = np.diff(iterative_l05.loss_history_)
+    np.testing.assert_array_less(diffs, 1e-5)
diff --git a/skglm/penalties/separable.py b/skglm/penalties/separable.py
@@ -380,6 +380,10 @@ def value(self, w):
         """Compute the value of L0_5 at w."""
         return self.alpha * np.sum(np.abs(w) ** 0.5)
 
+    def derivative(self, w):
+        """Compute the element-wise derivative."""
+        return 1. / (2. * np.sqrt(np.abs(w)) + 1e-12)
+
     def prox_1d(self, value, stepsize, j):
         """Compute the proximal operator of L0_5."""
         return prox_05(value, self.alpha * stepsize)
@@ -429,6 +433,10 @@ def value(self, w):
         """Compute the value of the L2_3 norm at w."""
         return self.alpha * np.sum(np.abs(w) ** (2/3))
 
+    def derivative(self, w):
+        """Compute the element-wise derivative."""
+        return 2 / (3 * np.abs(w) ** (1/3) + 1e-12)
+
     def prox_1d(self, value, stepsize, j):
         """Compute the proximal operator of the L2_3 norm."""
         return prox_2_3(value, self.alpha * stepsize)

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,7 @@`
	`1`	`+from .reweighted import IterativeReweightedL1`
`1`	`2`	`from .sqrt_lasso import SqrtLasso`
`2`	`3`
`3`	`4`	`__all__ = [`
`4`	`5`	`SqrtLasso,`
	`6`	`+ IterativeReweightedL1,`
`5`	`7`	`]`