diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 633976fd9..94ab9a946 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -25,5 +25,6 @@ jobs: pip install numpydoc pip install . pip install statsmodels cvxopt + pip install git+https://github.com/jolars/pyslope.git - name: Test with pytest run: pytest -v skglm/ diff --git a/doc/api.rst b/doc/api.rst index d05ed8a57..49bbf198b 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -43,6 +43,7 @@ Penalties WeightedGroupL2 SCAD BlockSCAD + SLOPE Datafits diff --git a/skglm/penalties/__init__.py b/skglm/penalties/__init__.py index 2a788efb8..88039db46 100644 --- a/skglm/penalties/__init__.py +++ b/skglm/penalties/__init__.py @@ -6,9 +6,11 @@ L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2 ) +from .non_separable import SLOPE + __all__ = [ BasePenalty, L1_plus_L2, L0_5, L1, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox, - L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2 + L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2, SLOPE ] diff --git a/skglm/penalties/non_separable.py b/skglm/penalties/non_separable.py new file mode 100644 index 000000000..4ae458250 --- /dev/null +++ b/skglm/penalties/non_separable.py @@ -0,0 +1,60 @@ +import numpy as np +from numba import float64 + +from skglm.penalties.base import BasePenalty +from skglm.utils import prox_SLOPE + + +class SLOPE(BasePenalty): + """Sorted L-One Penalized Estimation (SLOPE) penalty. + + Attributes + ---------- + alphas : array, shape (n_features,) + Contain regularization levels for every feature. + When ``alphas`` contain a single unique value, ``SLOPE`` + is equivalent to the ``L1``penalty. + + References + ---------- + .. [1] M. Bogdan, E. van den Berg, C. Sabatti, W. Su, E. Candes + "SLOPE - Adaptive Variable Selection via Convex Optimization", + The Annals of Applied Statistics 9 (3): 1103-40 + https://doi.org/10.1214/15-AOAS842 + """ + + def __init__(self, alphas): + self.alphas = alphas + + def get_spec(self): + spec = ( + ('alphas', float64[:]), + ) + return spec + + def params_to_dict(self): + return dict(alphas=self.alphas) + + def value(self, w): + """Compute the value of SLOPE at w.""" + return np.sum(np.sort(np.abs(w)) * self.alphas[::-1]) + + def prox_vec(self, x, stepsize): + def _prox(_x, _alphas): + sign_x = np.sign(_x) + _x = np.abs(_x) + sorted_indices = np.argsort(_x)[::-1] + prox = prox_SLOPE(_x[sorted_indices], _alphas) + prox[sorted_indices] = prox + return prox * sign_x + return _prox(x, self.alphas * stepsize) + + def prox_1d(self, value, stepsize, j): + raise ValueError( + "No coordinate-wise proximal operator for SLOPE. Use `prox_vec` instead." + ) + + def subdiff_distance(self, w, grad, ws): + return ValueError( + "No subdifferential distance for SLOPE. Use `opt_strategy='fixpoint'`" + ) diff --git a/skglm/solvers/fista.py b/skglm/solvers/fista.py index bbdc451b7..a36c30a96 100644 --- a/skglm/solvers/fista.py +++ b/skglm/solvers/fista.py @@ -27,10 +27,13 @@ class FISTA(BaseSolver): https://epubs.siam.org/doi/10.1137/080716542 """ - def __init__(self, max_iter=100, tol=1e-4, verbose=0): + def __init__(self, max_iter=100, tol=1e-4, opt_strategy="subdiff", verbose=0): self.max_iter = max_iter self.tol = tol self.verbose = verbose + self.opt_strategy = opt_strategy + self.fit_intercept = False # needed to be passed to GeneralizedLinearEstimator + self.warm_start = False def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): p_objs_out = [] @@ -60,11 +63,22 @@ def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): step = 1 / lipschitz z -= step * grad - w = _prox_vec(w, z, penalty, step) + if hasattr(penalty, "prox_vec"): + w = penalty.prox_vec(z, step) + else: + w = _prox_vec(w, z, penalty, step) Xw = X @ w z = w + (t_old - 1.) / t_new * (w - w_old) - opt = penalty.subdiff_distance(w, grad, all_features) + if self.opt_strategy == "subdiff": + opt = penalty.subdiff_distance(w, grad, all_features) + elif self.opt_strategy == "fixpoint": + opt = np.abs(w - penalty.prox_vec(w - grad / lipschitz, 1 / lipschitz)) + else: + raise ValueError( + "Unknown error optimality strategy. Expected " + f"`subdiff` or `fixpoint`. Got {self.opt_strategy}") + stop_crit = np.max(opt) p_obj = datafit.value(y, w, Xw) + penalty.value(w) diff --git a/skglm/tests/test_penalties.py b/skglm/tests/test_penalties.py index 783f7978d..f153d90a7 100644 --- a/skglm/tests/test_penalties.py +++ b/skglm/tests/test_penalties.py @@ -6,10 +6,10 @@ from skglm.datafits import Quadratic, QuadraticMultiTask from skglm.penalties import ( - L1, L1_plus_L2, WeightedL1, MCPenalty, SCAD, IndicatorBox, L0_5, L2_3, + L1, L1_plus_L2, WeightedL1, MCPenalty, SCAD, IndicatorBox, L0_5, L2_3, SLOPE, L2_1, L2_05, BlockMCPenalty, BlockSCAD) -from skglm import GeneralizedLinearEstimator -from skglm.solvers import AndersonCD, MultiTaskBCD +from skglm import GeneralizedLinearEstimator, Lasso +from skglm.solvers import AndersonCD, MultiTaskBCD, FISTA from skglm.utils import make_correlated_data @@ -25,6 +25,8 @@ alpha_max = norm(X.T @ y, ord=np.inf) / n_samples alpha = alpha_max / 1000 +tol = 1e-10 + penalties = [ L1(alpha=alpha), L1_plus_L2(alpha=alpha, l1_ratio=0.5), @@ -44,7 +46,6 @@ @pytest.mark.parametrize('penalty', penalties) def test_subdiff_diff(penalty): - tol = 1e-10 # tol=1e-14 is too low when coefs are of order 1. square roots are computed in # some penalties and precision is lost est = GeneralizedLinearEstimator( @@ -58,7 +59,6 @@ def test_subdiff_diff(penalty): @pytest.mark.parametrize('block_penalty', block_penalties) def test_subdiff_diff_block(block_penalty): - tol = 1e-10 # see test_subdiff_dist est = GeneralizedLinearEstimator( datafit=QuadraticMultiTask(), penalty=block_penalty, @@ -68,5 +68,38 @@ def test_subdiff_diff_block(block_penalty): assert_array_less(est.stop_crit_, est.solver.tol) +def test_slope_lasso(): + # check that when alphas = [alpha, ..., alpha], SLOPE and L1 solutions are equal + alphas = np.full(n_features, alpha) + est = GeneralizedLinearEstimator( + penalty=SLOPE(alphas), + solver=FISTA(max_iter=1000, tol=tol, opt_strategy="fixpoint"), + ).fit(X, y) + lasso = Lasso(alpha, fit_intercept=False, tol=tol).fit(X, y) + np.testing.assert_allclose(est.coef_, lasso.coef_, rtol=1e-5) + + +def test_slope(): + # compare solutions with `pyslope`: https://github.com/jolars/pyslope + try: + from slope.solvers import pgd_slope # noqa + from slope.utils import lambda_sequence # noqa + except ImportError: + pytest.xfail( + "This test requires slope to run.\n" + "https://github.com/jolars/pyslope") + + q = 0.1 + alphas = lambda_sequence( + X, y, fit_intercept=False, reg=alpha / alpha_max, q=q) + ours = GeneralizedLinearEstimator( + penalty=SLOPE(alphas), + solver=FISTA(max_iter=1000, tol=tol, opt_strategy="fixpoint"), + ).fit(X, y) + pyslope_out = pgd_slope( + X, y, alphas, fit_intercept=False, max_it=1000, gap_tol=tol) + np.testing.assert_allclose(ours.coef_, pyslope_out["beta"], rtol=1e-5) + + if __name__ == "__main__": pass diff --git a/skglm/utils.py b/skglm/utils.py index 2932c1822..baf06c1b7 100644 --- a/skglm/utils.py +++ b/skglm/utils.py @@ -559,5 +559,52 @@ def _XT_dot_vec(X_data, X_indptr, X_indices, vec): return result +@njit +def prox_SLOPE(z, alphas): + """Fast computation for proximal operator of SLOPE. + + Extracted from: + https://github.com/agisga/grpSLOPE/blob/master/src/proxSortedL1.c + + Parameters + ---------- + z : array, shape (n_features,) + Non-negative coefficient vector sorted in non-increasing order. + + alphas : array, shape (n_features,) + Regularization hyperparameter sorted in non-increasing order. + """ + n_features = z.shape[0] + x = np.empty(n_features) + + k = 0 + idx_i = np.empty((n_features,), dtype=np.int64) + idx_j = np.empty((n_features,), dtype=np.int64) + s = np.empty((n_features,), dtype=np.float64) + w = np.empty((n_features,), dtype=np.float64) + + for i in range(n_features): + idx_i[k] = i + idx_j[k] = i + s[k] = z[i] - alphas[i] + w[k] = s[k] + + while k > 0 and w[k - 1] <= w[k]: + k -= 1 + idx_j[k] = i + s[k] += s[k+1] + w[k] = s[k] / (i - idx_i[k] + 1) + + k += 1 + + for j in range(k): + d = w[j] + d = 0 if d < 0 else d + for i in range(idx_i[j], idx_j[j] + 1): + x[i] = d + + return x + + if __name__ == '__main__': pass