Skip to content

Commit 0e5c938

Browse files
authored
ENH - add L-BFGS solver wrapping scipy for smooth penalties (#165)
1 parent 395af5e commit 0e5c938

File tree

7 files changed

+172
-3
lines changed

7 files changed

+172
-3
lines changed

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Penalties
3737
L0_5
3838
L1
3939
L1_plus_L2
40+
L2
4041
L2_3
4142
MCPenalty
4243
WeightedL1
@@ -78,6 +79,7 @@ Solvers
7879
GramCD
7980
GroupBCD
8081
GroupProxNewton
82+
LBFGS
8183
MultiTaskBCD
8284
ProxNewton
8385

skglm/datafits/single_task.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,9 @@ def value(self, y, w, Xw):
182182
def gradient_scalar(self, X, y, w, Xw, j):
183183
return (- X[:, j] @ (y * sigmoid(- y * Xw))) / len(y)
184184

185+
def gradient(self, X, y, Xw):
186+
return X.T @ self.raw_grad(y, Xw)
187+
185188
def full_grad_sparse(
186189
self, X_data, X_indptr, X_indices, y, Xw):
187190
n_features = X_indptr.shape[0] - 1

skglm/penalties/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from .base import BasePenalty
22
from .separable import (
3-
L1_plus_L2, L0_5, L1, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox,
3+
L1_plus_L2, L0_5, L1, L2, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox,
44
PositiveConstraint
55
)
66
from .block_separable import (
@@ -12,6 +12,6 @@
1212

1313
__all__ = [
1414
BasePenalty,
15-
L1_plus_L2, L0_5, L1, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox,
15+
L1_plus_L2, L0_5, L1, L2, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox,
1616
PositiveConstraint, L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2, SLOPE
1717
]

skglm/penalties/separable.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,3 +557,34 @@ def is_penalized(self, n_features):
557557
def generalized_support(self, w):
558558
"""Return a mask with non-zero coefficients."""
559559
return w != 0
560+
561+
562+
class L2(BasePenalty):
563+
r""":math:`ell_2` penalty.
564+
565+
The penalty reads
566+
567+
.. math::
568+
569+
\alpha / 2 ||w||_2^2
570+
"""
571+
572+
def __init__(self, alpha):
573+
self.alpha = alpha
574+
575+
def get_spec(self):
576+
spec = (
577+
('alpha', float64),
578+
)
579+
return spec
580+
581+
def params_to_dict(self):
582+
return dict(alpha=self.alpha)
583+
584+
def value(self, w):
585+
"""Compute the value of the L2 penalty."""
586+
return self.alpha * (w ** 2).sum() / 2
587+
588+
def gradient(self, w):
589+
"""Compute the gradient of the L2 penalty."""
590+
return self.alpha * w

skglm/solvers/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
from .multitask_bcd import MultiTaskBCD
77
from .prox_newton import ProxNewton
88
from .group_prox_newton import GroupProxNewton
9+
from .lbfgs import LBFGS
910

1011

1112
__all__ = [AndersonCD, BaseSolver, FISTA, GramCD, GroupBCD, MultiTaskBCD, ProxNewton,
12-
GroupProxNewton]
13+
GroupProxNewton, LBFGS]

skglm/solvers/lbfgs.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import warnings
2+
from sklearn.exceptions import ConvergenceWarning
3+
4+
import numpy as np
5+
import scipy.optimize
6+
from numpy.linalg import norm
7+
8+
from skglm.solvers import BaseSolver
9+
10+
11+
class LBFGS(BaseSolver):
12+
"""A wrapper for scipy L-BFGS solver.
13+
14+
Refer to `scipy L-BFGS-B <https://docs.scipy.org/doc/scipy/reference/optimize.
15+
minimize-lbfgsb.html#optimize-minimize-lbfgsb>`_ documentation for details.
16+
17+
Parameters
18+
----------
19+
max_iter : int, default 20
20+
Maximum number of iterations.
21+
22+
tol : float, default 1e-4
23+
Tolerance for convergence.
24+
25+
verbose : bool, default False
26+
Amount of verbosity. 0/False is silent.
27+
"""
28+
29+
def __init__(self, max_iter=50, tol=1e-4, verbose=False):
30+
self.max_iter = max_iter
31+
self.tol = tol
32+
self.verbose = verbose
33+
34+
def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
35+
36+
def objective_function(w):
37+
Xw = X @ w
38+
datafit_value = datafit.value(y, w, Xw)
39+
penalty_value = penalty.value(w)
40+
41+
return datafit_value + penalty_value
42+
43+
def jacobian_function(w):
44+
Xw = X @ w
45+
datafit_grad = datafit.gradient(X, y, Xw)
46+
penalty_grad = penalty.gradient(w)
47+
48+
return datafit_grad + penalty_grad
49+
50+
def callback_post_iter(w_k):
51+
# save p_obj
52+
p_obj = objective_function(w_k)
53+
p_objs_out.append(p_obj)
54+
55+
if self.verbose:
56+
grad = jacobian_function(w_k)
57+
stop_crit = norm(grad)
58+
59+
it = len(p_objs_out)
60+
print(
61+
f"Iteration {it}: {p_obj:.10f}, "
62+
f"stopping crit: {stop_crit:.2e}"
63+
)
64+
65+
n_features = X.shape[1]
66+
w = np.zeros(n_features) if w_init is None else w_init
67+
p_objs_out = []
68+
69+
result = scipy.optimize.minimize(
70+
fun=objective_function,
71+
jac=jacobian_function,
72+
x0=w,
73+
method="L-BFGS-B",
74+
options=dict(
75+
maxiter=self.max_iter,
76+
gtol=self.tol
77+
),
78+
callback=callback_post_iter,
79+
)
80+
81+
if not result.success:
82+
warnings.warn(
83+
f"`LBFGS` did not converge for tol={self.tol:.3e} "
84+
f"and max_iter={self.max_iter}.\n"
85+
"Consider increasing `max_iter` and/or `tol`.",
86+
category=ConvergenceWarning
87+
)
88+
89+
w = result.x
90+
stop_crit = norm(result.jac)
91+
92+
return w, np.asarray(p_objs_out), stop_crit

skglm/tests/test_lbfgs_solver.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import numpy as np
2+
3+
from skglm.solvers import LBFGS
4+
from skglm.penalties import L2
5+
from skglm.datafits import Logistic
6+
7+
from sklearn.linear_model import LogisticRegression
8+
9+
from skglm.utils.data import make_correlated_data
10+
from skglm.utils.jit_compilation import compiled_clone
11+
12+
13+
def test_lbfgs_L2_logreg():
14+
reg = 1.
15+
n_samples, n_features = 50, 10
16+
17+
X, y, _ = make_correlated_data(
18+
n_samples, n_features, random_state=0)
19+
y = np.sign(y)
20+
21+
# fit L-BFGS
22+
datafit = compiled_clone(Logistic())
23+
penalty = compiled_clone(L2(reg))
24+
w, *_ = LBFGS().solve(X, y, datafit, penalty)
25+
26+
# fit scikit learn
27+
estimator = LogisticRegression(
28+
penalty='l2',
29+
C=1 / (n_samples * reg),
30+
fit_intercept=False
31+
)
32+
estimator.fit(X, y)
33+
34+
np.testing.assert_allclose(
35+
w, estimator.coef_.flatten(), atol=1e-4
36+
)
37+
38+
39+
if __name__ == "__main__":
40+
pass

0 commit comments

Comments
 (0)