Skip to content

Commit 4bee85f

Browse files
ENH - Add Pinball datafit (#134)
Co-authored-by: mathurinm <[email protected]>
1 parent cb43489 commit 4bee85f

File tree

2 files changed

+113
-0
lines changed

2 files changed

+113
-0
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import numpy as np
2+
from numba import float64
3+
from skglm.datafits import BaseDatafit
4+
from skglm.utils.prox_funcs import ST_vec
5+
6+
7+
class Pinball(BaseDatafit):
8+
r"""Pinball datafit.
9+
10+
The datafit reads::
11+
12+
sum_i quantile_level * max(y_i - Xw_i, 0) +
13+
(1 - quantile_level) * max(Xw_i - y_i, 0)
14+
15+
with ``quantile_level`` in [0, 1].
16+
17+
Parameters
18+
----------
19+
quantile_level : float
20+
Quantile level must be in [0, 1]. When ``quantile_level=0.5``,
21+
the datafit becomes a Least Absolute Deviation (LAD) datafit.
22+
"""
23+
24+
def __init__(self, quantile_level):
25+
self.quantile_level = quantile_level
26+
27+
def value(self, y, w, Xw):
28+
# implementation taken from
29+
# github.com/benchopt/benchmark_quantile_regression/blob/main/objective.py
30+
quantile_level = self.quantile_level
31+
32+
residual = y - Xw
33+
sign = residual >= 0
34+
35+
loss = (quantile_level * sign * residual -
36+
(1 - quantile_level) * (1 - sign) * residual)
37+
return np.sum(loss)
38+
39+
def prox(self, w, step, y):
40+
"""Prox of ``step * pinball``."""
41+
shift_cst = (self.quantile_level - 1/2) * step
42+
return y - ST_vec(y - w - shift_cst, step / 2)
43+
44+
def prox_conjugate(self, z, step, y):
45+
"""Prox of ``step * pinball^*``."""
46+
# using Moreau decomposition
47+
inv_step = 1 / step
48+
return z - step * self.prox(inv_step * z, inv_step, y)
49+
50+
def subdiff_distance(self, Xw, z, y):
51+
"""Distance of ``z`` to subdiff of pinball at ``Xw``."""
52+
# computation note: \partial ||y - . ||_1(Xw) = -\partial || . ||_1(y - Xw)
53+
y_minus_Xw = y - Xw
54+
shift_cst = self.quantile_level - 1/2
55+
56+
max_distance = 0.
57+
for i in range(len(y)):
58+
59+
if y_minus_Xw[i] == 0.:
60+
distance_i = max(0, abs(z[i] - shift_cst) - 1)
61+
else:
62+
distance_i = abs(z[i] + shift_cst + np.sign(y_minus_Xw[i]))
63+
64+
max_distance = max(max_distance, distance_i)
65+
66+
return max_distance
67+
68+
def get_spec(self):
69+
spec = (
70+
('quantile_level', float64),
71+
)
72+
return spec
73+
74+
def params_to_dict(self):
75+
return dict(quantile_level=self.quantile_level)
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import pytest
2+
import numpy as np
3+
from numpy.linalg import norm
4+
5+
from skglm.penalties import L1
6+
from skglm.experimental.pdcd_ws import PDCD_WS
7+
from skglm.experimental.quantile_regression import Pinball
8+
9+
from skglm.utils.data import make_correlated_data
10+
from sklearn.linear_model import QuantileRegressor
11+
12+
13+
@pytest.mark.parametrize('quantile_level', [0.3, 0.5, 0.7])
14+
def test_PDCD_WS(quantile_level):
15+
n_samples, n_features = 50, 10
16+
X, y, _ = make_correlated_data(n_samples, n_features, random_state=123)
17+
18+
# optimality condition for w = 0.
19+
# for all g in subdiff pinball(y), g must be in subdiff ||.||_1(0)
20+
# hint: use max(x, 0) = (x + |x|) / 2 to get subdiff pinball
21+
alpha_max = norm(X.T @ (np.sign(y)/2 + (quantile_level - 0.5)), ord=np.inf)
22+
alpha = alpha_max / 5
23+
24+
w = PDCD_WS(
25+
dual_init=np.sign(y)/2 + (quantile_level - 0.5)
26+
).solve(X, y, Pinball(quantile_level), L1(alpha))[0]
27+
28+
clf = QuantileRegressor(
29+
quantile=quantile_level,
30+
alpha=alpha/n_samples,
31+
fit_intercept=False
32+
).fit(X, y)
33+
34+
np.testing.assert_allclose(w, clf.coef_, atol=1e-5)
35+
36+
37+
if __name__ == '__main__':
38+
pass

0 commit comments

Comments
 (0)