Skip to content

Commit f5e8154

Browse files
EnLAI111PABanniermathurinm
authored
ENH add Huber datafit (#14)
* add Huber datafits * add L1 penalty without the 1st term * add jitclassr * rename file * fix pbm * fix pbm * correct grad sparse * correct grad sparse * change parameter of grad sparse * get_sys_info * delete get_sys_info * fix issue * fix issue * fix error * fix error * fix error * move huber to single_task and init * explicit loop computations' * linter happy * add test script for Huber * add Huber to doc * test huber in test_datafits * import assert_array_less Co-authored-by: Pierre-Antoine Bannier <[email protected]> Co-authored-by: mathurinm <[email protected]>
1 parent 1f13430 commit f5e8154

File tree

4 files changed

+135
-0
lines changed

4 files changed

+135
-0
lines changed

doc/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ Datafits
5151
.. autosummary::
5252
:toctree: generated/
5353

54+
Huber
5455
Logistic
5556
Quadratic
5657
QuadraticGroup

skglm/datafits/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from .single_task import ( # noqa F401
44
Quadratic, Quadratic_32, QuadraticSVC, QuadraticSVC_32, Logistic, Logistic_32,
5+
Huber, Huber_32,
56
)
67

78
from .multi_task import QuadraticMultiTask # noqa F401

skglm/datafits/single_task.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,3 +232,105 @@ def full_grad_sparse(
232232

233233

234234
QuadraticSVC, QuadraticSVC_32 = jit_factory(_QuadraticSVC, spec_quadratic_svc)
235+
236+
237+
spec_huber = [
238+
('delta', float64),
239+
('lipschitz', float64[:])
240+
]
241+
242+
243+
class _Huber(BaseDatafit):
244+
"""Huber datafit.
245+
246+
The datafit reads::
247+
248+
(1 / n_samples) * sum_{i=1}^{n_samples} f(y_i - Xw_i)
249+
250+
where f is the Huber function:
251+
252+
f(x) =
253+
1 / 2 * x^2 if x <= delta
254+
delta * |x| - 1/2 * delta^2 if x > delta
255+
256+
Attributes
257+
----------
258+
lipschitz : array, shape (n_features,)
259+
The coordinatewise gradient Lipschitz constants.
260+
261+
Note
262+
----
263+
The class _Huber is subsequently decorated with a @jitclass decorator with
264+
the `jit_factory` function to be compiled. This allows for faster computations
265+
using Numba JIT compiler.
266+
"""
267+
268+
def __init__(self, delta):
269+
self.delta = delta
270+
271+
def initialize(self, X, y):
272+
n_features = X.shape[1]
273+
self.lipschitz = np.zeros(n_features, dtype=X.dtype)
274+
for j in range(n_features):
275+
self.lipschitz[j] = (X[:, j] ** 2).sum() / len(y)
276+
277+
def initialize_sparse(
278+
self, X_data, X_indptr, X_indices, y):
279+
n_features = len(X_indptr) - 1
280+
self.lipschitz = np.zeros(n_features, dtype=X_data.dtype)
281+
for j in range(n_features):
282+
nrm2 = 0.
283+
for idx in range(X_indptr[j], X_indptr[j + 1]):
284+
nrm2 += X_data[idx] ** 2
285+
self.lipschitz[j] = nrm2 / len(y)
286+
287+
def value(self, y, w, Xw):
288+
n_samples = len(y)
289+
res = 0.
290+
for i in range(n_samples):
291+
tmp = abs(y[i] - Xw[i])
292+
if tmp < self.delta:
293+
res += 0.5 * tmp ** 2
294+
else:
295+
res += self.delta * tmp - 0.5 * self.delta ** 2
296+
return res / n_samples
297+
298+
def gradient_scalar(self, X, y, w, Xw, j):
299+
n_samples = len(y)
300+
grad_j = 0.
301+
for i in range(n_samples):
302+
tmp = y[i] - Xw[i]
303+
if abs(tmp) < self.delta:
304+
grad_j += - X[i, j] * tmp
305+
else:
306+
grad_j += - X[i, j] * np.sign(tmp) * self.delta
307+
return grad_j / n_samples
308+
309+
def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, Xw, j):
310+
grad_j = 0.
311+
for i in range(X_indptr[j], X_indptr[j + 1]):
312+
tmp = y[X_indices[i]] - Xw[X_indices[i]]
313+
if np.abs(tmp) < self.delta:
314+
grad_j += - X_data[i] * tmp
315+
else:
316+
grad_j += - X_data[i] * np.sign(tmp) * self.delta
317+
return grad_j / len(Xw)
318+
319+
def full_grad_sparse(
320+
self, X_data, X_indptr, X_indices, y, Xw):
321+
n_features = X_indptr.shape[0] - 1
322+
n_samples = y.shape[0]
323+
grad = np.zeros(n_features, dtype=Xw.dtype)
324+
for j in range(n_features):
325+
grad_j = 0.
326+
for i in range(X_indptr[j], X_indptr[j + 1]):
327+
tmp = y[X_indices[i]] - Xw[X_indices[i]]
328+
if np.abs(tmp) < self.delta:
329+
grad_j += - X_data[i] * tmp
330+
else:
331+
grad_j += - X_data[i] * np.sign(tmp) * self.delta
332+
grad[j] = grad_j / n_samples
333+
return grad
334+
335+
336+
Huber, Huber_32 = jit_factory(_Huber, spec_huber)

skglm/tests/test_datafits.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import numpy as np
2+
3+
from sklearn.linear_model import HuberRegressor
4+
from numpy.testing import assert_allclose, assert_array_less
5+
6+
from skglm.datafits import Huber
7+
from skglm.penalties import WeightedL1
8+
from skglm import GeneralizedLinearEstimator
9+
from skglm.utils import make_correlated_data
10+
11+
12+
def test_huber_datafit():
13+
# test only datafit: there does not exist other implems with sparse penalty
14+
X, y, _ = make_correlated_data(n_samples=20, n_features=10, random_state=0)
15+
# disable L2^2 regularization (alpha=0)
16+
their = HuberRegressor(
17+
fit_intercept=False, alpha=0, tol=1e-12, epsilon=1.35
18+
).fit(X, y)
19+
20+
# sklearn optimizes over a scale, we must match delta:
21+
delta = their.epsilon * their.scale_
22+
23+
# TODO we should have an unpenalized solver
24+
ours = GeneralizedLinearEstimator(
25+
datafit=Huber(delta),
26+
penalty=WeightedL1(1, np.zeros(X.shape[1])),
27+
tol=1e-14,
28+
).fit(X, y)
29+
30+
assert_allclose(ours.coef_, their.coef_, rtol=1e-3)
31+
assert_array_less(ours.stop_crit_, ours.tol)

0 commit comments

Comments
 (0)