ENH add Huber datafit (#14)

EnLAI111 · PABannier · mathurinm · web-flow · commit f5e81543cf76 · 2022-07-13T13:35:22.000-04:00
* add Huber datafits

* add L1 penalty without the 1st term

* add jitclassr

* rename file

* fix pbm

* fix pbm

* correct grad sparse

* correct grad sparse

* change parameter of grad sparse

* get_sys_info

* delete get_sys_info

* fix issue

* fix issue

* fix error

* fix error

* fix error

* move huber to single_task and init

* explicit loop computations'

* linter happy

* add test script for Huber

* add Huber to doc

* test huber in test_datafits

* import assert_array_less

Co-authored-by: Pierre-Antoine Bannier &lt;pierreantoine.bannier@gmail.com&gt;
Co-authored-by: mathurinm &lt;mathurin.massias@gmail.com&gt;
diff --git a/doc/api.rst b/doc/api.rst
@@ -51,6 +51,7 @@ Datafits
 .. autosummary::
    :toctree: generated/
 
+   Huber
    Logistic
    Quadratic
    QuadraticGroup
diff --git a/skglm/datafits/__init__.py b/skglm/datafits/__init__.py
@@ -2,6 +2,7 @@
 
 from .single_task import (  # noqa F401
     Quadratic, Quadratic_32, QuadraticSVC, QuadraticSVC_32, Logistic, Logistic_32,
+    Huber, Huber_32,
 )
 
 from .multi_task import QuadraticMultiTask  # noqa F401
diff --git a/skglm/datafits/single_task.py b/skglm/datafits/single_task.py
@@ -232,3 +232,105 @@ def full_grad_sparse(
 
 
 QuadraticSVC, QuadraticSVC_32 = jit_factory(_QuadraticSVC, spec_quadratic_svc)
+
+
+spec_huber = [
+    ('delta', float64),
+    ('lipschitz', float64[:])
+]
+
+
+class _Huber(BaseDatafit):
+    """Huber datafit.
+
+    The datafit reads::
+
+    (1 / n_samples) * sum_{i=1}^{n_samples} f(y_i - Xw_i)
+
+    where f is the Huber function:
+
+    f(x) =
+    1 / 2 * x^2                      if x <= delta
+    delta * |x| - 1/2 * delta^2      if x > delta
+
+    Attributes
+    ----------
+    lipschitz : array, shape (n_features,)
+        The coordinatewise gradient Lipschitz constants.
+
+    Note
+    ----
+    The class _Huber is subsequently decorated with a @jitclass decorator with
+    the `jit_factory` function to be compiled. This allows for faster computations
+    using Numba JIT compiler.
+    """
+
+    def __init__(self, delta):
+        self.delta = delta
+
+    def initialize(self, X, y):
+        n_features = X.shape[1]
+        self.lipschitz = np.zeros(n_features, dtype=X.dtype)
+        for j in range(n_features):
+            self.lipschitz[j] = (X[:, j] ** 2).sum() / len(y)
+
+    def initialize_sparse(
+            self, X_data, X_indptr, X_indices, y):
+        n_features = len(X_indptr) - 1
+        self.lipschitz = np.zeros(n_features, dtype=X_data.dtype)
+        for j in range(n_features):
+            nrm2 = 0.
+            for idx in range(X_indptr[j], X_indptr[j + 1]):
+                nrm2 += X_data[idx] ** 2
+            self.lipschitz[j] = nrm2 / len(y)
+
+    def value(self, y, w, Xw):
+        n_samples = len(y)
+        res = 0.
+        for i in range(n_samples):
+            tmp = abs(y[i] - Xw[i])
+            if tmp < self.delta:
+                res += 0.5 * tmp ** 2
+            else:
+                res += self.delta * tmp - 0.5 * self.delta ** 2
+        return res / n_samples
+
+    def gradient_scalar(self, X, y, w, Xw, j):
+        n_samples = len(y)
+        grad_j = 0.
+        for i in range(n_samples):
+            tmp = y[i] - Xw[i]
+            if abs(tmp) < self.delta:
+                grad_j += - X[i, j] * tmp
+            else:
+                grad_j += - X[i, j] * np.sign(tmp) * self.delta
+        return grad_j / n_samples
+
+    def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, Xw, j):
+        grad_j = 0.
+        for i in range(X_indptr[j], X_indptr[j + 1]):
+            tmp = y[X_indices[i]] - Xw[X_indices[i]]
+            if np.abs(tmp) < self.delta:
+                grad_j += - X_data[i] * tmp
+            else:
+                grad_j += - X_data[i] * np.sign(tmp) * self.delta
+        return grad_j / len(Xw)
+
+    def full_grad_sparse(
+            self, X_data, X_indptr, X_indices, y, Xw):
+        n_features = X_indptr.shape[0] - 1
+        n_samples = y.shape[0]
+        grad = np.zeros(n_features, dtype=Xw.dtype)
+        for j in range(n_features):
+            grad_j = 0.
+            for i in range(X_indptr[j], X_indptr[j + 1]):
+                tmp = y[X_indices[i]] - Xw[X_indices[i]]
+                if np.abs(tmp) < self.delta:
+                    grad_j += - X_data[i] * tmp
+                else:
+                    grad_j += - X_data[i] * np.sign(tmp) * self.delta
+            grad[j] = grad_j / n_samples
+        return grad
+
+
+Huber, Huber_32 = jit_factory(_Huber, spec_huber)
diff --git a/skglm/tests/test_datafits.py b/skglm/tests/test_datafits.py
@@ -0,0 +1,31 @@
+import numpy as np
+
+from sklearn.linear_model import HuberRegressor
+from numpy.testing import assert_allclose, assert_array_less
+
+from skglm.datafits import Huber
+from skglm.penalties import WeightedL1
+from skglm import GeneralizedLinearEstimator
+from skglm.utils import make_correlated_data
+
+
+def test_huber_datafit():
+    # test only datafit: there does not exist other implems with sparse penalty
+    X, y, _ = make_correlated_data(n_samples=20, n_features=10, random_state=0)
+    # disable L2^2 regularization (alpha=0)
+    their = HuberRegressor(
+        fit_intercept=False, alpha=0, tol=1e-12, epsilon=1.35
+    ).fit(X, y)
+
+    # sklearn optimizes over a scale, we must match delta:
+    delta = their.epsilon * their.scale_
+
+    # TODO we should have an unpenalized solver
+    ours = GeneralizedLinearEstimator(
+        datafit=Huber(delta),
+        penalty=WeightedL1(1, np.zeros(X.shape[1])),
+        tol=1e-14,
+    ).fit(X, y)
+
+    assert_allclose(ours.coef_, their.coef_, rtol=1e-3)
+    assert_array_less(ours.stop_crit_, ours.tol)

Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`
`3`	`3`	`from .single_task import ( # noqa F401`
`4`	`4`	`Quadratic, Quadratic_32, QuadraticSVC, QuadraticSVC_32, Logistic, Logistic_32,`
	`5`	`+ Huber, Huber_32,`
`5`	`6`	`)`
`6`	`7`
`7`	`8`	`from .multi_task import QuadraticMultiTask # noqa F401`