update tests, documentation, add example

floriankozikowski · floriankozikowski · commit cfe065e3e44c · 2025-05-13T17:25:52.000+02:00
diff --git a/examples/plot_smooth_quantile.py b/examples/plot_smooth_quantile.py
@@ -0,0 +1,156 @@
+"""
+===========================================
+Fast Quantile Regression with Smoothing
+===========================================
+This example demonstrates how SmoothQuantileRegressor achieves faster convergence
+than scikit-learn's QuantileRegressor while maintaining accuracy, particularly
+for large datasets.
+"""
+
+# %%
+# Data Generation
+# --------------
+# First, we generate synthetic data with a known quantile structure.
+
+import time
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.datasets import make_regression
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import QuantileRegressor
+from skglm.experimental.smooth_quantile_regressor import SmoothQuantileRegressor
+from skglm.solvers import FISTA
+
+# Set random seed for reproducibility
+np.random.seed(42)
+
+# Generate dataset - using a more reasonable size for quick testing
+n_samples, n_features = 10000, 10  # Match test file size
+X, y = make_regression(n_samples=n_samples, n_features=n_features,
+                       noise=0.1, random_state=42)
+X = StandardScaler().fit_transform(X)
+y = y - np.mean(y)  # Center y like in test file
+
+# %%
+# Model Comparison
+# ---------------
+# We compare scikit-learn's QuantileRegressor with our SmoothQuantileRegressor
+# on the 80th quantile.
+
+tau = 0.5  # median (SmoothQuantileRegressor works much better for non-median quantiles)
+alpha = 0.1
+
+
+def pinball_loss(y_true, y_pred, tau=0.5):
+    """Compute Pinball (quantile) loss."""
+    residuals = y_true - y_pred
+    return np.mean(np.where(residuals >= 0,
+                            tau * residuals,
+                            (1 - tau) * -residuals))
+
+
+# scikit-learn's QuantileRegressor
+start_time = time.time()
+qr = QuantileRegressor(quantile=tau, alpha=alpha, fit_intercept=True,
+                       solver="highs").fit(X, y)
+qr_time = time.time() - start_time
+y_pred_qr = qr.predict(X)
+qr_loss = pinball_loss(y, y_pred_qr, tau=tau)
+
+# SmoothQuantileRegressor
+start_time = time.time()
+solver = FISTA(max_iter=2000, tol=1e-8)
+solver.fit_intercept = True
+sqr = SmoothQuantileRegressor(
+    smoothing_sequence=[1.0, 0.5, 0.2, 0.1, 0.05],  # Base sequence, will be extended
+    quantile=tau, alpha=alpha, verbose=True,  # Enable verbose to see stages
+    smooth_solver=solver
+).fit(X, y)
+sqr_time = time.time() - start_time
+y_pred_sqr = sqr.predict(X)
+sqr_loss = pinball_loss(y, y_pred_sqr, tau=tau)
+
+# %%
+# Performance Analysis
+# ------------------
+# Let's analyze both the performance and solution quality of both methods.
+
+speedup = qr_time / sqr_time
+rel_gap = (sqr_loss - qr_loss) / qr_loss
+
+print("\nPerformance Summary:")
+print("scikit-learn QuantileRegressor:")
+print(f"  Time: {qr_time:.2f}s")
+print(f"  Loss: {qr_loss:.6f}")
+print("SmoothQuantileRegressor:")
+print(f"  Time: {sqr_time:.2f}s")
+print(f"  Loss: {sqr_loss:.6f}")
+print(f"  Speedup: {speedup:.1f}x")
+print(f"  Relative gap: {rel_gap:.1%}")
+
+# %%
+# Visual Comparison
+# ---------------
+# We create visualizations to compare the predictions and residuals
+# of both methods.
+
+# Sort data for better visualization
+sort_idx = np.argsort(y)
+y_sorted = y[sort_idx]
+qr_pred = y_pred_qr[sort_idx]
+sqr_pred = y_pred_sqr[sort_idx]
+
+# Create figure with two subplots
+fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
+
+# Plot predictions
+ax1.scatter(y_sorted, qr_pred, alpha=0.5, label='scikit-learn', s=10)
+ax1.scatter(y_sorted, sqr_pred, alpha=0.5, label='SmoothQuantile', s=10)
+ax1.plot([y_sorted.min(), y_sorted.max()],
+         [y_sorted.min(), y_sorted.max()], 'k--', alpha=0.3)
+ax1.set_xlabel('True values')
+ax1.set_ylabel('Predicted values')
+ax1.set_title(f'Predictions (τ={tau})')
+ax1.legend()
+
+# Plot residuals
+qr_residuals = y_sorted - qr_pred
+sqr_residuals = y_sorted - sqr_pred
+ax2.hist(qr_residuals, bins=50, alpha=0.5, label='scikit-learn')
+ax2.hist(sqr_residuals, bins=50, alpha=0.5, label='SmoothQuantile')
+ax2.axvline(x=0, color='k', linestyle='--', alpha=0.3)
+ax2.set_xlabel('Residuals')
+ax2.set_ylabel('Count')
+ax2.set_title('Residual Distribution')
+ax2.legend()
+
+plt.tight_layout()
+
+# %%
+# Progressive Smoothing Analysis
+# ----------------------------
+# Let's examine how the smoothing parameter affects the solution quality.
+
+stages = sqr.stage_results_
+deltas = [s['delta'] for s in stages]
+errors = [s['quantile_error'] for s in stages]
+losses = [s['obj_value'] for s in stages]
+
+fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
+
+# Plot quantile error progression
+ax1.semilogx(deltas, errors, 'o-')
+ax1.set_xlabel('Smoothing parameter (δ)')
+ax1.set_ylabel('Quantile error')
+ax1.set_title('Quantile Error vs Smoothing')
+ax1.grid(True, alpha=0.3)
+
+# Plot objective value progression
+ax2.semilogx(deltas, losses, 'o-')
+ax2.set_xlabel('Smoothing parameter (δ)')
+ax2.set_ylabel('Objective value')
+ax2.set_title('Objective Value vs Smoothing')
+ax2.grid(True, alpha=0.3)
+
+plt.tight_layout()
+plt.show()
diff --git a/skglm/experimental/quantile_huber.py b/skglm/experimental/quantile_huber.py
@@ -6,44 +6,75 @@
 
 
 class QuantileHuber(BaseDatafit):
-    r"""Huber‑smoothed pinball loss for quantile regression.
+    r"""Smoothed approximation of the pinball loss for quantile regression.
 
-    This class implements a smoothed approximation of the pinball (quantile)
-    loss by applying Huber‑style smoothing at the non‑differentiable point.
-    The formulation improves numerical stability and convergence for
-    gradient‑based solvers, particularly on large data sets.
+    This class implements a smoothed version of the pinball loss used in quantile
+    regression. The original pinball loss is:
+
+    .. math::
+        \rho_\tau(r) = \begin{cases}
+            \tau r & \text{if } r \geq 0 \\
+            (\tau - 1) r & \text{if } r < 0
+        \end{cases}
+
+    The smoothed version (Huberized pinball loss) is:
+
+    .. math::
+        \rho_\tau^\delta(r) = \begin{cases}
+            \tau r - \frac{\delta}{2} & \text{if } r \geq \delta \\
+            \frac{r^2}{2\delta} & \text{if } |r| < \delta \\
+            (\tau - 1) r - \frac{\delta}{2} & \text{if } r \leq -\delta
+        \end{cases}
+
+    where :math:`\delta` is the smoothing parameter. As :math:`\delta \to 0`,
+    the smoothed loss converges to the original pinball loss.
 
     Parameters
     ----------
-    delta : float, positive
-        Width of the quadratic region around the origin.  Larger values create
-        stronger smoothing.  As ``delta`` -> 0, the loss approaches the
-        standard pinball loss.
+    delta : float, default=1.0
+        Smoothing parameter. Smaller values make the approximation closer to
+        the original pinball loss but may lead to numerical instability.
 
-    quantile : float in (0, 1)
-        Target quantile level (e.g. ``0.5`` corresponds to the median).
+    quantile : float, default=0.5
+        Quantile level between 0 and 1. When 0.5, the loss is symmetric
+        (Huber loss). For other values, the loss is asymmetric.
+
+    Attributes
+    ----------
+    delta : float
+        Current smoothing parameter.
+
+    quantile : float
+        Current quantile level.
 
     Notes
     -----
-    The loss function is defined as
+    The smoothed loss is continuously differentiable everywhere, making it
+    suitable for gradient-based optimization methods. The gradient is:
 
     .. math::
+        \nabla \rho_\tau^\delta(r) = \begin{cases}
+            \tau & \text{if } r \geq \delta \\
+            \frac{r}{\delta} & \text{if } |r| < \delta \\
+            \tau - 1 & \text{if } r \leq -\delta
+        \end{cases}
 
-        L(r) =
-        \\begin{cases}
-            \\tau \\dfrac{r^{2}}{2\\delta}, & 0 < r \\le \\delta \\\\
-            (1-\\tau) \\dfrac{r^{2}}{2\\delta}, & -\\delta \\le r < 0 \\\\
-            \\tau \\left(r - \\dfrac{\\delta}{2}\\right), & r > \\delta \\\\
-            (1-\\tau) \\left(-r - \\dfrac{\\delta}{2}\\right), & r < -\\delta
-        \\end{cases}
+    The Hessian is piecewise constant:
 
-    where :math:`r = y - Xw` is the residual, :math:`\\tau` is the target
-    quantile, and :math:`\\delta` controls the smoothing width.
-
-    References
-    ----------
-    He, X., Pan, X., Tan, K. M., & Zhou, W. X. (2021).
-    *Smoothed Quantile Regression with Large‑Scale Inference*.
+    .. math::
+        \nabla^2 \rho_\tau^\delta(r) = \begin{cases}
+            0 & \text{if } |r| \geq \delta \\
+            \frac{1}{\delta} & \text{if } |r| < \delta
+        \end{cases}
+
+    Examples
+    --------
+    >>> from skglm.experimental.quantile_huber import QuantileHuber
+    >>> import numpy as np
+    >>> loss = QuantileHuber(delta=0.1, quantile=0.8)
+    >>> r = np.array([-1.0, 0.0, 1.0])
+    >>> print(loss.value(r))  # Compute loss values
+    >>> print(loss.gradient(r))  # Compute gradients
     """
 
     def __init__(self, delta, quantile):
diff --git a/skglm/experimental/smooth_quantile_regressor.py b/skglm/experimental/smooth_quantile_regressor.py
@@ -8,14 +8,33 @@
 
 
 class SmoothQuantileRegressor:
-    """Progressive smoothing (homotopy) meta-solver.
+    r"""Progressive smoothing solver for quantile regression.
 
-    This solver addresses convergence issues in non-smooth datafits like
-    Pinball(quantile regression) on large datasets
-    (as discussed in GitHub issue #276).
+    This solver addresses convergence issues in non-smooth quantile regression
+    on large datasets by using a progressive smoothing approach. The optimization
+    objective is:
 
-    It works by progressively solving a sequence of smoothed problems with
-    decreasing smoothing parameter.
+    .. math::
+        \min_{w \in \mathbb{R}^p} \frac{1}{n} \sum_{i=1}^n \rho_\tau(y_i - x_i^T w)
+        + \alpha \|w\|_1
+
+    where :math:`\rho_\tau` is the pinball loss:
+
+    .. math::
+        \rho_\tau(r) = \begin{cases}
+            \tau r & \text{if } r \geq 0 \\
+            (\tau - 1) r & \text{if } r < 0
+        \end{cases}
+
+    The solver progressively approximates the non-smooth pinball loss using
+    smoothed versions with decreasing smoothing parameter :math:`\delta`:
+
+    .. math::
+        \rho_\tau^\delta(r) = \begin{cases}
+            \tau r - \frac{\delta}{2} & \text{if } r \geq \delta \\
+            \frac{r^2}{2\delta} & \text{if } |r| < \delta \\
+            (\tau - 1) r - \frac{\delta}{2} & \text{if } r \leq -\delta
+        \end{cases}
 
     Parameters
     ----------
@@ -32,42 +51,57 @@ class SmoothQuantileRegressor:
 
     smooth_solver : instance of BaseSolver, default=None
         Solver to use for smooth approximation stages.
-        If None, uses LBFGS(max_iter=500, tol=1e-6).
-
+        If None, uses FISTA(max_iter=2000, tol=1e-8).
 
     verbose : bool, default=False
         If True, prints progress information during fitting.
 
     Attributes
     ----------
     coef_ : ndarray of shape (n_features,)
-        Parameter vector (w in the cost function formula).
+        Parameter vector (:math:`w` in the cost function formula).
 
     intercept_ : float
         Intercept term in decision function.
 
     stage_results_ : list
-        Information about each smoothing stage including smoothing parameter,
-        number of iterations, and coefficients.
-
+        Information about each smoothing stage including:
+        - delta: smoothing parameter
+        - obj_value: objective value
+        - coef_norm: L2 norm of coefficients
+        - quantile_error: absolute difference between target and achieved quantile
+        - actual_quantile: achieved quantile level
+        - coef: coefficient vector
+        - intercept: intercept value
+        - n_iter: number of iterations (if available)
+
+    Notes
+    -----
+    This implementation uses a progressive smoothing approach to solve quantile
+    regression problems. It starts with a highly smoothed approximation and
+    gradually reduces the smoothing parameter to approach the original non-smooth
+    problem. This approach is particularly effective for large datasets where
+    direct optimization of the non-smooth objective can be challenging.
+
+    The solver automatically selects the best solution from all smoothing stages
+    based on the quantile error, ensuring good approximation of the target
+    quantile level.
 
     References
     ----------
     Chen, C. (2007). A Finite Smoothing Algorithm for Quantile Regression.
     Journal of Computational and Graphical Statistics, 16(1), 136–164.
     http://www.jstor.org/stable/27594233
 
-
     Examples
     --------
-    >>> from skglm.experimental.progressive_smoothing import
-        ProgressiveSmoothingSolver
+    >>> from skglm.experimental.smooth_quantile_regressor import SmoothQuantileRegressor
     >>> import numpy as np
     >>> X = np.random.randn(1000, 10)
     >>> y = np.random.randn(1000)
-    >>> solver = ProgressiveSmoothingSolver(quantile=0.8)
-    >>> solver.fit(X, y)
-    >>> print(solver.coef_)
+    >>> reg = SmoothQuantileRegressor(quantile=0.8, alpha=0.1)
+    >>> reg.fit(X, y)
+    >>> print(reg.coef_)
     """
 
     def __init__(
diff --git a/skglm/experimental/tests/test_smooth_quantile_regressor.py b/skglm/experimental/tests/test_smooth_quantile_regressor.py