Skip to content

Commit 50c30bd

Browse files
improve understanding of huber approximation
1 parent cfe065e commit 50c30bd

File tree

6 files changed

+505
-44
lines changed

6 files changed

+505
-44
lines changed

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,5 @@ Experimental
107107
Pinball
108108
SqrtQuadratic
109109
SqrtLasso
110+
QuantileHuber
111+
SmoothQuantileRegressor

doc/changes/0.5.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
Version 0.5 (in progress)
44
-------------------------
55
- Add support for fitting an intercept in :ref:`SqrtLasso <skglm.experimental.sqrt_lasso.SqrtLasso>` (PR: :gh:`298`)
6+
- Add :ref:`SmoothQuantileRegressor <skglm.experimental.smooth_quantile_regressor.SmoothQuantileRegressor>` for fast quantile regression with progressive smoothing (PR: :gh:`276`)

examples/plot_quantile_huber.py

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
"""
2+
=======================================
3+
Quantile Huber Loss Visualization
4+
=======================================
5+
6+
This example demonstrates the smoothed approximation of the pinball loss
7+
(Quantile Huber) used in quantile regression. It illustrates how the
8+
smoothing parameter affects the loss function shape and optimization
9+
performance for different quantile levels.
10+
"""
11+
12+
# Authors: Florian Kozikowski
13+
14+
# %%
15+
# Understanding the Quantile Huber Loss
16+
# ------------------------------------
17+
#
18+
# The standard pinball loss used in quantile regression is not differentiable
19+
# at zero, which causes issues for gradient-based optimization methods.
20+
# The Quantile Huber loss provides a smoothed approximation by replacing the
21+
# non-differentiable point with a quadratic region.
22+
23+
import numpy as np
24+
import matplotlib.pyplot as plt
25+
from sklearn.datasets import make_regression
26+
from sklearn.preprocessing import StandardScaler
27+
import time
28+
29+
from skglm import GeneralizedLinearEstimator
30+
from skglm.penalties import L1
31+
from skglm.solvers import FISTA
32+
from skglm.experimental.quantile_huber import QuantileHuber
33+
34+
# %%
35+
# First, let's visualize the Quantile Huber loss for median regression (τ=0.5)
36+
# with different smoothing parameters.
37+
38+
fig1, ax1 = plt.subplots(figsize=(10, 5))
39+
40+
# Plot several smoothing levels
41+
deltas = [1.0, 0.5, 0.2, 0.1, 0.01]
42+
tau = 0.5
43+
r = np.linspace(-3, 3, 1000)
44+
45+
# Calculate the non-smooth pinball loss for comparison
46+
pinball_loss = np.where(r >= 0, tau * r, (tau - 1) * r)
47+
ax1.plot(r, pinball_loss, 'k--', label='Pinball (non-smooth)')
48+
49+
# Plot losses for different deltas
50+
for delta in deltas:
51+
loss_fn = QuantileHuber(delta=delta, quantile=tau)
52+
loss = np.array([loss_fn._loss_and_grad_scalar(ri)[0] for ri in r])
53+
ax1.plot(r, loss, '-', label=f'Quantile Huber (delta={delta})')
54+
55+
ax1.axvline(x=0, color='gray', linestyle='-', alpha=0.4)
56+
ax1.axhline(y=0, color='gray', linestyle='-', alpha=0.4)
57+
ax1.set_xlabel('Residual (r)')
58+
ax1.set_ylabel('Loss')
59+
ax1.set_title('Quantile Huber Loss (τ=0.5) with Different Smoothing Parameters')
60+
ax1.legend()
61+
ax1.grid(True, alpha=0.3)
62+
63+
# %%
64+
# As we can see, smaller values of delta make the approximation closer to the
65+
# original non-smooth pinball loss. Now, let's compare different quantile
66+
# levels with the same smoothing parameter.
67+
68+
fig2, ax2 = plt.subplots(figsize=(10, 5))
69+
70+
# Plot for different quantile levels
71+
taus = [0.1, 0.25, 0.5, 0.75, 0.9]
72+
delta = 0.2
73+
74+
for tau in taus:
75+
loss_fn = QuantileHuber(delta=delta, quantile=tau)
76+
loss = np.array([loss_fn._loss_and_grad_scalar(ri)[0] for ri in r])
77+
ax2.plot(r, loss, '-', label=f'τ={tau}')
78+
79+
ax2.axvline(x=0, color='gray', linestyle='-', alpha=0.4)
80+
ax2.axhline(y=0, color='gray', linestyle='-', alpha=0.4)
81+
ax2.set_xlabel('Residual (r)')
82+
ax2.set_ylabel('Loss')
83+
ax2.set_title(f'Quantile Huber Loss for Different Quantile Levels (delta={delta})')
84+
ax2.legend()
85+
ax2.grid(True, alpha=0.3)
86+
87+
# %%
88+
# The gradient of the smoothed loss
89+
# ---------------------------------
90+
#
91+
# The primary advantage of the Quantile Huber loss is its continuous gradient.
92+
# Let's visualize both the loss and its gradient for specific quantile levels.
93+
94+
95+
def plot_quantile_huber(tau=0.5, delta=0.5, residual_range=(-2, 2), num_points=1000):
96+
"""
97+
Plot the quantile Huber loss function and its gradient.
98+
99+
This utility function generates plots of the quantile Huber loss and its
100+
gradient for visualization and documentation purposes. It's not part of the
101+
core implementation but is useful for understanding the behavior of the loss.
102+
103+
Parameters
104+
----------
105+
tau : float, default=0.5
106+
Quantile level between 0 and 1.
107+
108+
delta : float, default=0.5
109+
Smoothing parameter controlling the width of the quadratic region.
110+
111+
residual_range : tuple (min, max), default=(-2, 2)
112+
Range of residual values to plot.
113+
114+
num_points : int, default=1000
115+
Number of points to plot.
116+
117+
Returns
118+
-------
119+
fig : matplotlib figure
120+
Figure containing the plots.
121+
122+
Example
123+
-------
124+
>>> from skglm.experimental.quantile_huber import plot_quantile_huber
125+
>>> fig = plot_quantile_huber(tau=0.8, delta=0.3)
126+
>>> fig.savefig('quantile_huber_tau_0.8.png')
127+
"""
128+
try:
129+
import matplotlib.pyplot as plt
130+
import numpy as np
131+
except ImportError:
132+
raise ImportError("Matplotlib is required for plotting.")
133+
134+
loss_fn = QuantileHuber(delta=delta, quantile=tau)
135+
r = np.linspace(residual_range[0], residual_range[1], num_points)
136+
137+
# Calculate loss and gradient for each residual value
138+
loss = np.zeros_like(r)
139+
grad = np.zeros_like(r)
140+
141+
for i, ri in enumerate(r):
142+
loss_val, grad_val = loss_fn._loss_and_grad_scalar(ri)
143+
loss[i] = loss_val
144+
grad[i] = grad_val
145+
146+
# For comparison, calculate the non-smooth pinball loss
147+
pinball_loss = np.where(r >= 0, tau * r, (tau - 1) * r)
148+
149+
# Create figure with two subplots
150+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
151+
152+
# Plot loss functions
153+
ax1.plot(r, loss, 'b-', label=f'Quantile Huber (δ={delta})')
154+
ax1.plot(r, pinball_loss, 'r--', label='Pinball (non-smooth)')
155+
156+
# Add vertical lines at the transition points
157+
ax1.axvline(x=delta, color='gray', linestyle=':', alpha=0.7)
158+
ax1.axvline(x=-delta, color='gray', linestyle=':', alpha=0.7)
159+
ax1.axvline(x=0, color='gray', linestyle='-', alpha=0.3)
160+
161+
# Add horizontal line at y=0
162+
ax1.axhline(y=0, color='gray', linestyle='-', alpha=0.3)
163+
164+
# Add shaded regions to highlight the quadratic zone
165+
ax1.axvspan(-delta, delta, alpha=0.1, color='blue')
166+
167+
ax1.set_xlabel('Residual (r)')
168+
ax1.set_ylabel('Loss')
169+
ax1.set_title(f'Quantile Huber Loss (τ={tau})')
170+
ax1.legend()
171+
ax1.grid(True, alpha=0.3)
172+
173+
# Plot gradients
174+
ax2.plot(r, grad, 'b-', label=f'Quantile Huber Gradient (δ={delta})')
175+
176+
# Add the non-smooth gradient for comparison
177+
pinball_grad = np.where(r > 0, tau, tau - 1)
178+
ax2.plot(r, pinball_grad, 'r--', label='Pinball Gradient (non-smooth)')
179+
180+
# Add vertical lines at the transition points
181+
ax2.axvline(x=delta, color='gray', linestyle=':', alpha=0.7)
182+
ax2.axvline(x=-delta, color='gray', linestyle=':', alpha=0.7)
183+
ax2.axvline(x=0, color='gray', linestyle='-', alpha=0.3)
184+
185+
# Add horizontal line at y=0
186+
ax2.axhline(y=0, color='gray', linestyle='-', alpha=0.3)
187+
188+
# Add shaded regions to highlight the quadratic zone
189+
ax2.axvspan(-delta, delta, alpha=0.1, color='blue')
190+
191+
ax2.set_xlabel('Residual (r)')
192+
ax2.set_ylabel('Gradient')
193+
ax2.set_title(f'Gradient of Quantile Huber Loss (τ={tau})')
194+
ax2.legend()
195+
ax2.grid(True, alpha=0.3)
196+
197+
plt.tight_layout()
198+
return fig
199+
200+
201+
# For the 75th percentile with delta=0.3
202+
fig3 = plot_quantile_huber(tau=0.75, delta=0.3)
203+
plt.suptitle('Quantile Huber Loss and Gradient for τ=0.75, delta=0.3', fontsize=14)
204+
plt.tight_layout(rect=[0, 0, 1, 0.95]) # Adjust for suptitle
205+
206+
# %%
207+
# For the 50th percentile (Median) with delta=0.3
208+
fig4 = plot_quantile_huber(tau=0.5, delta=0.3)
209+
plt.suptitle('Quantile Huber Loss and Gradient for τ=0.5, delta=0.3', fontsize=14)
210+
plt.tight_layout(rect=[0, 0, 1, 0.95]) # Adjust for suptitle
211+
212+
# %%
213+
# For the 25th percentile with delta=0.3
214+
fig4 = plot_quantile_huber(tau=0.25, delta=0.3)
215+
plt.suptitle('Quantile Huber Loss and Gradient for τ=0.25, delta=0.3', fontsize=14)
216+
plt.tight_layout(rect=[0, 0, 1, 0.95]) # Adjust for suptitle
217+
218+
# %%
219+
# Performance impact of smoothing parameter
220+
# -----------------------------------------
221+
#
222+
# The smoothing parameter delta strongly affects optimization performance.
223+
# Let's examine how it impacts convergence speed and solution quality.
224+
225+
# Generate synthetic data
226+
X, y = make_regression(n_samples=500, n_features=10, noise=0.1, random_state=42)
227+
X = StandardScaler().fit_transform(X)
228+
229+
# Set up the experiment
230+
delta_values = [1.0, 0.5, 0.2, 0.1, 0.05, 0.02]
231+
tau = 0.75
232+
alpha = 0.1
233+
234+
# Collect results
235+
times = []
236+
objectives = []
237+
238+
# Define pinball loss function for evaluation
239+
240+
241+
def pinball_loss(y_true, y_pred, tau=0.5):
242+
"""Compute the pinball loss for quantile regression."""
243+
residuals = y_true - y_pred
244+
return np.mean(np.where(residuals >= 0,
245+
tau * residuals,
246+
(1 - tau) * -residuals))
247+
248+
249+
# Run for each delta
250+
for delta in delta_values:
251+
datafit = QuantileHuber(delta=delta, quantile=tau)
252+
solver = FISTA(max_iter=1000, tol=1e-6)
253+
254+
start_time = time.time()
255+
est = GeneralizedLinearEstimator(
256+
datafit=datafit, penalty=L1(alpha=alpha), solver=solver
257+
).fit(X, y)
258+
259+
elapsed = time.time() - start_time
260+
times.append(elapsed)
261+
262+
# Compute pinball loss of the solution (not the smoothed loss)
263+
pinball = pinball_loss(y, X @ est.coef_, tau=tau)
264+
objectives.append(pinball)
265+
266+
# %%
267+
# The results show the trade-off between optimization speed and solution quality.
268+
# Let's plot the results to visualize this relationship.
269+
270+
fig5, ax5 = plt.subplots(figsize=(10, 5))
271+
272+
ax5.plot(delta_values, times, 'o-')
273+
ax5.set_xscale('log')
274+
ax5.set_xlabel('Delta (delta)')
275+
ax5.set_ylabel('Time (seconds)')
276+
ax5.set_title('Computation Time vs Smoothing Parameter')
277+
ax5.grid(True, alpha=0.3)
278+
279+
fig6, ax7 = plt.subplots(figsize=(10, 5))
280+
ax7.plot(delta_values, objectives, 'o-')
281+
ax7.set_xscale('log')
282+
ax7.set_xlabel('Delta (delta)')
283+
ax7.set_ylabel('Pinball Loss')
284+
ax7.set_title(f'Final Pinball Loss (τ={tau}) vs Smoothing Parameter')
285+
ax7.grid(True, alpha=0.3)
286+
287+
# %%
288+
# This example illustrates the key trade-off when choosing the smoothing parameter:
289+
#
290+
# - Larger values of delta make the problem easier to optimize (faster convergence,
291+
# fewer iterations), but may yield less accurate results for the original
292+
# quantile regression objective.
293+
# - Smaller values of delta give more accurate results, but may require more iterations
294+
# to converge and take longer to compute.
295+
#
296+
# In practice, a progressive smoothing approach (as used in SmoothQuantileRegressor)
297+
# can be beneficial, starting with a large delta and gradually reducing it to
298+
# approach the original non-smooth problem.
299+
#
300+
# The optimal choice of delta depends on your specific application and the balance
301+
# between computational efficiency and solution accuracy you require.

examples/plot_smooth_quantile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
np.random.seed(42)
2626

2727
# Generate dataset - using a more reasonable size for quick testing
28-
n_samples, n_features = 10000, 10 # Match test file size
28+
n_samples, n_features = 1000, 10 # Match test file size
2929
X, y = make_regression(n_samples=n_samples, n_features=n_features,
3030
noise=0.1, random_state=42)
3131
X = StandardScaler().fit_transform(X)

skglm/experimental/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from .pdcd_ws import PDCD_WS
44
from .quantile_regression import Pinball
55
from .quantile_huber import QuantileHuber
6+
from .smooth_quantile_regressor import SmoothQuantileRegressor
67

78
__all__ = [
89
IterativeReweightedL1,
@@ -11,4 +12,5 @@
1112
SqrtQuadratic,
1213
SqrtLasso,
1314
QuantileHuber,
15+
SmoothQuantileRegressor,
1416
]

0 commit comments

Comments
 (0)