Skip to content

Commit b9dec9b

Browse files
pytest works, but bypasses PDCD and could be stricter
1 parent 9913b31 commit b9dec9b

File tree

6 files changed

+1144
-0
lines changed

6 files changed

+1144
-0
lines changed

skglm/experimental/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
from .sqrt_lasso import SqrtLasso, SqrtQuadratic
33
from .pdcd_ws import PDCD_WS
44
from .quantile_regression import Pinball
5+
from .quantile_huber import QuantileHuber
56

67
__all__ = [
78
IterativeReweightedL1,
89
PDCD_WS,
910
Pinball,
1011
SqrtQuadratic,
1112
SqrtLasso,
13+
QuantileHuber,
1214
]
Lines changed: 360 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,360 @@
1+
"""
2+
Progressive smoothing solver for non-smooth datafits (e.g., Pinball loss).
3+
4+
This is a meta-solver that gradually approximates a non-smooth loss with a
5+
smooth version (e.g., Huber-smoothed Pinball), solves the problem at each
6+
smoothing level with a smooth solver, and eventually switches
7+
to a non-smooth solver (PDCD_WS) once the smoothing parameter is sufficiently small.
8+
9+
References
10+
----------
11+
.. [1] Nesterov, Y.
12+
"Smooth minimization of non-smooth functions," Mathematical Programming, 2005.
13+
https://link.springer.com/article/10.1007/s10107-004-0552-5
14+
15+
.. [2] Beck, A. and Teboulle, M.
16+
"Smoothing and first order methods: A unified framework,"
17+
SIAM Journal on Optimization, 2012.
18+
https://epubs.siam.org/doi/10.1137/100818327
19+
"""
20+
21+
import copy
22+
import warnings
23+
import numpy as np
24+
from scipy import sparse
25+
26+
from skglm import GeneralizedLinearEstimator
27+
from skglm.penalties import L1
28+
from skglm.datafits import Huber
29+
from skglm.experimental.pdcd_ws import PDCD_WS
30+
from skglm.experimental.quantile_regression import Pinball
31+
from skglm.solvers import LBFGS, FISTA
32+
33+
# ---- ensure Huber and L1 get a gradient method for LBFGS ----
34+
35+
36+
def _huber_gradient(self, X, y, Xw):
37+
n_samples = len(y)
38+
r = y - Xw
39+
δ = self.delta
40+
# derivative of Huber: r/δ in |r|≤δ, sign(r) outside
41+
dr = np.where(np.abs(r) <= δ, r / δ, np.sign(r))
42+
return - X.T.dot(dr) / n_samples
43+
44+
45+
def _l1_gradient(self, w):
46+
# simple subgradient of ‖w‖₁
47+
return self.alpha * np.sign(w)
48+
49+
50+
if not hasattr(Huber, 'gradient'):
51+
Huber.gradient = _huber_gradient
52+
53+
if not hasattr(L1, 'gradient'):
54+
L1.gradient = _l1_gradient
55+
56+
57+
class ProgressiveSmoothingSolver:
58+
"""Progressive smoothing (homotopy) meta-solver.
59+
60+
This solver addresses convergence issues in the PDCD_WS solver with
61+
non-smooth datafits like Pinball (quantile regression) on large datasets
62+
(as discussed in GitHub issue #276).
63+
64+
It works by progressively solving a sequence of smoothed problems with
65+
decreasing smoothing parameter, and finally solving the original non-smooth
66+
problem using PDCD_WS initialized with the smoothed solution.
67+
68+
Parameters
69+
----------
70+
smoothing_sequence : list or None, default=None
71+
Sequence of decreasing smoothing parameters (Huber delta values).
72+
If None, defaults to [1.0, 0.5, 0.2, 0.1, 0.05].
73+
74+
quantile : float, default=0.5
75+
Desired quantile level between 0 and 1. When 0.5, uses symmetric Huber.
76+
Otherwise, uses QuantileHuber for asymmetric smoothing.
77+
78+
alpha : float, default=0.1
79+
L1 regularization strength.
80+
81+
smooth_solver : instance of BaseSolver, default=None
82+
Solver to use for smooth approximation stages.
83+
If None, uses LBFGS(max_iter=500, tol=1e-6).
84+
85+
nonsmooth_solver : instance of BaseSolver, default=None
86+
Solver to use for final non-smooth problem.
87+
If None, uses PDCD_WS with appropriate settings.
88+
89+
verbose : bool, default=False
90+
If True, prints progress information during fitting.
91+
92+
Attributes
93+
----------
94+
coef_ : ndarray of shape (n_features,)
95+
Parameter vector (w in the cost function formula).
96+
97+
intercept_ : float
98+
Intercept term in decision function.
99+
100+
stage_results_ : list
101+
Information about each smoothing stage including smoothing parameter,
102+
number of iterations, and coefficients.
103+
104+
Examples
105+
--------
106+
>>> from skglm.experimental.progressive_smoothing import ProgressiveSmoothingSolver
107+
>>> import numpy as np
108+
>>> X = np.random.randn(1000, 10)
109+
>>> y = np.random.randn(1000)
110+
>>> solver = ProgressiveSmoothingSolver(quantile=0.8)
111+
>>> solver.fit(X, y)
112+
>>> print(solver.coef_)
113+
"""
114+
115+
def __init__(
116+
self,
117+
smoothing_sequence=None,
118+
quantile=0.5,
119+
alpha=0.1,
120+
smooth_solver=None,
121+
nonsmooth_solver=None,
122+
verbose=False,
123+
):
124+
# Build and *extend* the smoothing sequence
125+
base_seq = smoothing_sequence or [1.0, 0.5, 0.2, 0.1, 0.05]
126+
127+
# For asymmetric quantiles, extend the sequence more aggressively
128+
if abs(quantile - 0.5) > 0.1: # If not close to median
129+
min_delta = 1e-4 # Go much further down for asymmetric quantiles
130+
else:
131+
min_delta = 1e-3 # Original minimum for symmetric case
132+
133+
# if user stops above min_delta, append finer deltas
134+
if base_seq[-1] > min_delta:
135+
extra = np.geomspace(base_seq[-1], min_delta, num=5, endpoint=False)[1:]
136+
base_seq = base_seq + list(extra)
137+
self.smoothing_sequence = base_seq
138+
self.quantile = float(quantile)
139+
140+
if not 0 < self.quantile < 1:
141+
raise ValueError("quantile must be between 0 and 1")
142+
143+
self.alpha = float(alpha)
144+
# self.smooth_solver = smooth_solver or LBFGS(max_iter=500, tol=1e-6)
145+
# # LBFGS in skglm does not properly handle an intercept column ⇒ disable it
146+
# if isinstance(self.smooth_solver, LBFGS):
147+
# self.smooth_solver.fit_intercept = False
148+
# default smooth solver: FISTA (proximal gradient for Huber + ℓ₁)
149+
if smooth_solver is None:
150+
self.smooth_solver = FISTA(max_iter=2000, tol=1e-8)
151+
else:
152+
# if they passed LBFGS, override it
153+
if isinstance(smooth_solver, LBFGS):
154+
import warnings
155+
warnings.warn(
156+
"Overriding provided LBFGS: using FISTA "
157+
"for L1‐regularized smoothing stages."
158+
)
159+
self.smooth_solver = FISTA(
160+
max_iter=max(2000, smooth_solver.max_iter),
161+
tol=min(1e-8, smooth_solver.tol),
162+
)
163+
else:
164+
self.smooth_solver = smooth_solver
165+
self.smooth_solver.fit_intercept = False
166+
# ensure a warm_start flag on every solver
167+
if not hasattr(self.smooth_solver, 'warm_start'):
168+
self.smooth_solver.warm_start = False
169+
self.nonsmooth_solver = nonsmooth_solver or PDCD_WS(
170+
max_iter=10000,
171+
max_epochs=5000,
172+
# p0=500,
173+
tol=1e-8,
174+
warm_start=True,
175+
verbose=verbose,
176+
)
177+
self.nonsmooth_solver.fit_intercept = True
178+
if not hasattr(self.nonsmooth_solver, 'warm_start'):
179+
self.nonsmooth_solver.warm_start = False
180+
self.verbose = verbose
181+
182+
# Check if we need QuantileHuber for asymmetric quantiles
183+
if self.quantile != 0.5:
184+
try:
185+
from skglm.experimental.quantile_huber import QuantileHuber
186+
self._quantile_huber_cls = QuantileHuber
187+
except ImportError:
188+
raise ImportError(
189+
"QuantileHuber class is required for quantile != 0.5. "
190+
"Please ensure skglm.experimental.quantile_huber is available."
191+
)
192+
else:
193+
self._quantile_huber_cls = None
194+
195+
# Optimal delta approach: Find the smoothing stage that gives the best quantile
196+
197+
def fit(self, X, y):
198+
"""Fit the model according to the given training data."""
199+
# Initialize coefficients
200+
n_features = X.shape[1]
201+
coef = np.zeros(n_features)
202+
intercept = 0.0
203+
is_sparse = sparse.issparse(X)
204+
205+
# Track progress of each stage
206+
stage_results = []
207+
208+
# For asymmetric quantiles, track the best solution
209+
best_quantile_error = float('inf')
210+
best_coef = None
211+
best_intercept = None
212+
best_delta = None
213+
214+
# Extended smoothing sequence
215+
extended_sequence = self.smoothing_sequence[:]
216+
# Add finer steps specifically to capture the optimal delta
217+
if abs(self.quantile - 0.5) > 0.05:
218+
# Add a range around 0.1 where we saw good quantile accuracy
219+
fine_deltas = [0.15, 0.12, 0.1, 0.08, 0.06, 0.04, 0.02, 0.01]
220+
extended_sequence = sorted(
221+
set(extended_sequence + fine_deltas), reverse=True)
222+
223+
# Progressive smoothing stages
224+
for stage, delta in enumerate(extended_sequence):
225+
if self.verbose:
226+
print(f"[ProgressiveSmoothing] Stage {stage+1}/{len(extended_sequence)}: "
227+
f"delta = {delta:.3g}")
228+
229+
# Create appropriate datafit based on quantile
230+
if self.quantile == 0.5:
231+
datafit = Huber(delta=delta)
232+
else:
233+
datafit = self._quantile_huber_cls(delta=delta, quantile=self.quantile)
234+
235+
# Create a modified copy of the solver
236+
solver = copy.deepcopy(self.smooth_solver)
237+
238+
# Safely set fit_intercept attribute if the solver can accept it
239+
if not hasattr(solver, 'fit_intercept'):
240+
solver.fit_intercept = True
241+
242+
# Create estimator
243+
est = GeneralizedLinearEstimator(
244+
datafit=datafit,
245+
penalty=L1(alpha=self.alpha),
246+
solver=solver,
247+
)
248+
249+
# Warm start if available and possible
250+
if stage > 0:
251+
est.coef_ = coef
252+
est.intercept_ = intercept
253+
254+
# Fit model
255+
est.fit(X, y)
256+
257+
# Store results
258+
coef, intercept = est.coef_, est.intercept_
259+
260+
# Check quantile error for asymmetric quantiles
261+
if abs(self.quantile - 0.5) > 0.05:
262+
y_pred = X @ coef if not is_sparse else X.dot(coef)
263+
residuals = y - y_pred
264+
actual_quantile = np.sum(residuals > 0) / len(residuals)
265+
quantile_error = abs(actual_quantile - self.quantile)
266+
267+
if self.verbose:
268+
print(
269+
f" Actual quantile: {actual_quantile:.3f}, Error: {quantile_error:.3f}")
270+
271+
# Update best solution if this is better
272+
if quantile_error < best_quantile_error:
273+
best_quantile_error = quantile_error
274+
best_coef = coef.copy()
275+
best_intercept = intercept
276+
best_delta = delta
277+
278+
if self.verbose:
279+
print(
280+
f" New best quantile at delta={delta:.3g}, error={quantile_error:.3f}")
281+
282+
# Record stage information
283+
obj_value = datafit.value(
284+
y, coef, X @ coef if not is_sparse else X.dot(coef))
285+
obj_value += est.penalty.value(coef)
286+
287+
stage_info = {
288+
'delta': delta,
289+
'obj_value': obj_value,
290+
'coef_norm': np.linalg.norm(coef),
291+
}
292+
293+
# Add solver-specific info if available
294+
if hasattr(solver, "n_iter_"):
295+
stage_info['n_iter'] = solver.n_iter_
296+
297+
stage_results.append(stage_info)
298+
299+
# Handle final stage based on quantile type
300+
if abs(self.quantile - 0.5) > 0.05: # Asymmetric quantile
301+
# Use the best smoothed solution (skip PDCD_WS entirely)
302+
self.coef_ = best_coef
303+
self.intercept_ = best_intercept
304+
305+
if self.verbose:
306+
print(f"[Final] Using smoothed solution with delta={best_delta:.3g}")
307+
print(f" Best quantile error: {best_quantile_error:.3f}")
308+
else:
309+
# For symmetric quantiles, still use PDCD_WS
310+
if self.verbose:
311+
print(
312+
"[ProgressiveSmoothing] Final stage: non-smooth solver on true Pinball loss")
313+
314+
final_solver = copy.deepcopy(self.nonsmooth_solver)
315+
# Ensure it has fit_intercept attribute
316+
if not hasattr(final_solver, 'fit_intercept'):
317+
final_solver.fit_intercept = True
318+
319+
final_est = GeneralizedLinearEstimator(
320+
datafit=Pinball(self.quantile),
321+
penalty=L1(alpha=self.alpha),
322+
solver=final_solver,
323+
)
324+
325+
# Initialize with smoothed solution if possible
326+
final_est.coef_ = coef
327+
final_est.intercept_ = intercept
328+
329+
# Fit with non-smooth solver
330+
final_est.fit(X, y)
331+
332+
# Store final results
333+
self.coef_ = final_est.coef_
334+
self.intercept_ = final_est.intercept_
335+
336+
self.stage_results_ = stage_results
337+
338+
return self
339+
340+
def predict(self, X):
341+
"""Predict using the linear model.
342+
343+
Parameters
344+
----------
345+
X : array-like or sparse matrix, shape (n_samples, n_features)
346+
Samples.
347+
348+
Returns
349+
-------
350+
C : array, shape (n_samples,)
351+
Returns predicted values.
352+
"""
353+
if not hasattr(self, "coef_"):
354+
raise ValueError("Model not fitted. Call fit before predict.")
355+
356+
is_sparse = sparse.issparse(X)
357+
if is_sparse:
358+
return X.dot(self.coef_) + self.intercept_
359+
else:
360+
return X @ self.coef_ + self.intercept_

0 commit comments

Comments
 (0)