Skip to content

Commit 8cf2b4f

Browse files
authored
Merge pull request #18 from Leona-LYT/main
add CQR_Ridge_path_sol with examples
2 parents b80d7fa + 6284ea5 commit 8cf2b4f

File tree

2 files changed

+302
-15
lines changed

2 files changed

+302
-15
lines changed

doc/source/examples/Path_solution.ipynb

Lines changed: 140 additions & 1 deletion
Large diffs are not rendered by default.

rehline/_path_sol.py

Lines changed: 162 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import time
22

3-
import matplotlib.pyplot as plt
43
import numpy as np
54

65
from ._base import _make_loss_rehline_param
76
from ._class import plqERM_Ridge
7+
from ._class import CQR_Ridge
88
from ._loss import ReHLoss
99

1010

@@ -215,22 +215,170 @@ def plqERM_Ridge_path_sol(
215215
print(f"{'Avg Time/Iter':<12}{avg_time_per_iter:.6f} sec")
216216
print("=" * 90)
217217

218-
# ben: remove the plot part, when d is large, the figure will be too large to show
219-
# if verbose >= 2:
220-
# # it's better to load the matplotlib.pyplot before the function
221-
# import matplotlib.pyplot as plt
222-
# plt.figure(figsize=(10, 6))
223-
# for i in range(n_features):
224-
# plt.plot(Cs, coefs[i, :], label=f'Feature {i+1}')
225-
# plt.xscale('log')
226-
# plt.xlabel('C')
227-
# plt.ylabel('Coefficient Value')
228-
# plt.title('Regularization Path')
229-
# plt.legend()
230-
# plt.show()
231218

232219
if return_time:
233220
return Cs, times, n_iters, obj_values, L2_norms, coefs
234221
else:
235222
return Cs, n_iters, obj_values, L2_norms, coefs
236223

224+
225+
226+
def CQR_Ridge_path_sol(
227+
X,
228+
y,
229+
*,
230+
quantiles,
231+
eps=1e-5,
232+
n_Cs=50,
233+
Cs=None,
234+
max_iter=5000,
235+
tol=1e-4,
236+
verbose=0,
237+
shrink=1,
238+
warm_start=False,
239+
return_time=True,
240+
):
241+
"""
242+
Compute the regularization path for Composite Quantile Regression (CQR) with ridge penalty.
243+
244+
This function fits a series of CQR models using different values of the regularization parameter `C`.
245+
It reuses a single estimator and modifies `C` in-place before refitting.
246+
247+
Parameters
248+
----------
249+
X : ndarray of shape (n_samples, n_features)
250+
Feature matrix.
251+
252+
y : ndarray of shape (n_samples,)
253+
Response vector.
254+
255+
quantiles : list of float
256+
Quantile levels (e.g. [0.1, 0.5, 0.9]).
257+
258+
eps : float, default=1e-5
259+
Log-scaled lower bound for generated `C` values (used if `Cs` is None).
260+
261+
n_Cs : int, default=50
262+
Number of `C` values to generate.
263+
264+
Cs : array-like or None, default=None
265+
Explicit values of regularization strength. If None, use `eps` and `n_Cs` to generate them.
266+
267+
max_iter : int, default=5000
268+
Maximum number of solver iterations.
269+
270+
tol : float, default=1e-4
271+
Solver convergence tolerance.
272+
273+
verbose : int, default=0
274+
Verbosity level.
275+
276+
shrink : float, default=1
277+
Shrinkage parameter passed to solver.
278+
279+
warm_start : bool, default=False
280+
Use previous dual solution to initialize the next fit.
281+
282+
return_time : bool, default=True
283+
Whether to return a list of fit durations.
284+
285+
Returns
286+
-------
287+
Cs : ndarray
288+
List of regularization strengths.
289+
290+
models : list
291+
List of fitted model objects.
292+
293+
coefs : ndarray of shape (n_Cs, n_quantiles, n_features)
294+
Coefficient matrices per quantile and `C`.
295+
296+
intercepts : ndarray of shape (n_Cs, n_quantiles)
297+
Intercepts per quantile and `C`.
298+
299+
fit_times : list of float, optional
300+
Elapsed fit times (if `return_time=True`).
301+
302+
303+
Example
304+
-------
305+
>>> from sklearn.datasets import make_friedman1
306+
>>> from sklearn.preprocessing import StandardScaler
307+
>>> import numpy as np
308+
>>> from rehline import CQR_Ridge_path_sol
309+
310+
>>> # Generate the data
311+
>>> X, y = make_friedman1(n_samples=500, n_features=6, noise=1.0, random_state=42)
312+
>>> X = StandardScaler().fit_transform(X)
313+
>>> y = y / y.std()
314+
315+
>>> # Set quantiles and Cs
316+
>>> quantiles = [0.1, 0.5, 0.9]
317+
>>> Cs = np.logspace(-5, 0, 30)
318+
319+
>>> # Fit CQR path
320+
>>> Cs, models, coefs, intercepts, fit_times = CQR_Ridge_path_sol(
321+
... X, y,
322+
... quantiles=quantiles,
323+
... Cs=Cs,
324+
... max_iter=100000,
325+
... tol=1e-4,
326+
... verbose=1,
327+
... warm_start=True,
328+
... return_time=True
329+
... )
330+
331+
"""
332+
333+
if Cs is None:
334+
log_Cs = np.linspace(np.log10(eps), np.log10(10), n_Cs)
335+
Cs = np.power(10.0, log_Cs)
336+
else:
337+
Cs = np.array(Cs)
338+
339+
models = []
340+
fit_times = []
341+
coefs = []
342+
intercepts = []
343+
344+
clf = CQR_Ridge(
345+
quantiles=quantiles,
346+
C=Cs[0],
347+
max_iter=max_iter,
348+
tol=tol,
349+
shrink=shrink,
350+
verbose=verbose,
351+
warm_start=warm_start,
352+
)
353+
354+
for i, C in enumerate(Cs):
355+
clf.C = C
356+
357+
if return_time:
358+
start = time.time()
359+
360+
clf.fit(X, y)
361+
362+
d = X.shape[1]
363+
n_qt = len(quantiles)
364+
365+
coef_matrix = np.tile(clf.coef_, (n_qt, 1))
366+
intercept_vector = clf.intercept_
367+
368+
models.append(clf)
369+
coefs.append(coef_matrix)
370+
intercepts.append(intercept_vector)
371+
372+
if return_time:
373+
elapsed = time.time() - start
374+
fit_times.append(elapsed)
375+
if verbose >= 1:
376+
print(f"[OK] C={C:.3e}, time={elapsed:.3f}s")
377+
378+
coefs = np.array(coefs) # (n_Cs, n_quantiles, n_features)
379+
intercepts = np.array(intercepts) # (n_Cs, n_quantiles)
380+
381+
if return_time:
382+
return Cs, models, coefs, intercepts, fit_times
383+
else:
384+
return Cs, models, coefs, intercepts

0 commit comments

Comments
 (0)