Skip to content

Commit 6b2953a

Browse files
authored
Merge pull request #8 from arokem/more-comments
Adds more documentation to the example.
2 parents 782aa64 + bd7d01b commit 6b2953a

File tree

2 files changed

+49
-9
lines changed

2 files changed

+49
-9
lines changed

examples/plot_diabetes.py

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,96 @@
11
"""
22
3-
===============================
4-
Example using scikit learn data
5-
===============================
3+
========================================
4+
Example using scikit learn diabetes data
5+
========================================
66
7-
We're going to compare the performance of the fracrdige algorithm with the
8-
performance of a standard approach to selection of regularization parameters:
9-
log-spaced selection of alphas between very minimal regularization and
7+
This example demonstrates some of the properties of the FRR approach and
8+
compares it to the use of standard ridge regression (RR) on the diabetes
9+
dataset that is included in sckit-learn.
10+
11+
In standard ridge regression, it is common to select alpha by testing a
12+
range of log-spaced values between very minimal regularization and
1013
very strong regularization.
1114
1215
"""
1316

17+
##########################################################################
18+
# Imports:
19+
#
20+
1421
import numpy as np
1522
import matplotlib.pyplot as plt
23+
##########################################################################
24+
# This is the fracridge sklearn-style estimator:
25+
#
1626
from fracridge import FracRidge
1727

1828
from sklearn import datasets
1929
from sklearn.linear_model import Ridge
2030
from sklearn.model_selection import cross_val_predict
2131
from sklearn.metrics import r2_score
2232

33+
##########################################################################
34+
# Get example data from scikit learn:
35+
#
36+
2337
X, y = datasets.load_diabetes(return_X_y=True)
2438

39+
##########################################################################
40+
# Values of alpha for the standard approach are set to be 20 values
41+
# That are log-spaced from a very small value to a very large value:
42+
#
2543
n_alphas = 20
26-
rr_alphas = alphas = np.logspace(-10, 10, n_alphas)
44+
rr_alphas = np.logspace(-10, 10, n_alphas)
2745
rr_coefs = []
2846
rr_coefs = np.zeros((X.shape[-1], n_alphas))
2947
rr_pred = np.zeros((y.shape[-1], n_alphas))
48+
49+
##########################################################################
50+
# We calculate the fit and cross-validated prediction for each value of
51+
# alpha:
52+
3053
for aa in range(len(rr_alphas)):
3154
RR = Ridge(alpha=rr_alphas[aa], fit_intercept=True)
3255
RR.fit(X, y)
3356
rr_coefs[:, aa] = RR.coef_
3457
rr_pred[:, aa] = cross_val_predict(RR, X, y)
3558

59+
##########################################################################
60+
# In contrast, FRR takes as inputs fractions, rather than arbitrarily-chosen
61+
# values of alpha. The alphas that are generated are selected to produce
62+
# solutions whos fractional L2-norm relative to the L2-norm of the
63+
# unregularized solution are these values. Here too, cross-validated
64+
# predictions are generated:
65+
3666
fracs = np.linspace(0, 1, n_alphas)
3767
FR = FracRidge(fracs=fracs, fit_intercept=True)
3868
FR.fit(X, y)
3969
fr_pred = cross_val_predict(FR, X, y)
4070

71+
##########################################################################
72+
# We plot the results. First, the FRR coefficients as a function of requested
73+
# fractions:
74+
4175
fig, ax = plt.subplots(1, 2)
4276
ax[0].plot(fracs, FR.coef_.T)
4377
ylims = ax[0].get_ylim()
4478
ax[0].vlines(fracs, ylims[0], ylims[1], linewidth=0.5, color='gray')
4579
ax[0].set_ylim(*ylims)
4680

81+
##########################################################################
82+
# Next, the RR as a function of the requested log-spaced alpha:
4783
ax[1].plot(np.log(rr_alphas[::-1]), rr_coefs.T)
4884
ylims = ax[1].get_ylim()
4985
ax[1].vlines(np.log(rr_alphas[::-1]), ylims[0], ylims[1], linewidth=0.5,
5086
color='gray')
5187
ax[1].set_ylim(*ylims)
5288

89+
##########################################################################
90+
# In a second plot, we compare the cross-validated predictions with the
91+
# original data. This is appropriate as each prediction was generated in a
92+
# sample that did not include that observation.
93+
5394
test_y = np.tile(y, (fr_pred.shape[-1], 1)).T
5495

5596
rr_r2 = r2_score(test_y, rr_pred, multioutput="raw_values")

fracridge/fracridge.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,7 @@ class FracRidge(BaseEstimator, MultiOutputMixin):
207207
208208
Fit estimator:
209209
>>> fr.fit(X, y)
210-
FracRidge(copy_X=True, fit_intercept=False, fracs=0.3, jit=True,
211-
normalize=False, tol=1e-06)
210+
FracRidge(fracs=0.3)
212211
213212
Check results:
214213
>>> coef_ = fr.coef_

0 commit comments

Comments
 (0)