Merge pull request #8 from arokem/more-comments

arokem · web-flow · commit 6b2953a4e2c1 · 2020-06-29T14:48:54.000-07:00
Adds more documentation to the example.
diff --git a/examples/plot_diabetes.py b/examples/plot_diabetes.py
@@ -1,55 +1,96 @@
 """
 
-===============================
-Example using scikit learn data
-===============================
+========================================
+Example using scikit learn diabetes data
+========================================
 
-We're going to compare the performance of the fracrdige algorithm with the
-performance of a standard approach to selection of regularization parameters:
-log-spaced selection of alphas between very minimal regularization and
+This example demonstrates some of the properties of the FRR approach and
+compares it to the use of standard ridge regression (RR) on the diabetes
+dataset that is included in sckit-learn.
+
+In standard ridge regression, it is common to select alpha by testing a
+range of log-spaced values between very minimal regularization and
 very strong regularization.
 
 """
 
+##########################################################################
+# Imports:
+#
+
 import numpy as np
 import matplotlib.pyplot as plt
+##########################################################################
+# This is the fracridge sklearn-style estimator:
+#
 from fracridge import FracRidge
 
 from sklearn import datasets
 from sklearn.linear_model import Ridge
 from sklearn.model_selection import cross_val_predict
 from sklearn.metrics import r2_score
 
+##########################################################################
+# Get example data from scikit learn:
+#
+
 X, y = datasets.load_diabetes(return_X_y=True)
 
+##########################################################################
+# Values of alpha for the standard approach are set to be 20 values
+# That are log-spaced from a very small value to a very large value:
+#
 n_alphas = 20
-rr_alphas = alphas = np.logspace(-10, 10, n_alphas)
+rr_alphas = np.logspace(-10, 10, n_alphas)
 rr_coefs = []
 rr_coefs = np.zeros((X.shape[-1], n_alphas))
 rr_pred = np.zeros((y.shape[-1], n_alphas))
+
+##########################################################################
+# We calculate the fit and cross-validated prediction for each value of
+# alpha:
+
 for aa in range(len(rr_alphas)):
     RR = Ridge(alpha=rr_alphas[aa], fit_intercept=True)
     RR.fit(X, y)
     rr_coefs[:, aa] = RR.coef_
     rr_pred[:, aa] = cross_val_predict(RR, X, y)
 
+##########################################################################
+# In contrast, FRR takes as inputs fractions, rather than arbitrarily-chosen
+# values of alpha. The alphas that are generated are selected to produce
+# solutions whos fractional L2-norm relative to the L2-norm of the
+# unregularized solution are these values. Here too, cross-validated
+# predictions are generated:
+
 fracs = np.linspace(0, 1, n_alphas)
 FR = FracRidge(fracs=fracs, fit_intercept=True)
 FR.fit(X, y)
 fr_pred = cross_val_predict(FR, X, y)
 
+##########################################################################
+# We plot the results. First, the FRR coefficients as a function of requested
+# fractions:
+
 fig, ax = plt.subplots(1, 2)
 ax[0].plot(fracs, FR.coef_.T)
 ylims = ax[0].get_ylim()
 ax[0].vlines(fracs, ylims[0], ylims[1], linewidth=0.5, color='gray')
 ax[0].set_ylim(*ylims)
 
+##########################################################################
+# Next, the RR as a function of the requested log-spaced alpha:
 ax[1].plot(np.log(rr_alphas[::-1]), rr_coefs.T)
 ylims = ax[1].get_ylim()
 ax[1].vlines(np.log(rr_alphas[::-1]), ylims[0], ylims[1], linewidth=0.5,
              color='gray')
 ax[1].set_ylim(*ylims)
 
+##########################################################################
+# In a second plot, we compare the cross-validated predictions with the
+# original data. This is appropriate as each prediction was generated in a
+# sample that did not include that observation.
+
 test_y = np.tile(y, (fr_pred.shape[-1], 1)).T
 
 rr_r2 = r2_score(test_y, rr_pred, multioutput="raw_values")
diff --git a/fracridge/fracridge.py b/fracridge/fracridge.py
@@ -207,8 +207,7 @@ class FracRidge(BaseEstimator, MultiOutputMixin):
 
     Fit estimator:
     >>> fr.fit(X, y)
-    FracRidge(copy_X=True, fit_intercept=False, fracs=0.3, jit=True,
-              normalize=False, tol=1e-06)
+    FracRidge(fracs=0.3)
 
     Check results:
     >>> coef_ = fr.coef_