@@ -18,18 +18,44 @@ class RidgeRegression2FoldCV(BaseEstimator, MultiOutputMixin, RegressorMixin):
18
18
\|y - Xw\|^2_2 + \alpha \|w\|^2_2,
19
19
20
20
while the alpha value is determined with a 2-fold cross-validation from a list of
21
- alpha values. It is more efficient than doing a 2-fold cross-validation using
22
- :obj:`sklearn.linear_model.RidgeCV`.
23
- The advantage over :obj:`sklearn.linear_model.RidgeCV` using leave-one-out
24
- cross-validation (LOOCV) [loocv]_ needs to be analyzed more in detail. Internal
25
- benchmarks suggest that it is more efficient than the LOOCV in
26
- :obj:`sklearn.linear_model.RidgeCV` for feature sizes < 600 and in general more
27
- accurate, see issue #40. However, it is constraint to a svd solver for the matrix
28
- inversion.
21
+ alpha values. It is more efficient version than doing 2-fold cross-validation
22
+ naively The algorithmic trick is to reuse the matrices obtained by SVD for each
23
+ regularization paramater :param alpha: The 2-fold CV can be broken donw to
24
+
25
+ .. math::
26
+
27
+ \begin{align}
28
+ &\mathbf{X}_1 = \mathbf{U}_1\mathbf{S}_1\mathbf{V}_1^T,
29
+ \qquad\qquad\qquad\quad
30
+ \textrm{feature matrix }\mathbf{X}\textrm{ for fold 1} \\
31
+ &\mathbf{W}_1(\lambda) = \mathbf{V}_1
32
+ \tilde{\mathbf{S}}_1(\lambda)^{-1} \mathbf{U}_1^T y_1,
33
+ \qquad
34
+ \textrm{weight matrix fitted on fold 1}\\
35
+ &\tilde{y}_2 = \mathbf{X}_2 \mathbf{W}_1,
36
+ \qquad\qquad\qquad\qquad
37
+ \textrm{ prediction of } y\textrm{ for fold 2}
38
+ \end{align}
39
+
40
+ where the matrices
41
+
42
+ .. math::
43
+
44
+ \begin{align}
45
+ &\mathbf{A}_1 = \mathbf{X}_2 \mathbf{V}_1, \quad
46
+ \mathbf{B}_1 = \mathbf{U}_1^T y_1.
47
+ \end{align}
48
+
49
+ are stored to not recompute the SVD.
50
+
29
51
It offers additional functionalities in comparison to
30
- :obj:`sklearn.linear_model.Ridge`: The regularaization parameters can be chosen
31
- relative to the largest eigenvalue of the feature matrix
32
- as well as regularization method. Details are explained in the `Parameters` section.
52
+ :obj:`sklearn.linear_model.RidgeCV`: The regularization parameters can be chosen
53
+ relative to the largest eigenvalue of the feature matrix using :param alpha_type:
54
+ as well as type of regularization using :param regularization_method:.
55
+ Details are explained in the `Parameters` section.
56
+
57
+ It does not offer :param fit_intercept: as sklearn linear models do. It only
58
+ can fit with no intercept.
33
59
34
60
Parameters
35
61
----------
@@ -49,19 +75,19 @@ class RidgeRegression2FoldCV(BaseEstimator, MultiOutputMixin, RegressorMixin):
49
75
parameter in e.g. :obj:`numpy.linalg.lstsq`. Be aware that for every case
50
76
we always apply a small default cutoff dependend on the numerical
51
77
accuracy of the data type of ``X`` in the fitting function.
78
+ shuffle : bool, default=True
79
+ Whether or not to shuffle the data before splitting.
52
80
random_state : int or RandomState instance, default=None
53
81
Controls the shuffling applied to the data before applying the split.
54
82
Pass an int for reproducible output across multiple function calls.
55
83
See
56
84
`random_state glossary from sklearn (external link) <https://scikit-learn.org/stable/glossary.html#term-random-state>`_
57
- shuffle : bool, default=True
58
- Whether or not to shuffle the data before splitting. If shuffle=False
59
- then stratify must be None.
85
+ parameter is ignored.
60
86
scoring : str, callable, default=None
61
87
A string (see model evaluation documentation) or
62
88
a scorer callable object / function with signature
63
89
``scorer(estimator, X, y)``.
64
- If None, the negative mean squared error is used.
90
+ If None, the negative root mean squared error is used.
65
91
n_jobs : int, default=None
66
92
The number of CPUs to use to do the computation.
67
93
:obj:`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
@@ -71,27 +97,17 @@ class RidgeRegression2FoldCV(BaseEstimator, MultiOutputMixin, RegressorMixin):
71
97
72
98
Attributes
73
99
----------
74
- cv_values_ : ndarray of shape (n_samples, n_alphas) or \
75
- shape (n_samples, n_targets, n_alphas), optional
76
- Cross-validation values for each alpha (only available if \
77
- ``store_cv_values=True`` and ``cv=None``). After ``fit()`` has been \
78
- called, this attribute will contain the mean squared errors \
79
- (by default) or the values of the ``{loss,score}_func`` function \
80
- (if provided in the constructor).
100
+ cv_values_ : ndarray of shape (n_alphas)
101
+ 2-fold cross-validation values for each alpha. After :meth:`fit` has
102
+ been called, this attribute will contain the values out of score
103
+ function
81
104
coef_ : ndarray of shape (n_features) or (n_targets, n_features)
82
105
Weight vector(s).
83
- intercept_ : float or ndarray of shape (n_targets,)
84
- Independent term in decision function. Set to 0.0 if
85
- ``fit_intercept = False``.
86
106
alpha_ : float
87
107
Estimated regularization parameter.
88
108
best_score_ : float
89
109
Score of base estimator with best alpha.
90
110
91
- References
92
- ----------
93
- .. [loocv] Rifkin "Regularized Least Squares."
94
- https://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf
95
111
""" # NoQa: E501
96
112
97
113
def __init__ (
@@ -154,6 +170,7 @@ def fit(self, X, y):
154
170
)
155
171
else :
156
172
scorer = check_scoring (self , scoring = self .scoring , allow_none = False )
173
+
157
174
fold1_idx , fold2_idx = next (
158
175
KFold (
159
176
n_splits = 2 , shuffle = self .shuffle , random_state = self .random_state
0 commit comments