|
562 | 562 | if ( ~all (X(:, 1) == 1) ) |
563 | 563 | X = cat (2, ones (m, 1), X); |
564 | 564 | n = n + 1; |
| 565 | + if (~ isempty (categor)) |
| 566 | + categor = categor + 1; % Shift indices to match new design matrix |
| 567 | + end |
565 | 568 | end |
566 | 569 | p = n - 1; |
567 | 570 | % Check that X contains floating point numbers |
|
744 | 747 | % Get the prediction error and stability selection at the optimal lambda |
745 | 748 | % Use a minimum of 1999 bootstrap resamples for stability selection |
746 | 749 | B = max (nboot, 1999); |
747 | | - [pred_err, stability] = booterr632 (YS, XC, lambda, P_vec, B, categor, seed); |
| 750 | + [pred_err, stability, oob_err] = booterr632 (YS, XC, lambda, P_vec, B, ... |
| 751 | + categor, seed); |
748 | 752 |
|
749 | 753 | % Correct stability selection probabilities for the design effect |
750 | 754 | stdnormcdf = @(x) 0.5 * (1 + erf (x / sqrt (2))); |
|
771 | 775 |
|
772 | 776 | % Regression coefficient and the effective degrees of freedom for ridge |
773 | 777 | % regression penalized using the optimized (and corrected) lambda |
774 | | - A = X' * X + diag (lambda * P_vec); % Regularized normal equation matrix |
| 778 | + % Calculate regularized system matrix: A = X' * X + diag (lambda * P_vec); |
| 779 | + A = X' * X; % System matrix |
| 780 | + A(1:n+1:end) = A(1:n+1:end) + (lambda * P_vec'); % Regularized system matrix |
775 | 781 | [U, flag] = chol (A); % Upper Cholesky factor of symmetric A |
776 | 782 | tol = sqrt (m / eps (class (X))); % Set tolerance |
777 | 783 | if (~ flag); flag = (max (diag (U)) / min (diag (U)) > 1e+06); end; |
|
1100 | 1106 | end |
1101 | 1107 | fprintf('\n'); |
1102 | 1108 |
|
| 1109 | + % Plot bootstrap learning curve |
| 1110 | + plot (oob_err(1:nboot), '-r', 'linewidth', 1); box off; grid on; |
| 1111 | + title ('Bootstrap learning curve'); |
| 1112 | + xlabel ({'','Bootstrap resample'}); |
| 1113 | + ylabel ({'Running out-of-bag error',''}); |
| 1114 | + |
1103 | 1115 | end |
1104 | 1116 |
|
1105 | 1117 | end |
|
1109 | 1121 |
|
1110 | 1122 | %% FUNCTION FOR .632 BOOTSTRAP ESTIMATOR OF PREDICTION ERROR |
1111 | 1123 |
|
1112 | | -function [PRED_ERR, STABILITY] = booterr632 (Y, X, lambda, P_vec, nboot, ... |
1113 | | - categor, seed) |
| 1124 | +function [PRED_ERR, STABILITY, OOB_ERR] = booterr632 (Y, X, lambda, P_vec, ... |
| 1125 | + nboot, categor, seed) |
1114 | 1126 |
|
1115 | 1127 | % This function computes Efron & Tibshirani’s .632 bootstrap prediction error |
1116 | | - % for a multivariate linear ridge/Tikhonov model. Loss is the per-observation |
1117 | | - % squared Euclidean error: |
| 1128 | + % for a multivariate linear ridge/Tikhonov model. The .632 bootstrap estimator |
| 1129 | + % is a weighted average of the overly-optimistic apparent error (in‑bag error) |
| 1130 | + % and the overly-pessimistic out-of-bag error, where the weights arise from |
| 1131 | + % the expected probability that a data point is included in a bootstrap sample |
| 1132 | + % (~0.632) or excluded (~0.368). |
| 1133 | + % |
| 1134 | + % Loss is the per-observation squared Euclidean error: |
1118 | 1135 | % Q(y_i, yhat_i) = ||y_i - yhat_i||_2^2 |
| 1136 | + % |
1119 | 1137 | % Efron and Tibshirani (1993) An Introduction to the Bootstrap. New York, NY: |
1120 | 1138 | % Chapman & Hall. pg 247-252 |
1121 | 1139 |
|
|
1185 | 1203 | SSE_OOB = 0; |
1186 | 1204 | N_OOB = 0; |
1187 | 1205 | NSAMP = 0; |
| 1206 | + OOB_ERR = nan (nboot, 1); |
1188 | 1207 | if (nargout > 1) |
1189 | 1208 | tau = sqrt (eps_X); |
1190 | 1209 | Sign_obs = sign (Beta_obs); |
|
1276 | 1295 | % Calculate and accumulate number of OOB observations |
1277 | 1296 | N_OOB = N_OOB + sum (o) ; |
1278 | 1297 |
|
| 1298 | + % Calculate running out-of-bag error (smooth and monotonic) |
| 1299 | + OOB_ERR(b:nboot) = SSE_OOB / N_OOB; |
| 1300 | + |
1279 | 1301 | % Count actual bootstrap samples used |
1280 | 1302 | NSAMP = NSAMP + 1; |
1281 | 1303 |
|
|
0 commit comments