Skip to content

Commit 13e576d

Browse files
committed
bootridge: minor bug fix to categor in edge case, and added bootstrap learning curve
1 parent ff94c70 commit 13e576d

File tree

1 file changed

+28
-6
lines changed

1 file changed

+28
-6
lines changed

inst/bootridge.m

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,9 @@
562562
if ( ~all (X(:, 1) == 1) )
563563
X = cat (2, ones (m, 1), X);
564564
n = n + 1;
565+
if (~ isempty (categor))
566+
categor = categor + 1; % Shift indices to match new design matrix
567+
end
565568
end
566569
p = n - 1;
567570
% Check that X contains floating point numbers
@@ -744,7 +747,8 @@
744747
% Get the prediction error and stability selection at the optimal lambda
745748
% Use a minimum of 1999 bootstrap resamples for stability selection
746749
B = max (nboot, 1999);
747-
[pred_err, stability] = booterr632 (YS, XC, lambda, P_vec, B, categor, seed);
750+
[pred_err, stability, oob_err] = booterr632 (YS, XC, lambda, P_vec, B, ...
751+
categor, seed);
748752

749753
% Correct stability selection probabilities for the design effect
750754
stdnormcdf = @(x) 0.5 * (1 + erf (x / sqrt (2)));
@@ -771,7 +775,9 @@
771775

772776
% Regression coefficient and the effective degrees of freedom for ridge
773777
% regression penalized using the optimized (and corrected) lambda
774-
A = X' * X + diag (lambda * P_vec); % Regularized normal equation matrix
778+
% Calculate regularized system matrix: A = X' * X + diag (lambda * P_vec);
779+
A = X' * X; % System matrix
780+
A(1:n+1:end) = A(1:n+1:end) + (lambda * P_vec'); % Regularized system matrix
775781
[U, flag] = chol (A); % Upper Cholesky factor of symmetric A
776782
tol = sqrt (m / eps (class (X))); % Set tolerance
777783
if (~ flag); flag = (max (diag (U)) / min (diag (U)) > 1e+06); end;
@@ -1100,6 +1106,12 @@
11001106
end
11011107
fprintf('\n');
11021108

1109+
% Plot bootstrap learning curve
1110+
plot (oob_err(1:nboot), '-r', 'linewidth', 1); box off; grid on;
1111+
title ('Bootstrap learning curve');
1112+
xlabel ({'','Bootstrap resample'});
1113+
ylabel ({'Running out-of-bag error',''});
1114+
11031115
end
11041116

11051117
end
@@ -1109,13 +1121,19 @@
11091121

11101122
%% FUNCTION FOR .632 BOOTSTRAP ESTIMATOR OF PREDICTION ERROR
11111123

1112-
function [PRED_ERR, STABILITY] = booterr632 (Y, X, lambda, P_vec, nboot, ...
1113-
categor, seed)
1124+
function [PRED_ERR, STABILITY, OOB_ERR] = booterr632 (Y, X, lambda, P_vec, ...
1125+
nboot, categor, seed)
11141126

11151127
% This function computes Efron & Tibshirani’s .632 bootstrap prediction error
1116-
% for a multivariate linear ridge/Tikhonov model. Loss is the per-observation
1117-
% squared Euclidean error:
1128+
% for a multivariate linear ridge/Tikhonov model. The .632 bootstrap estimator
1129+
% is a weighted average of the overly-optimistic apparent error (in‑bag error)
1130+
% and the overly-pessimistic out-of-bag error, where the weights arise from
1131+
% the expected probability that a data point is included in a bootstrap sample
1132+
% (~0.632) or excluded (~0.368).
1133+
%
1134+
% Loss is the per-observation squared Euclidean error:
11181135
% Q(y_i, yhat_i) = ||y_i - yhat_i||_2^2
1136+
%
11191137
% Efron and Tibshirani (1993) An Introduction to the Bootstrap. New York, NY:
11201138
% Chapman & Hall. pg 247-252
11211139

@@ -1185,6 +1203,7 @@
11851203
SSE_OOB = 0;
11861204
N_OOB = 0;
11871205
NSAMP = 0;
1206+
OOB_ERR = nan (nboot, 1);
11881207
if (nargout > 1)
11891208
tau = sqrt (eps_X);
11901209
Sign_obs = sign (Beta_obs);
@@ -1276,6 +1295,9 @@
12761295
% Calculate and accumulate number of OOB observations
12771296
N_OOB = N_OOB + sum (o) ;
12781297

1298+
% Calculate running out-of-bag error (smooth and monotonic)
1299+
OOB_ERR(b:nboot) = SSE_OOB / N_OOB;
1300+
12791301
% Count actual bootstrap samples used
12801302
NSAMP = NSAMP + 1;
12811303

0 commit comments

Comments
 (0)