|
707 | 707 | ' nonzero variance.')); |
708 | 708 | end |
709 | 709 | parsubfun = struct ('booterr632', @booterr632, 'lambda_eval', @lambda_eval); |
710 | | - obj_func = @(lambda) parsubfun.booterr632 (YS, XC, lambda, P_vec, nboot, seed); |
| 710 | + obj_func = @(lambda) parsubfun.booterr632 (YS, XC, lambda, P_vec, nboot, ... |
| 711 | + categor, seed); |
711 | 712 |
|
712 | 713 | % Search for the optimal lambda by .632 bootstrap prediction error |
713 | 714 | try |
|
732 | 733 | % Get the prediction error and stability selection at the optimal lambda |
733 | 734 | % Use a minimum of 1999 bootstrap resamples for stability selection |
734 | 735 | B = max (nboot, 1999); |
735 | | - [pred_err, stability] = booterr632 (YS, XC, lambda, P_vec, B, seed); |
| 736 | + [pred_err, stability] = booterr632 (YS, XC, lambda, P_vec, B, categor, seed); |
736 | 737 |
|
737 | 738 | % Correct stability selection probabilities for the design effect |
738 | 739 | stdnormcdf = @(x) 0.5 * (1 + erf (x / sqrt (2))); |
|
1095 | 1096 |
|
1096 | 1097 | %% FUNCTION FOR .632 BOOTSTRAP ESTIMATOR OF PREDICTION ERROR |
1097 | 1098 |
|
1098 | | -function [PRED_ERR, STABILITY] = booterr632 (Y, X, lambda, P_vec, nboot, seed) |
| 1099 | +function [PRED_ERR, STABILITY] = booterr632 (Y, X, lambda, P_vec, nboot, ... |
| 1100 | + categor, seed) |
1099 | 1101 |
|
1100 | 1102 | % This function computes Efron & Tibshirani’s .632 bootstrap prediction error |
1101 | 1103 | % for a multivariate linear ridge/Tikhonov model. Loss is the per-observation |
|
1169 | 1171 | % --- BOOTSTRAP LOOP --- |
1170 | 1172 | SSE_OOB = 0; |
1171 | 1173 | N_OOB = 0; |
| 1174 | + NSAMP = 0; |
1172 | 1175 | if (nargout > 1) |
1173 | 1176 | tau = sqrt (eps_X); |
1174 | 1177 | Sign_obs = sign (Beta_obs); |
|
1183 | 1186 | o = true (m, 1); |
1184 | 1187 | o(i) = false; |
1185 | 1188 |
|
1186 | | - % Check for missing predictors in training set and remove out-of-bag |
1187 | | - % samples that have those predictors |
1188 | | - missing = ~ any (X(i, :), 1); |
1189 | | - if any (missing) |
1190 | | - o(any (X(:,missing), 2)) = false; |
| 1189 | + % If there are any categorical predictor terms, check for missing predictors |
| 1190 | + % in training set and remove out-of-bag samples that have those predictors. |
| 1191 | + if (~ isempty (categor)) |
| 1192 | + missing = ~ any (X(i, :), 1); |
| 1193 | + if (any (missing)) |
| 1194 | + o(any (X(:,missing), 2)) = false; |
| 1195 | + end |
1191 | 1196 | end |
1192 | 1197 |
|
1193 | 1198 | % Skip to next bootstrap sample if there are no out-of-bag observations |
|
1257 | 1262 | % Calculate and accumulate number of OOB observations |
1258 | 1263 | N_OOB = N_OOB + sum (o) ; |
1259 | 1264 |
|
| 1265 | + % Count actual bootstrap samples used |
| 1266 | + NSAMP = NSAMP + 1; |
| 1267 | + |
1260 | 1268 | end |
1261 | 1269 |
|
1262 | 1270 | % Calculate pooled OOB error estimate |
|
1271 | 1279 | % Calculate stability selection |
1272 | 1280 | if (nargout > 1) |
1273 | 1281 | % Convert counts to proportions, with Jeffrey's smoothing. |
1274 | | - STABILITY = (STABILITY + 0.5) / (nboot + 1.0); |
| 1282 | + STABILITY = (STABILITY + 0.5) / (NSAMP + 1.0); |
1275 | 1283 | STABILITY(1, :) = NaN; % Set stability selection to NaN for the intercepts |
1276 | 1284 | end |
1277 | 1285 |
|
|
0 commit comments