|
15 | 15 | % [1,2]) distribution(s) is/are summarised with the following statistics |
16 | 16 | % printed to the standard output: |
17 | 17 | % • original: the mean of the data vector y |
18 | | -% • bias: bootstrap estimate(s) of the bias |
| 18 | +% • bias: bootstrap bias estimate(s) |
19 | 19 | % • median: the median of the posterior distribution(s) |
20 | 20 | % • CI_lower: lower bound(s) of the 95% credible interval |
21 | 21 | % • CI_upper: upper bound(s) of the 95% credible interval |
| 22 | +% • p-val: two-tailed p-value(s) for the parameter(s) being equal to 0 |
22 | 23 | % By default, the credible intervals are shortest probability intervals, |
23 | 24 | % which represent a more computationally stable version of the highest |
24 | | -% posterior density interval [3]. |
| 25 | +% posterior density interval [3]. The p-value(s) is/are computed from |
| 26 | +% the Student-t (null) distribution(s) constructed from the posterior |
| 27 | +% statistics and heteroscedasticity-consistent standard errors [4,5]. |
25 | 28 | % |
26 | 29 | % 'bootbayes (y, X)' also specifies the design matrix (X) for least squares |
27 | 30 | % regression of y on X. X should be a column vector or matrix the same |
|
64 | 67 | % 'bootbayes' results are reproducible. |
65 | 68 | % |
66 | 69 | % 'bootbayes (..., NBOOT, PROB, PRIOR, SEED, L)' multiplies the regression |
67 | | -% coefficients by the hypothesis matrix L. If L is not provided or is empty, |
68 | | -% it will assume the default value of 1. |
| 70 | +% coefficients by the hypothesis matrix L. If L is not provided or is empty, |
| 71 | +% it will assume the default value of 1. This functionality is usually used |
| 72 | +% to convert regression to estimated marginal means. NaN is returned for |
| 73 | +% p-values if a hypothesis matrix is provided. |
69 | 74 | % |
70 | 75 | % 'STATS = bootbayes (STATS, ...) returns a structure with the following |
71 | | -% fields (defined above): original, bias, median, CI_lower, CI_upper. |
| 76 | +% fields (defined above): original, bias, median, CI_lower, CI_upper, tstat |
| 77 | +% and pval. |
72 | 78 | % |
73 | 79 | % '[STATS, BOOTSTAT] = bootbayes (STATS, ...) also returns the a vector (or |
74 | 80 | % matrix) of bootstrap statistics (BOOTSTAT) calculated over the bootstrap |
|
80 | 86 | % Bootstrap Mean. Ann. Statist. 17(2):705-710 |
81 | 87 | % [3] Liu, Gelman & Zheng (2015). Simulation-efficient shortest probability |
82 | 88 | % intervals. Statistics and Computing, 25(4), 809–819. |
| 89 | +% [4] Hall and Wilson (1991) Two Guidelines for Bootstrap Hypothesis Testing. |
| 90 | +% Biometrics, 47(2), 757-762 |
| 91 | +% [5] Long and Ervin (2000) Using Heteroscedasticity Consistent Standard |
| 92 | +% Errors in the Linear Regression Model. Am. Stat, 54(3), 217-224 |
83 | 93 | % |
84 | | -% bootbayes (version 2023.05.14) |
| 94 | +% bootbayes (version 2023.05.18) |
85 | 95 | % Author: Andrew Charles Penn |
86 | 96 | % https://www.researchgate.net/profile/Andrew_Penn/ |
87 | 97 | % |
|
230 | 240 | % Create weighted least squares anonymous function |
231 | 241 | bootfun = @(w) lmfit (X, y, diag (w), L); |
232 | 242 |
|
233 | | - % Calculate estimate(s) |
234 | | - original = bootfun (ones (n, 1)); |
| 243 | + % Calculate estimate(s) and heteroscedasticity robust standard error(s) (HC1) |
| 244 | + S = bootfun (ones (n, 1)); |
| 245 | + original = S.b; |
| 246 | + std_err = S.se; |
| 247 | + t = original ./ std_err; |
235 | 248 |
|
236 | 249 | % Create weights by randomly sampling from a symmetric Dirichlet distribution. |
237 | 250 | % This can be achieved by normalizing a set of randomly generated values from |
|
243 | 256 | end |
244 | 257 | W = bsxfun (@rdivide, r, sum (r)); |
245 | 258 |
|
246 | | - % Compute bootstat |
247 | | - bootstat = cell2mat (cellfun (bootfun, num2cell (W, 1), 'UniformOutput', false)); |
| 259 | + % Compute bootstap statistics |
| 260 | + bootout = cell2mat (cellfun (bootfun, num2cell (W, 1), 'UniformOutput', false)); |
| 261 | + bootstat = [bootout.b]; |
| 262 | + bootse = [bootout.se]; |
| 263 | + |
| 264 | + % Compute frequentist p-values following the guidelines described by |
| 265 | + % Hall and Wilson (1991) Biometrics, 47(2), 757-762 |
| 266 | + if (all (isnan (t))) |
| 267 | + pval = t; |
| 268 | + else |
| 269 | + T = bsxfun (@minus, bootstat, original) ./ bootse; % Null distribution |
| 270 | + pval = sum (bsxfun(@gt, abs (T), abs (t)), 2) / nboot; |
| 271 | + end |
248 | 272 |
|
249 | 273 | % Bootstrap bias estimation |
250 | 274 | bias = mean (bootstat, 2) - original; |
|
277 | 301 | stats.median = median (bootstat, 2); |
278 | 302 | stats.CI_lower = ci(:, 1); |
279 | 303 | stats.CI_upper = ci(:, 2); |
| 304 | + stats.tstat = t; |
| 305 | + stats.pval = pval; |
280 | 306 |
|
281 | 307 | % Print output if no output arguments are requested |
282 | 308 | if (nargout == 0) |
|
289 | 315 |
|
290 | 316 | %% FUNCTION TO FIT THE LINEAR MODEL |
291 | 317 |
|
292 | | -function b = lmfit (X, y, W, L) |
| 318 | +function S = lmfit (X, y, W, L) |
293 | 319 |
|
294 | 320 | % Get model coefficients by solving the linear equation by matrix arithmetic |
295 | 321 | % If optional arument W is provided, it should be a diagonal matrix of |
296 | 322 | % weights or a positive definite covariance matrix |
| 323 | + n = numel (y); |
297 | 324 | if (nargin < 3) |
298 | 325 | % If no weights are provided, create an identity matrix |
299 | | - n = numel (y); |
300 | 326 | W = eye (n); |
301 | 327 | end |
302 | 328 | if (nargin < 4) |
|
305 | 331 | end |
306 | 332 |
|
307 | 333 | % Solve linear equation to minimize weighted least squares |
308 | | - b = L * pinv (X' * W * X) * (X' * W * y); |
| 334 | + XW = X' * W; |
| 335 | + invG = pinv (XW * X); % calculate pseudoinverse of the Gram matrix |
| 336 | + b = L * (invG * (XW * y)); |
| 337 | + |
| 338 | + % Calculate heteroscedasticity-consistent standard errors (HC1) for the |
| 339 | + % regression coefficients. The standard errors calculated here reproduce |
| 340 | + % the HC1 standard errors calculated in R using vcovHC from the sandwich |
| 341 | + % package. |
| 342 | + % Ref: Long and Ervin (2000) Am. Stat, 54(3), 217-224 |
| 343 | + k = numel (b); |
| 344 | + if ((numel (L) == 1) && all (L == 1)) |
| 345 | + yf = X * invG * (XW * y); |
| 346 | + r = y - yf; |
| 347 | + rw = W * r; |
| 348 | + se = sqrt (diag ((n / (n - k) * invG * X' * diag ((rw).^2) * X * invG))); |
| 349 | + else |
| 350 | + se = nan (k, 1); |
| 351 | + end |
| 352 | + |
| 353 | + % Prepare output |
| 354 | + S.b = b; |
| 355 | + S.se = se; |
309 | 356 |
|
310 | 357 | end |
311 | 358 |
|
@@ -336,11 +383,23 @@ function print_output (stats, nboot, prob, prior, p, L) |
336 | 383 | fprintf (' Credible interval: %.3g%%\n', mass); |
337 | 384 | end |
338 | 385 | end |
| 386 | + fprintf (' Null value (H0) used for hypothesis testing (p-values): 0 \n') |
339 | 387 | fprintf ('\nPosterior Statistics: \n'); |
340 | | - fprintf (' original bias median CI_lower CI_upper\n'); |
| 388 | + fprintf (' original bias median CI_lower CI_upper p-val\n'); |
341 | 389 | for j = 1:p |
342 | | - fprintf (' %#-+12.6g %#-+12.6g %#-+12.6g %#-+12.6g %#-+12.6g \n',... |
| 390 | + if (stats.pval(j) <= 0.001) |
| 391 | + fprintf (' %#-+12.6g %#-+12.6g %#-+12.6g %#-+12.6g %#-+12.6g <.001 \n',... |
| 392 | + [stats.original(j), stats.bias(j), stats.median(j), stats.CI_lower(j), stats.CI_upper(j)]); |
| 393 | + elseif (stats.pval(j) < 0.9995) |
| 394 | + fprintf (' %#-+12.6g %#-+12.6g %#-+12.6g %#-+12.6g %#-+12.6g .%03u \n',... |
| 395 | + [stats.original(j), stats.bias(j), stats.median(j), stats.CI_lower(j), stats.CI_upper(j), round(stats.pval(j) * 1e+03)]); |
| 396 | + elseif (isnan (stats.pval(j))) |
| 397 | + fprintf (' %#-+12.6g %#-+12.6g %#-+12.6g %#-+12.6g %#-+12.6g NaN \n',... |
343 | 398 | [stats.original(j), stats.bias(j), stats.median(j), stats.CI_lower(j), stats.CI_upper(j)]); |
| 399 | + else |
| 400 | + fprintf (' %#-+12.6g %#-+12.6g %#-+12.6g %#-+12.6g %#-+12.6g 1.000 \n',... |
| 401 | + [stats.original(j), stats.bias(j), stats.median(j), stats.CI_lower(j), stats.CI_upper(j)]); |
| 402 | + end |
344 | 403 | end |
345 | 404 | fprintf ('\n'); |
346 | 405 |
|
|
0 commit comments