1+ # Importing some of the base packages which are used in the functions below.
2+ # ' @import stats
3+ # ' @import grDevices
4+ # ' @import graphics
5+ # ' @import utils
6+ NULL
7+
18# ' Produce random draws from a uniform Dirichlet distribution
29# '
310# ' \code{rudirichlet} produces \code{n} draws from a \code{d}-dimensional
@@ -62,19 +69,28 @@ is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
6269# ' be much slower than using \code{use.weights = TRUE} but will work with a
6370# ' larger range of statistics (the \code{\link{median}}, for example)
6471# '
72+ # ' For more information regarding this implementation of the Bayesian bootstrap
73+ # ' see the blog post
74+ # ' \href{http://www.sumsar.net/blog/2015/07/easy-bayesian-bootstrap-in-r/}{Easy
75+ # ' Bayesian Bootstrap in R}. For more information about the model behind the
76+ # ' Bayesian bootstrap see the blog post
77+ # ' \href{http://www.sumsar.net/blog/2015/04/the-non-parametric-bootstrap-as-a-bayesian-model/}{The
78+ # ' Non-parametric Bootstrap as a Bayesian Model} and, of course,
79+ # ' \href{http://projecteuclid.org/euclid.aos/1176345338}{the original Bayesian
80+ # ' bootstrap paper by Rubin (1981)}.
81+ # '
6582# ' @note \itemize{
6683# ' \item While \code{R} and \code{R2} are set to \code{4000} by
6784# ' default, that should not be taken to indicate that a sample of size 4000 is
6885# ' sufficient nor recommended.
6986# '
7087# ' \item When using \code{use.weights = FALSE} it is important to use a summary
7188# ' statistic that does not depend on the sample size. That is, doubling the size
72- # ' of a dataset by cloning data should result in the same statistic as the
73- # ' original dataset. An example of a statistic that depends on the sample size
74- # ' is the sample standard deviation (that is, \code{\link{sd}}), and when using
75- # ' \code{bayesboot} it would make more sense to use the population standard
76- # ' deviation (see examples below).
77- # ' }
89+ # ' of a dataset by cloning data should result in the same statistic as when
90+ # ' using the original dataset. An example of a statistic that depends on the
91+ # ' sample size is the sample standard deviation (that is, \code{\link{sd}}), and
92+ # ' when using \code{bayesboot} it would make more sense to use the population
93+ # ' standard deviation (as in the example below). }
7894# '
7995# ' @param data Either a vector or a list, or a matrix or a data.frame with one
8096# ' datapoint per row. The format of \code{data} should be compatible with the
@@ -158,7 +174,7 @@ is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
158174# ' coef( lm(efp ~ dye, data = d, weights = w) )
159175# ' }
160176# '
161- # ' b5 <- bayesboot(blood.flow, lm.coefs, R = 2000 , use.weights = TRUE)
177+ # ' b5 <- bayesboot(blood.flow, lm.coefs, R = 1000 , use.weights = TRUE)
162178# '
163179# ' # Plotting the marginal posteriors
164180# ' plot(b5)
@@ -204,6 +220,10 @@ bayesboot <- function(data, statistic, R = 4000, R2 = 4000, use.weights = FALSE,
204220 stop(e )
205221 }
206222 )
223+ # TODO: Should I add some more checks to stat.result? Like, that it contains no NA, values?
224+ # or should I maybe do these tests to the final posterior samples and issue a varning if
225+ # there are NAs, NULLs and similar?
226+
207227 } else { # use.weights == FALSE
208228 if (length(R2 ) != 1 || is.na(R2 ) || ! is.wholenumber(R2 ) || R2 < 1 ) {
209229 stop(" If use.weights == FALSE then R2 should be a single whole number >= 1." )
@@ -258,10 +278,20 @@ bayesboot <- function(data, statistic, R = 4000, R2 = 4000, use.weights = FALSE,
258278 class(boot.sample ) <- c(" bayesboot" , class(boot.sample ))
259279 attr(boot.sample , " statistic.label" ) <- statistic.label
260280 attr(boot.sample , " call" ) <- call
281+ # Warn if boot.sample contains "non-values".
282+ col.should.warn <- plyr :: laply(boot.sample , function (boot.col ) {
283+ any(is.na(boot.col ) | is.nan(boot.col ) | is.null(boot.col ))
284+ })
285+ if (any(col.should.warn )) {
286+ warning(paste(
287+ " The sample from bayesboot contains either NAs, NaNs or NULLs." ,
288+ " Make sure that your statistic function only return actual values." ))
289+ }
261290 boot.sample
262291}
263292
264293
294+
265295# ' Summarize the result of \code{bayesboot}
266296# '
267297# ' Summarizes the result of a call to \code{bayesboot} by calculating means, SDs,
@@ -275,6 +305,9 @@ bayesboot <- function(data, statistic, R = 4000, R2 = 4000, use.weights = FALSE,
275305# ' statistic, (2) \bold{measure} the name of the summarizing measure, and (3)
276306# ' \bold{value} the value of the summarizing measure.
277307# '
308+ # ' @seealso \code{\link[HDInterval]{hdi}} in the HDInterval package for directly calculating
309+ # ' highest density intervals from \code{bayesboot} objects.
310+ # '
278311# ' @export
279312summary.bayesboot <- function (object , cred.mass = 0.95 , ... ) {
280313 bootsum <- plyr :: ldply(seq_len(ncol(object )), function (i ) {
@@ -286,7 +319,7 @@ summary.bayesboot <- function(object, cred.mass = 0.95, ...) {
286319 }
287320 data.frame (statistic = names(object )[i ],
288321 measure = c(" mean" , " sd" , " hdi.low" , " hdi.high" ," q2.5%" , " q25%" , " median" ," q75%" , " q97.5%" ),
289- value = c(mean(s ), sd(s ), hdi(s , cred.mass ), quantile(s , c(0.025 , 0.25 , 0.5 , 0.75 , 0.975 ))))
322+ value = c(mean(s ), sd(s ), HDInterval :: hdi(s , cred.mass ), quantile(s , c(0.025 , 0.25 , 0.5 , 0.75 , 0.975 ))))
290323 })
291324 attr(bootsum , " statistic.label" ) <- attr(object , " statistic.label" )
292325 attr(bootsum , " call" ) <- attr(object , " call" )
@@ -296,6 +329,22 @@ summary.bayesboot <- function(object, cred.mass = 0.95, ...) {
296329 bootsum
297330}
298331
332+ # ' Print the first number of draws from the Bayesian bootstrap
333+ # '
334+ # ' @param x The bayesboot object to print.
335+ # ' @param n The number of draws to print.
336+ # ' @param ... Not used.
337+ # ' @export
338+ print.bayesboot <- function (x , n = 10 , ... ) {
339+ cat(paste0(" The first " , n ," draws (out of " , nrow(x ) ," ) from the Bayesian bootstrap:\n " ))
340+ cat(" \n " )
341+ print(as.data.frame(head(x , n )))
342+ cat(" .. ...\n " )
343+ cat(" \n " )
344+ cat(" Use summary() to produce a summary of the posterior distribution.\n " )
345+ invisible (x )
346+ }
347+
299348# ' @method print summary.bayesboot
300349# ' @export
301350print.summary.bayesboot <- function (x , ... ) {
@@ -318,6 +367,7 @@ print.summary.bayesboot <- function(x, ...) {
318367 cat(" \n " )
319368 }
320369 cat(" Call:\n " , format(attr(x , " call" )))
370+ invisible (x )
321371}
322372
323373# ' Coerce to a \code{bayesboot} object
@@ -359,30 +409,31 @@ as.bayesboot <- function(object) {
359409plot.bayesboot <- function (x , cred.mass = 0.95 , plots.per.page = 3 , cex = 1.2 , cex.lab = 1.3 , ... ) {
360410 old.devAskNewPage <- devAskNewPage()
361411 old.par <- par(mfrow = c(min(plots.per.page , ncol(x )) , 1 ) , mar = c(4.1 , 4.1 , 0.5 , 4.1 ), mgp = c(2.5 , 1 , 0 ))
412+ on.exit({ # revert graphical parameters
413+ par(old.par )
414+ devAskNewPage(old.devAskNewPage )
415+ })
362416 n.plots <- 0
363417 for (i in seq_len(ncol(x ))) {
364418 if (! is.numeric(x [, i ])) {
365419 warning(paste(" The statistic" , names(x )[i ] , " was skipped as" ,
366420 " plot.bayesboot can't handle non-numeric statistics." ))
367421 next
368422 }
369- # Byta ut detta mot plotPost?
370- # hist(x[, i], breaks = "FD", xlab = names(x)[i])
371423 n.plots <- n.plots + 1
372424 if (n.plots > plots.per.page ) {
373425 devAskNewPage(TRUE )
374426 }
375- if (ncol(x ) == 1 && names(x )[i ] == " V1" ) {
427+ if (ncol(x ) == 1 && names(x )[i ] == " V1" && attr( x , " statistic.label " ) != " " ) {
376428 # There is only one statistic and it has an uninformative default name
377- # so use the begining of the function call instead as a statistic.
429+ # so use the begining of the function call instead as a statistic, unless
430+ # it is empty.
378431 statistic_name <- attr(x , " statistic.label" )
379432 } else { # use the column name
380433 statistic_name <- names(x )[i ]
381434 }
382435 plotPost(x [, i ], credMass = cred.mass , xlab = statistic_name , cex = cex , cex.lab = cex.lab , ... )
383436 }
384- par(old.par )
385- devAskNewPage(old.devAskNewPage )
386437 invisible (NULL )
387438}
388439
0 commit comments