Skip to content

Commit 4d4e0e2

Browse files
committed
Merge pull request #1 from rasmusab/develop
Update master to 0.2.0
2 parents 4220f4f + b3255e3 commit 4d4e0e2

26 files changed

+239
-447
lines changed

.Rbuildignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,6 @@
33
^README\.Rmd$
44
^README-.*\.png$
55
^testingground$
6+
^blogposts$
7+
Rplots.pdf
8+
cran-comments.md

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
.Rhistory
33
.RData
44
testingground
5+
blogposts

DESCRIPTION

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
Package: bayesboot
22
Type: Package
33
Title: An Implementation of Rubin's (1981) Bayesian Bootstrap
4-
Version: 0.1
5-
Date: 2016-02-09
4+
Version: 0.2.0
5+
Date: 2016-02-18
66
Authors@R: person("Rasmus", "Bååth", email = "rasmus.baath@gmail.com", role = c("aut", "cre"))
77
Description: Functions for performing the Bayesian bootstrap as introduced by
8-
Rubin (1981) and for summarizing the result. The implementation can both handle
9-
summary statistics that works on a weighted version of the data or that works
10-
on a resampled data set.
8+
Rubin (1981) <doi:10.1214/aos/1176345338> and for summarizing the result.
9+
The implementation can handle both summary statistics that works on a
10+
weighted version of the data and summary statistics that works on a
11+
resampled data set.
1112
License: MIT + file LICENSE
1213
LazyData: TRUE
14+
URL: https://github.com/rasmusab/bayesboot
15+
BugReports: https://github.com/rasmusab/bayesboot/issues
1316
RoxygenNote: 5.0.1
14-
Imports: plyr (>= 1.8.3)
17+
Imports: plyr (>= 1.8.3),
18+
HDInterval(>= 0.1.1)
1519
Depends: R (>= 3.2.0)
1620
Suggests:
1721
testthat,

NAMESPACE

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
# Generated by roxygen2: do not edit by hand
22

3-
S3method(hdi,"function")
4-
S3method(hdi,data.frame)
5-
S3method(hdi,default)
6-
S3method(hdi,density)
7-
S3method(hdi,matrix)
8-
S3method(hdi,mcmc.list)
93
S3method(plot,bayesboot)
4+
S3method(print,bayesboot)
105
S3method(print,summary.bayesboot)
116
S3method(summary,bayesboot)
127
export(as.bayesboot)
138
export(bayesboot)
14-
export(hdi)
159
export(plotPost)
1610
export(rudirichlet)
11+
import(grDevices)
12+
import(graphics)
13+
import(stats)
14+
import(utils)

NEWS

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
1-
bayesboot 0.1
1+
bayesboot 0.2.0
2+
=======================
3+
4+
* Added print.bayesboot method.
5+
* bayesboot now warns when the posterior sample contains NAs, NaNs and NULLs.
6+
* plot.bayesboot now correctly resets graph parameters.
7+
* Removed bayesboot::hdi and instead depend on HDInterval::hdi .
8+
9+
10+
bayesboot 0.1.0
211
=======================
312

413
* Initial release.

R/bayesboot.R

Lines changed: 65 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Importing some of the base packages which are used in the functions below.
2+
#' @import stats
3+
#' @import grDevices
4+
#' @import graphics
5+
#' @import utils
6+
NULL
7+
18
#' Produce random draws from a uniform Dirichlet distribution
29
#'
310
#' \code{rudirichlet} produces \code{n} draws from a \code{d}-dimensional
@@ -62,19 +69,28 @@ is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
6269
#' be much slower than using \code{use.weights = TRUE} but will work with a
6370
#' larger range of statistics (the \code{\link{median}}, for example)
6471
#'
72+
#' For more information regarding this implementation of the Bayesian bootstrap
73+
#' see the blog post
74+
#' \href{http://www.sumsar.net/blog/2015/07/easy-bayesian-bootstrap-in-r/}{Easy
75+
#' Bayesian Bootstrap in R}. For more information about the model behind the
76+
#' Bayesian bootstrap see the blog post
77+
#' \href{http://www.sumsar.net/blog/2015/04/the-non-parametric-bootstrap-as-a-bayesian-model/}{The
78+
#' Non-parametric Bootstrap as a Bayesian Model} and, of course,
79+
#' \href{http://projecteuclid.org/euclid.aos/1176345338}{the original Bayesian
80+
#' bootstrap paper by Rubin (1981)}.
81+
#'
6582
#' @note \itemize{
6683
#' \item While \code{R} and \code{R2} are set to \code{4000} by
6784
#' default, that should not be taken to indicate that a sample of size 4000 is
6885
#' sufficient nor recommended.
6986
#'
7087
#' \item When using \code{use.weights = FALSE} it is important to use a summary
7188
#' statistic that does not depend on the sample size. That is, doubling the size
72-
#' of a dataset by cloning data should result in the same statistic as the
73-
#' original dataset. An example of a statistic that depends on the sample size
74-
#' is the sample standard deviation (that is, \code{\link{sd}}), and when using
75-
#' \code{bayesboot} it would make more sense to use the population standard
76-
#' deviation (see examples below).
77-
#' }
89+
#' of a dataset by cloning data should result in the same statistic as when
90+
#' using the original dataset. An example of a statistic that depends on the
91+
#' sample size is the sample standard deviation (that is, \code{\link{sd}}), and
92+
#' when using \code{bayesboot} it would make more sense to use the population
93+
#' standard deviation (as in the example below). }
7894
#'
7995
#' @param data Either a vector or a list, or a matrix or a data.frame with one
8096
#' datapoint per row. The format of \code{data} should be compatible with the
@@ -158,7 +174,7 @@ is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
158174
#' coef( lm(efp ~ dye, data = d, weights = w) )
159175
#' }
160176
#'
161-
#' b5 <- bayesboot(blood.flow, lm.coefs, R = 2000, use.weights = TRUE)
177+
#' b5 <- bayesboot(blood.flow, lm.coefs, R = 1000, use.weights = TRUE)
162178
#'
163179
#' # Plotting the marginal posteriors
164180
#' plot(b5)
@@ -204,6 +220,10 @@ bayesboot <- function(data, statistic, R = 4000, R2 = 4000, use.weights = FALSE,
204220
stop(e)
205221
}
206222
)
223+
# TODO: Should I add some more checks to stat.result? Like, that it contains no NA, values?
224+
# or should I maybe do these tests to the final posterior samples and issue a varning if
225+
# there are NAs, NULLs and similar?
226+
207227
} else { # use.weights == FALSE
208228
if(length(R2) != 1 || is.na(R2) || !is.wholenumber(R2) || R2 < 1) {
209229
stop("If use.weights == FALSE then R2 should be a single whole number >= 1.")
@@ -258,10 +278,20 @@ bayesboot <- function(data, statistic, R = 4000, R2 = 4000, use.weights = FALSE,
258278
class(boot.sample) <- c("bayesboot", class(boot.sample))
259279
attr(boot.sample, "statistic.label") <- statistic.label
260280
attr(boot.sample, "call") <- call
281+
# Warn if boot.sample contains "non-values".
282+
col.should.warn <- plyr::laply(boot.sample, function(boot.col) {
283+
any(is.na(boot.col) |is.nan(boot.col) | is.null(boot.col))
284+
})
285+
if(any(col.should.warn)) {
286+
warning(paste(
287+
"The sample from bayesboot contains either NAs, NaNs or NULLs.",
288+
"Make sure that your statistic function only return actual values."))
289+
}
261290
boot.sample
262291
}
263292

264293

294+
265295
#' Summarize the result of \code{bayesboot}
266296
#'
267297
#' Summarizes the result of a call to \code{bayesboot} by calculating means, SDs,
@@ -275,6 +305,9 @@ bayesboot <- function(data, statistic, R = 4000, R2 = 4000, use.weights = FALSE,
275305
#' statistic, (2) \bold{measure} the name of the summarizing measure, and (3)
276306
#' \bold{value} the value of the summarizing measure.
277307
#'
308+
#' @seealso \code{\link[HDInterval]{hdi}} in the HDInterval package for directly calculating
309+
#' highest density intervals from \code{bayesboot} objects.
310+
#'
278311
#' @export
279312
summary.bayesboot <- function(object, cred.mass = 0.95, ...) {
280313
bootsum <- plyr::ldply(seq_len(ncol(object)), function(i) {
@@ -286,7 +319,7 @@ summary.bayesboot <- function(object, cred.mass = 0.95, ...) {
286319
}
287320
data.frame(statistic = names(object)[i],
288321
measure = c("mean", "sd", "hdi.low", "hdi.high","q2.5%", "q25%", "median" ,"q75%", "q97.5%"),
289-
value = c(mean(s), sd(s), hdi(s, cred.mass), quantile(s, c(0.025, 0.25, 0.5, 0.75, 0.975))))
322+
value = c(mean(s), sd(s), HDInterval::hdi(s, cred.mass), quantile(s, c(0.025, 0.25, 0.5, 0.75, 0.975))))
290323
})
291324
attr(bootsum, "statistic.label") <- attr(object, "statistic.label")
292325
attr(bootsum, "call") <- attr(object, "call")
@@ -296,6 +329,22 @@ summary.bayesboot <- function(object, cred.mass = 0.95, ...) {
296329
bootsum
297330
}
298331

332+
#' Print the first number of draws from the Bayesian bootstrap
333+
#'
334+
#' @param x The bayesboot object to print.
335+
#' @param n The number of draws to print.
336+
#' @param ... Not used.
337+
#' @export
338+
print.bayesboot <- function(x, n = 10, ...) {
339+
cat(paste0("The first ", n," draws (out of ", nrow(x) ,") from the Bayesian bootstrap:\n"))
340+
cat("\n")
341+
print(as.data.frame(head(x, n)))
342+
cat(".. ...\n")
343+
cat("\n")
344+
cat("Use summary() to produce a summary of the posterior distribution.\n")
345+
invisible(x)
346+
}
347+
299348
#' @method print summary.bayesboot
300349
#' @export
301350
print.summary.bayesboot <- function(x, ...) {
@@ -318,6 +367,7 @@ print.summary.bayesboot <- function(x, ...) {
318367
cat("\n")
319368
}
320369
cat("Call:\n", format(attr(x, "call")))
370+
invisible(x)
321371
}
322372

323373
#' Coerce to a \code{bayesboot} object
@@ -359,30 +409,31 @@ as.bayesboot <- function(object) {
359409
plot.bayesboot <- function(x, cred.mass = 0.95, plots.per.page = 3, cex = 1.2, cex.lab=1.3, ...) {
360410
old.devAskNewPage <- devAskNewPage()
361411
old.par <- par(mfrow = c(min(plots.per.page, ncol(x)) , 1) , mar = c(4.1, 4.1, 0.5, 4.1), mgp = c(2.5, 1, 0))
412+
on.exit({ # revert graphical parameters
413+
par(old.par)
414+
devAskNewPage(old.devAskNewPage)
415+
})
362416
n.plots <- 0
363417
for(i in seq_len(ncol(x))) {
364418
if(!is.numeric(x[, i])) {
365419
warning(paste("The statistic", names(x)[i] , "was skipped as",
366420
"plot.bayesboot can't handle non-numeric statistics."))
367421
next
368422
}
369-
# Byta ut detta mot plotPost?
370-
#hist(x[, i], breaks = "FD", xlab = names(x)[i])
371423
n.plots <- n.plots + 1
372424
if(n.plots > plots.per.page) {
373425
devAskNewPage(TRUE)
374426
}
375-
if(ncol(x) == 1 && names(x)[i] == "V1") {
427+
if(ncol(x) == 1 && names(x)[i] == "V1" && attr(x, "statistic.label") != "") {
376428
# There is only one statistic and it has an uninformative default name
377-
# so use the begining of the function call instead as a statistic.
429+
# so use the begining of the function call instead as a statistic, unless
430+
# it is empty.
378431
statistic_name <- attr(x, "statistic.label")
379432
} else { # use the column name
380433
statistic_name <- names(x)[i]
381434
}
382435
plotPost(x[, i], credMass = cred.mass, xlab = statistic_name, cex = cex, cex.lab = cex.lab, ...)
383436
}
384-
par(old.par)
385-
devAskNewPage(old.devAskNewPage)
386437
invisible(NULL)
387438
}
388439

0 commit comments

Comments
 (0)