diff --git a/NEWS.md b/NEWS.md index f63bb826..72f943d8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,7 @@ * Add `x` argument to `ppc_error_binned` by @behramulukir (#359) * Add `x` argument to `ppc_error_scatter_avg()` by @behramulukir (#367) * Add `discrete` style to `ppc_rootogram` by @behramulukir (#362) +* Add `discrete` argument to `ppc_stat` and `ppd_stat` by @behramulukir (#369) # bayesplot 1.13.0 diff --git a/R/ppc-discrete.R b/R/ppc-discrete.R index 1f6122c3..dc4730f5 100644 --- a/R/ppc-discrete.R +++ b/R/ppc-discrete.R @@ -77,6 +77,21 @@ #' } #' } #' +#' @section Related functions: +#' In addition to the functions on this page that are restricted to discrete +#' outcomes, some general PPC/PPD functions also support discrete data +#' when requested: +#' - [ppc_stat()] and [ppc_stat_grouped()] can visualize discrete test statistics +#' with predictive checks when `discrete = TRUE`. +#' - [ppd_stat()] and [ppd_stat_grouped()] can visualize discrete test statistics +#' from predictive draws when `discrete = TRUE`. +#' - [ppc_ecdf_overlay] can visualize empirical CDFs for discrete statistics +#' with `discrete = TRUE`. +#' - [ppc_pit_ecdf()] and [ppc_pit_ecdf_grouped()] can also handle discrete +#' variables to plot PIT-ECDF of the empirical PIT values. +#' These functions are not limited to discrete outcomes, but offer discrete-friendly +#' displays for integer-valued statistics. +#' #' @examples #' set.seed(9222017) #' diff --git a/R/ppc-test-statistics.R b/R/ppc-test-statistics.R index a1b8960c..ecaabfd4 100644 --- a/R/ppc-test-statistics.R +++ b/R/ppc-test-statistics.R @@ -27,7 +27,8 @@ #' display the function name(s). If specified as a function (or functions) #' then generic naming is used in the legend. #' @param ... Currently unused. -#' +#' @param discrete For `ppc_stat()` and `ppc_stat_grouped()`, if `TRUE` then a +#' bar chart is used instead of a histogram. #' @template details-binomial #' @template return-ggplot-or-data #' @@ -38,7 +39,7 @@ #' @section Plot Descriptions: #' \describe{ #' \item{`ppc_stat()`, `ppc_stat_freqpoly()`}{ -#' A histogram or frequency polygon of the distribution of a statistic +#' A histogram/bar plot or frequency polygon of the distribution of a statistic #' computed by applying `stat` to each dataset (row) in `yrep`. The value of #' the statistic in the observed data, `stat(y)`, is overlaid as a vertical #' line. More details and example usage of `ppc_stat()` can be found in Gabry @@ -62,6 +63,12 @@ #' ppc_stat(y, yrep, stat = "median") #' ppc_stat(y, yrep, stat = "sd") + legend_none() #' +#' # discrete data example +#' set.seed(0) +#' y_discrete <- rbinom(20, 1, 0.2) +#' yrep_discrete <- matrix(rbinom(2000, 1, prob = 0.4), 1000, 20, byrow = TRUE) +#' ppc_stat(y_discrete, yrep_discrete, stat = "mean", discrete = TRUE) +#' #' # use your own function for the 'stat' argument #' color_scheme_set("brightblue") #' q25 <- function(y) quantile(y, 0.25) @@ -77,6 +84,10 @@ #' ppc_stat_grouped(y, yrep, group, stat = "median") #' ppc_stat_grouped(y, yrep, group, stat = "mad") + yaxis_text() #' +#' # discrete data example with groups +#' group_discrete <- rep(c("First Half","Second Half"), each = 10) +#' ppc_stat_grouped(y_discrete, yrep_discrete, group_discrete, stat = "mean", discrete = TRUE) +#' #' # force y-axes to have same scales, allow x axis to vary #' ppc_stat_grouped(y, yrep, group, facet_args = list(scales = "free_x")) + yaxis_text() #' @@ -106,6 +117,7 @@ ppc_stat <- yrep, stat = "mean", ..., + discrete = FALSE, binwidth = NULL, bins = NULL, breaks = NULL, @@ -124,11 +136,21 @@ ppc_stat <- group = dots$group, stat = match.fun(stat) ) - ggplot( + + graph <- ggplot( data = dplyr::filter(data, .data$variable != "y"), mapping = set_hist_aes(freq) - ) + - geom_histogram( + ) + + graph <- if (discrete) { + graph + geom_bar( + aes(fill = "yrep"), + color = get_color("lh"), + linewidth = 0.25, + na.rm = TRUE, + ) + } else { + graph + geom_histogram( aes(fill = "yrep"), color = get_color("lh"), linewidth = 0.25, @@ -136,8 +158,10 @@ ppc_stat <- binwidth = binwidth, bins = bins, breaks = breaks - ) + - geom_vline( + ) + } + + graph + geom_vline( data = dplyr::filter(data, .data$variable == "y"), mapping = aes(xintercept = .data$value, color = "y"), linewidth = 1.5 @@ -169,6 +193,7 @@ ppc_stat_grouped <- group, stat = "mean", ..., + discrete = FALSE, facet_args = list(), binwidth = NULL, bins = NULL, diff --git a/R/ppd-test-statistics.R b/R/ppd-test-statistics.R index fc4acd38..d7146405 100644 --- a/R/ppd-test-statistics.R +++ b/R/ppd-test-statistics.R @@ -35,6 +35,7 @@ ppd_stat <- function(ypred, stat = "mean", ..., + discrete = FALSE, binwidth = NULL, bins = NULL, breaks = NULL, @@ -51,18 +52,28 @@ ppd_stat <- group = dots$group, stat = match.fun(stat) ) - ggplot(data, mapping = set_hist_aes( + graph <- ggplot(data, mapping = set_hist_aes( freq, color = "ypred", fill = "ypred" - )) + + )) + graph <- graph + if (discrete) { + geom_bar( + color = get_color("lh"), + linewidth = 0.25, + na.rm = TRUE, + position = "identity", + ) + } + else { geom_histogram( linewidth = 0.25, na.rm = TRUE, binwidth = binwidth, bins = bins, breaks = breaks - ) + + ) } + graph + scale_color_ppd(guide = "none") + scale_fill_ppd(labels = Typred_label(), guide = guide_legend( title = stat_legend_title(stat, deparse(substitute(stat))) @@ -83,6 +94,7 @@ ppd_stat_grouped <- group, stat = "mean", ..., + discrete = FALSE, facet_args = list(), binwidth = NULL, bins = NULL, diff --git a/man/PPC-discrete.Rd b/man/PPC-discrete.Rd index 7c1ec86e..459b49e2 100644 --- a/man/PPC-discrete.Rd +++ b/man/PPC-discrete.Rd @@ -158,6 +158,25 @@ among the different styles. } } +\section{Related functions}{ + +In addition to the functions on this page that are restricted to discrete +outcomes, some general PPC/PPD functions also support discrete data +when requested: +\itemize{ +\item \code{\link[=ppc_stat]{ppc_stat()}} and \code{\link[=ppc_stat_grouped]{ppc_stat_grouped()}} can visualize discrete test statistics +with predictive checks when \code{discrete = TRUE}. +\item \code{\link[=ppd_stat]{ppd_stat()}} and \code{\link[=ppd_stat_grouped]{ppd_stat_grouped()}} can visualize discrete test statistics +from predictive draws when \code{discrete = TRUE}. +\item \link{ppc_ecdf_overlay} can visualize empirical CDFs for discrete statistics +with \code{discrete = TRUE}. +\item \code{\link[=ppc_pit_ecdf]{ppc_pit_ecdf()}} and \code{\link[=ppc_pit_ecdf_grouped]{ppc_pit_ecdf_grouped()}} can also handle discrete +variables to plot PIT-ECDF of the empirical PIT values. +These functions are not limited to discrete outcomes, but offer discrete-friendly +displays for integer-valued statistics. +} +} + \examples{ set.seed(9222017) diff --git a/man/PPC-test-statistics.Rd b/man/PPC-test-statistics.Rd index 83883979..364ede99 100644 --- a/man/PPC-test-statistics.Rd +++ b/man/PPC-test-statistics.Rd @@ -16,6 +16,7 @@ ppc_stat( yrep, stat = "mean", ..., + discrete = FALSE, binwidth = NULL, bins = NULL, breaks = NULL, @@ -28,6 +29,7 @@ ppc_stat_grouped( group, stat = "mean", ..., + discrete = FALSE, facet_args = list(), binwidth = NULL, bins = NULL, @@ -82,6 +84,9 @@ then generic naming is used in the legend.} \item{...}{Currently unused.} +\item{discrete}{For \code{ppc_stat()} and \code{ppc_stat_grouped()}, if \code{TRUE} then a +bar chart is used instead of a histogram.} + \item{binwidth}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} to override the default binwidth.} @@ -138,7 +143,7 @@ the input contains the "success" \emph{proportions} (not discrete \describe{ \item{\code{ppc_stat()}, \code{ppc_stat_freqpoly()}}{ -A histogram or frequency polygon of the distribution of a statistic +A histogram/bar plot or frequency polygon of the distribution of a statistic computed by applying \code{stat} to each dataset (row) in \code{yrep}. The value of the statistic in the observed data, \code{stat(y)}, is overlaid as a vertical line. More details and example usage of \code{ppc_stat()} can be found in Gabry @@ -163,6 +168,12 @@ yrep <- example_yrep_draws() ppc_stat(y, yrep, stat = "median") ppc_stat(y, yrep, stat = "sd") + legend_none() +# discrete data example +set.seed(0) +y_discrete <- rbinom(20, 1, 0.2) +yrep_discrete <- matrix(rbinom(2000, 1, prob = 0.4), 1000, 20, byrow = TRUE) +ppc_stat(y_discrete, yrep_discrete, stat = "mean", discrete = TRUE) + # use your own function for the 'stat' argument color_scheme_set("brightblue") q25 <- function(y) quantile(y, 0.25) @@ -178,6 +189,10 @@ group <- example_group_data() ppc_stat_grouped(y, yrep, group, stat = "median") ppc_stat_grouped(y, yrep, group, stat = "mad") + yaxis_text() +# discrete data example with groups +group_discrete <- rep(c("First Half","Second Half"), each = 10) +ppc_stat_grouped(y_discrete, yrep_discrete, group_discrete, stat = "mean", discrete = TRUE) + # force y-axes to have same scales, allow x axis to vary ppc_stat_grouped(y, yrep, group, facet_args = list(scales = "free_x")) + yaxis_text() diff --git a/man/PPD-test-statistics.Rd b/man/PPD-test-statistics.Rd index 20e9546e..168f2bb3 100644 --- a/man/PPD-test-statistics.Rd +++ b/man/PPD-test-statistics.Rd @@ -15,6 +15,7 @@ ppd_stat( ypred, stat = "mean", ..., + discrete = FALSE, binwidth = NULL, bins = NULL, breaks = NULL, @@ -26,6 +27,7 @@ ppd_stat_grouped( group, stat = "mean", ..., + discrete = FALSE, facet_args = list(), binwidth = NULL, bins = NULL, @@ -73,6 +75,9 @@ then generic naming is used in the legend.} \item{...}{Currently unused.} +\item{discrete}{For \code{ppc_stat()} and \code{ppc_stat_grouped()}, if \code{TRUE} then a +bar chart is used instead of a histogram.} + \item{binwidth}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} to override the default binwidth.} diff --git a/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-discrete-stat.svg b/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-discrete-stat.svg new file mode 100644 index 00000000..db94c61f --- /dev/null +++ b/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-discrete-stat.svg @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.0 +0.5 +1.0 +1.5 +2.0 + + + + + + +0.2 +0.3 +0.4 +0.5 +0.6 +T += +prop0 + +T +( +y +r +e +p +) + +T +( +y +) +ppc_stat (discrete, stat) + + diff --git a/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-grouped-discrete-stat.svg b/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-grouped-discrete-stat.svg new file mode 100644 index 00000000..6a2ab548 --- /dev/null +++ b/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-grouped-discrete-stat.svg @@ -0,0 +1,116 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1 + + + + + + + + + +2 + + + + + + +0.2 +0.4 +0.6 + + + + + + +0.1 +0.2 +0.3 +0.4 +0.5 + + +T += +prop0 + +T +( +y +r +e +p +) + +T +( +y +) +ppc_stat_grouped (discrete, stat) + + diff --git a/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-discrete-stat.svg b/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-discrete-stat.svg new file mode 100644 index 00000000..b6ec720e --- /dev/null +++ b/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-discrete-stat.svg @@ -0,0 +1,68 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.0 +0.5 +1.0 +1.5 +2.0 + + + + + + +0.2 +0.3 +0.4 +0.5 +0.6 +T += +prop0 + +T +( +y +p +r +e +d +) +ppd_stat (discrete, stat) + + diff --git a/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-grouped-discrete-stat.svg b/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-grouped-discrete-stat.svg new file mode 100644 index 00000000..b842c1bf --- /dev/null +++ b/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-grouped-discrete-stat.svg @@ -0,0 +1,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1 + + + + + + + + + +2 + + + + + + +0.2 +0.4 +0.6 + + + + + + +0.1 +0.2 +0.3 +0.4 +0.5 + + +T += +prop0 + +T +( +y +p +r +e +d +) +ppd_stat_grouped (discrete, stat) + + diff --git a/tests/testthat/test-ppc-test-statistics.R b/tests/testthat/test-ppc-test-statistics.R index a0e8e094..73acbe79 100644 --- a/tests/testthat/test-ppc-test-statistics.R +++ b/tests/testthat/test-ppc-test-statistics.R @@ -64,11 +64,14 @@ test_that("ppc_stat returns ggplot object", { expect_gg(ppc_stat(y, yrep, stat = function(x) median(x), binwidth = 0.05)) expect_gg(ppc_stat(y2, yrep2, binwidth = 0.05)) expect_gg(ppc_stat(y2, yrep2, stat = "prop0", binwidth = 0.05)) + expect_gg(ppc_stat(y2, yrep2, discrete = TRUE)) + expect_gg(ppc_stat(y2, yrep2, stat = "prop0", discrete = TRUE)) # ppd versions expect_gg(ppd_stat(yrep, stat = "q25", binwidth = 0.05)) expect_gg(ppd_stat(yrep, stat = q25, binwidth = 0.05)) expect_gg(ppd_stat(yrep2, stat = "prop0", binwidth = 0.05)) + expect_gg(ppd_stat(yrep2, stat = "prop0", discrete = TRUE)) }) test_that("ppc_stat_2d returns ggplot object", { @@ -93,6 +96,11 @@ test_that("ppc_stat_grouped returns ggplot object", { expect_gg(ppc_stat_grouped(y, yrep, group, binwidth = 0.05)) expect_gg(ppc_stat_grouped(y, yrep, as.numeric(group), stat = function(z) var(z), binwidth = 0.05)) expect_gg(ppc_stat_grouped(y, yrep, as.integer(group), stat = "sd", binwidth = 0.05)) + expect_gg(ppc_stat_grouped(y2, yrep2, group2, discrete = TRUE)) + + # ppd version + expect_gg(ppd_stat_grouped(yrep, group, binwidth = 0.05)) + expect_gg(ppd_stat_grouped(yrep2, group2, discrete = TRUE)) }) test_that("ppc_stat_freqpoly_grouped returns ggplot object", { @@ -146,6 +154,16 @@ test_that("ppc_stat renders correctly", { title = "ppc_stat (stat, binwidth, freq)", fig = p_custom) + p_discrete <- ppc_stat( + y = vdiff_y2, + yrep = vdiff_yrep2, + stat = "prop0", + discrete = TRUE + ) + yaxis_text() + vdiffr::expect_doppelganger( + title = "ppc_stat (discrete, stat)", + fig = p_discrete) + # ppd versions p_base <- ppd_stat(vdiff_yrep, binwidth = 0.05) + yaxis_text() vdiffr::expect_doppelganger("ppd_stat (default)", p_base) @@ -159,6 +177,15 @@ test_that("ppc_stat renders correctly", { vdiffr::expect_doppelganger( title = "ppd_stat (stat, binwidth, freq)", fig = p_custom) + + p_discrete <- ppd_stat( + ypred = vdiff_yrep2, + stat = "prop0", + discrete = TRUE + ) + yaxis_text() + vdiffr::expect_doppelganger( + title = "ppd_stat (discrete, stat)", + fig = p_discrete) }) test_that("ppc_stat_2d renders correctly", { @@ -215,6 +242,17 @@ test_that("ppc_stat_grouped renders correctly", { title = "ppc_stat_grouped (stat, facet_args, binwidth)", fig = p_custom) + p_discrete <- ppc_stat_grouped( + y = vdiff_y2, + yrep = vdiff_yrep2, + group = vdiff_group2, + stat = "prop0", + discrete = TRUE + ) + vdiffr::expect_doppelganger( + title = "ppc_stat_grouped (discrete, stat)", + fig = p_discrete) + # ppd versions p_base <- ppd_stat_grouped(vdiff_yrep, vdiff_group, binwidth = 0.05) vdiffr::expect_doppelganger("ppd_stat_grouped (default)", p_base) @@ -229,6 +267,16 @@ test_that("ppc_stat_grouped renders correctly", { vdiffr::expect_doppelganger( title = "ppd_stat_grouped (stat, facet_args, binwidth)", fig = p_custom) + + p_discrete <- ppd_stat_grouped( + ypred = vdiff_yrep2, + group = vdiff_group2, + stat = "prop0", + discrete = TRUE + ) + vdiffr::expect_doppelganger( + title = "ppd_stat_grouped (discrete, stat)", + fig = p_discrete) }) test_that("ppc_stat_freqpoly_grouped renders correctly", {