diff --git a/NEWS.md b/NEWS.md
index f63bb826..72f943d8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,6 +6,7 @@
* Add `x` argument to `ppc_error_binned` by @behramulukir (#359)
* Add `x` argument to `ppc_error_scatter_avg()` by @behramulukir (#367)
* Add `discrete` style to `ppc_rootogram` by @behramulukir (#362)
+* Add `discrete` argument to `ppc_stat` and `ppd_stat` by @behramulukir (#369)
# bayesplot 1.13.0
diff --git a/R/ppc-discrete.R b/R/ppc-discrete.R
index 1f6122c3..dc4730f5 100644
--- a/R/ppc-discrete.R
+++ b/R/ppc-discrete.R
@@ -77,6 +77,21 @@
#' }
#' }
#'
+#' @section Related functions:
+#' In addition to the functions on this page that are restricted to discrete
+#' outcomes, some general PPC/PPD functions also support discrete data
+#' when requested:
+#' - [ppc_stat()] and [ppc_stat_grouped()] can visualize discrete test statistics
+#' with predictive checks when `discrete = TRUE`.
+#' - [ppd_stat()] and [ppd_stat_grouped()] can visualize discrete test statistics
+#' from predictive draws when `discrete = TRUE`.
+#' - [ppc_ecdf_overlay] can visualize empirical CDFs for discrete statistics
+#' with `discrete = TRUE`.
+#' - [ppc_pit_ecdf()] and [ppc_pit_ecdf_grouped()] can also handle discrete
+#' variables to plot PIT-ECDF of the empirical PIT values.
+#' These functions are not limited to discrete outcomes, but offer discrete-friendly
+#' displays for integer-valued statistics.
+#'
#' @examples
#' set.seed(9222017)
#'
diff --git a/R/ppc-test-statistics.R b/R/ppc-test-statistics.R
index a1b8960c..ecaabfd4 100644
--- a/R/ppc-test-statistics.R
+++ b/R/ppc-test-statistics.R
@@ -27,7 +27,8 @@
#' display the function name(s). If specified as a function (or functions)
#' then generic naming is used in the legend.
#' @param ... Currently unused.
-#'
+#' @param discrete For `ppc_stat()` and `ppc_stat_grouped()`, if `TRUE` then a
+#' bar chart is used instead of a histogram.
#' @template details-binomial
#' @template return-ggplot-or-data
#'
@@ -38,7 +39,7 @@
#' @section Plot Descriptions:
#' \describe{
#' \item{`ppc_stat()`, `ppc_stat_freqpoly()`}{
-#' A histogram or frequency polygon of the distribution of a statistic
+#' A histogram/bar plot or frequency polygon of the distribution of a statistic
#' computed by applying `stat` to each dataset (row) in `yrep`. The value of
#' the statistic in the observed data, `stat(y)`, is overlaid as a vertical
#' line. More details and example usage of `ppc_stat()` can be found in Gabry
@@ -62,6 +63,12 @@
#' ppc_stat(y, yrep, stat = "median")
#' ppc_stat(y, yrep, stat = "sd") + legend_none()
#'
+#' # discrete data example
+#' set.seed(0)
+#' y_discrete <- rbinom(20, 1, 0.2)
+#' yrep_discrete <- matrix(rbinom(2000, 1, prob = 0.4), 1000, 20, byrow = TRUE)
+#' ppc_stat(y_discrete, yrep_discrete, stat = "mean", discrete = TRUE)
+#'
#' # use your own function for the 'stat' argument
#' color_scheme_set("brightblue")
#' q25 <- function(y) quantile(y, 0.25)
@@ -77,6 +84,10 @@
#' ppc_stat_grouped(y, yrep, group, stat = "median")
#' ppc_stat_grouped(y, yrep, group, stat = "mad") + yaxis_text()
#'
+#' # discrete data example with groups
+#' group_discrete <- rep(c("First Half","Second Half"), each = 10)
+#' ppc_stat_grouped(y_discrete, yrep_discrete, group_discrete, stat = "mean", discrete = TRUE)
+#'
#' # force y-axes to have same scales, allow x axis to vary
#' ppc_stat_grouped(y, yrep, group, facet_args = list(scales = "free_x")) + yaxis_text()
#'
@@ -106,6 +117,7 @@ ppc_stat <-
yrep,
stat = "mean",
...,
+ discrete = FALSE,
binwidth = NULL,
bins = NULL,
breaks = NULL,
@@ -124,11 +136,21 @@ ppc_stat <-
group = dots$group,
stat = match.fun(stat)
)
- ggplot(
+
+ graph <- ggplot(
data = dplyr::filter(data, .data$variable != "y"),
mapping = set_hist_aes(freq)
- ) +
- geom_histogram(
+ )
+
+ graph <- if (discrete) {
+ graph + geom_bar(
+ aes(fill = "yrep"),
+ color = get_color("lh"),
+ linewidth = 0.25,
+ na.rm = TRUE,
+ )
+ } else {
+ graph + geom_histogram(
aes(fill = "yrep"),
color = get_color("lh"),
linewidth = 0.25,
@@ -136,8 +158,10 @@ ppc_stat <-
binwidth = binwidth,
bins = bins,
breaks = breaks
- ) +
- geom_vline(
+ )
+ }
+
+ graph + geom_vline(
data = dplyr::filter(data, .data$variable == "y"),
mapping = aes(xintercept = .data$value, color = "y"),
linewidth = 1.5
@@ -169,6 +193,7 @@ ppc_stat_grouped <-
group,
stat = "mean",
...,
+ discrete = FALSE,
facet_args = list(),
binwidth = NULL,
bins = NULL,
diff --git a/R/ppd-test-statistics.R b/R/ppd-test-statistics.R
index fc4acd38..d7146405 100644
--- a/R/ppd-test-statistics.R
+++ b/R/ppd-test-statistics.R
@@ -35,6 +35,7 @@ ppd_stat <-
function(ypred,
stat = "mean",
...,
+ discrete = FALSE,
binwidth = NULL,
bins = NULL,
breaks = NULL,
@@ -51,18 +52,28 @@ ppd_stat <-
group = dots$group,
stat = match.fun(stat)
)
- ggplot(data, mapping = set_hist_aes(
+ graph <- ggplot(data, mapping = set_hist_aes(
freq,
color = "ypred",
fill = "ypred"
- )) +
+ ))
+ graph <- graph + if (discrete) {
+ geom_bar(
+ color = get_color("lh"),
+ linewidth = 0.25,
+ na.rm = TRUE,
+ position = "identity",
+ )
+ }
+ else {
geom_histogram(
linewidth = 0.25,
na.rm = TRUE,
binwidth = binwidth,
bins = bins,
breaks = breaks
- ) +
+ ) }
+ graph +
scale_color_ppd(guide = "none") +
scale_fill_ppd(labels = Typred_label(), guide = guide_legend(
title = stat_legend_title(stat, deparse(substitute(stat)))
@@ -83,6 +94,7 @@ ppd_stat_grouped <-
group,
stat = "mean",
...,
+ discrete = FALSE,
facet_args = list(),
binwidth = NULL,
bins = NULL,
diff --git a/man/PPC-discrete.Rd b/man/PPC-discrete.Rd
index 7c1ec86e..459b49e2 100644
--- a/man/PPC-discrete.Rd
+++ b/man/PPC-discrete.Rd
@@ -158,6 +158,25 @@ among the different styles.
}
}
+\section{Related functions}{
+
+In addition to the functions on this page that are restricted to discrete
+outcomes, some general PPC/PPD functions also support discrete data
+when requested:
+\itemize{
+\item \code{\link[=ppc_stat]{ppc_stat()}} and \code{\link[=ppc_stat_grouped]{ppc_stat_grouped()}} can visualize discrete test statistics
+with predictive checks when \code{discrete = TRUE}.
+\item \code{\link[=ppd_stat]{ppd_stat()}} and \code{\link[=ppd_stat_grouped]{ppd_stat_grouped()}} can visualize discrete test statistics
+from predictive draws when \code{discrete = TRUE}.
+\item \link{ppc_ecdf_overlay} can visualize empirical CDFs for discrete statistics
+with \code{discrete = TRUE}.
+\item \code{\link[=ppc_pit_ecdf]{ppc_pit_ecdf()}} and \code{\link[=ppc_pit_ecdf_grouped]{ppc_pit_ecdf_grouped()}} can also handle discrete
+variables to plot PIT-ECDF of the empirical PIT values.
+These functions are not limited to discrete outcomes, but offer discrete-friendly
+displays for integer-valued statistics.
+}
+}
+
\examples{
set.seed(9222017)
diff --git a/man/PPC-test-statistics.Rd b/man/PPC-test-statistics.Rd
index 83883979..364ede99 100644
--- a/man/PPC-test-statistics.Rd
+++ b/man/PPC-test-statistics.Rd
@@ -16,6 +16,7 @@ ppc_stat(
yrep,
stat = "mean",
...,
+ discrete = FALSE,
binwidth = NULL,
bins = NULL,
breaks = NULL,
@@ -28,6 +29,7 @@ ppc_stat_grouped(
group,
stat = "mean",
...,
+ discrete = FALSE,
facet_args = list(),
binwidth = NULL,
bins = NULL,
@@ -82,6 +84,9 @@ then generic naming is used in the legend.}
\item{...}{Currently unused.}
+\item{discrete}{For \code{ppc_stat()} and \code{ppc_stat_grouped()}, if \code{TRUE} then a
+bar chart is used instead of a histogram.}
+
\item{binwidth}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} to override
the default binwidth.}
@@ -138,7 +143,7 @@ the input contains the "success" \emph{proportions} (not discrete
\describe{
\item{\code{ppc_stat()}, \code{ppc_stat_freqpoly()}}{
-A histogram or frequency polygon of the distribution of a statistic
+A histogram/bar plot or frequency polygon of the distribution of a statistic
computed by applying \code{stat} to each dataset (row) in \code{yrep}. The value of
the statistic in the observed data, \code{stat(y)}, is overlaid as a vertical
line. More details and example usage of \code{ppc_stat()} can be found in Gabry
@@ -163,6 +168,12 @@ yrep <- example_yrep_draws()
ppc_stat(y, yrep, stat = "median")
ppc_stat(y, yrep, stat = "sd") + legend_none()
+# discrete data example
+set.seed(0)
+y_discrete <- rbinom(20, 1, 0.2)
+yrep_discrete <- matrix(rbinom(2000, 1, prob = 0.4), 1000, 20, byrow = TRUE)
+ppc_stat(y_discrete, yrep_discrete, stat = "mean", discrete = TRUE)
+
# use your own function for the 'stat' argument
color_scheme_set("brightblue")
q25 <- function(y) quantile(y, 0.25)
@@ -178,6 +189,10 @@ group <- example_group_data()
ppc_stat_grouped(y, yrep, group, stat = "median")
ppc_stat_grouped(y, yrep, group, stat = "mad") + yaxis_text()
+# discrete data example with groups
+group_discrete <- rep(c("First Half","Second Half"), each = 10)
+ppc_stat_grouped(y_discrete, yrep_discrete, group_discrete, stat = "mean", discrete = TRUE)
+
# force y-axes to have same scales, allow x axis to vary
ppc_stat_grouped(y, yrep, group, facet_args = list(scales = "free_x")) + yaxis_text()
diff --git a/man/PPD-test-statistics.Rd b/man/PPD-test-statistics.Rd
index 20e9546e..168f2bb3 100644
--- a/man/PPD-test-statistics.Rd
+++ b/man/PPD-test-statistics.Rd
@@ -15,6 +15,7 @@ ppd_stat(
ypred,
stat = "mean",
...,
+ discrete = FALSE,
binwidth = NULL,
bins = NULL,
breaks = NULL,
@@ -26,6 +27,7 @@ ppd_stat_grouped(
group,
stat = "mean",
...,
+ discrete = FALSE,
facet_args = list(),
binwidth = NULL,
bins = NULL,
@@ -73,6 +75,9 @@ then generic naming is used in the legend.}
\item{...}{Currently unused.}
+\item{discrete}{For \code{ppc_stat()} and \code{ppc_stat_grouped()}, if \code{TRUE} then a
+bar chart is used instead of a histogram.}
+
\item{binwidth}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} to override
the default binwidth.}
diff --git a/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-discrete-stat.svg b/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-discrete-stat.svg
new file mode 100644
index 00000000..db94c61f
--- /dev/null
+++ b/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-discrete-stat.svg
@@ -0,0 +1,73 @@
+
+
diff --git a/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-grouped-discrete-stat.svg b/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-grouped-discrete-stat.svg
new file mode 100644
index 00000000..6a2ab548
--- /dev/null
+++ b/tests/testthat/_snaps/ppc-test-statistics/ppc-stat-grouped-discrete-stat.svg
@@ -0,0 +1,116 @@
+
+
diff --git a/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-discrete-stat.svg b/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-discrete-stat.svg
new file mode 100644
index 00000000..b6ec720e
--- /dev/null
+++ b/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-discrete-stat.svg
@@ -0,0 +1,68 @@
+
+
diff --git a/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-grouped-discrete-stat.svg b/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-grouped-discrete-stat.svg
new file mode 100644
index 00000000..b842c1bf
--- /dev/null
+++ b/tests/testthat/_snaps/ppc-test-statistics/ppd-stat-grouped-discrete-stat.svg
@@ -0,0 +1,110 @@
+
+
diff --git a/tests/testthat/test-ppc-test-statistics.R b/tests/testthat/test-ppc-test-statistics.R
index a0e8e094..73acbe79 100644
--- a/tests/testthat/test-ppc-test-statistics.R
+++ b/tests/testthat/test-ppc-test-statistics.R
@@ -64,11 +64,14 @@ test_that("ppc_stat returns ggplot object", {
expect_gg(ppc_stat(y, yrep, stat = function(x) median(x), binwidth = 0.05))
expect_gg(ppc_stat(y2, yrep2, binwidth = 0.05))
expect_gg(ppc_stat(y2, yrep2, stat = "prop0", binwidth = 0.05))
+ expect_gg(ppc_stat(y2, yrep2, discrete = TRUE))
+ expect_gg(ppc_stat(y2, yrep2, stat = "prop0", discrete = TRUE))
# ppd versions
expect_gg(ppd_stat(yrep, stat = "q25", binwidth = 0.05))
expect_gg(ppd_stat(yrep, stat = q25, binwidth = 0.05))
expect_gg(ppd_stat(yrep2, stat = "prop0", binwidth = 0.05))
+ expect_gg(ppd_stat(yrep2, stat = "prop0", discrete = TRUE))
})
test_that("ppc_stat_2d returns ggplot object", {
@@ -93,6 +96,11 @@ test_that("ppc_stat_grouped returns ggplot object", {
expect_gg(ppc_stat_grouped(y, yrep, group, binwidth = 0.05))
expect_gg(ppc_stat_grouped(y, yrep, as.numeric(group), stat = function(z) var(z), binwidth = 0.05))
expect_gg(ppc_stat_grouped(y, yrep, as.integer(group), stat = "sd", binwidth = 0.05))
+ expect_gg(ppc_stat_grouped(y2, yrep2, group2, discrete = TRUE))
+
+ # ppd version
+ expect_gg(ppd_stat_grouped(yrep, group, binwidth = 0.05))
+ expect_gg(ppd_stat_grouped(yrep2, group2, discrete = TRUE))
})
test_that("ppc_stat_freqpoly_grouped returns ggplot object", {
@@ -146,6 +154,16 @@ test_that("ppc_stat renders correctly", {
title = "ppc_stat (stat, binwidth, freq)",
fig = p_custom)
+ p_discrete <- ppc_stat(
+ y = vdiff_y2,
+ yrep = vdiff_yrep2,
+ stat = "prop0",
+ discrete = TRUE
+ ) + yaxis_text()
+ vdiffr::expect_doppelganger(
+ title = "ppc_stat (discrete, stat)",
+ fig = p_discrete)
+
# ppd versions
p_base <- ppd_stat(vdiff_yrep, binwidth = 0.05) + yaxis_text()
vdiffr::expect_doppelganger("ppd_stat (default)", p_base)
@@ -159,6 +177,15 @@ test_that("ppc_stat renders correctly", {
vdiffr::expect_doppelganger(
title = "ppd_stat (stat, binwidth, freq)",
fig = p_custom)
+
+ p_discrete <- ppd_stat(
+ ypred = vdiff_yrep2,
+ stat = "prop0",
+ discrete = TRUE
+ ) + yaxis_text()
+ vdiffr::expect_doppelganger(
+ title = "ppd_stat (discrete, stat)",
+ fig = p_discrete)
})
test_that("ppc_stat_2d renders correctly", {
@@ -215,6 +242,17 @@ test_that("ppc_stat_grouped renders correctly", {
title = "ppc_stat_grouped (stat, facet_args, binwidth)",
fig = p_custom)
+ p_discrete <- ppc_stat_grouped(
+ y = vdiff_y2,
+ yrep = vdiff_yrep2,
+ group = vdiff_group2,
+ stat = "prop0",
+ discrete = TRUE
+ )
+ vdiffr::expect_doppelganger(
+ title = "ppc_stat_grouped (discrete, stat)",
+ fig = p_discrete)
+
# ppd versions
p_base <- ppd_stat_grouped(vdiff_yrep, vdiff_group, binwidth = 0.05)
vdiffr::expect_doppelganger("ppd_stat_grouped (default)", p_base)
@@ -229,6 +267,16 @@ test_that("ppc_stat_grouped renders correctly", {
vdiffr::expect_doppelganger(
title = "ppd_stat_grouped (stat, facet_args, binwidth)",
fig = p_custom)
+
+ p_discrete <- ppd_stat_grouped(
+ ypred = vdiff_yrep2,
+ group = vdiff_group2,
+ stat = "prop0",
+ discrete = TRUE
+ )
+ vdiffr::expect_doppelganger(
+ title = "ppd_stat_grouped (discrete, stat)",
+ fig = p_discrete)
})
test_that("ppc_stat_freqpoly_grouped renders correctly", {