Skip to content

Commit fc0559c

Browse files
authored
Merge pull request #369 from stan-dev/ppc_stat
adding `discrete` argument to `stat` functions
2 parents e7d10d9 + 68ceb49 commit fc0559c

12 files changed

+518
-11
lines changed

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
* Add `x` argument to `ppc_error_binned` by @behramulukir (#359)
77
* Add `x` argument to `ppc_error_scatter_avg()` by @behramulukir (#367)
88
* Add `discrete` style to `ppc_rootogram` by @behramulukir (#362)
9+
* Add `discrete` argument to `ppc_stat` and `ppd_stat` by @behramulukir (#369)
910

1011
# bayesplot 1.13.0
1112

R/ppc-discrete.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,21 @@
7777
#' }
7878
#' }
7979
#'
80+
#' @section Related functions:
81+
#' In addition to the functions on this page that are restricted to discrete
82+
#' outcomes, some general PPC/PPD functions also support discrete data
83+
#' when requested:
84+
#' - [ppc_stat()] and [ppc_stat_grouped()] can visualize discrete test statistics
85+
#' with predictive checks when `discrete = TRUE`.
86+
#' - [ppd_stat()] and [ppd_stat_grouped()] can visualize discrete test statistics
87+
#' from predictive draws when `discrete = TRUE`.
88+
#' - [ppc_ecdf_overlay] can visualize empirical CDFs for discrete statistics
89+
#' with `discrete = TRUE`.
90+
#' - [ppc_pit_ecdf()] and [ppc_pit_ecdf_grouped()] can also handle discrete
91+
#' variables to plot PIT-ECDF of the empirical PIT values.
92+
#' These functions are not limited to discrete outcomes, but offer discrete-friendly
93+
#' displays for integer-valued statistics.
94+
#'
8095
#' @examples
8196
#' set.seed(9222017)
8297
#'

R/ppc-test-statistics.R

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
#' display the function name(s). If specified as a function (or functions)
2828
#' then generic naming is used in the legend.
2929
#' @param ... Currently unused.
30-
#'
30+
#' @param discrete For `ppc_stat()` and `ppc_stat_grouped()`, if `TRUE` then a
31+
#' bar chart is used instead of a histogram.
3132
#' @template details-binomial
3233
#' @template return-ggplot-or-data
3334
#'
@@ -38,7 +39,7 @@
3839
#' @section Plot Descriptions:
3940
#' \describe{
4041
#' \item{`ppc_stat()`, `ppc_stat_freqpoly()`}{
41-
#' A histogram or frequency polygon of the distribution of a statistic
42+
#' A histogram/bar plot or frequency polygon of the distribution of a statistic
4243
#' computed by applying `stat` to each dataset (row) in `yrep`. The value of
4344
#' the statistic in the observed data, `stat(y)`, is overlaid as a vertical
4445
#' line. More details and example usage of `ppc_stat()` can be found in Gabry
@@ -62,6 +63,12 @@
6263
#' ppc_stat(y, yrep, stat = "median")
6364
#' ppc_stat(y, yrep, stat = "sd") + legend_none()
6465
#'
66+
#' # discrete data example
67+
#' set.seed(0)
68+
#' y_discrete <- rbinom(20, 1, 0.2)
69+
#' yrep_discrete <- matrix(rbinom(2000, 1, prob = 0.4), 1000, 20, byrow = TRUE)
70+
#' ppc_stat(y_discrete, yrep_discrete, stat = "mean", discrete = TRUE)
71+
#'
6572
#' # use your own function for the 'stat' argument
6673
#' color_scheme_set("brightblue")
6774
#' q25 <- function(y) quantile(y, 0.25)
@@ -77,6 +84,10 @@
7784
#' ppc_stat_grouped(y, yrep, group, stat = "median")
7885
#' ppc_stat_grouped(y, yrep, group, stat = "mad") + yaxis_text()
7986
#'
87+
#' # discrete data example with groups
88+
#' group_discrete <- rep(c("First Half","Second Half"), each = 10)
89+
#' ppc_stat_grouped(y_discrete, yrep_discrete, group_discrete, stat = "mean", discrete = TRUE)
90+
#'
8091
#' # force y-axes to have same scales, allow x axis to vary
8192
#' ppc_stat_grouped(y, yrep, group, facet_args = list(scales = "free_x")) + yaxis_text()
8293
#'
@@ -106,6 +117,7 @@ ppc_stat <-
106117
yrep,
107118
stat = "mean",
108119
...,
120+
discrete = FALSE,
109121
binwidth = NULL,
110122
bins = NULL,
111123
breaks = NULL,
@@ -124,20 +136,32 @@ ppc_stat <-
124136
group = dots$group,
125137
stat = match.fun(stat)
126138
)
127-
ggplot(
139+
140+
graph <- ggplot(
128141
data = dplyr::filter(data, .data$variable != "y"),
129142
mapping = set_hist_aes(freq)
130-
) +
131-
geom_histogram(
143+
)
144+
145+
graph <- if (discrete) {
146+
graph + geom_bar(
147+
aes(fill = "yrep"),
148+
color = get_color("lh"),
149+
linewidth = 0.25,
150+
na.rm = TRUE,
151+
)
152+
} else {
153+
graph + geom_histogram(
132154
aes(fill = "yrep"),
133155
color = get_color("lh"),
134156
linewidth = 0.25,
135157
na.rm = TRUE,
136158
binwidth = binwidth,
137159
bins = bins,
138160
breaks = breaks
139-
) +
140-
geom_vline(
161+
)
162+
}
163+
164+
graph + geom_vline(
141165
data = dplyr::filter(data, .data$variable == "y"),
142166
mapping = aes(xintercept = .data$value, color = "y"),
143167
linewidth = 1.5
@@ -169,6 +193,7 @@ ppc_stat_grouped <-
169193
group,
170194
stat = "mean",
171195
...,
196+
discrete = FALSE,
172197
facet_args = list(),
173198
binwidth = NULL,
174199
bins = NULL,

R/ppd-test-statistics.R

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ ppd_stat <-
3535
function(ypred,
3636
stat = "mean",
3737
...,
38+
discrete = FALSE,
3839
binwidth = NULL,
3940
bins = NULL,
4041
breaks = NULL,
@@ -51,18 +52,28 @@ ppd_stat <-
5152
group = dots$group,
5253
stat = match.fun(stat)
5354
)
54-
ggplot(data, mapping = set_hist_aes(
55+
graph <- ggplot(data, mapping = set_hist_aes(
5556
freq,
5657
color = "ypred",
5758
fill = "ypred"
58-
)) +
59+
))
60+
graph <- graph + if (discrete) {
61+
geom_bar(
62+
color = get_color("lh"),
63+
linewidth = 0.25,
64+
na.rm = TRUE,
65+
position = "identity",
66+
)
67+
}
68+
else {
5969
geom_histogram(
6070
linewidth = 0.25,
6171
na.rm = TRUE,
6272
binwidth = binwidth,
6373
bins = bins,
6474
breaks = breaks
65-
) +
75+
) }
76+
graph +
6677
scale_color_ppd(guide = "none") +
6778
scale_fill_ppd(labels = Typred_label(), guide = guide_legend(
6879
title = stat_legend_title(stat, deparse(substitute(stat)))
@@ -83,6 +94,7 @@ ppd_stat_grouped <-
8394
group,
8495
stat = "mean",
8596
...,
97+
discrete = FALSE,
8698
facet_args = list(),
8799
binwidth = NULL,
88100
bins = NULL,

man/PPC-discrete.Rd

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/PPC-test-statistics.Rd

Lines changed: 16 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/PPD-test-statistics.Rd

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 73 additions & 0 deletions
Loading

0 commit comments

Comments
 (0)