2121# ' @param size,fatten,linewidth For bar plots, `size`, `fatten`, and `linewidth`
2222# ' are passed to [ggplot2::geom_pointrange()] to control the appearance of the
2323# ' `yrep` points and intervals. For rootograms `size` is passed to
24- # ' [ggplot2::geom_line()].
24+ # ' [ggplot2::geom_line()] and [ggplot2::geom_pointrange()] .
2525# ' @param freq For bar plots only, if `TRUE` (the default) the y-axis will
2626# ' display counts. Setting `freq=FALSE` will put proportions on the y-axis.
27+ # ' @param bound_distinct For `ppc_rootogram(style = "discrete)`,
28+ # ' if `TRUE` then the observed counts will be plotted with different shapes
29+ # ' depending on whether they are within the bounds of the `y` quantiles.
2730# '
2831# ' @template return-ggplot-or-data
2932# '
4447# ' }
4548# ' \item{`ppc_rootogram()`}{
4649# ' Rootograms allow for diagnosing problems in count data models such as
47- # ' overdispersion or excess zeros. They consist of a histogram of `y` with the
48- # ' expected counts based on `yrep` overlaid as a line along with uncertainty
49- # ' intervals. The y-axis represents the square roots of the counts to
50- # ' approximately adjust for scale differences and thus ease comparison between
51- # ' observed and expected counts. Using the `style` argument, the histogram
52- # ' style can be adjusted to focus on different aspects of the data:
50+ # ' overdispersion or excess zeros. In `standing`, `hanging`, and `suspended`
51+ # ' styles, they consist of a histogram of `y` with the expected counts based on
52+ # ' `yrep` overlaid as a line along with uncertainty intervals.
53+ # '
54+ # ' Meanwhile, in `discrete` style, median counts based on `yrep` are laid
55+ # ' as a point range with uncertainty intervals along with dots
56+ # ' representing the `y`.
57+ # '
58+ # ' The y-axis represents the square roots of the counts to approximately
59+ # ' adjust for scale differences and thus ease comparison between observed
60+ # ' and expected counts. Using the `style` argument, the rootogram can be
61+ # ' adjusted to focus on different aspects of the data:
5362# ' * _Standing_: basic histogram of observed counts with curve
5463# ' showing expected counts.
55- # ' * _Hanging_: observed counts counts hanging from the curve
64+ # ' * _Hanging_: observed counts hanging from the curve
5665# ' representing expected counts.
5766# ' * _Suspended_: histogram of the differences between expected and
5867# ' observed counts.
68+ # ' * _Discrete_: a dot-and-whisker plot of the median counts and
69+ # ' dots representing observed counts.
70+ # '
71+ # ' As it emphasizes the discrete nature of the count data,
72+ # ' using `discrete` style is suggested.
5973# '
6074# ' **All of the rootograms are plotted on the square root scale**. See Kleiber
6175# ' and Zeileis (2016) for advice on interpreting rootograms and selecting
@@ -198,22 +212,22 @@ ppc_bars_grouped <-
198212 fatten = 2.5 ,
199213 linewidth = 1 ,
200214 freq = TRUE ) {
201- check_ignored_arguments(... )
202- call <- match.call(expand.dots = FALSE )
203- g <- eval(ungroup_call(" ppc_bars" , call ), parent.frame())
204- if (fixed_y(facet_args )) {
205- g <- g + expand_limits(y = 1.05 * max(g $ data [[" h" ]], na.rm = TRUE ))
215+ check_ignored_arguments(... )
216+ call <- match.call(expand.dots = FALSE )
217+ g <- eval(ungroup_call(" ppc_bars" , call ), parent.frame())
218+ if (fixed_y(facet_args )) {
219+ g <- g + expand_limits(y = 1.05 * max(g $ data [[" h" ]], na.rm = TRUE ))
220+ }
221+ g +
222+ bars_group_facets(facet_args ) +
223+ force_axes_in_facets()
206224 }
207- g +
208- bars_group_facets(facet_args ) +
209- force_axes_in_facets()
210- }
211225
212226
213227# ' @rdname PPC-discrete
214228# ' @export
215229# ' @param style For `ppc_rootogram`, a string specifying the rootogram
216- # ' style. The options are `"standing"`, `"hanging"`, and
230+ # ' style. The options are `"discrete"`, `" standing"`, `"hanging"`, and
217231# ' `"suspended"`. See the **Plot Descriptions** section, below, for
218232# ' details on the different styles.
219233# '
@@ -234,61 +248,34 @@ ppc_bars_grouped <-
234248# '
235249# ' ppc_rootogram(y, yrep, style = "hanging", prob = 0.8)
236250# ' ppc_rootogram(y, yrep, style = "suspended")
251+ # ' ppc_rootogram(y, yrep, style = "discrete")
237252# '
238253ppc_rootogram <- function (y ,
239254 yrep ,
240- style = c(" standing" , " hanging" , " suspended" ),
255+ style = c(" standing" , " hanging" , " suspended" , " discrete " ),
241256 ... ,
242257 prob = 0.9 ,
243- size = 1 ) {
258+ size = 1 ,
259+ bound_distinct = TRUE ) {
244260 check_ignored_arguments(... )
245261 style <- match.arg(style )
246- y <- validate_y(y )
247- yrep <- validate_predictions(yrep , length(y ))
248- if (! all_counts(y )) {
249- abort(" ppc_rootogram expects counts as inputs to 'y'." )
250- }
251- if (! all_counts(yrep )) {
252- abort(" ppc_rootogram expects counts as inputs to 'yrep'." )
253- }
254262
255- alpha <- (1 - prob ) / 2
256- probs <- c(alpha , 1 - alpha )
257- ymax <- max(y , yrep )
258- xpos <- 0L : ymax
259-
260- # prepare a table for yrep
261- tyrep <- as.list(rep(NA , nrow(yrep )))
262- for (i in seq_along(tyrep )) {
263- tyrep [[i ]] <- table(yrep [i ,])
264- matches <- match(xpos , rownames(tyrep [[i ]]))
265- tyrep [[i ]] <- as.numeric(tyrep [[i ]][matches ])
266- }
267- tyrep <- do.call(rbind , tyrep )
268- tyrep [is.na(tyrep )] <- 0
269- tyexp <- sqrt(colMeans(tyrep ))
270- tyquantile <- sqrt(t(apply(tyrep , 2 , quantile , probs = probs )))
271- colnames(tyquantile ) <- c(" tylower" , " tyupper" )
272-
273- # prepare a table for y
274- ty <- table(y )
275- ty <- sqrt(as.numeric(ty [match(xpos , rownames(ty ))]))
276- if (style == " suspended" ) {
277- ty <- tyexp - ty
278- }
279- ty [is.na(ty )] <- 0
280- ypos <- ty / 2
281- if (style == " hanging" ) {
282- ypos <- tyexp - ypos
283- }
263+ data <- .ppc_rootogram_data(
264+ y = y ,
265+ yrep = yrep ,
266+ style = style ,
267+ prob = prob ,
268+ bound_distinct = bound_distinct
269+ )
284270
285- data <- data.frame (xpos , ypos , ty , tyexp , tyquantile )
286- graph <- ggplot(data ) +
287- aes(
288- ymin = .data $ tylower ,
289- ymax = .data $ tyupper ,
290- height = .data $ ty
291- ) +
271+ # Building geoms for y and y_rep
272+ geom_y <- if (style == " discrete" ) {
273+ geom_point(
274+ aes(y = .data $ obs , shape = .data $ obs_shape ),
275+ size = size * 1.5 ,
276+ color = get_color(" d" ),
277+ fill = get_color(" d" ))
278+ } else {
292279 geom_tile(
293280 aes(
294281 x = .data $ xpos ,
@@ -298,34 +285,69 @@ ppc_rootogram <- function(y,
298285 color = get_color(" lh" ),
299286 linewidth = 0.25 ,
300287 width = 1
301- ) +
302- bayesplot_theme_get()
303-
304- if (style != " standing" ) {
305- graph <- graph + hline_0(size = 0.4 )
288+ )
306289 }
307290
308- graph <- graph +
291+ geom_yrep <- if (style == " discrete" ) {
292+ geom_pointrange(
293+ aes(y = .data $ pred_median , ymin = .data $ lower , ymax = .data $ upper , color = " y_rep" ),
294+ fill = get_color(" lh" ),
295+ linewidth = size ,
296+ size = size ,
297+ fatten = 2 ,
298+ alpha = 1
299+ )
300+ } else {
309301 geom_smooth(
310- aes(
311- x = .data $ xpos ,
312- y = .data $ tyexp ,
313- color = " Expected"
314- ),
302+ aes(x = .data $ xpos , y = .data $ tyexp , color = " Expected" ),
315303 fill = get_color(" d" ),
316304 linewidth = size ,
317305 stat = " identity"
318- ) +
319- scale_fill_manual(" " , values = get_color(" l" )) +
320- scale_color_manual(" " , values = get_color(" dh" )) +
321- labs(x = expression(italic(y )),
322- y = expression(sqrt(Count )))
323-
324- if (style == " standing" ) {
325- graph <- graph + dont_expand_y_axis()
306+ )
326307 }
327308
328- graph + reduce_legend_spacing(0.25 )
309+ # Creating the graph
310+ graph <- ggplot(data )
311+
312+ if (style == " discrete" ) {
313+ graph <- graph +
314+ geom_yrep +
315+ geom_y +
316+ aes(x = .data $ xpos ) +
317+ scale_y_sqrt() +
318+ scale_fill_manual(" " , values = get_color(" d" ), guide = " none" ) +
319+ scale_color_manual(" " , values = get_color(" lh" ), labels = yrep_label()) +
320+ labs(x = expression(italic(y )), y = " Count" ) +
321+ bayesplot_theme_get() +
322+ reduce_legend_spacing(0.25 ) +
323+ scale_shape_manual(values = c(" In" = 22 , " Out" = 23 , " y" = 22 ), guide = " legend" , labels = c(" y" = expression(italic(y ))))
324+ if (bound_distinct ) {
325+ graph <- graph + guides(shape = guide_legend(expression(italic(y )~ within ~ bounds )))
326+ } else {
327+ graph <- graph + guides(shape = guide_legend(" " ))
328+ }
329+ } else {
330+ graph <- graph +
331+ geom_y +
332+ geom_yrep +
333+ aes(
334+ ymin = .data $ tylower ,
335+ ymax = .data $ tyupper ,
336+ height = .data $ ty
337+ ) +
338+ scale_fill_manual(" " , values = get_color(" l" )) +
339+ scale_color_manual(" " , values = get_color(" dh" )) +
340+ labs(x = expression(italic(y )), y = expression(sqrt(Count ))) +
341+ bayesplot_theme_get() +
342+ reduce_legend_spacing(0.25 )
343+ if (style == " standing" ) {
344+ graph <- graph + dont_expand_y_axis()
345+ } else {
346+ graph <- graph + hline_0(size = 0.4 )
347+ }
348+ }
349+
350+ return (graph )
329351}
330352
331353
@@ -395,7 +417,7 @@ ppc_bars_data <-
395417 data <-
396418 reshape2 :: melt(tmp_data , id.vars = " group" ) %> %
397419 count(.data $ group , .data $ value , .data $ variable ) %> %
398- tidyr :: complete(.data $ group , .data $ value , .data $ variable , fill = list (n = 0 )) %> %
420+ tidyr :: complete(.data $ group , .data $ value , .data $ variable , fill = list (n = 0 )) %> %
399421 group_by(.data $ variable , .data $ group ) %> %
400422 mutate(proportion = .data $ n / sum(.data $ n )) %> %
401423 ungroup() %> %
@@ -440,3 +462,90 @@ bars_group_facets <- function(facet_args, scales_default = "fixed") {
440462fixed_y <- function (facet_args ) {
441463 ! isTRUE(facet_args [[" scales" ]] %in% c(" free" , " free_y" ))
442464}
465+
466+ # ' Internal function for `ppc_rootogram()`
467+ # ' @param y,yrep User's `y` and `yrep` arguments.
468+ # ' @param style,prob,bound_distinct User's `style`, `prob`, and
469+ # ' (if applicable) `bound_distinct` arguments.
470+ # ' @noRd
471+ .ppc_rootogram_data <- function (y ,
472+ yrep ,
473+ style = c(" standing" , " hanging" , " suspended" , " discrete" ),
474+ prob = 0.9 ,
475+ bound_distinct ) {
476+
477+ y <- validate_y(y )
478+ yrep <- validate_predictions(yrep , length(y ))
479+ if (! all_counts(y )) {
480+ abort(" ppc_rootogram expects counts as inputs to 'y'." )
481+ }
482+ if (! all_counts(yrep )) {
483+ abort(" ppc_rootogram expects counts as inputs to 'yrep'." )
484+ }
485+
486+ alpha <- (1 - prob ) / 2
487+ probs <- c(alpha , 1 - alpha )
488+ ymax <- max(y , yrep )
489+ xpos <- 0L : ymax
490+
491+ # prepare a table for yrep
492+ tyrep <- as.list(rep(NA , nrow(yrep )))
493+ for (i in seq_along(tyrep )) {
494+ tyrep [[i ]] <- table(yrep [i ,])
495+ matches <- match(xpos , rownames(tyrep [[i ]]))
496+ tyrep [[i ]] <- as.numeric(tyrep [[i ]][matches ])
497+ }
498+ tyrep <- do.call(rbind , tyrep )
499+ tyrep [is.na(tyrep )] <- 0
500+
501+ # discrete style
502+ if (style == " discrete" ){
503+ pred_median <- apply(tyrep , 2 , median )
504+ pred_quantile <- t(apply(tyrep , 2 , quantile , probs = probs ))
505+ colnames(pred_quantile ) <- c(" lower" , " upper" )
506+
507+ # prepare a table for y
508+ ty <- table(y )
509+ y_count <- as.numeric(ty [match(xpos , rownames(ty ))])
510+ y_count [is.na(y_count )] <- 0
511+
512+ if (bound_distinct ) {
513+ # If the observed count is within the bounds of the predicted quantiles,
514+ # use a different shape for the point
515+ obs_shape <- obs_shape <- ifelse(y_count > = pred_quantile [, " lower" ] & y_count < = pred_quantile [, " upper" ], " In" , " Out" )
516+ } else {
517+ obs_shape <- rep(" y" , length(y_count )) # all points are the same shape for observed
518+ }
519+
520+ data <- data.frame (
521+ xpos = xpos ,
522+ obs = y_count ,
523+ pred_median = pred_median ,
524+ lower = pred_quantile [, " lower" ],
525+ upper = pred_quantile [, " upper" ],
526+ obs_shape = obs_shape
527+ )
528+ }
529+ # standing, hanging, suspended styles
530+ else {
531+ tyexp <- sqrt(colMeans(tyrep ))
532+ tyquantile <- sqrt(t(apply(tyrep , 2 , quantile , probs = probs )))
533+ colnames(tyquantile ) <- c(" tylower" , " tyupper" )
534+
535+ # prepare a table for y
536+ ty <- table(y )
537+ ty <- sqrt(as.numeric(ty [match(xpos , rownames(ty ))]))
538+ if (style == " suspended" ) {
539+ ty <- tyexp - ty
540+ }
541+ ty [is.na(ty )] <- 0
542+ ypos <- ty / 2
543+ if (style == " hanging" ) {
544+ ypos <- tyexp - ypos
545+ }
546+
547+ data <- data.frame (xpos , ypos , ty , tyexp , tyquantile )
548+ }
549+
550+ return (data )
551+ }
0 commit comments