stan-dev
diff --git a/‎DESCRIPTION
Lines changed: 4 additions & 1 deletion b/‎DESCRIPTION
Lines changed: 4 additions & 1 deletion
diff --git a/‎NAMESPACE
Lines changed: 1 addition & 0 deletions b/‎NAMESPACE
Lines changed: 1 addition & 0 deletions
diff --git a/‎NEWS.md
Lines changed: 9 additions & 1 deletion b/‎NEWS.md
Lines changed: 9 additions & 1 deletion
diff --git a/‎R/bayesplot-package.R
Lines changed: 1 addition & 1 deletion b/‎R/bayesplot-package.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/helpers-ppc.R
Lines changed: 2 additions & 5 deletions b/‎R/helpers-ppc.R
Lines changed: 2 additions & 5 deletions
diff --git a/‎R/ppc-censoring.R
Lines changed: 136 additions & 0 deletions b/‎R/ppc-censoring.R
Lines changed: 136 additions & 0 deletions
diff --git a/‎R/ppc-loo.R
Lines changed: 11 additions & 9 deletions b/‎R/ppc-loo.R
Lines changed: 11 additions & 9 deletions
diff --git a/‎R/ppc-overview.R
Lines changed: 3 additions & 0 deletions b/‎R/ppc-overview.R
Lines changed: 3 additions & 0 deletions
diff --git a/‎man-roxygen/reference-km.R
Lines changed: 4 additions & 0 deletions b/‎man-roxygen/reference-km.R
Lines changed: 4 additions & 0 deletions
diff --git a/‎man/PPC-censoring.Rd
Lines changed: 92 additions & 0 deletions b/‎man/PPC-censoring.Rd
Lines changed: 92 additions & 0 deletions
@@ -7,7 +7,8 @@ Authors@R: c(person("Jonah", "Gabry", role = c("aut", "cre"), email = "jsg2201@c
              person("Tristan", "Mahr", role = "aut"),
              person("Paul-Christian", "Bürkner", role = "ctb"),
              person("Martin", "Modrák", role = "ctb"),
-             person("Malcolm", "Barrett", role = "ctb"))
+             person("Malcolm", "Barrett", role = "ctb"), 
+             person("Frank", "Weber", role = "ctb"))
 Maintainer: Jonah Gabry <[email protected]>
 Description: Plotting functions for posterior analysis, MCMC diagnostics,
     prior and posterior predictive checks, and other visualizations 
@@ -36,6 +37,7 @@ Imports:
     tidyselect,
     utils
 Suggests: 
+    ggfortify,
     gridExtra (>= 2.2.1),
     hexbin,
     knitr (>= 1.16),
@@ -47,6 +49,7 @@ Suggests:
     rstantools (>= 1.5.0),
     scales,
     shinystan (>= 2.3.0),
+    survival,
     testthat (>= 2.0.0),
     vdiffr
 RoxygenNote: 7.1.1
 
@@ -117,6 +117,7 @@ export(ppc_hist)
 export(ppc_intervals)
 export(ppc_intervals_data)
 export(ppc_intervals_grouped)
+export(ppc_km_overlay)
 export(ppc_loo_intervals)
 export(ppc_loo_pit)
 export(ppc_loo_pit_overlay)
 
@@ -8,14 +8,22 @@
 * Items for next release go here
 -->
 
+* On the y axis, `ppc_loo_pit_qq(..., compare = "normal")` now plots standard
+  normal quantiles calculated from the PIT values (instead of the standardized
+  PIT values). (#240, #243, @fweber144)
+
+* New plotting function `ppc_km_overlay()` for outcome variables that are   
+  right-censored. Empirical CCDF estimates of `yrep` are compared with the 
+  Kaplan-Meier estimate of `y`. (#233, #234, @fweber144)
+
 * CmdStanMCMC objects (from CmdStanR) can now be used with extractor 
   functions `nuts_params()`, `log_posterior()`, `rhat()`, and 
   `neff_ratio()`. (#227)
 
 * Size of points and interval lines can set in 
   `mcmc_intervals(..., outer_size, inner_size, point_size)`. (#215, #228, #229) 
 
-* Size of ridgelines can be set in `mcmc_areas_ridges(..., size)`. (#224)
+* Size of ridgelines can be set in `mcmc_areas(..., size)` and `mcmc_areas_ridges(..., size)`. (#224)
 
 * `mcmc_areas()` tries to use less blank vertical blank space. (#218, #230) 
 
 
@@ -98,6 +98,6 @@ NULL
 # release reminders (for devtools)
 release_questions <- function() { # nocov start
   c(
-    "Have you reduced the size of the vignettes for CRAN?",
+    "Have you reduced the size of the vignettes for CRAN?"
   )
 } # nocov end
@@ -70,7 +70,8 @@ validate_yrep <- function(yrep, y) {
 #' Checks that grouping variable has same length as `y` and is either a vector or
 #' factor variable.
 #'
-#' @param group,y The user's `group` object and the `y` object returned by `validate_y()`.
+#' @param group,y The user's `group` object and the `y` object returned by
+#'   `validate_y()`.
 #' @return Either throws an error or returns `group` (coerced to a factor).
 #' @noRd
 validate_group <- function(group, y) {
@@ -88,10 +89,6 @@ validate_group <- function(group, y) {
     abort("length(group) must be equal to length(y).")
   }
 
-  if (length(unique(group)) == 1) {
-    abort("'group' must have more than one unique value.")
-  }
-
   unname(group)
 }
 
 
@@ -0,0 +1,136 @@
+#' PPC censoring
+#'
+#' @description Compare the empirical distribution of censored data `y` to the
+#'   distributions of simulated/replicated data `yrep` from the posterior
+#'   predictive distribution. See the **Plot Descriptions** section, below, for
+#'   details.
+#'
+#'   Although some of the other plots can be used with censored data,
+#'   `ppc_km_overlay()` is currently the only plotting function designed
+#'   *specifically* for censored data. We encourage you to suggest or contribute
+#'   additional plots at [https://github.com/stan-dev/bayesplot](github.com/stan-dev/bayesplot).
+#'
+#'
+#'
+#' @name PPC-censoring
+#' @family PPCs
+#'
+#' @template args-y-yrep
+#' @param size,alpha Passed to the appropriate geom to control the appearance of
+#'   the `yrep` distributions.
+#' @param ... Currently unused.
+#'
+#' @template return-ggplot
+#'
+#' @section Plot Descriptions:
+#' \describe{
+#'   \item{`ppc_km_overlay()`}{
+#'    Empirical CCDF estimates of each dataset (row) in `yrep` are overlaid,
+#'    with the Kaplan-Meier estimate (Kaplan and Meier, 1958) for `y` itself
+#'    on top (and in a darker shade). This is a PPC suitable for
+#'    right-censored `y`. Note that the replicated data from `yrep` is assumed
+#'    to be uncensored.
+#'   }
+#' }
+#'
+#' @templateVar bdaRef (Ch. 6)
+#' @template reference-bda
+#' @template reference-km
+#'
+#' @examples
+#' color_scheme_set("brightblue")
+#' y <- example_y_data()
+#' # For illustrative purposes, (right-)censor values y > 110:
+#' status_y <- as.numeric(y <= 110)
+#' y <- pmin(y, 110)
+#' # In reality, the replicated data (yrep) would be obtained from a
+#' # model which takes the censoring of y properly into account. Here,
+#' # for illustrative purposes, we simply use example_yrep_draws():
+#' yrep <- example_yrep_draws()
+#' dim(yrep)
+#' \donttest{
+#' ppc_km_overlay(y, yrep[1:25, ], status_y = status_y)
+#' }
+NULL
+
+#' @export
+#' @rdname PPC-censoring
+#' @param status_y The status indicator for the observations from `y`. This must
+#'   be a numeric vector of the same length as `y` with values in \{0, 1\} (0 =
+#'   right censored, 1 = event).
+ppc_km_overlay <-
+  function(y,
+           yrep,
+           ...,
+           status_y,
+           size = 0.25,
+           alpha = 0.7) {
+    check_ignored_arguments(...)
+
+    if(!requireNamespace("survival", quietly = TRUE)){
+      abort("Package 'survival' required.")
+    }
+    if(!requireNamespace("ggfortify", quietly = TRUE)){
+      abort("Package 'ggfortify' required.")
+    }
+
+    # Checks for 'status_y':
+    stopifnot(is.numeric(status_y))
+    stopifnot(all(status_y %in% c(0, 1)))
+
+    # Create basic PPC dataset:
+    data <- ppc_data(y, yrep, group = status_y)
+
+    # Modify the status indicator:
+    #   * For the observed data ("y"), convert the status indicator back to
+    #     a numeric.
+    #   * For the replicated data ("yrep"), set the status indicator
+    #     to 1 ("event"). This way, the Kaplan-Meier estimator reduces
+    #     to "1 - ECDF" with ECDF denoting the ordinary empirical cumulative
+    #     distribution function.
+    data <- data %>%
+      dplyr::mutate(group = ifelse(.data$is_y,
+                                   as.numeric(as.character(.data$group)),
+                                   1))
+
+    # Create 'survfit' object and 'fortify' it
+    sf <- survival::survfit(
+      survival::Surv(value, group) ~ rep_label,
+      data = data
+    )
+    fsf <- fortify(sf)
+
+    # Add variables specifying color, size, and alpha:
+    fsf$is_y_color <- as.factor(sub("\\[rep\\] \\(.*$", "rep", sub("^italic\\(y\\)", "y", fsf$strata)))
+    fsf$is_y_size <- ifelse(fsf$is_y_color == "yrep", size, 1)
+    fsf$is_y_alpha <- ifelse(fsf$is_y_color == "yrep", alpha, 1)
+
+    # Ensure that the observed data gets plotted last by reordering the
+    # levels of the factor "strata":
+    fsf$strata <- factor(fsf$strata, levels = rev(levels(fsf$strata)))
+
+    # Plot:
+    ggplot(data = fsf,
+           mapping = aes_(x = ~ time,
+                          y = ~ surv,
+                          color = ~ is_y_color,
+                          group = ~ strata,
+                          size = ~ is_y_size,
+                          alpha = ~ is_y_alpha)) +
+      geom_step() +
+      hline_at(
+        c(0, 0.5, 1),
+        size = c(0.2, 0.1, 0.2),
+        linetype = 2,
+        color = get_color("dh")
+      ) +
+      scale_size_identity() +
+      scale_alpha_identity() +
+      scale_color_ppc_dist() +
+      scale_y_continuous(breaks = c(0, 0.5, 1)) +
+      xlab(y_label()) +
+      yaxis_title(FALSE) +
+      xaxis_title(FALSE) +
+      yaxis_ticks(FALSE) +
+      bayesplot_theme_get()
+  }
@@ -42,11 +42,12 @@
 #'  PITs to the standard uniform distribution. Comparing to the uniform is not
 #'  good for extreme probabilities close to 0 and 1, so it can sometimes be
 #'  useful to set the `compare` argument to `"normal"`, which will
-#'  produce a Q-Q plot comparing standardized PIT values to the standard normal
-#'  distribution that can help see the (mis)calibration better for the extreme
-#'  values. However, in most cases we have found that the overlaid density plot
-#'  (`ppc_loo_pit_overlay()`) function will provided a clearer picture of
-#'  calibration problems that the Q-Q plot.
+#'  produce a Q-Q plot comparing standard normal quantiles calculated from the
+#'  PIT values to the theoretical standard normal quantiles. This can help see
+#'  the (mis)calibration better for the extreme values. However, in most cases
+#'  we have found that the overlaid density plot (`ppc_loo_pit_overlay()`)
+#'  function will provide a clearer picture of calibration problems than the
+#'  Q-Q plot.
 #' }
 #' \item{`ppc_loo_intervals()`, `ppc_loo_ribbon()`}{
 #'  Similar to [ppc_intervals()] and [ppc_ribbon()] but the intervals are for
@@ -113,8 +114,9 @@ NULL
 #' @param compare For `ppc_loo_pit_qq()`, a string that can be either
 #'   `"uniform"` or `"normal"`. If `"uniform"` (the default) the Q-Q plot
 #'   compares computed PIT values to the standard uniform distribution. If
-#'   `compare="normal"`, the Q-Q plot compares standardized PIT values to the
-#'   standard normal distribution.
+#'   `compare="normal"`, the Q-Q plot compares standard normal quantiles
+#'   calculated from the PIT values to the theoretical standard normal
+#'   quantiles.
 #' @param trim Passed to [ggplot2::stat_density()].
 #' @template args-density-controls
 ppc_loo_pit_overlay <- function(y,
@@ -220,10 +222,10 @@ ppc_loo_pit_qq <- function(y,
     x_lab <- "Uniform"
     y_lab <- "LOO-PIT"
   } else {
-    pit <- as.vector(scale(pit))
+    pit <- as.vector(stats::qnorm(pit))
     theoretical <- stats::qnorm
     x_lab <- "Normal"
-    y_lab <- "LOO-PIT (standardized)"
+    y_lab <- "LOO-PIT (standard normal quantiles)"
   }
 
   ggplot(data.frame(p = pit)) +
 
@@ -87,6 +87,9 @@
 #'   multinomial outcomes.
 #' * [LOO predictive checks][PPC-loo]: PPC functions for predictive checks
 #'   based on (approximate) leave-one-out (LOO) cross-validation.
+#' * [Censored data][PPC-censoring]: PPC functions comparing the empirical
+#'   distribution of censored data `y` to the distributions of individual
+#'   simulated datasets (rows) in `yrep`.
 #'
 #' @section Providing an interface for predictive checking from another package:
 #'
 
@@ -0,0 +1,4 @@
+#' @references Kaplan, E. L. and Meier, P. (1958). Nonparametric estimation
+#'   from incomplete observations.
+#'   *Journal of the American Statistical Association*. 53(282), 457--481.
+#'   doi:10.1080/01621459.1958.10501452.
Original file line number	Diff line number	Diff line change
`@@ -98,6 +98,6 @@ NULL`
`98`	`98`	`# release reminders (for devtools)`
`99`	`99`	`release_questions <- function() { # nocov start`
`100`	`100`	`c(`
`101`		`- "Have you reduced the size of the vignettes for CRAN?",`
	`101`	`+ "Have you reduced the size of the vignettes for CRAN?"`
`102`	`102`	`)`
`103`	`103`	`} # nocov end`