stan-dev
diff --git a/‎DESCRIPTION
Lines changed: 5 additions & 1 deletion b/‎DESCRIPTION
Lines changed: 5 additions & 1 deletion
diff --git a/‎NAMESPACE
Lines changed: 2 additions & 0 deletions b/‎NAMESPACE
Lines changed: 2 additions & 0 deletions
diff --git a/‎NEWS.md
Lines changed: 15 additions & 0 deletions b/‎NEWS.md
Lines changed: 15 additions & 0 deletions
diff --git a/‎R/bayesplot-package.R
Lines changed: 1 addition & 1 deletion b/‎R/bayesplot-package.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/helpers-ppc.R
Lines changed: 2 additions & 5 deletions b/‎R/helpers-ppc.R
Lines changed: 2 additions & 5 deletions
diff --git a/‎R/ppc-censoring.R
Lines changed: 136 additions & 0 deletions b/‎R/ppc-censoring.R
Lines changed: 136 additions & 0 deletions
@@ -7,7 +7,9 @@ Authors@R: c(person("Jonah", "Gabry", role = c("aut", "cre"), email = "jsg2201@c
              person("Tristan", "Mahr", role = "aut"),
              person("Paul-Christian", "Bürkner", role = "ctb"),
              person("Martin", "Modrák", role = "ctb"),
-             person("Malcolm", "Barrett", role = "ctb"))
+             person("Malcolm", "Barrett", role = "ctb"), 
+             person("Frank", "Weber", role = "ctb"),
+             person("Eduardo", "Coronado Sroka", role = "ctb"))
 Maintainer: Jonah Gabry <[email protected]>
 Description: Plotting functions for posterior analysis, MCMC diagnostics,
     prior and posterior predictive checks, and other visualizations 
@@ -36,6 +38,7 @@ Imports:
     tidyselect,
     utils
 Suggests: 
+    ggfortify,
     gridExtra (>= 2.2.1),
     hexbin,
     knitr (>= 1.16),
@@ -47,6 +50,7 @@ Suggests:
     rstantools (>= 1.5.0),
     scales,
     shinystan (>= 2.3.0),
+    survival,
     testthat (>= 2.0.0),
     vdiffr
 RoxygenNote: 7.1.1
 
@@ -117,8 +117,10 @@ export(ppc_hist)
 export(ppc_intervals)
 export(ppc_intervals_data)
 export(ppc_intervals_grouped)
+export(ppc_km_overlay)
 export(ppc_loo_intervals)
 export(ppc_loo_pit)
+export(ppc_loo_pit_data)
 export(ppc_loo_pit_overlay)
 export(ppc_loo_pit_qq)
 export(ppc_loo_ribbon)
 
@@ -8,15 +8,30 @@
 * Items for next release go here
 -->
 
+* On the y axis, `ppc_loo_pit_qq(..., compare = "normal")` now plots standard
+  normal quantiles calculated from the PIT values (instead of the standardized
+  PIT values). (#240, #243, @fweber144)
+
+* New plotting function `ppc_km_overlay()` for outcome variables that are   
+  right-censored. Empirical CCDF estimates of `yrep` are compared with the 
+  Kaplan-Meier estimate of `y`. (#233, #234, @fweber144)
+
 * CmdStanMCMC objects (from CmdStanR) can now be used with extractor 
   functions `nuts_params()`, `log_posterior()`, `rhat()`, and 
   `neff_ratio()`. (#227)
 
 * Added missing `facet_args` argument to `mcmc_rank_overlay()`. (#221, @hhau)
+
 * Size of points and interval lines can set in 
   `mcmc_intervals(..., outer_size, inner_size, point_size)`. (#215, #228, #229) 
+  
 * `mcmc_areas()` tries to use less blank vertical blank space. (#218, #230) 
 
+* `ppc_loo_pit_overlay()` now uses a boundary correction for an improved kernel
+  density estimation. The new argument `boundary_correction` defaults to TRUE but
+  can be set to FALSE to recover the old version of the plot. (#171, #235,
+  @ecoronado92)
+
 
 # bayesplot 1.7.2
 
 
@@ -98,6 +98,6 @@ NULL
 # release reminders (for devtools)
 release_questions <- function() { # nocov start
   c(
-    "Have you reduced the size of the vignettes for CRAN?",
+    "Have you reduced the size of the vignettes for CRAN?"
   )
 } # nocov end
@@ -70,7 +70,8 @@ validate_yrep <- function(yrep, y) {
 #' Checks that grouping variable has same length as `y` and is either a vector or
 #' factor variable.
 #'
-#' @param group,y The user's `group` object and the `y` object returned by `validate_y()`.
+#' @param group,y The user's `group` object and the `y` object returned by
+#'   `validate_y()`.
 #' @return Either throws an error or returns `group` (coerced to a factor).
 #' @noRd
 validate_group <- function(group, y) {
@@ -88,10 +89,6 @@ validate_group <- function(group, y) {
     abort("length(group) must be equal to length(y).")
   }
 
-  if (length(unique(group)) == 1) {
-    abort("'group' must have more than one unique value.")
-  }
-
   unname(group)
 }
 
 
@@ -0,0 +1,136 @@
+#' PPC censoring
+#'
+#' @description Compare the empirical distribution of censored data `y` to the
+#'   distributions of simulated/replicated data `yrep` from the posterior
+#'   predictive distribution. See the **Plot Descriptions** section, below, for
+#'   details.
+#'
+#'   Although some of the other plots can be used with censored data,
+#'   `ppc_km_overlay()` is currently the only plotting function designed
+#'   *specifically* for censored data. We encourage you to suggest or contribute
+#'   additional plots at [https://github.com/stan-dev/bayesplot](github.com/stan-dev/bayesplot).
+#'
+#'
+#'
+#' @name PPC-censoring
+#' @family PPCs
+#'
+#' @template args-y-yrep
+#' @param size,alpha Passed to the appropriate geom to control the appearance of
+#'   the `yrep` distributions.
+#' @param ... Currently unused.
+#'
+#' @template return-ggplot
+#'
+#' @section Plot Descriptions:
+#' \describe{
+#'   \item{`ppc_km_overlay()`}{
+#'    Empirical CCDF estimates of each dataset (row) in `yrep` are overlaid,
+#'    with the Kaplan-Meier estimate (Kaplan and Meier, 1958) for `y` itself
+#'    on top (and in a darker shade). This is a PPC suitable for
+#'    right-censored `y`. Note that the replicated data from `yrep` is assumed
+#'    to be uncensored.
+#'   }
+#' }
+#'
+#' @templateVar bdaRef (Ch. 6)
+#' @template reference-bda
+#' @template reference-km
+#'
+#' @examples
+#' color_scheme_set("brightblue")
+#' y <- example_y_data()
+#' # For illustrative purposes, (right-)censor values y > 110:
+#' status_y <- as.numeric(y <= 110)
+#' y <- pmin(y, 110)
+#' # In reality, the replicated data (yrep) would be obtained from a
+#' # model which takes the censoring of y properly into account. Here,
+#' # for illustrative purposes, we simply use example_yrep_draws():
+#' yrep <- example_yrep_draws()
+#' dim(yrep)
+#' \donttest{
+#' ppc_km_overlay(y, yrep[1:25, ], status_y = status_y)
+#' }
+NULL
+
+#' @export
+#' @rdname PPC-censoring
+#' @param status_y The status indicator for the observations from `y`. This must
+#'   be a numeric vector of the same length as `y` with values in \{0, 1\} (0 =
+#'   right censored, 1 = event).
+ppc_km_overlay <-
+  function(y,
+           yrep,
+           ...,
+           status_y,
+           size = 0.25,
+           alpha = 0.7) {
+    check_ignored_arguments(...)
+
+    if(!requireNamespace("survival", quietly = TRUE)){
+      abort("Package 'survival' required.")
+    }
+    if(!requireNamespace("ggfortify", quietly = TRUE)){
+      abort("Package 'ggfortify' required.")
+    }
+
+    # Checks for 'status_y':
+    stopifnot(is.numeric(status_y))
+    stopifnot(all(status_y %in% c(0, 1)))
+
+    # Create basic PPC dataset:
+    data <- ppc_data(y, yrep, group = status_y)
+
+    # Modify the status indicator:
+    #   * For the observed data ("y"), convert the status indicator back to
+    #     a numeric.
+    #   * For the replicated data ("yrep"), set the status indicator
+    #     to 1 ("event"). This way, the Kaplan-Meier estimator reduces
+    #     to "1 - ECDF" with ECDF denoting the ordinary empirical cumulative
+    #     distribution function.
+    data <- data %>%
+      dplyr::mutate(group = ifelse(.data$is_y,
+                                   as.numeric(as.character(.data$group)),
+                                   1))
+
+    # Create 'survfit' object and 'fortify' it
+    sf <- survival::survfit(
+      survival::Surv(value, group) ~ rep_label,
+      data = data
+    )
+    fsf <- fortify(sf)
+
+    # Add variables specifying color, size, and alpha:
+    fsf$is_y_color <- as.factor(sub("\\[rep\\] \\(.*$", "rep", sub("^italic\\(y\\)", "y", fsf$strata)))
+    fsf$is_y_size <- ifelse(fsf$is_y_color == "yrep", size, 1)
+    fsf$is_y_alpha <- ifelse(fsf$is_y_color == "yrep", alpha, 1)
+
+    # Ensure that the observed data gets plotted last by reordering the
+    # levels of the factor "strata":
+    fsf$strata <- factor(fsf$strata, levels = rev(levels(fsf$strata)))
+
+    # Plot:
+    ggplot(data = fsf,
+           mapping = aes_(x = ~ time,
+                          y = ~ surv,
+                          color = ~ is_y_color,
+                          group = ~ strata,
+                          size = ~ is_y_size,
+                          alpha = ~ is_y_alpha)) +
+      geom_step() +
+      hline_at(
+        c(0, 0.5, 1),
+        size = c(0.2, 0.1, 0.2),
+        linetype = 2,
+        color = get_color("dh")
+      ) +
+      scale_size_identity() +
+      scale_alpha_identity() +
+      scale_color_ppc_dist() +
+      scale_y_continuous(breaks = c(0, 0.5, 1)) +
+      xlab(y_label()) +
+      yaxis_title(FALSE) +
+      xaxis_title(FALSE) +
+      yaxis_ticks(FALSE) +
+      bayesplot_theme_get()
+  }
Original file line number	Diff line number	Diff line change
`@@ -98,6 +98,6 @@ NULL`
`98`	`98`	`# release reminders (for devtools)`
`99`	`99`	`release_questions <- function() { # nocov start`
`100`	`100`	`c(`
`101`		`- "Have you reduced the size of the vignettes for CRAN?",`
	`101`	`+ "Have you reduced the size of the vignettes for CRAN?"`
`102`	`102`	`)`
`103`	`103`	`} # nocov end`