|
| 1 | +#' PPC censoring |
| 2 | +#' |
| 3 | +#' @description Compare the empirical distribution of censored data `y` to the |
| 4 | +#' distributions of simulated/replicated data `yrep` from the posterior |
| 5 | +#' predictive distribution. See the **Plot Descriptions** section, below, for |
| 6 | +#' details. |
| 7 | +#' |
| 8 | +#' Although some of the other plots can be used with censored data, |
| 9 | +#' `ppc_km_overlay()` is currently the only plotting function designed |
| 10 | +#' *specifically* for censored data. We encourage you to suggest or contribute |
| 11 | +#' additional plots at [https://github.com/stan-dev/bayesplot](github.com/stan-dev/bayesplot). |
| 12 | +#' |
| 13 | +#' |
| 14 | +#' |
| 15 | +#' @name PPC-censoring |
| 16 | +#' @family PPCs |
| 17 | +#' |
| 18 | +#' @template args-y-yrep |
| 19 | +#' @param size,alpha Passed to the appropriate geom to control the appearance of |
| 20 | +#' the `yrep` distributions. |
| 21 | +#' @param ... Currently unused. |
| 22 | +#' |
| 23 | +#' @template return-ggplot |
| 24 | +#' |
| 25 | +#' @section Plot Descriptions: |
| 26 | +#' \describe{ |
| 27 | +#' \item{`ppc_km_overlay()`}{ |
| 28 | +#' Empirical CCDF estimates of each dataset (row) in `yrep` are overlaid, |
| 29 | +#' with the Kaplan-Meier estimate (Kaplan and Meier, 1958) for `y` itself |
| 30 | +#' on top (and in a darker shade). This is a PPC suitable for |
| 31 | +#' right-censored `y`. Note that the replicated data from `yrep` is assumed |
| 32 | +#' to be uncensored. |
| 33 | +#' } |
| 34 | +#' } |
| 35 | +#' |
| 36 | +#' @templateVar bdaRef (Ch. 6) |
| 37 | +#' @template reference-bda |
| 38 | +#' @template reference-km |
| 39 | +#' |
| 40 | +#' @examples |
| 41 | +#' color_scheme_set("brightblue") |
| 42 | +#' y <- example_y_data() |
| 43 | +#' # For illustrative purposes, (right-)censor values y > 110: |
| 44 | +#' status_y <- as.numeric(y <= 110) |
| 45 | +#' y <- pmin(y, 110) |
| 46 | +#' # In reality, the replicated data (yrep) would be obtained from a |
| 47 | +#' # model which takes the censoring of y properly into account. Here, |
| 48 | +#' # for illustrative purposes, we simply use example_yrep_draws(): |
| 49 | +#' yrep <- example_yrep_draws() |
| 50 | +#' dim(yrep) |
| 51 | +#' \donttest{ |
| 52 | +#' ppc_km_overlay(y, yrep[1:25, ], status_y = status_y) |
| 53 | +#' } |
| 54 | +NULL |
| 55 | + |
| 56 | +#' @export |
| 57 | +#' @rdname PPC-censoring |
| 58 | +#' @param status_y The status indicator for the observations from `y`. This must |
| 59 | +#' be a numeric vector of the same length as `y` with values in \{0, 1\} (0 = |
| 60 | +#' right censored, 1 = event). |
| 61 | +ppc_km_overlay <- |
| 62 | + function(y, |
| 63 | + yrep, |
| 64 | + ..., |
| 65 | + status_y, |
| 66 | + size = 0.25, |
| 67 | + alpha = 0.7) { |
| 68 | + check_ignored_arguments(...) |
| 69 | + |
| 70 | + if(!requireNamespace("survival", quietly = TRUE)){ |
| 71 | + abort("Package 'survival' required.") |
| 72 | + } |
| 73 | + if(!requireNamespace("ggfortify", quietly = TRUE)){ |
| 74 | + abort("Package 'ggfortify' required.") |
| 75 | + } |
| 76 | + |
| 77 | + # Checks for 'status_y': |
| 78 | + stopifnot(is.numeric(status_y)) |
| 79 | + stopifnot(all(status_y %in% c(0, 1))) |
| 80 | + |
| 81 | + # Create basic PPC dataset: |
| 82 | + data <- ppc_data(y, yrep, group = status_y) |
| 83 | + |
| 84 | + # Modify the status indicator: |
| 85 | + # * For the observed data ("y"), convert the status indicator back to |
| 86 | + # a numeric. |
| 87 | + # * For the replicated data ("yrep"), set the status indicator |
| 88 | + # to 1 ("event"). This way, the Kaplan-Meier estimator reduces |
| 89 | + # to "1 - ECDF" with ECDF denoting the ordinary empirical cumulative |
| 90 | + # distribution function. |
| 91 | + data <- data %>% |
| 92 | + dplyr::mutate(group = ifelse(.data$is_y, |
| 93 | + as.numeric(as.character(.data$group)), |
| 94 | + 1)) |
| 95 | + |
| 96 | + # Create 'survfit' object and 'fortify' it |
| 97 | + sf <- survival::survfit( |
| 98 | + survival::Surv(value, group) ~ rep_label, |
| 99 | + data = data |
| 100 | + ) |
| 101 | + fsf <- fortify(sf) |
| 102 | + |
| 103 | + # Add variables specifying color, size, and alpha: |
| 104 | + fsf$is_y_color <- as.factor(sub("\\[rep\\] \\(.*$", "rep", sub("^italic\\(y\\)", "y", fsf$strata))) |
| 105 | + fsf$is_y_size <- ifelse(fsf$is_y_color == "yrep", size, 1) |
| 106 | + fsf$is_y_alpha <- ifelse(fsf$is_y_color == "yrep", alpha, 1) |
| 107 | + |
| 108 | + # Ensure that the observed data gets plotted last by reordering the |
| 109 | + # levels of the factor "strata": |
| 110 | + fsf$strata <- factor(fsf$strata, levels = rev(levels(fsf$strata))) |
| 111 | + |
| 112 | + # Plot: |
| 113 | + ggplot(data = fsf, |
| 114 | + mapping = aes_(x = ~ time, |
| 115 | + y = ~ surv, |
| 116 | + color = ~ is_y_color, |
| 117 | + group = ~ strata, |
| 118 | + size = ~ is_y_size, |
| 119 | + alpha = ~ is_y_alpha)) + |
| 120 | + geom_step() + |
| 121 | + hline_at( |
| 122 | + c(0, 0.5, 1), |
| 123 | + size = c(0.2, 0.1, 0.2), |
| 124 | + linetype = 2, |
| 125 | + color = get_color("dh") |
| 126 | + ) + |
| 127 | + scale_size_identity() + |
| 128 | + scale_alpha_identity() + |
| 129 | + scale_color_ppc_dist() + |
| 130 | + scale_y_continuous(breaks = c(0, 0.5, 1)) + |
| 131 | + xlab(y_label()) + |
| 132 | + yaxis_title(FALSE) + |
| 133 | + xaxis_title(FALSE) + |
| 134 | + yaxis_ticks(FALSE) + |
| 135 | + bayesplot_theme_get() |
| 136 | + } |
0 commit comments