Genentech · yoshidk6 · Dec 18, 2025 · Nov 26, 2025 · Nov 26, 2025 · Nov 26, 2025
diff --git a/R/plot_ermod.R b/R/plot_ermod.R
@@ -99,7 +99,7 @@ plot_er.ersim_med_qi <- function(
       list(
         add_boxplot = FALSE, boxplot_height = 0.15,
         show_boxplot_y_title = TRUE, var_group = NULL,
-        n_bins = 4, qi_width = 0.95
+        n_bins = 4, bin_breaks = NULL, qi_width = 0.95
       ),
       options_orig_data
     )
@@ -306,7 +306,6 @@ plot_er.ersim_med_qi <- function(
   return(caption)
 }
 
-
 .plot_er_binary <- function(
     gg, x, origdata, show_orig_data, options_orig_data) {
   var_resp <- extract_var_resp(x)
@@ -328,17 +327,22 @@ plot_er.ersim_med_qi <- function(
 
     var_group <- options_orig_data$var_group
     n_bins <- options_orig_data$n_bins
+    bin_breaks <- options_orig_data$bin_breaks
     qi_width <- options_orig_data$qi_width
 
     # Convert the response variable to a numeric index
     origdata$.resp_num <- as.numeric(factor(origdata[[var_resp]])) - 1
 
     # binned probability ------------------------------------------------------
-    breaks <-
-      stats::quantile(origdata[[var_exposure]], probs = seq(0, 1, 1 / n_bins))
+    if (is.null(bin_breaks)) {
+      bin_breaks <- stats::quantile(
+        origdata[[var_exposure]],
+        probs = seq(0, 1, 1 / n_bins)
+      )
+    }
 
     # Show error when the breaks are not unique
-    if (length(unique(breaks)) != length(breaks)) {
+    if (length(unique(bin_breaks)) != length(bin_breaks)) {
       stop(
         "The breaks for the binned probability are not unique, ",
         "possibly due to too few unique values in the exposure variable.\n",
@@ -348,20 +352,34 @@ plot_er.ersim_med_qi <- function(
 
     gg <- gg +
       ggplot2::geom_vline(
-        xintercept = breaks, linetype = "dashed",
+        xintercept = bin_breaks,
+        linetype = "dashed",
         alpha = 0.3
       ) +
       xgxr::xgx_stat_ci(
         data = origdata,
-        ggplot2::aes(x = .data[[var_exposure]], y = .data[[".resp_num"]]),
-        bins = n_bins, conf_level = qi_width, distribution = "binomial",
-        geom = c("point"), shape = 0, size = 4
+        ggplot2::aes(
+          x = .data[[var_exposure]],
+          y = .data[[".resp_num"]]
+        ),
+        breaks = bin_breaks,
+        conf_level = qi_width,
+        distribution = "binomial",
+        geom = c("point"),
+        shape = 0,
+        size = 4
       ) +
       xgxr::xgx_stat_ci(
         data = origdata,
-        ggplot2::aes(x = .data[[var_exposure]], y = .data[[".resp_num"]]),
-        bins = n_bins, conf_level = qi_width, distribution = "binomial",
-        geom = c("errorbar"), linewidth = 0.5
+        ggplot2::aes(
+          x = .data[[var_exposure]],
+          y = .data[[".resp_num"]]
+        ),
+        breaks = bin_breaks,
+        conf_level = qi_width,
+        distribution = "binomial",
+        geom = c("errorbar"),
+        linewidth = 0.5
       )
 
 
@@ -550,6 +568,8 @@ plot_er.ermod <- function(
 #' and colored by this column. Default is `NULL`.
 #' @param n_bins Number of bins to use for observed probability
 #' summary. Only relevant for binary models. Default is `4`.
+#' @param bin_breaks Manually specify bin breaks for binary models. If specified
+#' this overrides `n_bins`.
 #' @param qi_width_obs Confidence level for the observed probability
 #' summary. Default is `0.95`.
 #' @param show_coef_exp Logical, whether to show the credible interval
@@ -610,11 +630,17 @@ plot_er.ermod <- function(
 #' }
 #'
 plot_er_gof <- function(
-    x, add_boxplot = !is.null(var_group), boxplot_height = 0.15,
+    x, add_boxplot = !is.null(var_group),
+    boxplot_height = 0.15,
     show_boxplot_y_title = FALSE,
-    var_group = NULL, n_bins = 4, qi_width_obs = 0.95,
+    var_group = NULL,
+    n_bins = 4,
+    bin_breaks = NULL,
+    qi_width_obs = 0.95,
     show_coef_exp = FALSE,
-    coef_pos_x = NULL, coef_pos_y = NULL, coef_size = 4,
+    coef_pos_x = NULL,
+    coef_pos_y = NULL,
+    coef_size = 4,
     qi_width_coef = 0.95,
     qi_width_sim = 0.95,
     show_caption = TRUE) {
@@ -624,17 +650,23 @@ plot_er_gof <- function(
     show_coef_exp = show_coef_exp,
     show_caption = show_caption,
     options_orig_data = list(
-      add_boxplot = add_boxplot, boxplot_height = boxplot_height,
+      add_boxplot = add_boxplot,
+      boxplot_height = boxplot_height,
       show_boxplot_y_title = show_boxplot_y_title,
       var_group = var_group,
-      n_bins = n_bins, qi_width = qi_width_obs
+      n_bins = n_bins,
+      bin_breaks = bin_breaks,
+      qi_width = qi_width_obs
     ),
     options_coef_exp = list(
-      qi_width = qi_width_coef, pos_x = coef_pos_x, pos_y = coef_pos_y,
+      qi_width = qi_width_coef,
+      pos_x = coef_pos_x,
+      pos_y = coef_pos_y,
       size = coef_size
     ),
     options_caption = list(
-      orig_data_summary = TRUE, coef_exp = show_coef_exp
+      orig_data_summary = TRUE,
+      coef_exp = show_coef_exp
     ),
     qi_width_sim = qi_width_sim
   )

diff --git a/R/yyy.R b/R/yyy.R
@@ -143,3 +143,36 @@ if (getRversion() >= "2.15.1") {
 #' @examples
 #' d_sim_emax
 "d_sim_emax"
+
+
+#' Sample simulated data for Emax exposure-response models with covariates and placebo
+#'
+#' @name d_sim_placebo
+#' @format A data frame with columns:
+#' \describe{
+#' \item{id}{Subject identifier}
+#' \item{dose}{Nominal dose, units not specified}
+#' \item{exp_1}{Exposure value on metric 1, units and metric not specified}
+#' \item{exp_2}{Exposure value on metric 2, units and metric not specified}
+#' \item{rsp_1}{Continuous response value (units not specified)}
+#' \item{rsp_2}{Binary response value (group labels not specified)}
+#' \item{cnt_a}{Continuous valued covariate}
+#' \item{cnt_b}{Continuous valued covariate}
+#' \item{cnt_c}{Continuous valued covariate}
+#' \item{bin_d}{Binary valued covariate}
+#' \item{bin_e}{Binary valued covariate}
+#' }
+#' @details
+#'
+#' This simulated dataset is entirely synthetic. It is a generic data set that can be used
+#' to illustrate Emax modeling with placebo data. It contains variables corresponding to
+#' dose and exposure, and includes both a continuous response variable and a binary response
+#' variable. Three continuous valued covariates are included, along with two binary covariates.
+#' Data from two exposure metrics are included.
+#'
+#' You can find the data generating code in the package source code,
+#' under `data-raw/d_sim_placebo.R`.
+#'
+#' @examples
+#' d_sim_placebo
+"d_sim_placebo"
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -73,6 +73,7 @@ reference:
   - d_sim_binom_cov
   - d_sim_lin
   - d_sim_emax
+  - d_sim_placebo
 - title: S3 methods
   contents:
   - ermod_method

diff --git a/data-raw/d_sim_placebo.R b/data-raw/d_sim_placebo.R
@@ -0,0 +1,139 @@
+set.seed(123L)
+
+d_sim_placebo <- local({
+
+  emax_fn <- function(exposure, emax, ec50, e0, gamma = 1) {
+    e0 + emax * (exposure^gamma) / (ec50^gamma + exposure^gamma)
+  }
+
+  # simulation model for the exposures includes a dosing model, but is very
+  # simple. exposure scales linearly with dose, and has a slightly-truncated
+  # lognormal distribution conditional on dose
+  generate_exposure <- function(dose, n, meanlog = 4, sdlog = 0.5) {
+    dose * stats::qlnorm(
+      p = stats::runif(n, min = .01, max = .99),
+      meanlog = meanlog,
+      sdlog = sdlog
+    )
+  }
+
+  # continuous covariates all have the same range (0 to 10) and follow a
+  # scaled beta distribution within that range
+  continuous_covariate <- function(n) {
+    stats::rbeta(n, 2, 2) * 10
+  }
+
+  # binary covariates are Bernoulli variates
+  binary_covariate <- function(n, p) {
+    as.numeric(stats::runif(n) <= p)
+  }
+
+
+  # simulation functions ----------------------------------------------------
+
+  simulate_data <- function(seed = 123) {
+    set.seed(seed)
+
+    par <- list(
+      # parameters for continuous response
+      emax_1  = 10,
+      ec50_1  = 4000,
+      e0_1    = 5,
+      gamma_1 = 1,
+      sigma_1 = .5,
+      coef_a1 = .5,
+      coef_b1 = 0,
+      coef_c1 = 0,
+      coef_d1 = 0,
+
+      # parameters for binary response
+      emax_2  = 5,
+      ec50_2  = 8000,
+      e0_2    = -4.5,
+      gamma_2 = 1,
+      coef_a2 = .5,
+      coef_b2 = 0,
+      coef_c2 = 0,
+      coef_d2 = 1
+    )
+
+    # conditional on a dose group, generate data; include multiple exposure metrics
+    # but treat the first one as source of ground truth. exposures are strongly
+    # correlated but on the same scale
+    make_dose_data <- function(dose, n, par) {
+      tibble::tibble(
+        dose = dose,
+        exposure_1 = generate_exposure(dose, n = n),
+        exposure_2 = 0.7 * exposure_1 + 0.3 * generate_exposure(dose, n = n),
+
+        # add continuous and binary covariates
+        cnt_a = continuous_covariate(n = n),
+        cnt_b = continuous_covariate(n = n),
+        cnt_c = continuous_covariate(n = n),
+        bin_d = binary_covariate(n = n, p = .5),
+        bin_e = binary_covariate(n = n, p = .7),
+
+        # response 1 is continuous
+        response_1 = emax_fn(
+          exposure_1,
+          emax = par$emax_1,
+          ec50 = par$ec50_1,
+          e0 = par$e0_1,
+          gamma = par$gamma_1
+        ) +
+          par$coef_a1 * cnt_a +
+          par$coef_b1 * cnt_b +
+          par$coef_c1 * cnt_c +
+          par$coef_d1 * bin_d +
+          stats::rnorm(n, 0, par$sigma_1),
+
+        # response 2 is binary; start with the predictor
+        bin_pred = emax_fn(
+          exposure_1,
+          emax = par$emax_2,
+          ec50 = par$ec50_2,
+          e0 = par$e0_2,
+          gamma = par$gamma_2
+        ) +
+          par$coef_a2 * cnt_a +
+          par$coef_b2 * cnt_b +
+          par$coef_c2 * cnt_c +
+          par$coef_d2 * bin_d,
+
+        # convert
+        bin_prob = 1 / (1 + exp(-bin_pred)),
+        response_2 = as.numeric(stats::runif(n) < bin_prob)
+      ) |>
+        dplyr::select(-bin_pred, -bin_prob) # remove intermediate variables
+    }
+
+    # create data set assuming three dosing groups and placebo
+    dat <- dplyr::bind_rows(
+      make_dose_data(dose = 0, n = 100, par = par),
+      make_dose_data(dose = 100, n = 100, par = par),
+      make_dose_data(dose = 200, n = 100, par = par),
+      make_dose_data(dose = 300, n = 100, par = par)
+    )
+
+    return(dat)
+  }
+
+  # generate data -----------------------------------------------------------
+
+  d_sim_placebo <- simulate_data()
+  d_sim_placebo <- d_sim_placebo |>
+    dplyr::mutate(id = dplyr::row_number()) |>
+    dplyr::relocate(response_1, response_2, .after = exposure_2) |>
+    dplyr::relocate(id, 1) |>
+    dplyr::rename(
+      exp_1 = exposure_1,
+      exp_2 = exposure_2,
+      rsp_1 = response_1,
+      rsp_2 = response_2
+    )
+
+  d_sim_placebo
+})
+
+readr::write_csv(d_sim_placebo, "data-raw/d_sim_placebo.csv")
+usethis::use_data(d_sim_placebo, overwrite = TRUE)