Skip to content

Commit b4d400b

Browse files
authored
Merge pull request #16 from djnavarro/plot-allow-manual-breaks
allow manual breaks in plot_er()
2 parents c56d3bd + 0cbb2bb commit b4d400b

File tree

9 files changed

+673
-20
lines changed

9 files changed

+673
-20
lines changed

R/plot_ermod.R

Lines changed: 51 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ plot_er.ersim_med_qi <- function(
9999
list(
100100
add_boxplot = FALSE, boxplot_height = 0.15,
101101
show_boxplot_y_title = TRUE, var_group = NULL,
102-
n_bins = 4, qi_width = 0.95
102+
n_bins = 4, bin_breaks = NULL, qi_width = 0.95
103103
),
104104
options_orig_data
105105
)
@@ -306,7 +306,6 @@ plot_er.ersim_med_qi <- function(
306306
return(caption)
307307
}
308308

309-
310309
.plot_er_binary <- function(
311310
gg, x, origdata, show_orig_data, options_orig_data) {
312311
var_resp <- extract_var_resp(x)
@@ -328,17 +327,22 @@ plot_er.ersim_med_qi <- function(
328327

329328
var_group <- options_orig_data$var_group
330329
n_bins <- options_orig_data$n_bins
330+
bin_breaks <- options_orig_data$bin_breaks
331331
qi_width <- options_orig_data$qi_width
332332

333333
# Convert the response variable to a numeric index
334334
origdata$.resp_num <- as.numeric(factor(origdata[[var_resp]])) - 1
335335

336336
# binned probability ------------------------------------------------------
337-
breaks <-
338-
stats::quantile(origdata[[var_exposure]], probs = seq(0, 1, 1 / n_bins))
337+
if (is.null(bin_breaks)) {
338+
bin_breaks <- stats::quantile(
339+
origdata[[var_exposure]],
340+
probs = seq(0, 1, 1 / n_bins)
341+
)
342+
}
339343

340344
# Show error when the breaks are not unique
341-
if (length(unique(breaks)) != length(breaks)) {
345+
if (length(unique(bin_breaks)) != length(bin_breaks)) {
342346
stop(
343347
"The breaks for the binned probability are not unique, ",
344348
"possibly due to too few unique values in the exposure variable.\n",
@@ -348,20 +352,34 @@ plot_er.ersim_med_qi <- function(
348352

349353
gg <- gg +
350354
ggplot2::geom_vline(
351-
xintercept = breaks, linetype = "dashed",
355+
xintercept = bin_breaks,
356+
linetype = "dashed",
352357
alpha = 0.3
353358
) +
354359
xgxr::xgx_stat_ci(
355360
data = origdata,
356-
ggplot2::aes(x = .data[[var_exposure]], y = .data[[".resp_num"]]),
357-
bins = n_bins, conf_level = qi_width, distribution = "binomial",
358-
geom = c("point"), shape = 0, size = 4
361+
ggplot2::aes(
362+
x = .data[[var_exposure]],
363+
y = .data[[".resp_num"]]
364+
),
365+
breaks = bin_breaks,
366+
conf_level = qi_width,
367+
distribution = "binomial",
368+
geom = c("point"),
369+
shape = 0,
370+
size = 4
359371
) +
360372
xgxr::xgx_stat_ci(
361373
data = origdata,
362-
ggplot2::aes(x = .data[[var_exposure]], y = .data[[".resp_num"]]),
363-
bins = n_bins, conf_level = qi_width, distribution = "binomial",
364-
geom = c("errorbar"), linewidth = 0.5
374+
ggplot2::aes(
375+
x = .data[[var_exposure]],
376+
y = .data[[".resp_num"]]
377+
),
378+
breaks = bin_breaks,
379+
conf_level = qi_width,
380+
distribution = "binomial",
381+
geom = c("errorbar"),
382+
linewidth = 0.5
365383
)
366384

367385

@@ -550,6 +568,8 @@ plot_er.ermod <- function(
550568
#' and colored by this column. Default is `NULL`.
551569
#' @param n_bins Number of bins to use for observed probability
552570
#' summary. Only relevant for binary models. Default is `4`.
571+
#' @param bin_breaks Manually specify bin breaks for binary models. If specified
572+
#' this overrides `n_bins`.
553573
#' @param qi_width_obs Confidence level for the observed probability
554574
#' summary. Default is `0.95`.
555575
#' @param show_coef_exp Logical, whether to show the credible interval
@@ -610,11 +630,17 @@ plot_er.ermod <- function(
610630
#' }
611631
#'
612632
plot_er_gof <- function(
613-
x, add_boxplot = !is.null(var_group), boxplot_height = 0.15,
633+
x, add_boxplot = !is.null(var_group),
634+
boxplot_height = 0.15,
614635
show_boxplot_y_title = FALSE,
615-
var_group = NULL, n_bins = 4, qi_width_obs = 0.95,
636+
var_group = NULL,
637+
n_bins = 4,
638+
bin_breaks = NULL,
639+
qi_width_obs = 0.95,
616640
show_coef_exp = FALSE,
617-
coef_pos_x = NULL, coef_pos_y = NULL, coef_size = 4,
641+
coef_pos_x = NULL,
642+
coef_pos_y = NULL,
643+
coef_size = 4,
618644
qi_width_coef = 0.95,
619645
qi_width_sim = 0.95,
620646
show_caption = TRUE) {
@@ -624,17 +650,23 @@ plot_er_gof <- function(
624650
show_coef_exp = show_coef_exp,
625651
show_caption = show_caption,
626652
options_orig_data = list(
627-
add_boxplot = add_boxplot, boxplot_height = boxplot_height,
653+
add_boxplot = add_boxplot,
654+
boxplot_height = boxplot_height,
628655
show_boxplot_y_title = show_boxplot_y_title,
629656
var_group = var_group,
630-
n_bins = n_bins, qi_width = qi_width_obs
657+
n_bins = n_bins,
658+
bin_breaks = bin_breaks,
659+
qi_width = qi_width_obs
631660
),
632661
options_coef_exp = list(
633-
qi_width = qi_width_coef, pos_x = coef_pos_x, pos_y = coef_pos_y,
662+
qi_width = qi_width_coef,
663+
pos_x = coef_pos_x,
664+
pos_y = coef_pos_y,
634665
size = coef_size
635666
),
636667
options_caption = list(
637-
orig_data_summary = TRUE, coef_exp = show_coef_exp
668+
orig_data_summary = TRUE,
669+
coef_exp = show_coef_exp
638670
),
639671
qi_width_sim = qi_width_sim
640672
)

R/yyy.R

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,36 @@ if (getRversion() >= "2.15.1") {
143143
#' @examples
144144
#' d_sim_emax
145145
"d_sim_emax"
146+
147+
148+
#' Sample simulated data for Emax exposure-response models with covariates and placebo
149+
#'
150+
#' @name d_sim_placebo
151+
#' @format A data frame with columns:
152+
#' \describe{
153+
#' \item{id}{Subject identifier}
154+
#' \item{dose}{Nominal dose, units not specified}
155+
#' \item{exp_1}{Exposure value on metric 1, units and metric not specified}
156+
#' \item{exp_2}{Exposure value on metric 2, units and metric not specified}
157+
#' \item{rsp_1}{Continuous response value (units not specified)}
158+
#' \item{rsp_2}{Binary response value (group labels not specified)}
159+
#' \item{cnt_a}{Continuous valued covariate}
160+
#' \item{cnt_b}{Continuous valued covariate}
161+
#' \item{cnt_c}{Continuous valued covariate}
162+
#' \item{bin_d}{Binary valued covariate}
163+
#' \item{bin_e}{Binary valued covariate}
164+
#' }
165+
#' @details
166+
#'
167+
#' This simulated dataset is entirely synthetic. It is a generic data set that can be used
168+
#' to illustrate Emax modeling with placebo data. It contains variables corresponding to
169+
#' dose and exposure, and includes both a continuous response variable and a binary response
170+
#' variable. Three continuous valued covariates are included, along with two binary covariates.
171+
#' Data from two exposure metrics are included.
172+
#'
173+
#' You can find the data generating code in the package source code,
174+
#' under `data-raw/d_sim_placebo.R`.
175+
#'
176+
#' @examples
177+
#' d_sim_placebo
178+
"d_sim_placebo"

_pkgdown.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ reference:
7373
- d_sim_binom_cov
7474
- d_sim_lin
7575
- d_sim_emax
76+
- d_sim_placebo
7677
- title: S3 methods
7778
contents:
7879
- ermod_method

data-raw/d_sim_placebo.R

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
set.seed(123L)
2+
3+
d_sim_placebo <- local({
4+
5+
emax_fn <- function(exposure, emax, ec50, e0, gamma = 1) {
6+
e0 + emax * (exposure^gamma) / (ec50^gamma + exposure^gamma)
7+
}
8+
9+
# simulation model for the exposures includes a dosing model, but is very
10+
# simple. exposure scales linearly with dose, and has a slightly-truncated
11+
# lognormal distribution conditional on dose
12+
generate_exposure <- function(dose, n, meanlog = 4, sdlog = 0.5) {
13+
dose * stats::qlnorm(
14+
p = stats::runif(n, min = .01, max = .99),
15+
meanlog = meanlog,
16+
sdlog = sdlog
17+
)
18+
}
19+
20+
# continuous covariates all have the same range (0 to 10) and follow a
21+
# scaled beta distribution within that range
22+
continuous_covariate <- function(n) {
23+
stats::rbeta(n, 2, 2) * 10
24+
}
25+
26+
# binary covariates are Bernoulli variates
27+
binary_covariate <- function(n, p) {
28+
as.numeric(stats::runif(n) <= p)
29+
}
30+
31+
32+
# simulation functions ----------------------------------------------------
33+
34+
simulate_data <- function(seed = 123) {
35+
set.seed(seed)
36+
37+
par <- list(
38+
# parameters for continuous response
39+
emax_1 = 10,
40+
ec50_1 = 4000,
41+
e0_1 = 5,
42+
gamma_1 = 1,
43+
sigma_1 = .5,
44+
coef_a1 = .5,
45+
coef_b1 = 0,
46+
coef_c1 = 0,
47+
coef_d1 = 0,
48+
49+
# parameters for binary response
50+
emax_2 = 5,
51+
ec50_2 = 8000,
52+
e0_2 = -4.5,
53+
gamma_2 = 1,
54+
coef_a2 = .5,
55+
coef_b2 = 0,
56+
coef_c2 = 0,
57+
coef_d2 = 1
58+
)
59+
60+
# conditional on a dose group, generate data; include multiple exposure metrics
61+
# but treat the first one as source of ground truth. exposures are strongly
62+
# correlated but on the same scale
63+
make_dose_data <- function(dose, n, par) {
64+
tibble::tibble(
65+
dose = dose,
66+
exposure_1 = generate_exposure(dose, n = n),
67+
exposure_2 = 0.7 * exposure_1 + 0.3 * generate_exposure(dose, n = n),
68+
69+
# add continuous and binary covariates
70+
cnt_a = continuous_covariate(n = n),
71+
cnt_b = continuous_covariate(n = n),
72+
cnt_c = continuous_covariate(n = n),
73+
bin_d = binary_covariate(n = n, p = .5),
74+
bin_e = binary_covariate(n = n, p = .7),
75+
76+
# response 1 is continuous
77+
response_1 = emax_fn(
78+
exposure_1,
79+
emax = par$emax_1,
80+
ec50 = par$ec50_1,
81+
e0 = par$e0_1,
82+
gamma = par$gamma_1
83+
) +
84+
par$coef_a1 * cnt_a +
85+
par$coef_b1 * cnt_b +
86+
par$coef_c1 * cnt_c +
87+
par$coef_d1 * bin_d +
88+
stats::rnorm(n, 0, par$sigma_1),
89+
90+
# response 2 is binary; start with the predictor
91+
bin_pred = emax_fn(
92+
exposure_1,
93+
emax = par$emax_2,
94+
ec50 = par$ec50_2,
95+
e0 = par$e0_2,
96+
gamma = par$gamma_2
97+
) +
98+
par$coef_a2 * cnt_a +
99+
par$coef_b2 * cnt_b +
100+
par$coef_c2 * cnt_c +
101+
par$coef_d2 * bin_d,
102+
103+
# convert
104+
bin_prob = 1 / (1 + exp(-bin_pred)),
105+
response_2 = as.numeric(stats::runif(n) < bin_prob)
106+
) |>
107+
dplyr::select(-bin_pred, -bin_prob) # remove intermediate variables
108+
}
109+
110+
# create data set assuming three dosing groups and placebo
111+
dat <- dplyr::bind_rows(
112+
make_dose_data(dose = 0, n = 100, par = par),
113+
make_dose_data(dose = 100, n = 100, par = par),
114+
make_dose_data(dose = 200, n = 100, par = par),
115+
make_dose_data(dose = 300, n = 100, par = par)
116+
)
117+
118+
return(dat)
119+
}
120+
121+
# generate data -----------------------------------------------------------
122+
123+
d_sim_placebo <- simulate_data()
124+
d_sim_placebo <- d_sim_placebo |>
125+
dplyr::mutate(id = dplyr::row_number()) |>
126+
dplyr::relocate(response_1, response_2, .after = exposure_2) |>
127+
dplyr::relocate(id, 1) |>
128+
dplyr::rename(
129+
exp_1 = exposure_1,
130+
exp_2 = exposure_2,
131+
rsp_1 = response_1,
132+
rsp_2 = response_2
133+
)
134+
135+
d_sim_placebo
136+
})
137+
138+
readr::write_csv(d_sim_placebo, "data-raw/d_sim_placebo.csv")
139+
usethis::use_data(d_sim_placebo, overwrite = TRUE)

0 commit comments

Comments
 (0)