Skip to content

Commit dce7da6

Browse files
committed
extended for more general link functions.
* need to test because it certainly will not work atm! * write some test scenarios, vignettes
1 parent 61fe909 commit dce7da6

File tree

12 files changed

+494
-286
lines changed

12 files changed

+494
-286
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ S3method(print,outstandR)
1010
S3method(summary,outstandR)
1111
export(ALD_stats)
1212
export(IPD_stats)
13+
export(calculate_ate)
1314
export(marginal_treatment_effect)
1415
export(marginal_variance)
1516
export(new_strategy)

R/ALD_stats.R

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
#' @seealso [marginal_treatment_effect()], [marginal_variance()]
99
#' @export
1010
#'
11-
ALD_stats <- function(ald, treatments = list("B", "C")) {
12-
list(mean = marginal_treatment_effect(ald, treatments),
13-
var = marginal_variance(ald, treatments))
11+
ALD_stats <- function(strategy, ald, treatments = list("B", "C")) {
12+
list(mean = marginal_treatment_effect(ald, treatments, link = strategy$family$link),
13+
var = marginal_variance(ald, treatments, link = strategy$family$link))
1414
}
1515

1616

@@ -24,8 +24,8 @@ ALD_stats <- function(ald, treatments = list("B", "C")) {
2424
#' @return Sum of variances
2525
#' @export
2626
#'
27-
marginal_variance <- function(ald, treatments = list("B", "C")) {
28-
trial_vars <- purrr::map_dbl(treatments, ~trial_variance(ald, .x))
27+
marginal_variance <- function(ald, treatments = list("B", "C"), link) {
28+
trial_vars <- purrr::map_dbl(treatments, ~trial_variance(ald, .x, link))
2929
sum(trial_vars)
3030
}
3131

@@ -44,13 +44,13 @@ marginal_variance <- function(ald, treatments = list("B", "C")) {
4444
#' @return Trial effect difference
4545
#' @export
4646
#'
47-
marginal_treatment_effect <- function(ald, treatments = list("B", "C")) {
48-
trial_effect <- purrr::map_dbl(treatments, ~trial_treatment_effect(ald, .x))
47+
marginal_treatment_effect <- function(ald, treatments = list("B", "C"), link) {
48+
trial_effect <- purrr::map_dbl(treatments, ~trial_treatment_effect(ald, .x, link))
4949
trial_effect[2] - trial_effect[1]
5050
}
5151

5252

53-
#' Trial variance with aggregate data
53+
#' Trial variance of the log-odds (logit) estimate with aggregate data
5454
#'
5555
#' Calculate
5656
#' \deqn{1/(\sum y_k) + 1/(N_k - \sum y_k)}.
@@ -61,9 +61,12 @@ marginal_treatment_effect <- function(ald, treatments = list("B", "C")) {
6161
#' @return Value
6262
#' @export
6363
#'
64-
trial_variance <- function(ald, tid) {
65-
var_string <- glue::glue("1/ald$y.{tid}.sum + 1/(ald$N.{tid} - ald$y.{tid}.sum)")
66-
eval(parse(text = var_string))
64+
trial_variance <- function(ald, tid, link = "logit") {
65+
66+
y <- ald[[paste0("y.", tid, ".sum")]]
67+
N <- ald[[paste0("N.", tid)]]
68+
69+
link_transform_var(y, N, link)
6770
}
6871

6972

@@ -74,11 +77,42 @@ trial_variance <- function(ald, tid) {
7477
#'
7578
#' @param ald Aggregate-level data
7679
#' @param tid Treatment label
80+
#' @param link Link function; default "logit"
7781
#'
7882
#' @return Value
7983
#' @export
8084
#'
81-
trial_treatment_effect <- function(ald, tid) {
82-
var_string <- glue::glue("log(ald$y.{tid}.sum*(ald$N.{tid} - ald$y.{tid}.sum))")
83-
eval(parse(text = var_string))
85+
trial_treatment_effect <- function(ald, tid, link = "logit") {
86+
##TODO: should this be instead i.e. log odds? it was * before
87+
# var_string <- glue::glue("log(ald$y.{tid}.sum / (ald$N.{tid} - ald$y.{tid}.sum))")
88+
89+
# estimated probability
90+
p_hat <- ald[[paste0("y.", tid, ".sum")]] / ald[[paste0("N.", tid)]]
91+
92+
link_transform(p_hat, link)
93+
}
94+
95+
96+
#' mean
97+
#'
98+
link_transform <- function(p, link) {
99+
if (link == "logit") {
100+
# log-OR
101+
return(qlogis(p)) # log(p / (1 - p))
102+
} else if (link == "log") {
103+
# log-Relative Risk (log-RR)
104+
return(log(p))
105+
}
106+
}
107+
108+
#' variance
109+
#'
110+
link_transform_var <- function(y, N, link) {
111+
if (link == "logit") {
112+
# log-OR
113+
return(1/y + 1/(N - y))
114+
} else if (link == "log") {
115+
# log-RR
116+
return(1/y)
117+
}
84118
}

R/IPD_stats.R

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ IPD_stats.stc <- function(strategy,
7272

7373
fit <- glm(strategy$formula,
7474
data = ipd,
75-
family = binomial)
75+
family = strategy$family)
7676

7777
treat_nm <- get_treatment_name(strategy$formula)
7878

@@ -95,6 +95,7 @@ IPD_stats.gcomp_ml <- function(strategy,
9595
statistic = gcomp_ml.boot,
9696
R = strategy$R,
9797
formula = strategy$formula,
98+
family = strategy$family,
9899
ald = ald)
99100

100101
list(mean = mean(AC_maic_boot$t),
@@ -113,10 +114,14 @@ IPD_stats.gcomp_stan <- function(strategy,
113114
ipd, ald) {
114115

115116
ppv <- gcomp_stan(formula = strategy$formula,
117+
family = strategy$family,
116118
ipd = ipd, ald = ald)
119+
120+
# posterior means for each treatment group
121+
mean_A <- rowMeans(ppv$y.star.A)
122+
mean_C <- rowMeans(ppv$y.star.C)
117123

118-
hat.delta.AC <-
119-
qlogis(rowMeans(ppv$y.star.A)) - qlogis(rowMeans(ppv$y.star.C))
124+
hat.delta.AC <- calculate_ate(mean_A, mean_B, family = strategy$family)
120125

121126
list(mean = mean(hat.delta.AC),
122127
var = var(hat.delta.AC))
@@ -135,6 +140,7 @@ IPD_stats.mim <- function(strategy,
135140
ipd, ald) {
136141

137142
mis_res <- mim(formula = strategy$formula,
143+
family = strategy$family,
138144
ipd, ald)
139145

140146
M <- mis_res$M

R/calculate_ate.R

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
2+
#' Calculate ATE
3+
#'
4+
#' @param ppv model prediction samples
5+
#' @param family family object of the model
6+
#'
7+
#' @returns ATE
8+
#' @export
9+
#'
10+
calculate_ate <- function(mean_A, mean_B, family) {
11+
12+
link <- family$link
13+
14+
if (link == "logit") {
15+
ate <- qlogis(mean_A) - qlogis(mean_C)
16+
} else if (link == "identity") {
17+
ate <- mean_A - mean_C
18+
} else if (link == "probit") {
19+
ate <- qnorm(mean_A) - qnorm(mean_C)
20+
} else if (link == "cloglog") {
21+
ate <- log(-log(1 - mean_A)) - log(-log(1 - mean_C))
22+
} else if (link == "log") { # Poisson log link
23+
ate <- log(mean_A) - log(mean_C)
24+
} else {
25+
stop("Unsupported link function. Choose from 'logit', 'identity', 'probit', 'cloglog', or 'log'.")
26+
}
27+
28+
ate
29+
}
30+

R/gcomp_ml.R

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,22 @@
1212
#' @keywords internal
1313
#'
1414
gcomp_ml.boot <- function(data, indices,
15-
R, formula = NULL, ald) {
15+
R, formula = NULL, family, ald) {
1616
dat <- data[indices, ]
17-
gcomp_ml_log_odds_ratio(formula, dat, ald)
17+
gcomp_ml_ate(formula, family, dat, ald)
1818
}
1919

2020

21-
#' G-computation Maximum Likelihood Log-Odds Ratio
21+
#' G-computation Maximum Likelihood ATE
2222
#'
23+
#' @section Log-Odds Ratio:
2324
#' Marginal _A_ vs _C_ log-odds ratio (mean difference in expected log-odds)
2425
#' estimated by transforming from probability to linear predictor scale.
2526
#'
2627
#' \eqn{\log(\hat{\mu}_A/(1 - \hat{\mu}_A)) - \log(\hat{\mu}_C/(1 - \hat{\mu}_C))}
2728
#'
2829
#' @param formula Linear regression `formula` object
30+
#' @param family Family object
2931
#' @template args-ipd
3032
#' @template args-ald
3133
#'
@@ -35,7 +37,9 @@ gcomp_ml.boot <- function(data, indices,
3537
#' @importFrom stats predict glm
3638
#' @keywords internal
3739
#'
38-
gcomp_ml_log_odds_ratio <- function(formula, ipd, ald) {
40+
gcomp_ml_ate <- function(formula,
41+
family,
42+
ipd, ald) {
3943

4044
if (!inherits(formula, "formula"))
4145
stop("formula argument must be of formula class.")
@@ -45,7 +49,7 @@ gcomp_ml_log_odds_ratio <- function(formula, ipd, ald) {
4549
# outcome logistic regression fitted to IPD using maximum likelihood
4650
fit <- glm(formula,
4751
data = ipd,
48-
family = binomial)
52+
family = family)
4953

5054
# counterfactual datasets
5155
data.trtA <- data.trtC <- x_star
@@ -63,8 +67,7 @@ gcomp_ml_log_odds_ratio <- function(formula, ipd, ald) {
6367
hat.mu.A <- mean(hat.mu.A.i) # (marginal) mean probability prediction under A
6468
hat.mu.C <- mean(hat.mu.C.i) # (marginal) mean probability prediction under C
6569

66-
log(hat.mu.A/(1-hat.mu.A)) - log(hat.mu.C/(1-hat.mu.C))
67-
# qlogis(hat.mu.A) - qlogis(hat.mu.C)#'
70+
calculate_ate(hat.mu.A, hat.mu.C, family = strategy$family)
6871
}
6972

7073

R/gcomp_stan.R

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#' from the Bayesian G-computation method using Hamiltonian Monte Carlo.
66
#'
77
#' @param formula Linear regression `formula` object
8+
#' @param family A `family` object
89
#' @template args-ipd
910
#' @template args-ald
1011
#'
@@ -14,6 +15,7 @@
1415
#' @keywords internal
1516
#'
1617
gcomp_stan <- function(formula = NULL,
18+
family = gaussian(link = "identity"),
1719
ipd, ald) {
1820

1921
if (!inherits(formula, "formula"))
@@ -25,7 +27,7 @@ gcomp_stan <- function(formula = NULL,
2527
outcome.model <-
2628
rstanarm::stan_glm(formula,
2729
data = ipd,
28-
family = binomial,
30+
family = family,
2931
algorithm = "sampling",
3032
iter = 4000, warmup = 2000, chains = 2)
3133

@@ -40,7 +42,7 @@ gcomp_stan <- function(formula = NULL,
4042

4143
# draw binary responses from posterior predictive distribution
4244
list(
43-
y.star.A = rstanarm::posterior_predict(outcome.model, newdata=data.trtA),
44-
y.star.C = rstanarm::posterior_predict(outcome.model, newdata=data.trtC))
45+
y.star.A = rstanarm::posterior_predict(outcome.model, newdata = data.trtA),
46+
y.star.C = rstanarm::posterior_predict(outcome.model, newdata = data.trtC))
4547
}
4648

R/mim.R

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#' @keywords internal
1414
#'
1515
mim <- function(formula,
16+
family,
1617
ipd, ald,
1718
M = 1000,
1819
n.chains = 2,
@@ -28,9 +29,9 @@ mim <- function(formula,
2829

2930
# first-stage logistic regression model fitted to index RCT using MCMC (Stan)
3031
outcome.model <- stan_glm(
31-
formula,
32+
formula = formula,
3233
data = ipd,
33-
family = binomial,
34+
family = family,
3435
algorithm = "sampling",
3536
iter = iters,
3637
warmup = warmup,
@@ -52,7 +53,7 @@ mim <- function(formula,
5253

5354
# fit second-stage regression to each synthesis using maximum-likelihood estimation
5455
reg2.fits <- lapply(1:M, function(m)
55-
glm(y_star[m, ] ~ trt, data = aug.target, family = binomial))
56+
glm(y_star[m, ] ~ trt, data = aug.target, family = family))
5657

5758
# treatment effect point estimates in each synthesis
5859
hats.delta <- unlist(lapply(reg2.fits,

R/outstandR.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ outstandR <- function(AC.IPD, BC.ALD, strategy, CI = 0.95, ...) {
5454
ald <- prep_ald(strategy$formula, BC.ALD)
5555

5656
AC_stats <- IPD_stats(strategy, ipd = ipd, ald = ald, ...)
57-
BC_stats <- ALD_stats(ald = ald)
57+
BC_stats <- ALD_stats(strategy, ald = ald)
5858

5959
stats <- contrast_stats(AC_stats, BC_stats, CI)
6060

man/calculate_ate.Rd

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/gcomp_stan.Rd

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)