extended for more general link functions.

n8thangreen · n8thangreen · commit dce7da6ee200 · 2025-02-04T16:40:58.000Z
* need to test because it certainly will not work atm!

* write some test scenarios, vignettes
diff --git a/NAMESPACE b/NAMESPACE
@@ -10,6 +10,7 @@ S3method(print,outstandR)
 S3method(summary,outstandR)
 export(ALD_stats)
 export(IPD_stats)
+export(calculate_ate)
 export(marginal_treatment_effect)
 export(marginal_variance)
 export(new_strategy)
diff --git a/R/ALD_stats.R b/R/ALD_stats.R
@@ -8,9 +8,9 @@
 #' @seealso [marginal_treatment_effect()], [marginal_variance()]
 #' @export
 #'
-ALD_stats <- function(ald, treatments = list("B", "C")) {
-  list(mean = marginal_treatment_effect(ald, treatments),
-       var = marginal_variance(ald, treatments))
+ALD_stats <- function(strategy, ald, treatments = list("B", "C")) {
+  list(mean = marginal_treatment_effect(ald, treatments, link = strategy$family$link),
+       var = marginal_variance(ald, treatments, link = strategy$family$link))
 }
 
 
@@ -24,8 +24,8 @@ ALD_stats <- function(ald, treatments = list("B", "C")) {
 #' @return Sum of variances
 #' @export
 #' 
-marginal_variance <- function(ald, treatments = list("B", "C")) {
-  trial_vars <- purrr::map_dbl(treatments, ~trial_variance(ald, .x))
+marginal_variance <- function(ald, treatments = list("B", "C"), link) {
+  trial_vars <- purrr::map_dbl(treatments, ~trial_variance(ald, .x, link))
   sum(trial_vars)
 }
 
@@ -44,13 +44,13 @@ marginal_variance <- function(ald, treatments = list("B", "C")) {
 #' @return Trial effect difference
 #' @export
 #' 
-marginal_treatment_effect <- function(ald, treatments = list("B", "C")) {
-  trial_effect <- purrr::map_dbl(treatments, ~trial_treatment_effect(ald, .x))
+marginal_treatment_effect <- function(ald, treatments = list("B", "C"), link) {
+  trial_effect <- purrr::map_dbl(treatments, ~trial_treatment_effect(ald, .x, link))
   trial_effect[2] - trial_effect[1]
 }
 
 
-#' Trial variance with aggregate data
+#' Trial variance of the log-odds (logit) estimate with aggregate data
 #'
 #' Calculate
 #' \deqn{1/(\sum y_k) + 1/(N_k - \sum y_k)}.
@@ -61,9 +61,12 @@ marginal_treatment_effect <- function(ald, treatments = list("B", "C")) {
 #' @return Value
 #' @export
 #'
-trial_variance <- function(ald, tid) {
-  var_string <- glue::glue("1/ald$y.{tid}.sum + 1/(ald$N.{tid} - ald$y.{tid}.sum)")
-  eval(parse(text = var_string))
+trial_variance <- function(ald, tid, link = "logit") {
+  
+  y <- ald[[paste0("y.", tid, ".sum")]]
+  N <- ald[[paste0("N.", tid)]]
+  
+  link_transform_var(y, N, link)
 }
 
 
@@ -74,11 +77,42 @@ trial_variance <- function(ald, tid) {
 #' 
 #' @param ald Aggregate-level data
 #' @param tid Treatment label
+#' @param link Link function; default "logit"
 #'
 #' @return Value
 #' @export
 #'
-trial_treatment_effect <- function(ald, tid) {
-  var_string <- glue::glue("log(ald$y.{tid}.sum*(ald$N.{tid} - ald$y.{tid}.sum))")
-  eval(parse(text = var_string))
+trial_treatment_effect <- function(ald, tid, link = "logit") {
+  ##TODO: should this be instead i.e. log odds? it was * before
+  # var_string <- glue::glue("log(ald$y.{tid}.sum / (ald$N.{tid} - ald$y.{tid}.sum))")
+  
+  # estimated probability
+  p_hat <- ald[[paste0("y.", tid, ".sum")]] / ald[[paste0("N.", tid)]]
+  
+  link_transform(p_hat, link)
+}
+
+
+#' mean
+#'
+link_transform <- function(p, link) {
+  if (link == "logit") {
+    # log-OR
+    return(qlogis(p))  # log(p / (1 - p))
+  } else if (link == "log") {
+    # log-Relative Risk (log-RR)
+    return(log(p))
+  }
+}
+
+#' variance
+#'
+link_transform_var <- function(y, N, link) {
+  if (link == "logit") {
+    # log-OR
+    return(1/y + 1/(N - y))
+  } else if (link == "log") {
+    # log-RR
+    return(1/y)
+  }
 }
diff --git a/R/IPD_stats.R b/R/IPD_stats.R
@@ -72,7 +72,7 @@ IPD_stats.stc <- function(strategy,
   
   fit <- glm(strategy$formula,
              data = ipd,
-             family = binomial)
+             family = strategy$family)
   
   treat_nm <- get_treatment_name(strategy$formula)
   
@@ -95,6 +95,7 @@ IPD_stats.gcomp_ml <- function(strategy,
                              statistic = gcomp_ml.boot,
                              R = strategy$R,
                              formula = strategy$formula,
+                             family = strategy$family,
                              ald = ald)
   
   list(mean = mean(AC_maic_boot$t),
@@ -113,10 +114,14 @@ IPD_stats.gcomp_stan <- function(strategy,
                                  ipd, ald) {
   
   ppv <- gcomp_stan(formula = strategy$formula,
+                    family = strategy$family,
                     ipd = ipd, ald = ald)
+
+  # posterior means for each treatment group
+  mean_A <- rowMeans(ppv$y.star.A)
+  mean_C <- rowMeans(ppv$y.star.C)
   
-  hat.delta.AC <-
-    qlogis(rowMeans(ppv$y.star.A)) - qlogis(rowMeans(ppv$y.star.C))
+  hat.delta.AC <- calculate_ate(mean_A, mean_B, family = strategy$family)
   
   list(mean = mean(hat.delta.AC),
        var = var(hat.delta.AC))
@@ -135,6 +140,7 @@ IPD_stats.mim <- function(strategy,
                           ipd, ald) {
   
   mis_res <- mim(formula = strategy$formula,
+                 family = strategy$family,
                  ipd, ald)
   
   M <- mis_res$M
diff --git a/R/calculate_ate.R b/R/calculate_ate.R
@@ -0,0 +1,30 @@
+
+#' Calculate ATE
+#'
+#' @param ppv model prediction samples
+#' @param family family object of the model
+#'
+#' @returns ATE
+#' @export
+#'
+calculate_ate <- function(mean_A, mean_B, family) {
+
+  link <- family$link
+  
+  if (link == "logit") {
+    ate <- qlogis(mean_A) - qlogis(mean_C)
+  } else if (link == "identity") {
+    ate <- mean_A - mean_C
+  } else if (link == "probit") {
+    ate <- qnorm(mean_A) - qnorm(mean_C)
+  } else if (link == "cloglog") {
+    ate <- log(-log(1 - mean_A)) - log(-log(1 - mean_C))
+  } else if (link == "log") {  # Poisson log link
+    ate <- log(mean_A) - log(mean_C)
+  } else {
+    stop("Unsupported link function. Choose from 'logit', 'identity', 'probit', 'cloglog', or 'log'.")
+  }
+  
+  ate
+}
+
diff --git a/R/gcomp_ml.R b/R/gcomp_ml.R
@@ -12,20 +12,22 @@
 #' @keywords internal
 #' 
 gcomp_ml.boot <- function(data, indices,
-                          R, formula = NULL, ald) {
+                          R, formula = NULL, family, ald) {
   dat <- data[indices, ]
-  gcomp_ml_log_odds_ratio(formula, dat, ald) 
+  gcomp_ml_ate(formula, family, dat, ald) 
 }
 
 
-#' G-computation Maximum Likelihood Log-Odds Ratio
+#' G-computation Maximum Likelihood ATE
 #' 
+#' @section Log-Odds Ratio: 
 #' Marginal _A_ vs _C_ log-odds ratio (mean difference in expected log-odds)
 #' estimated by transforming from probability to linear predictor scale.
 #'
 #' \eqn{\log(\hat{\mu}_A/(1 - \hat{\mu}_A)) - \log(\hat{\mu}_C/(1 - \hat{\mu}_C))}
 #'
 #' @param formula Linear regression `formula` object
+#' @param family Family object
 #' @template args-ipd
 #' @template args-ald
 #'
@@ -35,7 +37,9 @@ gcomp_ml.boot <- function(data, indices,
 #' @importFrom stats predict glm
 #' @keywords internal
 #'
-gcomp_ml_log_odds_ratio <- function(formula, ipd, ald) {
+gcomp_ml_ate <- function(formula,
+                         family,
+                         ipd, ald) {
   
   if (!inherits(formula, "formula"))
     stop("formula argument must be of formula class.")
@@ -45,7 +49,7 @@ gcomp_ml_log_odds_ratio <- function(formula, ipd, ald) {
   # outcome logistic regression fitted to IPD using maximum likelihood
   fit <- glm(formula,
              data = ipd,
-             family = binomial)
+             family = family)
   
   # counterfactual datasets
   data.trtA <- data.trtC <- x_star
@@ -63,8 +67,7 @@ gcomp_ml_log_odds_ratio <- function(formula, ipd, ald) {
   hat.mu.A <- mean(hat.mu.A.i) # (marginal) mean probability prediction under A
   hat.mu.C <- mean(hat.mu.C.i) # (marginal) mean probability prediction under C
   
-  log(hat.mu.A/(1-hat.mu.A)) - log(hat.mu.C/(1-hat.mu.C))
-  # qlogis(hat.mu.A) - qlogis(hat.mu.C)#'
+  calculate_ate(hat.mu.A, hat.mu.C, family = strategy$family)
 }
 
 
diff --git a/R/gcomp_stan.R b/R/gcomp_stan.R
@@ -5,6 +5,7 @@
 #' from the Bayesian G-computation method using Hamiltonian Monte Carlo.
 #' 
 #' @param formula Linear regression `formula` object
+#' @param family A `family` object
 #' @template args-ipd
 #' @template args-ald
 #'
@@ -14,6 +15,7 @@
 #' @keywords internal
 #'
 gcomp_stan <- function(formula = NULL,
+                       family = gaussian(link = "identity"),
                        ipd, ald) {
   
   if (!inherits(formula, "formula"))
@@ -25,7 +27,7 @@ gcomp_stan <- function(formula = NULL,
   outcome.model <-
     rstanarm::stan_glm(formula,
                        data = ipd,
-                       family = binomial,
+                       family = family,
                        algorithm = "sampling",
                        iter = 4000, warmup = 2000, chains = 2)
   
@@ -40,7 +42,7 @@ gcomp_stan <- function(formula = NULL,
   
   # draw binary responses from posterior predictive distribution
   list(
-    y.star.A = rstanarm::posterior_predict(outcome.model, newdata=data.trtA),
-    y.star.C = rstanarm::posterior_predict(outcome.model, newdata=data.trtC))
+    y.star.A = rstanarm::posterior_predict(outcome.model, newdata = data.trtA),
+    y.star.C = rstanarm::posterior_predict(outcome.model, newdata = data.trtC))
 }
 
diff --git a/R/mim.R b/R/mim.R
@@ -13,6 +13,7 @@
 #' @keywords internal
 #' 
 mim <- function(formula,
+                family,
                 ipd, ald,
                 M = 1000,
                 n.chains = 2,
@@ -28,9 +29,9 @@ mim <- function(formula,
   
   # first-stage logistic regression model fitted to index RCT using MCMC (Stan)
   outcome.model <- stan_glm(
-    formula,
+    formula = formula,
     data = ipd,
-    family = binomial,
+    family = family,
     algorithm = "sampling",
     iter = iters,
     warmup = warmup,
@@ -52,7 +53,7 @@ mim <- function(formula,
   
   # fit second-stage regression to each synthesis using maximum-likelihood estimation
   reg2.fits <- lapply(1:M, function(m)
-    glm(y_star[m, ] ~ trt, data = aug.target, family = binomial))
+    glm(y_star[m, ] ~ trt, data = aug.target, family = family))
   
   # treatment effect point estimates in each synthesis
   hats.delta <- unlist(lapply(reg2.fits,
diff --git a/R/outstandR.R b/R/outstandR.R
@@ -54,7 +54,7 @@ outstandR <- function(AC.IPD, BC.ALD, strategy, CI = 0.95, ...) {
   ald <- prep_ald(strategy$formula, BC.ALD)
 
   AC_stats <- IPD_stats(strategy, ipd = ipd, ald = ald, ...) 
-  BC_stats <- ALD_stats(ald = ald) 
+  BC_stats <- ALD_stats(strategy, ald = ald) 
   
   stats <- contrast_stats(AC_stats, BC_stats, CI)
   
diff --git a/man/calculate_ate.Rd b/man/calculate_ate.Rd
diff --git a/man/gcomp_stan.Rd b/man/gcomp_stan.Rd
diff --git a/vignettes/Basic_example.Rmd b/vignettes/Basic_example.Rmd
diff --git a/vignettes/Binary_data_example.Rmd b/vignettes/Binary_data_example.Rmd