ehsanx
diff --git a/‎1RHC.Rmd‎
Lines changed: 3 additions & 0 deletions b/‎1RHC.Rmd‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎2gcomp2.Rmd‎
Lines changed: 3 additions & 0 deletions b/‎2gcomp2.Rmd‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎3ipw2.Rmd‎
Lines changed: 4 additions & 0 deletions b/‎3ipw2.Rmd‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎4tmle.Rmd‎
Lines changed: 22 additions & 22 deletions b/‎4tmle.Rmd‎
Lines changed: 22 additions & 22 deletions
diff --git a/‎5software.Rmd‎
Lines changed: 41 additions & 10 deletions b/‎5software.Rmd‎
Lines changed: 41 additions & 10 deletions
diff --git a/‎6final.Rmd‎
Lines changed: 3 additions & 2 deletions b/‎6final.Rmd‎
Lines changed: 3 additions & 2 deletions
@@ -147,6 +147,9 @@ fit1 <- lm(out.formula, data = ObsData)
 adj.fit <- publish(fit1, digits=1)$regressionTable[2,]
 ```
 
+```{r, cache=TRUE, echo = TRUE}
+saveRDS(fit1, file = "data/adjreg.RDS")
+```
 
 ### Regression diagnostics
 
 
@@ -487,6 +487,9 @@ fit.sl4 <- recombineSL(fit.sl, Y = Y, method = "method.CC_nloglik")
 fit.sl4$coef
 ```
 
+- `method.CC_LS` is [suggested](https://si.biostat.washington.edu/sites/default/files/modules/lab1_0.pdf) as a good method for continuous outcome
+- `method.CC_nloglik` is [suggested](https://si.biostat.washington.edu/sites/default/files/modules/lab1_0.pdf) as a good method for binary outcome
+
 ```{r, cache=TRUE, echo = TRUE}
 saveRDS(TE1, file = "data/gcompxg.RDS")
 saveRDS(TE2, file = "data/gcompls.RDS")
 
@@ -109,6 +109,10 @@ W.out <- weightit(ps.formula,
 summary(W.out$weights)
 ```
 
+```{r, cache=TRUE, echo = TRUE}
+saveRDS(W.out, file = "data/ipwslps.RDS")
+```
+
 Alternatively, you can use the previously estimated PS
 
 ```{r ipw2psx2c2clone, cache=TRUE, echo = TRUE}
 
@@ -108,12 +108,12 @@ Y.fit.sl <- SuperLearner(Y=ObsData$Y.bounded,
                        SL.library=c("SL.glm", 
                                     "SL.glmnet", 
                                     "SL.xgboost"),
-                       method="method.CC_nloglik",
+                       method="method.CC_nloglik", 
                        family="gaussian")
 ```
 
 ```{r SL_out01x, cache=TRUE}
-ObsData$init.Pred <- predict(Y.fit.sl, newdata = ObsData.noY,
+ObsData$init.Pred <- predict(Y.fit.sl, newdata = ObsData.noY, 
                            type = "response")$pred
 
 summary(ObsData$init.Pred)
@@ -132,7 +132,7 @@ summary(ObsData$init.Pred)
 
 ```{r SL_out01, cache=TRUE}
 ObsData.noY$A <- 1
-ObsData$Pred.Y1 <- predict(Y.fit.sl, newdata = ObsData.noY,
+ObsData$Pred.Y1 <- predict(Y.fit.sl, newdata = ObsData.noY, 
                            type = "response")$pred
 summary(ObsData$Pred.Y1)
 ```
@@ -141,19 +141,19 @@ summary(ObsData$Pred.Y1)
 
 ```{r SL_out02, cache=TRUE}
 ObsData.noY$A <- 0
-ObsData$Pred.Y0 <- predict(Y.fit.sl, newdata = ObsData.noY,
+ObsData$Pred.Y0 <- predict(Y.fit.sl, newdata = ObsData.noY, 
                            type = "response")$pred
 summary(ObsData$Pred.Y0)
 ```
 
 ### Get initial treatment effect estimate
 
 ```{r SL_out03, cache=cachex, echo = TRUE}
-ObsData$Pred.TE <- ObsData$Pred.Y1 - ObsData$Pred.Y0  
+ObsData$Pred.TE <- ObsData$Pred.Y1 - ObsData$Pred.Y0   
 ```
 
 ```{r SL_out04, cache=cachex, echo = TRUE}
-summary(ObsData$Pred.TE)
+summary(ObsData$Pred.TE) 
 ```
 
 ```{r SL_out, cache=TRUE, message=FALSE, warning=FALSE, include = FALSE}
@@ -202,13 +202,13 @@ PS.fit.SL <- SuperLearner(Y=ObsData$A,
                                     "SL.glmnet", 
                                     "SL.xgboost"),
                        method="method.CC_nloglik",
-                       family="binomial")
+                       family="binomial")  
 ```
 
 
 ```{r SL_out01ps2, cache=TRUE}
 all.pred <- predict(PS.fit.SL, type = "response")
-ObsData$PS.SL <- all.pred$pred
+ObsData$PS.SL <- all.pred$pred 
 ```
 
 - These propensity score predictions (`PS.SL`) are represented as $g(A_i=1|L_i)$.
@@ -222,7 +222,7 @@ plot(density(ObsData$PS.SL[ObsData$A==0]),
 lines(density(ObsData$PS.SL[ObsData$A==1]), 
       col = "blue", lty = 2)
 legend("topright", c("No RHC","RHC"), 
-       col = c("red", "blue"), lty=1:2)
+       col = c("red", "blue"), lty=1:2) 
 ```
 
 
@@ -256,7 +256,7 @@ ObsData$H.AL <- ObsData$H.A1L - ObsData$H.A0L
 summary(ObsData$H.AL)
 tapply(ObsData$H.AL, ObsData$A, summary)
 t(apply(cbind(-ObsData$H.A0L,ObsData$H.A1L), 
-      2, summary))
+      2, summary)) 
 ```
 
 Aggregated or individual clever covariate components show slight difference in their summaries.
@@ -282,7 +282,7 @@ eps_mod <- glm(Y.bounded ~ -1 + H.A1L + H.A0L +
                data = ObsData)
 epsilon <- coef(eps_mod)  
 epsilon["H.A1L"]
-epsilon["H.A0L"]
+epsilon["H.A0L"] 
 ```
 
 Note that, if `init.Pred` includes negative values, `NaNs` would be produced after applying `qlogis()`.
@@ -297,7 +297,7 @@ eps_mod1 <- glm(Y.bounded ~ -1 + H.AL +
                family = "binomial",
                data = ObsData)
 epsilon1 <- coef(eps_mod1) 
-epsilon1
+epsilon1 
 ```
 
 Alternative could be to use `H.AL` as weights (not shown here).
@@ -314,7 +314,7 @@ ObsData$Pred.Y1.update <- plogis(qlogis(ObsData$Pred.Y1) +
 ObsData$Pred.Y0.update <- plogis(qlogis(ObsData$Pred.Y0) + 
                                    epsilon["H.A0L"]*ObsData$H.A0L)
 summary(ObsData$Pred.Y1.update)
-summary(ObsData$Pred.Y0.update) 
+summary(ObsData$Pred.Y0.update)  
 ```
 
 ### Only 1 $\hat\epsilon$
@@ -327,7 +327,7 @@ ObsData$Pred.Y1.update1 <- plogis(qlogis(ObsData$Pred.Y1) +
 ObsData$Pred.Y0.update1 <- plogis(qlogis(ObsData$Pred.Y0) + 
                                    epsilon1*ObsData$H.AL)
 summary(ObsData$Pred.Y1.update1)
-summary(ObsData$Pred.Y0.update1)  
+summary(ObsData$Pred.Y0.update1)   
 ```
 
 Note that, if `Pred.Y1` and `Pred.Y0` include negative values, `NaNs` would be produced after applying `qlogis()`.
@@ -359,7 +359,7 @@ ATE.TMLE.bounded.vector <- ObsData$Pred.Y1.update -
 summary(ATE.TMLE.bounded.vector) 
 ATE.TMLE.bounded <- mean(ATE.TMLE.bounded.vector, 
                          na.rm = TRUE) 
-ATE.TMLE.bounded
+ATE.TMLE.bounded 
 ```
 
 ### Only 1 $\hat\epsilon$
@@ -372,7 +372,7 @@ ATE.TMLE.bounded.vector1 <- ObsData$Pred.Y1.update1 -
 summary(ATE.TMLE.bounded.vector1) 
 ATE.TMLE.bounded1 <- mean(ATE.TMLE.bounded.vector1, 
                          na.rm = TRUE) 
-ATE.TMLE.bounded1
+ATE.TMLE.bounded1 
 ```
 
 ## Step 8: Rescale effect estimate
@@ -383,7 +383,7 @@ We make sure to transform back to our original scale.
 
 ```{r meantmle, cache=TRUE}
 ATE.TMLE <- (max.Y-min.Y)*ATE.TMLE.bounded   
-ATE.TMLE
+ATE.TMLE 
 ```
 
 ### Only 1 $\hat\epsilon$
@@ -392,7 +392,7 @@ Alternatively, using `H.AL`:
 
 ```{r meantmle2, cache=TRUE}
 ATE.TMLE1 <- (max.Y-min.Y)*ATE.TMLE.bounded1
-ATE.TMLE1
+ATE.TMLE1 
 ```
 
 ## Step 9: Confidence interval estimation
@@ -442,21 +442,21 @@ ci.estimate <- function(data = ObsData, H.AL.components = 1){
     ATE.TMLE.CI <- c(ATE.TMLE1 - 1.96*sqrt(varHat.IC), 
                    ATE.TMLE1 + 1.96*sqrt(varHat.IC))
   }
-  return(ATE.TMLE.CI)
+  return(ATE.TMLE.CI) 
 }
 ```
 
 ### $\hat\epsilon$ = $\hat\epsilon_0$ and $\hat\epsilon_1$ 
 
 ```{r tmleinf2b, cache=TRUE}
-CI2 <- ci.estimate(data = ObsData, H.AL.components = 2)
+CI2 <- ci.estimate(data = ObsData, H.AL.components = 2) 
 CI2
 ```
 
 ### Only 1 $\hat\epsilon$
 
 ```{r tmleinf2c, cache=TRUE}
-CI1 <- ci.estimate(data = ObsData, H.AL.components = 1)
+CI1 <- ci.estimate(data = ObsData, H.AL.components = 1) 
 CI1
 ```
 
@@ -481,6 +481,6 @@ CI1
 ```
 
 ```{r, cache=TRUE, echo = TRUE}
-saveRDS(ATE.TMLE1, file = "data/tmlepointh.RDS")
+saveRDS(ATE.TMLE, file = "data/tmlepointh.RDS") 
 saveRDS(CI2, file = "data/tmlecih.RDS")
 ```
@@ -43,12 +43,13 @@ dim(ObsData)
 - Note also that the outcome $Y$ is required to be within the range of $[0,1]$ for this method as well, 
   - so we need to pass in the transformed data, then transform back the estimate.
 
-```{r tmlepkg, cache=cachex, results='hide', message=FALSE, warning=FALSE}
+```{r tmlepkg, cache=cachex, message=FALSE, warning=FALSE}
 set.seed(1444) 
-
 # transform the outcome to fall within the range [0,1]
 min.Y <- min(ObsData$Y)
+min.Y
 max.Y <- max(ObsData$Y)
+max.Y
 ObsData$Y_transf <- (ObsData$Y-min.Y)/(max.Y-min.Y)
 
 # run tmle from the tmle package 
@@ -79,9 +80,9 @@ summary(tmle.fit)
 
 ```{r tmlepkgtr, cache=cachex, message=FALSE, warning=FALSE}
 tmle_est_tr <- tmle.fit$estimates$ATE$psi
+tmle_est_tr
 # transform back the ATE estimate
 tmle_est <- (max.Y-min.Y)*tmle_est_tr
-
 tmle_est
 ```
 
@@ -114,7 +115,28 @@ Most helpful resources:
 
 * [CRAN docs](https://cran.r-project.org/web/packages/tmle/tmle.pdf)
 * [tmle package paper](https://www.jstatsoft.org/article/view/v051i13)
-* Vignettes in R
+
+## tmle (reduced computation)
+
+We can use the previously calculated propensity score predictions from SL (calculated using `WeightIt` package) in the `tmle` to reduce some computing time.
+
+```{r tmlepkg33b, cache=cachex, message=FALSE, warning=FALSE}
+ps.obj <- readRDS(file = "data/ipwslps.RDS")
+ps.SL <- ps.obj$weights
+tmle.fit2 <- tmle::tmle(Y = ObsData$Y_transf, 
+                   A = ObsData$A, 
+                   W = ObsData.noYA, 
+                   family = "gaussian",
+                   V = 3,
+                   Q.SL.library = SL.library, 
+                   g1W = ps.SL)
+tmle.fit2
+```
+
+```{r tmlepkgtrb, cache=cachex, message=FALSE, warning=FALSE}
+# transform back ATE estimate
+(max.Y-min.Y)*tmle.fit2$estimates$ATE$psi
+```
 
 ## sl3 (optional)
 
@@ -325,6 +347,9 @@ cat("ATE from aipw package: ", aipw_est, aipw_ci, sep = "")
 
 Gathering previously saved results:
 ```{r summarytable0, cache=cachex, echo=FALSE, results='hold', warning=FALSE, message=FALSE}
+fit.reg <- readRDS(file = "data/adjreg.RDS")
+TEr <- fit.reg$coefficients[2]
+CIr <- as.numeric(confint(fit.reg, 'A'))
 fit.matched <- readRDS(file = "data/match.RDS")
 TEm <- fit.matched$coefficients[2]
 CIm <- as.numeric(confint(fit.matched, 'A'))
@@ -346,18 +371,24 @@ tmlesl <- readRDS(file = "data/tmle.RDS")
 tmlecisl <- readRDS(file = "data/tmleci.RDS")
 slp <- readRDS(file = "data/sl3.RDS")
 ci.b <- rep(NA,2)
-point <- as.numeric(c(TEm, TEg, TE1g, TE2g, TE3g, TEi, TEsli, tmleh, tmlesl, slp))
-CIs <- cbind(CIm, CIgc, ci.b, ci.b, ci.b, CIi, CIsli, tmlecih, tmlecisl, ci.b) 
+point <- as.numeric(c(TEr, TEm, TEg, TE1g, TE2g, 
+                      TE3g, TEi, TEsli, tmleh, 
+                      tmlesl, slp))
+CIs <- cbind(CIr, CIm, CIgc, ci.b, ci.b, ci.b, 
+             CIi, CIsli, tmlecih, tmlecisl, ci.b)  
 ```
 
 
 ```{r summarytable, cache=cachex, echo=FALSE}
-method.list <- c("PS.match", "G-comp","G-comp-xgboost",
-             "G-comp-lasso", "G-comp-SL","IPW", 
-             "IPW-SL", "TMLE.step", "tmle (package)", "sl3") 
+method.list <- c("Adj. Reg","PS match", 
+                 "G-comp (logistic)","G-comp (xgboost)",
+                 "G-comp (lasso)", "G-comp (SL)",
+                 "IPW (logistic)", "IPW (SL)", 
+                 "TMLE (9 steps)", "TMLE (package)", 
+                 "sl3 (package)") 
 results <- data.frame(method.list) 
 results$Estimate <- round(point,2)
-results$`2.5 %` <- CIs[1,]
+results$`2.5 %` <- CIs[1,] 
 results$`97.5 %` <- CIs[2,]
 kable(results,digits = 2)
 ```
 
@@ -154,10 +154,11 @@ However, causal inference requires satisfying identifiability assumptions for us
 
 ### Workshops
 
-Highly recommend joining SER if interested in Epi methods development. The following workshops are very useful.
+Highly recommend joining SER if interested in Epi methods development. The following workshops and summer course are very useful.
 
 - [SER Workshop](https://epiresearch.org/) Introduction to Parametric and Semi-parametric Estimators for Causal Inference by Laura B. Balzer & Jennifer Ahern, 2020
 - [SER Workshop](https://epiresearch.org/) Machine Learning and Artificial Intelligence for Causal Inference and Prediction: A Primer by Naimi A, 2021
+- [SISCER](https://si.biostat.washington.edu/suminst/archives/SISCER2021/CR2106) Modern Statistical Learning for Observational Data by Marco Carone, David Benkeser, 2021
 
 ### Recorded webinars
 
@@ -186,4 +187,4 @@ The following webinars and workshops are freely accessible, and great for unders
 
 - [Kat’s Stats](https://www.khstats.com/) by Katherine Hoffman
 - [towardsdatascience](https://towardsdatascience.com/targeted-maximum-likelihood-tmle-for-causal-inference-1be88542a749) by Yao Yang
-- [The Research Group of Mark van der Laan](https://vanderlaan-lab.org/post/) by Mark van der Laan
+- [The Research Group of Mark van der Laan](https://vanderlaan-lab.org/post/) by Mark van der Laan