Skip to content

Commit 85e2588

Browse files
committed
gitbook, epub, pdf, citation added
1 parent 9c93a46 commit 85e2588

38 files changed

+5096
-3193
lines changed

1RHC.Rmd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ fit1 <- lm(out.formula, data = ObsData)
147147
adj.fit <- publish(fit1, digits=1)$regressionTable[2,]
148148
```
149149

150+
```{r, cache=TRUE, echo = TRUE}
151+
saveRDS(fit1, file = "data/adjreg.RDS")
152+
```
150153

151154
### Regression diagnostics
152155

2gcomp2.Rmd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,9 @@ fit.sl4 <- recombineSL(fit.sl, Y = Y, method = "method.CC_nloglik")
487487
fit.sl4$coef
488488
```
489489

490+
- `method.CC_LS` is [suggested](https://si.biostat.washington.edu/sites/default/files/modules/lab1_0.pdf) as a good method for continuous outcome
491+
- `method.CC_nloglik` is [suggested](https://si.biostat.washington.edu/sites/default/files/modules/lab1_0.pdf) as a good method for binary outcome
492+
490493
```{r, cache=TRUE, echo = TRUE}
491494
saveRDS(TE1, file = "data/gcompxg.RDS")
492495
saveRDS(TE2, file = "data/gcompls.RDS")

3ipw2.Rmd

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ W.out <- weightit(ps.formula,
109109
summary(W.out$weights)
110110
```
111111

112+
```{r, cache=TRUE, echo = TRUE}
113+
saveRDS(W.out, file = "data/ipwslps.RDS")
114+
```
115+
112116
Alternatively, you can use the previously estimated PS
113117

114118
```{r ipw2psx2c2clone, cache=TRUE, echo = TRUE}

4tmle.Rmd

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,12 @@ Y.fit.sl <- SuperLearner(Y=ObsData$Y.bounded,
108108
SL.library=c("SL.glm",
109109
"SL.glmnet",
110110
"SL.xgboost"),
111-
method="method.CC_nloglik",
111+
method="method.CC_nloglik",
112112
family="gaussian")
113113
```
114114

115115
```{r SL_out01x, cache=TRUE}
116-
ObsData$init.Pred <- predict(Y.fit.sl, newdata = ObsData.noY,
116+
ObsData$init.Pred <- predict(Y.fit.sl, newdata = ObsData.noY,
117117
type = "response")$pred
118118
119119
summary(ObsData$init.Pred)
@@ -132,7 +132,7 @@ summary(ObsData$init.Pred)
132132

133133
```{r SL_out01, cache=TRUE}
134134
ObsData.noY$A <- 1
135-
ObsData$Pred.Y1 <- predict(Y.fit.sl, newdata = ObsData.noY,
135+
ObsData$Pred.Y1 <- predict(Y.fit.sl, newdata = ObsData.noY,
136136
type = "response")$pred
137137
summary(ObsData$Pred.Y1)
138138
```
@@ -141,19 +141,19 @@ summary(ObsData$Pred.Y1)
141141

142142
```{r SL_out02, cache=TRUE}
143143
ObsData.noY$A <- 0
144-
ObsData$Pred.Y0 <- predict(Y.fit.sl, newdata = ObsData.noY,
144+
ObsData$Pred.Y0 <- predict(Y.fit.sl, newdata = ObsData.noY,
145145
type = "response")$pred
146146
summary(ObsData$Pred.Y0)
147147
```
148148

149149
### Get initial treatment effect estimate
150150

151151
```{r SL_out03, cache=cachex, echo = TRUE}
152-
ObsData$Pred.TE <- ObsData$Pred.Y1 - ObsData$Pred.Y0
152+
ObsData$Pred.TE <- ObsData$Pred.Y1 - ObsData$Pred.Y0
153153
```
154154

155155
```{r SL_out04, cache=cachex, echo = TRUE}
156-
summary(ObsData$Pred.TE)
156+
summary(ObsData$Pred.TE)
157157
```
158158

159159
```{r SL_out, cache=TRUE, message=FALSE, warning=FALSE, include = FALSE}
@@ -202,13 +202,13 @@ PS.fit.SL <- SuperLearner(Y=ObsData$A,
202202
"SL.glmnet",
203203
"SL.xgboost"),
204204
method="method.CC_nloglik",
205-
family="binomial")
205+
family="binomial")
206206
```
207207

208208

209209
```{r SL_out01ps2, cache=TRUE}
210210
all.pred <- predict(PS.fit.SL, type = "response")
211-
ObsData$PS.SL <- all.pred$pred
211+
ObsData$PS.SL <- all.pred$pred
212212
```
213213

214214
- These propensity score predictions (`PS.SL`) are represented as $g(A_i=1|L_i)$.
@@ -222,7 +222,7 @@ plot(density(ObsData$PS.SL[ObsData$A==0]),
222222
lines(density(ObsData$PS.SL[ObsData$A==1]),
223223
col = "blue", lty = 2)
224224
legend("topright", c("No RHC","RHC"),
225-
col = c("red", "blue"), lty=1:2)
225+
col = c("red", "blue"), lty=1:2)
226226
```
227227

228228

@@ -256,7 +256,7 @@ ObsData$H.AL <- ObsData$H.A1L - ObsData$H.A0L
256256
summary(ObsData$H.AL)
257257
tapply(ObsData$H.AL, ObsData$A, summary)
258258
t(apply(cbind(-ObsData$H.A0L,ObsData$H.A1L),
259-
2, summary))
259+
2, summary))
260260
```
261261

262262
Aggregated or individual clever covariate components show slight difference in their summaries.
@@ -282,7 +282,7 @@ eps_mod <- glm(Y.bounded ~ -1 + H.A1L + H.A0L +
282282
data = ObsData)
283283
epsilon <- coef(eps_mod)
284284
epsilon["H.A1L"]
285-
epsilon["H.A0L"]
285+
epsilon["H.A0L"]
286286
```
287287

288288
Note that, if `init.Pred` includes negative values, `NaNs` would be produced after applying `qlogis()`.
@@ -297,7 +297,7 @@ eps_mod1 <- glm(Y.bounded ~ -1 + H.AL +
297297
family = "binomial",
298298
data = ObsData)
299299
epsilon1 <- coef(eps_mod1)
300-
epsilon1
300+
epsilon1
301301
```
302302

303303
Alternative could be to use `H.AL` as weights (not shown here).
@@ -314,7 +314,7 @@ ObsData$Pred.Y1.update <- plogis(qlogis(ObsData$Pred.Y1) +
314314
ObsData$Pred.Y0.update <- plogis(qlogis(ObsData$Pred.Y0) +
315315
epsilon["H.A0L"]*ObsData$H.A0L)
316316
summary(ObsData$Pred.Y1.update)
317-
summary(ObsData$Pred.Y0.update)
317+
summary(ObsData$Pred.Y0.update)
318318
```
319319

320320
### Only 1 $\hat\epsilon$
@@ -327,7 +327,7 @@ ObsData$Pred.Y1.update1 <- plogis(qlogis(ObsData$Pred.Y1) +
327327
ObsData$Pred.Y0.update1 <- plogis(qlogis(ObsData$Pred.Y0) +
328328
epsilon1*ObsData$H.AL)
329329
summary(ObsData$Pred.Y1.update1)
330-
summary(ObsData$Pred.Y0.update1)
330+
summary(ObsData$Pred.Y0.update1)
331331
```
332332

333333
Note that, if `Pred.Y1` and `Pred.Y0` include negative values, `NaNs` would be produced after applying `qlogis()`.
@@ -359,7 +359,7 @@ ATE.TMLE.bounded.vector <- ObsData$Pred.Y1.update -
359359
summary(ATE.TMLE.bounded.vector)
360360
ATE.TMLE.bounded <- mean(ATE.TMLE.bounded.vector,
361361
na.rm = TRUE)
362-
ATE.TMLE.bounded
362+
ATE.TMLE.bounded
363363
```
364364

365365
### Only 1 $\hat\epsilon$
@@ -372,7 +372,7 @@ ATE.TMLE.bounded.vector1 <- ObsData$Pred.Y1.update1 -
372372
summary(ATE.TMLE.bounded.vector1)
373373
ATE.TMLE.bounded1 <- mean(ATE.TMLE.bounded.vector1,
374374
na.rm = TRUE)
375-
ATE.TMLE.bounded1
375+
ATE.TMLE.bounded1
376376
```
377377

378378
## Step 8: Rescale effect estimate
@@ -383,7 +383,7 @@ We make sure to transform back to our original scale.
383383

384384
```{r meantmle, cache=TRUE}
385385
ATE.TMLE <- (max.Y-min.Y)*ATE.TMLE.bounded
386-
ATE.TMLE
386+
ATE.TMLE
387387
```
388388

389389
### Only 1 $\hat\epsilon$
@@ -392,7 +392,7 @@ Alternatively, using `H.AL`:
392392

393393
```{r meantmle2, cache=TRUE}
394394
ATE.TMLE1 <- (max.Y-min.Y)*ATE.TMLE.bounded1
395-
ATE.TMLE1
395+
ATE.TMLE1
396396
```
397397

398398
## Step 9: Confidence interval estimation
@@ -442,21 +442,21 @@ ci.estimate <- function(data = ObsData, H.AL.components = 1){
442442
ATE.TMLE.CI <- c(ATE.TMLE1 - 1.96*sqrt(varHat.IC),
443443
ATE.TMLE1 + 1.96*sqrt(varHat.IC))
444444
}
445-
return(ATE.TMLE.CI)
445+
return(ATE.TMLE.CI)
446446
}
447447
```
448448

449449
### $\hat\epsilon$ = $\hat\epsilon_0$ and $\hat\epsilon_1$
450450

451451
```{r tmleinf2b, cache=TRUE}
452-
CI2 <- ci.estimate(data = ObsData, H.AL.components = 2)
452+
CI2 <- ci.estimate(data = ObsData, H.AL.components = 2)
453453
CI2
454454
```
455455

456456
### Only 1 $\hat\epsilon$
457457

458458
```{r tmleinf2c, cache=TRUE}
459-
CI1 <- ci.estimate(data = ObsData, H.AL.components = 1)
459+
CI1 <- ci.estimate(data = ObsData, H.AL.components = 1)
460460
CI1
461461
```
462462

@@ -481,6 +481,6 @@ CI1
481481
```
482482

483483
```{r, cache=TRUE, echo = TRUE}
484-
saveRDS(ATE.TMLE1, file = "data/tmlepointh.RDS")
484+
saveRDS(ATE.TMLE, file = "data/tmlepointh.RDS")
485485
saveRDS(CI2, file = "data/tmlecih.RDS")
486486
```

5software.Rmd

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,13 @@ dim(ObsData)
4343
- Note also that the outcome $Y$ is required to be within the range of $[0,1]$ for this method as well,
4444
- so we need to pass in the transformed data, then transform back the estimate.
4545

46-
```{r tmlepkg, cache=cachex, results='hide', message=FALSE, warning=FALSE}
46+
```{r tmlepkg, cache=cachex, message=FALSE, warning=FALSE}
4747
set.seed(1444)
48-
4948
# transform the outcome to fall within the range [0,1]
5049
min.Y <- min(ObsData$Y)
50+
min.Y
5151
max.Y <- max(ObsData$Y)
52+
max.Y
5253
ObsData$Y_transf <- (ObsData$Y-min.Y)/(max.Y-min.Y)
5354
5455
# run tmle from the tmle package
@@ -79,9 +80,9 @@ summary(tmle.fit)
7980

8081
```{r tmlepkgtr, cache=cachex, message=FALSE, warning=FALSE}
8182
tmle_est_tr <- tmle.fit$estimates$ATE$psi
83+
tmle_est_tr
8284
# transform back the ATE estimate
8385
tmle_est <- (max.Y-min.Y)*tmle_est_tr
84-
8586
tmle_est
8687
```
8788

@@ -114,7 +115,28 @@ Most helpful resources:
114115

115116
* [CRAN docs](https://cran.r-project.org/web/packages/tmle/tmle.pdf)
116117
* [tmle package paper](https://www.jstatsoft.org/article/view/v051i13)
117-
* Vignettes in R
118+
119+
## tmle (reduced computation)
120+
121+
We can use the previously calculated propensity score predictions from SL (calculated using `WeightIt` package) in the `tmle` to reduce some computing time.
122+
123+
```{r tmlepkg33b, cache=cachex, message=FALSE, warning=FALSE}
124+
ps.obj <- readRDS(file = "data/ipwslps.RDS")
125+
ps.SL <- ps.obj$weights
126+
tmle.fit2 <- tmle::tmle(Y = ObsData$Y_transf,
127+
A = ObsData$A,
128+
W = ObsData.noYA,
129+
family = "gaussian",
130+
V = 3,
131+
Q.SL.library = SL.library,
132+
g1W = ps.SL)
133+
tmle.fit2
134+
```
135+
136+
```{r tmlepkgtrb, cache=cachex, message=FALSE, warning=FALSE}
137+
# transform back ATE estimate
138+
(max.Y-min.Y)*tmle.fit2$estimates$ATE$psi
139+
```
118140

119141
## sl3 (optional)
120142

@@ -325,6 +347,9 @@ cat("ATE from aipw package: ", aipw_est, aipw_ci, sep = "")
325347

326348
Gathering previously saved results:
327349
```{r summarytable0, cache=cachex, echo=FALSE, results='hold', warning=FALSE, message=FALSE}
350+
fit.reg <- readRDS(file = "data/adjreg.RDS")
351+
TEr <- fit.reg$coefficients[2]
352+
CIr <- as.numeric(confint(fit.reg, 'A'))
328353
fit.matched <- readRDS(file = "data/match.RDS")
329354
TEm <- fit.matched$coefficients[2]
330355
CIm <- as.numeric(confint(fit.matched, 'A'))
@@ -346,18 +371,24 @@ tmlesl <- readRDS(file = "data/tmle.RDS")
346371
tmlecisl <- readRDS(file = "data/tmleci.RDS")
347372
slp <- readRDS(file = "data/sl3.RDS")
348373
ci.b <- rep(NA,2)
349-
point <- as.numeric(c(TEm, TEg, TE1g, TE2g, TE3g, TEi, TEsli, tmleh, tmlesl, slp))
350-
CIs <- cbind(CIm, CIgc, ci.b, ci.b, ci.b, CIi, CIsli, tmlecih, tmlecisl, ci.b)
374+
point <- as.numeric(c(TEr, TEm, TEg, TE1g, TE2g,
375+
TE3g, TEi, TEsli, tmleh,
376+
tmlesl, slp))
377+
CIs <- cbind(CIr, CIm, CIgc, ci.b, ci.b, ci.b,
378+
CIi, CIsli, tmlecih, tmlecisl, ci.b)
351379
```
352380

353381

354382
```{r summarytable, cache=cachex, echo=FALSE}
355-
method.list <- c("PS.match", "G-comp","G-comp-xgboost",
356-
"G-comp-lasso", "G-comp-SL","IPW",
357-
"IPW-SL", "TMLE.step", "tmle (package)", "sl3")
383+
method.list <- c("Adj. Reg","PS match",
384+
"G-comp (logistic)","G-comp (xgboost)",
385+
"G-comp (lasso)", "G-comp (SL)",
386+
"IPW (logistic)", "IPW (SL)",
387+
"TMLE (9 steps)", "TMLE (package)",
388+
"sl3 (package)")
358389
results <- data.frame(method.list)
359390
results$Estimate <- round(point,2)
360-
results$`2.5 %` <- CIs[1,]
391+
results$`2.5 %` <- CIs[1,]
361392
results$`97.5 %` <- CIs[2,]
362393
kable(results,digits = 2)
363394
```

6final.Rmd

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,10 +154,11 @@ However, causal inference requires satisfying identifiability assumptions for us
154154

155155
### Workshops
156156

157-
Highly recommend joining SER if interested in Epi methods development. The following workshops are very useful.
157+
Highly recommend joining SER if interested in Epi methods development. The following workshops and summer course are very useful.
158158

159159
- [SER Workshop](https://epiresearch.org/) Introduction to Parametric and Semi-parametric Estimators for Causal Inference by Laura B. Balzer & Jennifer Ahern, 2020
160160
- [SER Workshop](https://epiresearch.org/) Machine Learning and Artificial Intelligence for Causal Inference and Prediction: A Primer by Naimi A, 2021
161+
- [SISCER](https://si.biostat.washington.edu/suminst/archives/SISCER2021/CR2106) Modern Statistical Learning for Observational Data by Marco Carone, David Benkeser, 2021
161162

162163
### Recorded webinars
163164

@@ -186,4 +187,4 @@ The following webinars and workshops are freely accessible, and great for unders
186187

187188
- [Kat’s Stats](https://www.khstats.com/) by Katherine Hoffman
188189
- [towardsdatascience](https://towardsdatascience.com/targeted-maximum-likelihood-tmle-for-causal-inference-1be88542a749) by Yao Yang
189-
- [The Research Group of Mark van der Laan](https://vanderlaan-lab.org/post/) by Mark van der Laan
190+
- [The Research Group of Mark van der Laan](https://vanderlaan-lab.org/post/) by Mark van der Laan

0 commit comments

Comments
 (0)