Skip to content

Commit 76d7893

Browse files
Merge pull request #17 from malcolmbarrett/smd
Use smd and gtsummary
2 parents 7d675ce + e11b2dc commit 76d7893

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+932
-520
lines changed

exercises/01-whole-game-exercises.Rmd

Lines changed: 26 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ library(broom)
99
library(rsample)
1010
library(ggdag)
1111
library(causaldata)
12-
library(survey)
13-
library(tableone)
1412
```
1513

1614
## Causal Modeling: Whole Game
@@ -194,45 +192,39 @@ The main goal here is to *break* the non-causal associations between quitting sm
194192

195193
Standardized mean differences (SMD) are a simple measurement of differences that work across variable types. In general, the closer to 0 we are, the better job we have done eliminating the non-causal relationships we drew in our DAG. Note that low SMDs for everything we adjust for does *not* mean that there is not something else that might confound our study. Unmeasured confounders or misspecified DAGs can still distort our effects, even if our SMDs look great!
196194

197-
We'll use the {survey} and {tableone} package to calculate the SMDs, then visualize them.
195+
We'll use the {smd} package to calculate the SMDs, then visualize them.
198196

199197
```{r}
200-
svy_des <- svydesign(
201-
ids = ~ 1,
202-
data = nhefs_complete_uc,
203-
weights = ~ wts)
204-
205-
smd_table_unweighted <- CreateTableOne(
206-
vars = c("sex", "race", "age", "education", "smokeintensity", "smokeyrs",
207-
"exercise", "active", "wt71"),
208-
strata = "qsmk",
209-
data = nhefs_complete_uc,
210-
test = FALSE)
211-
212-
smd_table <- svyCreateTableOne(
213-
vars = c("sex", "race", "age", "education", "smokeintensity", "smokeyrs",
214-
"exercise", "active", "wt71"),
215-
strata = "qsmk",
216-
data = svy_des,
217-
test = FALSE)
218-
219-
220-
plot_df <- data.frame(
221-
var = rownames(ExtractSmd(smd_table)),
222-
Unadjusted = as.numeric(ExtractSmd(smd_table_unweighted)),
223-
Weighted = as.numeric(ExtractSmd(smd_table))) %>%
224-
pivot_longer(-var, names_to = "Method", values_to = "SMD")
198+
vars <- c("sex", "race", "age", "education", "smokeintensity", "smokeyrs",
199+
"exercise", "active", "wt71")
200+
201+
plot_df <- nhefs_complete_uc %>%
202+
summarise(
203+
across(
204+
all_of(vars),
205+
list(
206+
unweighted = ~smd(.x, qsmk)$estimate,
207+
weighted = ~smd(.x, qsmk, wts)$estimate
208+
)
209+
)
210+
) %>%
211+
pivot_longer(
212+
everything(),
213+
values_to = "SMD",
214+
names_to = c("variable", "Method"),
215+
names_sep = "_"
216+
)
225217
226218
ggplot(
227219
data = plot_df,
228-
mapping = aes(x = var, y = SMD, group = Method, color = Method)
220+
aes(x = abs(SMD), y = variable, group = Method, color = Method)
229221
) +
230-
geom_line() +
222+
geom_line(orientation = "y") +
231223
geom_point() +
232-
geom_hline(yintercept = 0.1, color = "black", size = 0.1) +
233-
coord_flip() +
234-
theme_minimal()
235-
224+
geom_vline(xintercept = 0.1, color = "black", size = 0.1) +
225+
theme_minimal() +
226+
scale_color_manual(values = c("grey85", "#00BFC4")) +
227+
xlim(0, .3)
236228
```
237229

238230
These look pretty good! Some variables are better than others, but weighting appears to have done a much better job eliminating these differences than an unadjusted analysis.

exercises/05-pscores-diagnostics-exercises.Rmd

Lines changed: 29 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ output: html_document
66

77
```{r}
88
library(tidyverse)
9-
library(survey)
10-
library(tableone)
119
library(broom)
10+
library(smd)
1211
library(causaldata)
1312
```
1413

@@ -32,55 +31,46 @@ df <- propensity_model %>%
3231

3332
_After updating the code chunks below, change `eval = TRUE` before knitting._
3433

35-
Create the survey design object to incorporate the weights.
34+
Calculate the standardized mean differences with and without weights
3635

3736
```{r, eval = FALSE}
38-
svy_des <- ____(
39-
ids = ~ 1,
40-
data = ___,
41-
weights = ___
42-
)
43-
```
44-
45-
Create the **unweighted** standardized mean differences data frame
46-
47-
```{r, eval = FALSE}
48-
smd_table_unweighted <- ____(
49-
vars = _____,
50-
strata = _____,
51-
data = ____,
52-
test = FALSE)
37+
smds <- df %>%
38+
summarise(
39+
across(
40+
# variables to calculate SMD for
41+
________,
42+
list(
43+
unweighted = ~_____(.x, qsmk)$estimate,
44+
weighted = ~_____(.x, qsmk, w = ___)$estimate
45+
)
46+
)
47+
)
5348
```
5449

55-
Create the **weighted** standardized mean differences data frame
50+
Pivot `smds` so that it is in tidy format with the columns "SMD", "variable", and "Method".
5651

5752
```{r, eval = FALSE}
58-
smd_table <- ____(
59-
vars = _____,
60-
strata = _____,
61-
data = ____,
62-
test = FALSE)
63-
```
64-
65-
Create a data frame that merges `smd_table_unweighted` and `smd_table` and pivots it to prepare for plotting
66-
67-
```{r, eval = FALSE}
68-
plot_df <- data.frame(
69-
var = rownames(____),
70-
Unadjusted = _____,
71-
Weighted = _____) %>%
72-
pivot_longer(-var, names_to = "Method", values_to = "SMD")
53+
plot_df <- smds %>%
54+
pivot_longer(
55+
_______,
56+
values_to = "___",
57+
names_to = c("___", "___"),
58+
59+
# don't change this one. We really mean _!
60+
names_sep = "_"
61+
)
7362
```
7463

7564
Create the Love Plot using ggplot
7665

7766
```{r, eval = FALSE}
78-
ggplot(data = _____,
79-
mapping = aes(x = ____, y = ____, group = ____, color = ____)) +
80-
geom_line() +
67+
ggplot(
68+
data = ____,
69+
aes(x = abs(____), y = ____, group = ____, color = ____)
70+
) +
71+
geom_line(orientation = "y") +
8172
geom_point() +
82-
geom_hline(yintercept = 0.1, color = "black", size = 0.1) +
83-
coord_flip()
73+
geom_vline(xintercept = 0.1, color = "black", size = 0.1)
8474
```
8575

8676

exercises/06-outcome-model-exercises.Rmd

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ output: html_document
66

77
```{r}
88
library(tidyverse)
9-
library(survey)
10-
library(tableone)
119
library(broom)
1210
library(causaldata)
1311
library(rsample)

slides/pdf/00-intro.pdf

-843 Bytes
Binary file not shown.
-1.09 MB
Binary file not shown.

slides/pdf/02-dags.pdf

-1.27 MB
Binary file not shown.

slides/pdf/03-pscores.pdf

-231 KB
Binary file not shown.

slides/pdf/04-using-pscores.pdf

-167 KB
Binary file not shown.
-361 KB
Binary file not shown.

slides/pdf/06-outcome-model.pdf

-1.15 KB
Binary file not shown.

0 commit comments

Comments
 (0)