1
1
---
2
- title : " Propensity scores for continuous exposures "
2
+ title : " Continuous exposures with propensity scores "
3
3
author : " Malcolm Barrett"
4
4
format : " kakashi-revealjs"
5
5
date : " 2021-09-01 (updated: `r Sys.Date()`)"
@@ -17,10 +17,14 @@ library(tidyverse)
17
17
library(broom)
18
18
library(causaldata)
19
19
library(touringplans)
20
+ library(propensity)
20
21
21
22
set.seed(1000)
22
23
```
23
24
25
+ ## {background-color="#23373B" .huge .center}
26
+ ### * Warning!* Propensity score weights are sensitive to positivity violations for continuous exposures.
27
+
24
28
## {background-color="#23373B" .huge .center}
25
29
### ** The story so far**
26
30
@@ -34,11 +38,11 @@ set.seed(1000)
34
38
35
39
## Continous exposures {background-color="#23373B"}
36
40
37
- 1 . Use a model like ` lm(x ~ z) ` for the propensity score model
38
- 2 . Scale weights to probability-like scale using ` dnorm(true_value, fitted_value, estimated_sd) `
41
+ 1 . Use a model like ` lm(x ~ z) ` for the propensity score model.
42
+ 2 . Use ` wt_ate() ` with ` .fitted ` and ` .sigma ` ; transforms using ` dnorm() ` to get on probability-like scale.
39
43
3 . Apply the weights to the outcome model as normal!
40
44
41
- ## Alternative: quantile binning {background-color="#23373B"}
45
+ ## Alternative: quantile binning {background-color="#23373B" .small}
42
46
43
47
1 . Bin the continuous exposure into quantiles and use categorical regression like a multinomial model to calculate probabilities.
44
48
2 . Calculate the weights where the propensity score is the probability you fall into the quantile you actually fell into. Same as the binary ATE!
@@ -54,17 +58,18 @@ model <- lm(
54
58
)
55
59
```
56
60
57
- ## 2. Calculate the weights with ` dnorm ()`
61
+ ## 2. Calculate the weights with ` wt_ate ()`
58
62
59
63
``` {r}
60
64
#| eval: false
61
- #| code-line-numbers: "|3-7 "
65
+ #| code-line-numbers: "|3-8 "
62
66
model |>
63
67
augment(data = df) |>
64
- mutate(denominator = dnorm (
68
+ mutate(wts = wt_ate (
65
69
exposure,
66
- mean = .fitted,
67
- sd = mean(.sigma, na.rm = TRUE)
70
+ .fitted,
71
+ # .sigma is from augment()
72
+ .sigma = .sigma
68
73
))
69
74
```
70
75
@@ -79,7 +84,7 @@ nhefs_light_smokers <- nhefs_complete |>
79
84
80
85
``` {r}
81
86
#| code-line-numbers: "|1-2|3-6"
82
- nhefs_denominator_model <- lm(
87
+ nhefs_model <- lm(
83
88
smkintensity82_71 ~ sex + race + age + I(age^2) +
84
89
education + smokeintensity + I(smokeintensity^2) +
85
90
smokeyrs + I(smokeyrs^2) + exercise + active +
@@ -88,25 +93,24 @@ nhefs_denominator_model <- lm(
88
93
)
89
94
```
90
95
91
- ## 2. Calculate the weights with ` dnorm ()`
96
+ ## 2. Calculate the weights with ` wt_ate ()`
92
97
93
98
``` {r}
94
- #| code-line-numbers: "|3-6 "
95
- nhefs_denominators <- nhefs_denominator_model |>
99
+ #| code-line-numbers: "|3-7 "
100
+ nhefs_wts <- nhefs_model |>
96
101
augment(data = nhefs_light_smokers) |>
97
- mutate(denominator = dnorm (
102
+ mutate(wts = wt_ate (
98
103
smkintensity82_71,
99
104
.fitted,
100
- mean(.sigma, na.rm = TRUE)
101
- )) |>
102
- select(id, denominator)
105
+ .sigma = .sigma
106
+ ))
103
107
```
104
108
105
109
106
- ## 2. Calculate the weights with ` dnorm ()`
110
+ ## 2. Calculate the weights with ` wt_ate ()`
107
111
108
112
``` {r}
109
- nhefs_denominators
113
+ nhefs_wts
110
114
```
111
115
112
116
## Do * posted* wait times at 8 am affect * actual* wait times at 9 am?
@@ -190,7 +194,7 @@ dagify(
190
194
191
195
### Fit a model using ` lm() ` with ` avg_spostmin ` as the outcome and the confounders identified in the DAG.
192
196
### Use ` augment() ` to add model predictions to the data frame
193
- ### In ` dnorm ()` , use ` .fitted ` as the mean and the mean of ` .sigma ` as the SD to calculate the propensity score for the denominator.
197
+ ### In ` wt_ate ()` , calculate the weights using ` avg_postmin ` , ` .fitted ` , and ` .sigma `
194
198
195
199
` r countdown::countdown(minutes = 5) `
196
200
@@ -212,7 +216,7 @@ wait_times <- eight |>
212
216
```
213
217
214
218
``` {r}
215
- denominator_model <- lm(
219
+ post_time_model <- lm(
216
220
avg_spostmin ~
217
221
close + extra_magic_morning +
218
222
weather_wdwhigh + wdw_ticket_season,
@@ -223,22 +227,18 @@ denominator_model <- lm(
223
227
## * Your Turn 1*
224
228
225
229
``` {r}
226
- denominators <- denominator_model |>
230
+ wait_times_wts <- post_time_model |>
227
231
augment(data = wait_times) |>
228
- mutate(
229
- denominator = dnorm(
230
- avg_spostmin, .fitted, mean(.sigma, na.rm = TRUE)
231
- )
232
- ) |>
233
- select(date, denominator)
232
+ mutate(wts = wt_ate(
233
+ avg_spostmin, .fitted, .sigma = .sigma
234
+ ))
234
235
```
235
236
236
237
## * Stabilizing extreme weights*
237
238
238
239
``` {r}
239
240
#| echo: false
240
- nhefs_denominators |>
241
- mutate(wts = 1 / denominator) |>
241
+ nhefs_wts |>
242
242
ggplot(aes(wts)) +
243
243
geom_density(col = "#E69F00", fill = "#E69F0095") +
244
244
scale_x_log10() +
@@ -248,49 +248,29 @@ nhefs_denominators |>
248
248
249
249
## Stabilizing extreme weights {background-color="#23373B"}
250
250
251
- 1 . Fit an intercept-only model (e.g. ` lm(x ~ 1) ` )
252
- 2 . Calculate weights from this model
253
- 3 . Divide these weights by the propensity score weights
254
-
255
- ## 1. Fit an intercept-only model
256
-
257
- ``` {r}
258
- #| code-line-numbers: "|2"
259
- nhefs_numerator_model <- lm(
260
- smkintensity82_71 ~ 1,
261
- data = nhefs_light_smokers
262
- )
263
- ```
251
+ 1 . Fit an intercept-only model (e.g. ` lm(x ~ 1) ` ) or use mean and SD of ` x `
252
+ 2 . Calculate weights from this model.
253
+ 3 . Divide these weights by the propensity score weights. ` wt_ate(.., stabilize = TRUE) ` does this all!
264
254
265
- ## 2. Calculate weights from this model
255
+ ## Calculate stabilized weights
266
256
267
257
``` {r}
268
- #| code-line-numbers: "|1 "
269
- nhefs_numerators <- nhefs_numerator_model |>
258
+ #| code-line-numbers: "|7 "
259
+ nhefs_swts <- nhefs_model |>
270
260
augment(data = nhefs_light_smokers) |>
271
- mutate(numerator = dnorm (
261
+ mutate(swts = wt_ate (
272
262
smkintensity82_71,
273
- mean = .fitted,
274
- sd = mean(.sigma, na.rm = TRUE))
275
- ) |>
276
- select(id, numerator)
277
- ```
278
-
279
- ## 3. Divide these weights by the propensity score weights
280
-
281
- ``` {r}
282
- #| code-line-numbers: "|4"
283
- nhefs_light_smokers <- nhefs_light_smokers |>
284
- left_join(nhefs_numerators, by = "id") |>
285
- left_join(nhefs_denominators, by = "id") |>
286
- mutate(swts = numerator / denominator)
263
+ .fitted,
264
+ .sigma = .sigma,
265
+ stabilize = TRUE
266
+ ))
287
267
```
288
268
289
269
## Stabilizing extreme weights
290
270
291
271
``` {r}
292
272
#| echo: false
293
- ggplot(nhefs_light_smokers , aes(swts)) +
273
+ ggplot(nhefs_swts , aes(swts)) +
294
274
geom_density(col = "#E69F00", fill = "#E69F0095") +
295
275
scale_x_log10() +
296
276
theme_minimal(base_size = 20) +
@@ -299,42 +279,23 @@ ggplot(nhefs_light_smokers, aes(swts)) +
299
279
300
280
## * Your Turn 2*
301
281
302
- ### Fit an intercept-only model of posted weight times to use as the numerator model
303
- ### Calculate the numerator weights using ` dnorm() ` as above.
304
- ### Finally, calculate the stabilized weights, ` swts ` , using the ` numerator ` and ` denominator ` weights
282
+ ### Re-fit the above using stabilized weights
305
283
306
- ` r countdown::countdown(minutes = 5 ) `
284
+ ` r countdown::countdown(minutes = 3 ) `
307
285
308
286
## * Your Turn 2*
309
287
310
288
``` {r}
311
- numerator_model <- lm(
312
- avg_spostmin ~ 1,
313
- data = wait_times
314
- )
315
- ```
316
-
317
- ---
318
-
319
- ## Your Turn 2
320
-
321
- ``` {r}
322
- numerators <- numerator_model |>
289
+ wait_times_swts <- post_time_model |>
323
290
augment(data = wait_times) |>
324
- mutate(
325
- numerator = dnorm(
326
- avg_spostmin, .fitted, mean(.sigma, na.rm = TRUE)
327
- )
328
- ) |>
329
- select(date, numerator)
330
-
331
- wait_times_wts <- wait_times |>
332
- left_join(numerators, by = "date") |>
333
- left_join(denominators, by = "date") |>
334
- mutate(swts = numerator / denominator)
291
+ mutate(swts = wt_ate(
292
+ avg_spostmin,
293
+ .fitted,
294
+ .sigma = .sigma,
295
+ stabilize = TRUE
296
+ ))
335
297
```
336
298
337
-
338
299
## Fitting the outcome model {background-color="#23373B"}
339
300
340
301
1 . Use the stabilized weights in the outcome model. Nothing new here!
@@ -346,7 +307,7 @@ wait_times_wts <- wait_times |>
346
307
lm(
347
308
wt82_71 ~ smkintensity82_71,
348
309
weights = swts,
349
- data = nhefs_light_smokers
310
+ data = nhefs_swts
350
311
) |>
351
312
tidy() |>
352
313
filter(term == "smkintensity82_71") |>
@@ -365,10 +326,20 @@ lm(
365
326
lm(
366
327
avg_sactmin ~ avg_spostmin,
367
328
weights = swts,
368
- data = wait_times_wts
329
+ data = wait_times_swts
369
330
) |>
370
331
tidy() |>
371
332
filter(term == "avg_spostmin") |>
372
333
mutate(estimate = estimate * 10)
373
334
```
374
335
336
+
337
+ ## Diagnosing issues {background-color="#23373B"}
338
+
339
+ 1 . Extreme weights even after stabilization
340
+ 2 . Bootstrap: non-normal distribution
341
+ 3 . Bootstrap: estimate different from original model
342
+
343
+ ## More info {background-color="#23373B"}
344
+
345
+ ### https://github.com/LucyMcGowan/writing-positivity-continous-ps
0 commit comments