@@ -49,13 +49,15 @@ def __init__(
49
49
outcome : str ,
50
50
df : pd .DataFrame = None ,
51
51
effect_modifiers : dict [str :Any ] = None ,
52
+ alpha : float = 0.05 ,
52
53
):
53
54
self .treatment = treatment
54
55
self .treatment_value = treatment_value
55
56
self .control_value = control_value
56
57
self .adjustment_set = adjustment_set
57
58
self .outcome = outcome
58
59
self .df = df
60
+ self .alpha = alpha
59
61
if effect_modifiers is None :
60
62
self .effect_modifiers = {}
61
63
elif isinstance (effect_modifiers , dict ):
@@ -179,7 +181,7 @@ def estimate(self, data: pd.DataFrame, adjustment_config: dict = None) -> Regres
179
181
# x = x[model.params.index]
180
182
return model .predict (x )
181
183
182
- def estimate_control_treatment (self , bootstrap_size = 100 , adjustment_config = None ) -> tuple [pd .Series , pd .Series ]:
184
+ def estimate_control_treatment (self , bootstrap_size , adjustment_config ) -> tuple [pd .Series , pd .Series ]:
183
185
"""Estimate the outcomes under control and treatment.
184
186
185
187
:return: The estimated control and treatment values and their confidence
@@ -215,14 +217,18 @@ def estimate_control_treatment(self, bootstrap_size=100, adjustment_config=None)
215
217
216
218
return (y .iloc [1 ], np .array (control )), (y .iloc [0 ], np .array (treatment ))
217
219
218
- def estimate_ate (self , bootstrap_size = 100 , adjustment_config = None ) -> float :
220
+ def estimate_ate (self , estimator_params : dict = None ) -> float :
219
221
"""Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
220
222
by changing the treatment variable from the control value to the treatment value. Here, we actually
221
223
calculate the expected outcomes under control and treatment and take one away from the other. This
222
224
allows for custom terms to be put in such as squares, inverses, products, etc.
223
225
224
226
:return: The estimated average treatment effect and 95% confidence intervals
225
227
"""
228
+ if estimator_params is None :
229
+ estimator_params = {}
230
+ bootstrap_size = estimator_params .get ("bootstrap_size" , 100 )
231
+ adjustment_config = estimator_params .get ("adjustment_config" , None )
226
232
(control_outcome , control_bootstraps ), (
227
233
treatment_outcome ,
228
234
treatment_bootstraps ,
@@ -233,7 +239,7 @@ def estimate_ate(self, bootstrap_size=100, adjustment_config=None) -> float:
233
239
return estimate , (None , None )
234
240
235
241
bootstraps = sorted (list (treatment_bootstraps - control_bootstraps ))
236
- bound = int ((bootstrap_size * 0.05 ) / 2 )
242
+ bound = int ((bootstrap_size * self . alpha ) / 2 )
237
243
ci_low = bootstraps [bound ]
238
244
ci_high = bootstraps [bootstrap_size - bound ]
239
245
@@ -245,14 +251,18 @@ def estimate_ate(self, bootstrap_size=100, adjustment_config=None) -> float:
245
251
246
252
return estimate , (ci_low , ci_high )
247
253
248
- def estimate_risk_ratio (self , bootstrap_size = 100 , adjustment_config = None ) -> float :
254
+ def estimate_risk_ratio (self , estimator_params : dict = None ) -> float :
249
255
"""Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
250
256
by changing the treatment variable from the control value to the treatment value. Here, we actually
251
257
calculate the expected outcomes under control and treatment and divide one by the other. This
252
258
allows for custom terms to be put in such as squares, inverses, products, etc.
253
259
254
260
:return: The estimated risk ratio and 95% confidence intervals.
255
261
"""
262
+ if estimator_params is None :
263
+ estimator_params = {}
264
+ bootstrap_size = estimator_params .get ("bootstrap_size" , 100 )
265
+ adjustment_config = estimator_params .get ("adjustment_config" , None )
256
266
(control_outcome , control_bootstraps ), (
257
267
treatment_outcome ,
258
268
treatment_bootstraps ,
@@ -263,7 +273,7 @@ def estimate_risk_ratio(self, bootstrap_size=100, adjustment_config=None) -> flo
263
273
return estimate , (None , None )
264
274
265
275
bootstraps = sorted (list (treatment_bootstraps / control_bootstraps ))
266
- bound = ceil ((bootstrap_size * 0.05 ) / 2 )
276
+ bound = ceil ((bootstrap_size * self . alpha ) / 2 )
267
277
ci_low = bootstraps [bound ]
268
278
ci_high = bootstraps [bootstrap_size - bound ]
269
279
@@ -301,8 +311,11 @@ def __init__(
301
311
df : pd .DataFrame = None ,
302
312
effect_modifiers : dict [Variable :Any ] = None ,
303
313
formula : str = None ,
314
+ alpha : float = 0.05 ,
304
315
):
305
- super ().__init__ (treatment , treatment_value , control_value , adjustment_set , outcome , df , effect_modifiers )
316
+ super ().__init__ (
317
+ treatment , treatment_value , control_value , adjustment_set , outcome , df , effect_modifiers , alpha = alpha
318
+ )
306
319
307
320
self .model = None
308
321
if effect_modifiers is None :
@@ -336,7 +349,6 @@ def estimate_unit_ate(self) -> float:
336
349
"""
337
350
model = self ._run_linear_regression ()
338
351
newline = "\n "
339
- print (model .conf_int ())
340
352
treatment = [self .treatment ]
341
353
if str (self .df .dtypes [self .treatment ]) == "object" :
342
354
design_info = dmatrix (self .formula .split ("~" )[1 ], self .df ).design_info
@@ -372,7 +384,7 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
372
384
# Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
373
385
t_test_results = model .t_test (individuals .loc ["treated" ] - individuals .loc ["control" ])
374
386
ate = t_test_results .effect [0 ]
375
- confidence_intervals = list (t_test_results .conf_int ().flatten ())
387
+ confidence_intervals = list (t_test_results .conf_int (alpha = self . alpha ).flatten ())
376
388
return ate , confidence_intervals
377
389
378
390
def estimate_control_treatment (self , adjustment_config : dict = None ) -> tuple [pd .Series , pd .Series ]:
@@ -434,25 +446,11 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
434
446
435
447
:return: The model after fitting to data.
436
448
"""
437
- # 1. Reduce dataframe to contain only the necessary columns
438
- reduced_df = self .df .copy ()
439
- necessary_cols = [self .treatment ] + list (self .adjustment_set ) + [self .outcome ]
440
- missing_rows = reduced_df [necessary_cols ].isnull ().any (axis = 1 )
441
- reduced_df = reduced_df [~ missing_rows ]
442
- reduced_df = reduced_df .sort_values ([self .treatment ])
443
- logger .debug (reduced_df [necessary_cols ])
444
-
445
- # 2. Add intercept
446
- reduced_df ["Intercept" ] = 1 # self.intercept
447
-
448
- # 3. Estimate the unit difference in outcome caused by unit difference in treatment
449
- cols = [self .treatment ]
450
- cols += [x for x in self .adjustment_set if x not in cols ]
451
449
model = smf .ols (formula = self .formula , data = self .df ).fit ()
452
450
return model
453
451
454
452
def _get_confidence_intervals (self , model , treatment ):
455
- confidence_intervals = model .conf_int (alpha = 0.05 , cols = None )
453
+ confidence_intervals = model .conf_int (alpha = self . alpha , cols = None )
456
454
ci_low , ci_high = (
457
455
confidence_intervals [0 ].loc [treatment ],
458
456
confidence_intervals [1 ].loc [treatment ],
@@ -519,7 +517,7 @@ def estimate_unit_ate(self, bootstrap_size=100):
519
517
bootstraps = sorted (
520
518
[self .estimate_coefficient (self .df .sample (len (self .df ), replace = True )) for _ in range (bootstrap_size )]
521
519
)
522
- bound = ceil ((bootstrap_size * 0.05 ) / 2 )
520
+ bound = ceil ((bootstrap_size * self . alpha ) / 2 )
523
521
ci_low = bootstraps [bound ]
524
522
ci_high = bootstraps [bootstrap_size - bound ]
525
523
@@ -610,7 +608,7 @@ def estimate_cates(self) -> pd.DataFrame:
610
608
# Obtain CATES and confidence intervals
611
609
conditional_ates = model .effect (effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value ).flatten ()
612
610
[ci_low , ci_high ] = model .effect_interval (
613
- effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value , alpha = 0.05
611
+ effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value , alpha = self . alpha
614
612
)
615
613
616
614
# Merge results into a dataframe (CATE, confidence intervals, and effect modifier values)
0 commit comments