@@ -49,13 +49,15 @@ def __init__(
49
49
outcome : str ,
50
50
df : pd .DataFrame = None ,
51
51
effect_modifiers : dict [str :Any ] = None ,
52
+ alpha : float = 0.05 ,
52
53
):
53
54
self .treatment = treatment
54
55
self .treatment_value = treatment_value
55
56
self .control_value = control_value
56
57
self .adjustment_set = adjustment_set
57
58
self .outcome = outcome
58
59
self .df = df
60
+ self .alpha = alpha
59
61
if effect_modifiers is None :
60
62
self .effect_modifiers = {}
61
63
elif isinstance (effect_modifiers , dict ):
@@ -237,7 +239,7 @@ def estimate_ate(self, estimator_params: dict = None) -> float:
237
239
return estimate , (None , None )
238
240
239
241
bootstraps = sorted (list (treatment_bootstraps - control_bootstraps ))
240
- bound = int ((bootstrap_size * 0.05 ) / 2 )
242
+ bound = int ((bootstrap_size * self . alpha ) / 2 )
241
243
ci_low = bootstraps [bound ]
242
244
ci_high = bootstraps [bootstrap_size - bound ]
243
245
@@ -271,7 +273,7 @@ def estimate_risk_ratio(self, estimator_params: dict = None) -> float:
271
273
return estimate , (None , None )
272
274
273
275
bootstraps = sorted (list (treatment_bootstraps / control_bootstraps ))
274
- bound = ceil ((bootstrap_size * 0.05 ) / 2 )
276
+ bound = ceil ((bootstrap_size * self . alpha ) / 2 )
275
277
ci_low = bootstraps [bound ]
276
278
ci_high = bootstraps [bootstrap_size - bound ]
277
279
@@ -309,8 +311,11 @@ def __init__(
309
311
df : pd .DataFrame = None ,
310
312
effect_modifiers : dict [Variable :Any ] = None ,
311
313
formula : str = None ,
314
+ alpha : float = 0.05 ,
312
315
):
313
- super ().__init__ (treatment , treatment_value , control_value , adjustment_set , outcome , df , effect_modifiers )
316
+ super ().__init__ (
317
+ treatment , treatment_value , control_value , adjustment_set , outcome , df , effect_modifiers , alpha = alpha
318
+ )
314
319
315
320
self .model = None
316
321
if effect_modifiers is None :
@@ -344,7 +349,6 @@ def estimate_unit_ate(self) -> float:
344
349
"""
345
350
model = self ._run_linear_regression ()
346
351
newline = "\n "
347
- print (model .conf_int ())
348
352
treatment = [self .treatment ]
349
353
if str (self .df .dtypes [self .treatment ]) == "object" :
350
354
design_info = dmatrix (self .formula .split ("~" )[1 ], self .df ).design_info
@@ -380,7 +384,7 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
380
384
# Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
381
385
t_test_results = model .t_test (individuals .loc ["treated" ] - individuals .loc ["control" ])
382
386
ate = t_test_results .effect [0 ]
383
- confidence_intervals = list (t_test_results .conf_int ().flatten ())
387
+ confidence_intervals = list (t_test_results .conf_int (alpha = self . alpha ).flatten ())
384
388
return ate , confidence_intervals
385
389
386
390
def estimate_control_treatment (self , adjustment_config : dict = None ) -> tuple [pd .Series , pd .Series ]:
@@ -442,25 +446,11 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
442
446
443
447
:return: The model after fitting to data.
444
448
"""
445
- # 1. Reduce dataframe to contain only the necessary columns
446
- reduced_df = self .df .copy ()
447
- necessary_cols = [self .treatment ] + list (self .adjustment_set ) + [self .outcome ]
448
- missing_rows = reduced_df [necessary_cols ].isnull ().any (axis = 1 )
449
- reduced_df = reduced_df [~ missing_rows ]
450
- reduced_df = reduced_df .sort_values ([self .treatment ])
451
- logger .debug (reduced_df [necessary_cols ])
452
-
453
- # 2. Add intercept
454
- reduced_df ["Intercept" ] = 1 # self.intercept
455
-
456
- # 3. Estimate the unit difference in outcome caused by unit difference in treatment
457
- cols = [self .treatment ]
458
- cols += [x for x in self .adjustment_set if x not in cols ]
459
449
model = smf .ols (formula = self .formula , data = self .df ).fit ()
460
450
return model
461
451
462
452
def _get_confidence_intervals (self , model , treatment ):
463
- confidence_intervals = model .conf_int (alpha = 0.05 , cols = None )
453
+ confidence_intervals = model .conf_int (alpha = self . alpha , cols = None )
464
454
ci_low , ci_high = (
465
455
confidence_intervals [0 ].loc [treatment ],
466
456
confidence_intervals [1 ].loc [treatment ],
@@ -527,7 +517,7 @@ def estimate_unit_ate(self, bootstrap_size=100):
527
517
bootstraps = sorted (
528
518
[self .estimate_coefficient (self .df .sample (len (self .df ), replace = True )) for _ in range (bootstrap_size )]
529
519
)
530
- bound = ceil ((bootstrap_size * 0.05 ) / 2 )
520
+ bound = ceil ((bootstrap_size * self . alpha ) / 2 )
531
521
ci_low = bootstraps [bound ]
532
522
ci_high = bootstraps [bootstrap_size - bound ]
533
523
@@ -618,7 +608,7 @@ def estimate_cates(self) -> pd.DataFrame:
618
608
# Obtain CATES and confidence intervals
619
609
conditional_ates = model .effect (effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value ).flatten ()
620
610
[ci_low , ci_high ] = model .effect_interval (
621
- effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value , alpha = 0.05
611
+ effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value , alpha = self . alpha
622
612
)
623
613
624
614
# Merge results into a dataframe (CATE, confidence intervals, and effect modifier values)
0 commit comments