@@ -49,13 +49,15 @@ def __init__(
49
49
outcome : str ,
50
50
df : pd .DataFrame = None ,
51
51
effect_modifiers : dict [str :Any ] = None ,
52
+ alpha : float = 0.05
52
53
):
53
54
self .treatment = treatment
54
55
self .treatment_value = treatment_value
55
56
self .control_value = control_value
56
57
self .adjustment_set = adjustment_set
57
58
self .outcome = outcome
58
59
self .df = df
60
+ self .alpha = alpha
59
61
if effect_modifiers is None :
60
62
self .effect_modifiers = {}
61
63
elif isinstance (effect_modifiers , dict ):
@@ -233,7 +235,7 @@ def estimate_ate(self, bootstrap_size=100, adjustment_config=None) -> float:
233
235
return estimate , (None , None )
234
236
235
237
bootstraps = sorted (list (treatment_bootstraps - control_bootstraps ))
236
- bound = int ((bootstrap_size * 0.05 ) / 2 )
238
+ bound = int ((bootstrap_size * self . alpha ) / 2 )
237
239
ci_low = bootstraps [bound ]
238
240
ci_high = bootstraps [bootstrap_size - bound ]
239
241
@@ -263,7 +265,7 @@ def estimate_risk_ratio(self, bootstrap_size=100, adjustment_config=None) -> flo
263
265
return estimate , (None , None )
264
266
265
267
bootstraps = sorted (list (treatment_bootstraps / control_bootstraps ))
266
- bound = ceil ((bootstrap_size * 0.05 ) / 2 )
268
+ bound = ceil ((bootstrap_size * self . alpha ) / 2 )
267
269
ci_low = bootstraps [bound ]
268
270
ci_high = bootstraps [bootstrap_size - bound ]
269
271
@@ -301,8 +303,9 @@ def __init__(
301
303
df : pd .DataFrame = None ,
302
304
effect_modifiers : dict [Variable :Any ] = None ,
303
305
formula : str = None ,
306
+ alpha : float = 0.05
304
307
):
305
- super ().__init__ (treatment , treatment_value , control_value , adjustment_set , outcome , df , effect_modifiers )
308
+ super ().__init__ (treatment , treatment_value , control_value , adjustment_set , outcome , df , effect_modifiers , alpha = alpha )
306
309
307
310
self .model = None
308
311
if effect_modifiers is None :
@@ -336,7 +339,6 @@ def estimate_unit_ate(self) -> float:
336
339
"""
337
340
model = self ._run_linear_regression ()
338
341
newline = "\n "
339
- print (model .conf_int ())
340
342
treatment = [self .treatment ]
341
343
if str (self .df .dtypes [self .treatment ]) == "object" :
342
344
design_info = dmatrix (self .formula .split ("~" )[1 ], self .df ).design_info
@@ -372,7 +374,7 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
372
374
# Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
373
375
t_test_results = model .t_test (individuals .loc ["treated" ] - individuals .loc ["control" ])
374
376
ate = t_test_results .effect [0 ]
375
- confidence_intervals = list (t_test_results .conf_int ().flatten ())
377
+ confidence_intervals = list (t_test_results .conf_int (alpha = self . alpha ).flatten ())
376
378
return ate , confidence_intervals
377
379
378
380
def estimate_control_treatment (self , adjustment_config : dict = None ) -> tuple [pd .Series , pd .Series ]:
@@ -434,25 +436,11 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
434
436
435
437
:return: The model after fitting to data.
436
438
"""
437
- # 1. Reduce dataframe to contain only the necessary columns
438
- reduced_df = self .df .copy ()
439
- necessary_cols = [self .treatment ] + list (self .adjustment_set ) + [self .outcome ]
440
- missing_rows = reduced_df [necessary_cols ].isnull ().any (axis = 1 )
441
- reduced_df = reduced_df [~ missing_rows ]
442
- reduced_df = reduced_df .sort_values ([self .treatment ])
443
- logger .debug (reduced_df [necessary_cols ])
444
-
445
- # 2. Add intercept
446
- reduced_df ["Intercept" ] = 1 # self.intercept
447
-
448
- # 3. Estimate the unit difference in outcome caused by unit difference in treatment
449
- cols = [self .treatment ]
450
- cols += [x for x in self .adjustment_set if x not in cols ]
451
439
model = smf .ols (formula = self .formula , data = self .df ).fit ()
452
440
return model
453
441
454
442
def _get_confidence_intervals (self , model , treatment ):
455
- confidence_intervals = model .conf_int (alpha = 0.05 , cols = None )
443
+ confidence_intervals = model .conf_int (alpha = self . alpha , cols = None )
456
444
ci_low , ci_high = (
457
445
confidence_intervals [0 ].loc [treatment ],
458
446
confidence_intervals [1 ].loc [treatment ],
@@ -519,7 +507,7 @@ def estimate_unit_ate(self, bootstrap_size=100):
519
507
bootstraps = sorted (
520
508
[self .estimate_coefficient (self .df .sample (len (self .df ), replace = True )) for _ in range (bootstrap_size )]
521
509
)
522
- bound = ceil ((bootstrap_size * 0.05 ) / 2 )
510
+ bound = ceil ((bootstrap_size * self . alpha ) / 2 )
523
511
ci_low = bootstraps [bound ]
524
512
ci_high = bootstraps [bootstrap_size - bound ]
525
513
@@ -610,7 +598,7 @@ def estimate_cates(self) -> pd.DataFrame:
610
598
# Obtain CATES and confidence intervals
611
599
conditional_ates = model .effect (effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value ).flatten ()
612
600
[ci_low , ci_high ] = model .effect_interval (
613
- effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value , alpha = 0.05
601
+ effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value , alpha = self . alpha
614
602
)
615
603
616
604
# Merge results into a dataframe (CATE, confidence intervals, and effect modifier values)
0 commit comments