@@ -36,11 +36,11 @@ class Estimator(ABC):
36
36
37
37
def __init__ (
38
38
self ,
39
- treatment : tuple ,
39
+ treatment : str ,
40
40
treatment_value : float ,
41
41
control_value : float ,
42
42
adjustment_set : set ,
43
- outcome : tuple ,
43
+ outcome : str ,
44
44
df : pd .DataFrame = None ,
45
45
effect_modifiers : dict [Variable :Any ] = None ,
46
46
):
@@ -93,11 +93,11 @@ class LogisticRegressionEstimator(Estimator):
93
93
94
94
def __init__ (
95
95
self ,
96
- treatment : tuple ,
96
+ treatment : str ,
97
97
treatment_value : float ,
98
98
control_value : float ,
99
99
adjustment_set : set ,
100
- outcome : tuple ,
100
+ outcome : str ,
101
101
df : pd .DataFrame = None ,
102
102
effect_modifiers : dict [Variable :Any ] = None ,
103
103
intercept : int = 1 ,
@@ -133,20 +133,20 @@ def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
133
133
"""
134
134
# 1. Reduce dataframe to contain only the necessary columns
135
135
reduced_df = data .copy ()
136
- necessary_cols = list ( self .treatment ) + list (self .adjustment_set ) + list ( self .outcome )
136
+ necessary_cols = [ self .treatment ] + list (self .adjustment_set ) + [ self .outcome ]
137
137
missing_rows = reduced_df [necessary_cols ].isnull ().any (axis = 1 )
138
138
reduced_df = reduced_df [~ missing_rows ]
139
- reduced_df = reduced_df .sort_values (list ( self .treatment ) )
139
+ reduced_df = reduced_df .sort_values ([ self .treatment ] )
140
140
logger .debug (reduced_df [necessary_cols ])
141
141
142
142
# 2. Add intercept
143
143
reduced_df ["Intercept" ] = self .intercept
144
144
145
145
# 3. Estimate the unit difference in outcome caused by unit difference in treatment
146
- cols = list ( self .treatment )
146
+ cols = [ self .treatment ]
147
147
cols += [x for x in self .adjustment_set if x not in cols ]
148
148
treatment_and_adjustments_cols = reduced_df [cols + ["Intercept" ]]
149
- outcome_col = reduced_df [list ( self .outcome ) ]
149
+ outcome_col = reduced_df [[ self .outcome ] ]
150
150
for col in treatment_and_adjustments_cols :
151
151
if str (treatment_and_adjustments_cols .dtypes [col ]) == "object" :
152
152
treatment_and_adjustments_cols = pd .get_dummies (
@@ -165,7 +165,7 @@ def estimate(self, data: pd.DataFrame) -> RegressionResultsWrapper:
165
165
self .model = model
166
166
167
167
x = pd .DataFrame ()
168
- x [self .treatment [ 0 ] ] = [self .treatment_value , self .control_value ]
168
+ x [self .treatment ] = [self .treatment_value , self .control_value ]
169
169
x ["Intercept" ] = self .intercept
170
170
for k , v in self .effect_modifiers .items ():
171
171
x [k ] = v
@@ -238,7 +238,7 @@ def estimate_ate(self, bootstrap_size=100) -> float:
238
238
ci_high = bootstraps [bootstrap_size - bound ]
239
239
240
240
logger .info (
241
- f"Changing { self .treatment [ 0 ] } from { self .control_value } to { self .treatment_value } gives an estimated "
241
+ f"Changing { self .treatment } from { self .control_value } to { self .treatment_value } gives an estimated "
242
242
f"ATE of { ci_low } < { estimate } < { ci_high } "
243
243
)
244
244
assert ci_low < estimate < ci_high , f"Expecting { ci_low } < { estimate } < { ci_high } "
@@ -268,7 +268,7 @@ def estimate_risk_ratio(self, bootstrap_size=100) -> float:
268
268
ci_high = bootstraps [bootstrap_size - bound ]
269
269
270
270
logger .info (
271
- f"Changing { self .treatment [ 0 ] } from { self .control_value } to { self .treatment_value } gives an estimated "
271
+ f"Changing { self .treatment } from { self .control_value } to { self .treatment_value } gives an estimated "
272
272
f"risk ratio of { ci_low } < { estimate } < { ci_high } "
273
273
)
274
274
assert ci_low < estimate < ci_high , f"Expecting { ci_low } < { estimate } < { ci_high } "
@@ -282,7 +282,7 @@ def estimate_unit_odds_ratio(self) -> float:
282
282
:return: The odds ratio. Confidence intervals are not yet supported.
283
283
"""
284
284
model = self ._run_logistic_regression (self .df )
285
- return np .exp (model .params [self .treatment [ 0 ] ])
285
+ return np .exp (model .params [self .treatment ])
286
286
287
287
288
288
class LinearRegressionEstimator (Estimator ):
@@ -292,11 +292,11 @@ class LinearRegressionEstimator(Estimator):
292
292
293
293
def __init__ (
294
294
self ,
295
- treatment : tuple ,
295
+ treatment : str ,
296
296
treatment_value : float ,
297
297
control_value : float ,
298
298
adjustment_set : set ,
299
- outcome : tuple ,
299
+ outcome : str ,
300
300
df : pd .DataFrame = None ,
301
301
effect_modifiers : dict [Variable :Any ] = None ,
302
302
product_terms : list [tuple [Variable , Variable ]] = None ,
@@ -383,7 +383,7 @@ def estimate_unit_ate(self) -> float:
383
383
:return: The unit average treatment effect and the 95% Wald confidence intervals.
384
384
"""
385
385
model = self ._run_linear_regression ()
386
- unit_effect = model .params [list ( self .treatment ) ].values [0 ] # Unit effect is the coefficient of the treatment
386
+ unit_effect = model .params [[ self .treatment ] ].values [0 ] # Unit effect is the coefficient of the treatment
387
387
[ci_low , ci_high ] = self ._get_confidence_intervals (model )
388
388
389
389
return unit_effect * self .treatment_value - unit_effect * self .control_value , [ci_low , ci_high ]
@@ -407,8 +407,8 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
407
407
408
408
# It is ABSOLUTELY CRITICAL that these go last, otherwise we can't index
409
409
# the effect with "ate = t_test_results.effect[0]"
410
- individuals .loc ["control" , list ( self .treatment ) ] = self .control_value
411
- individuals .loc ["treated" , list ( self .treatment ) ] = self .treatment_value
410
+ individuals .loc ["control" , [ self .treatment ] ] = self .control_value
411
+ individuals .loc ["treated" , [ self .treatment ] ] = self .treatment_value
412
412
413
413
# Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
414
414
t_test_results = model .t_test (individuals .loc ["treated" ] - individuals .loc ["control" ])
@@ -429,7 +429,7 @@ def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd
429
429
self .model = model
430
430
431
431
x = pd .DataFrame ()
432
- x [self .treatment [ 0 ] ] = [self .treatment_value , self .control_value ]
432
+ x [self .treatment ] = [self .treatment_value , self .control_value ]
433
433
x ["Intercept" ] = self .intercept
434
434
for k , v in adjustment_config .items ():
435
435
x [k ] = v
@@ -485,7 +485,7 @@ def estimate_cates(self) -> tuple[float, list[float, float]]:
485
485
self .effect_modifiers
486
486
), f"Must have at least one effect modifier to compute CATE - { self .effect_modifiers } ."
487
487
x = pd .DataFrame ()
488
- x [self .treatment [ 0 ] ] = [self .treatment_value , self .control_value ]
488
+ x [self .treatment ] = [self .treatment_value , self .control_value ]
489
489
x ["Intercept" ] = self .intercept
490
490
for k , v in self .effect_modifiers .items ():
491
491
self .adjustment_set .add (k )
@@ -511,20 +511,20 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
511
511
"""
512
512
# 1. Reduce dataframe to contain only the necessary columns
513
513
reduced_df = self .df .copy ()
514
- necessary_cols = list ( self .treatment ) + list (self .adjustment_set ) + list ( self .outcome )
514
+ necessary_cols = [ self .treatment ] + list (self .adjustment_set ) + [ self .outcome ]
515
515
missing_rows = reduced_df [necessary_cols ].isnull ().any (axis = 1 )
516
516
reduced_df = reduced_df [~ missing_rows ]
517
- reduced_df = reduced_df .sort_values (list ( self .treatment ) )
517
+ reduced_df = reduced_df .sort_values ([ self .treatment ] )
518
518
logger .debug (reduced_df [necessary_cols ])
519
519
520
520
# 2. Add intercept
521
521
reduced_df ["Intercept" ] = self .intercept
522
522
523
523
# 3. Estimate the unit difference in outcome caused by unit difference in treatment
524
- cols = list ( self .treatment )
524
+ cols = [ self .treatment ]
525
525
cols += [x for x in self .adjustment_set if x not in cols ]
526
526
treatment_and_adjustments_cols = reduced_df [cols + ["Intercept" ]]
527
- outcome_col = reduced_df [list ( self .outcome ) ]
527
+ outcome_col = reduced_df [[ self .outcome ] ]
528
528
for col in treatment_and_adjustments_cols :
529
529
if str (treatment_and_adjustments_cols .dtypes [col ]) == "object" :
530
530
treatment_and_adjustments_cols = pd .get_dummies (
@@ -537,8 +537,8 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
537
537
def _get_confidence_intervals (self , model ):
538
538
confidence_intervals = model .conf_int (alpha = 0.05 , cols = None )
539
539
ci_low , ci_high = (
540
- confidence_intervals [0 ][list ( self .treatment ) ],
541
- confidence_intervals [1 ][list ( self .treatment ) ],
540
+ confidence_intervals [0 ][[ self .treatment ] ],
541
+ confidence_intervals [1 ][[ self .treatment ] ],
542
542
)
543
543
return [ci_low .values [0 ], ci_high .values [0 ]]
544
544
@@ -551,20 +551,22 @@ class InstrumentalVariableEstimator(Estimator):
551
551
552
552
def __init__ (
553
553
self ,
554
- treatment : tuple ,
554
+ treatment : str ,
555
555
treatment_value : float ,
556
556
control_value : float ,
557
557
adjustment_set : set ,
558
- outcome : tuple ,
558
+ outcome : str ,
559
559
instrument : str ,
560
560
df : pd .DataFrame = None ,
561
561
intercept : int = 1 ,
562
+ effect_modifiers : dict = None # Not used (yet?). Needed for compatibility
562
563
):
563
564
super ().__init__ (treatment , treatment_value , control_value , adjustment_set , outcome , df , None )
564
565
self .intercept = intercept
565
566
self .model = None
566
567
self .instrument = instrument
567
568
569
+
568
570
def add_modelling_assumptions (self ):
569
571
"""
570
572
Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
@@ -582,7 +584,6 @@ def estimate_coefficient(self):
582
584
"""
583
585
Estimate the linear regression coefficient of the treatment on the outcome.
584
586
"""
585
-
586
587
# Estimate the total effect of instrument I on outcome Y = abI + c1
587
588
ab = sm .OLS (self .df [self .outcome ], self .df [[self .instrument ]]).fit ().params [self .instrument ]
588
589
@@ -617,7 +618,7 @@ def estimate_ate(self) -> float:
617
618
"""
618
619
# Remove any NA containing rows
619
620
reduced_df = self .df .copy ()
620
- necessary_cols = list ( self .treatment ) + list (self .adjustment_set ) + list ( self .outcome )
621
+ necessary_cols = [ self .treatment ] + list (self .adjustment_set ) + [ self .outcome ]
621
622
missing_rows = reduced_df [necessary_cols ].isnull ().any (axis = 1 )
622
623
reduced_df = reduced_df [~ missing_rows ]
623
624
@@ -628,8 +629,8 @@ def estimate_ate(self) -> float:
628
629
else :
629
630
effect_modifier_df = reduced_df [list (self .adjustment_set )]
630
631
confounders_df = reduced_df [list (self .adjustment_set )]
631
- treatment_df = np .ravel (reduced_df [list ( self .treatment ) ])
632
- outcome_df = np .ravel (reduced_df [list ( self .outcome ) ])
632
+ treatment_df = np .ravel (reduced_df [[ self .treatment ] ])
633
+ outcome_df = np .ravel (reduced_df [[ self .outcome ] ])
633
634
634
635
# Fit the model to the data using a gradient boosting regressor for both the treatment and outcome model
635
636
model = CausalForestDML (
@@ -657,7 +658,7 @@ def estimate_cates(self) -> pd.DataFrame:
657
658
658
659
# Remove any NA containing rows
659
660
reduced_df = self .df .copy ()
660
- necessary_cols = list ( self .treatment ) + list (self .adjustment_set ) + list ( self .outcome )
661
+ necessary_cols = [ self .treatment ] + list (self .adjustment_set ) + [ self .outcome ]
661
662
missing_rows = reduced_df [necessary_cols ].isnull ().any (axis = 1 )
662
663
reduced_df = reduced_df [~ missing_rows ]
663
664
@@ -671,8 +672,8 @@ def estimate_cates(self) -> pd.DataFrame:
671
672
confounders_df = reduced_df [list (self .adjustment_set )]
672
673
else :
673
674
confounders_df = None
674
- treatment_df = reduced_df [list ( self .treatment ) ]
675
- outcome_df = reduced_df [list ( self .outcome ) ]
675
+ treatment_df = reduced_df [[ self .treatment ] ]
676
+ outcome_df = reduced_df [[ self .outcome ] ]
676
677
677
678
# Fit a model to the data
678
679
model = CausalForestDML (model_y = GradientBoostingRegressor (), model_t = GradientBoostingRegressor ())
0 commit comments