11
11
import statsmodels .formula .api as smf
12
12
from econml .dml import CausalForestDML
13
13
from patsy import dmatrix # pylint: disable = no-name-in-module
14
-
14
+ from patsy import ModelDesc
15
15
from sklearn .ensemble import GradientBoostingRegressor
16
16
from statsmodels .regression .linear_model import RegressionResultsWrapper
17
17
from statsmodels .tools .sm_exceptions import PerfectSeparationError
@@ -343,30 +343,28 @@ def add_modelling_assumptions(self):
343
343
"do not need to be linear."
344
344
)
345
345
346
- def estimate_coefficient (self ) -> float :
346
+ def estimate_coefficient (self ) -> tuple [ pd . Series , list [ pd . Series , pd . Series ]] :
347
347
"""Estimate the unit average treatment effect of the treatment on the outcome. That is, the change in outcome
348
348
caused by a unit change in treatment.
349
349
350
350
:return: The unit average treatment effect and the 95% Wald confidence intervals.
351
351
"""
352
352
model = self ._run_linear_regression ()
353
353
newline = "\n "
354
- treatment = [ self .treatment ]
355
- if str ( self .df .dtypes [self . treatment ]) == " object" :
354
+ patsy_md = ModelDesc . from_formula ( self .treatment )
355
+ if any (( self .df .dtypes [factor . name ()] == ' object' for factor in patsy_md . rhs_termlist [ 1 ]. factors )) :
356
356
design_info = dmatrix (self .formula .split ("~" )[1 ], self .df ).design_info
357
357
treatment = design_info .column_names [design_info .term_name_slices [self .treatment ]]
358
+ else :
359
+ treatment = [self .treatment ]
358
360
assert set (treatment ).issubset (
359
361
model .params .index .tolist ()
360
362
), f"{ treatment } not in\n { ' ' + str (model .params .index ).replace (newline , newline + ' ' )} "
361
363
unit_effect = model .params [treatment ] # Unit effect is the coefficient of the treatment
362
364
[ci_low , ci_high ] = self ._get_confidence_intervals (model , treatment )
363
- if str (self .df .dtypes [self .treatment ]) != "object" :
364
- unit_effect = unit_effect [0 ]
365
- ci_low = ci_low [0 ]
366
- ci_high = ci_high [0 ]
367
365
return unit_effect , [ci_low , ci_high ]
368
366
369
- def estimate_ate (self ) -> tuple [float , list [float , float ], float ]:
367
+ def estimate_ate (self ) -> tuple [pd . Series , list [pd . Series , pd . Series ] ]:
370
368
"""Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
371
369
by changing the treatment variable from the control value to the treatment value.
372
370
@@ -384,8 +382,9 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
384
382
385
383
# Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
386
384
t_test_results = model .t_test (individuals .loc ["treated" ] - individuals .loc ["control" ])
387
- ate = t_test_results .effect [0 ]
385
+ ate = pd . Series ( t_test_results .effect [0 ])
388
386
confidence_intervals = list (t_test_results .conf_int (alpha = self .alpha ).flatten ())
387
+ confidence_intervals = [pd .Series (interval ) for interval in confidence_intervals ]
389
388
return ate , confidence_intervals
390
389
391
390
def estimate_control_treatment (self , adjustment_config : dict = None ) -> tuple [pd .Series , pd .Series ]:
@@ -414,7 +413,7 @@ def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd
414
413
415
414
return y .iloc [1 ], y .iloc [0 ]
416
415
417
- def estimate_risk_ratio (self , adjustment_config : dict = None ) -> tuple [float , list [float , float ]]:
416
+ def estimate_risk_ratio (self , adjustment_config : dict = None ) -> tuple [pd . Series , list [pd . Series , pd . Series ]]:
418
417
"""Estimate the risk_ratio effect of the treatment on the outcome. That is, the change in outcome caused
419
418
by changing the treatment variable from the control value to the treatment value.
420
419
@@ -423,12 +422,11 @@ def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[float, li
423
422
if adjustment_config is None :
424
423
adjustment_config = {}
425
424
control_outcome , treatment_outcome = self .estimate_control_treatment (adjustment_config = adjustment_config )
426
- ci_low = treatment_outcome ["mean_ci_lower" ] / control_outcome ["mean_ci_upper" ]
427
- ci_high = treatment_outcome ["mean_ci_upper" ] / control_outcome ["mean_ci_lower" ]
428
-
429
- return (treatment_outcome ["mean" ] / control_outcome ["mean" ]), [ci_low , ci_high ]
425
+ ci_low = pd .Series (treatment_outcome ["mean_ci_lower" ] / control_outcome ["mean_ci_upper" ])
426
+ ci_high = pd .Series (treatment_outcome ["mean_ci_upper" ] / control_outcome ["mean_ci_lower" ])
427
+ return pd .Series (treatment_outcome ["mean" ] / control_outcome ["mean" ]), [ci_low , ci_high ]
430
428
431
- def estimate_ate_calculated (self , adjustment_config : dict = None ) -> tuple [float , list [float , float ]]:
429
+ def estimate_ate_calculated (self , adjustment_config : dict = None ) -> tuple [pd . Series , list [pd . Series , pd . Series ]]:
432
430
"""Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
433
431
by changing the treatment variable from the control value to the treatment value. Here, we actually
434
432
calculate the expected outcomes under control and treatment and divide one by the other. This
@@ -439,10 +437,9 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[float
439
437
if adjustment_config is None :
440
438
adjustment_config = {}
441
439
control_outcome , treatment_outcome = self .estimate_control_treatment (adjustment_config = adjustment_config )
442
- ci_low = treatment_outcome ["mean_ci_lower" ] - control_outcome ["mean_ci_upper" ]
443
- ci_high = treatment_outcome ["mean_ci_upper" ] - control_outcome ["mean_ci_lower" ]
444
-
445
- return (treatment_outcome ["mean" ] - control_outcome ["mean" ]), [ci_low , ci_high ]
440
+ ci_low = pd .Series (treatment_outcome ["mean_ci_lower" ] - control_outcome ["mean_ci_upper" ])
441
+ ci_high = pd .Series (treatment_outcome ["mean_ci_upper" ] - control_outcome ["mean_ci_lower" ])
442
+ return pd .Series (treatment_outcome ["mean" ] - control_outcome ["mean" ]), [ci_low , ci_high ]
446
443
447
444
def _run_linear_regression (self ) -> RegressionResultsWrapper :
448
445
"""Run linear regression of the treatment and adjustment set against the outcome and return the model.
@@ -456,8 +453,8 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
456
453
def _get_confidence_intervals (self , model , treatment ):
457
454
confidence_intervals = model .conf_int (alpha = self .alpha , cols = None )
458
455
ci_low , ci_high = (
459
- confidence_intervals [0 ].loc [treatment ],
460
- confidence_intervals [1 ].loc [treatment ],
456
+ pd . Series ( confidence_intervals [0 ].loc [treatment ]) ,
457
+ pd . Series ( confidence_intervals [1 ].loc [treatment ]) ,
461
458
)
462
459
return [ci_low , ci_high ]
463
460
@@ -495,7 +492,7 @@ def __init__(
495
492
terms = [treatment ] + sorted (list (adjustment_set )) + sorted (list (effect_modifiers ))
496
493
self .formula = f"{ outcome } ~ cr({ '+' .join (terms )} , df={ basis } )"
497
494
498
- def estimate_ate_calculated (self , adjustment_config : dict = None ) -> float :
495
+ def estimate_ate_calculated (self , adjustment_config : dict = None ) -> pd . Series :
499
496
model = self ._run_linear_regression ()
500
497
501
498
x = {"Intercept" : 1 , self .treatment : self .treatment_value }
@@ -511,7 +508,7 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> float:
511
508
x [self .treatment ] = self .control_value
512
509
control = model .predict (x ).iloc [0 ]
513
510
514
- return treatment - control
511
+ return pd . Series ( treatment - control )
515
512
516
513
517
514
class InstrumentalVariableEstimator (Estimator ):
@@ -567,7 +564,7 @@ def add_modelling_assumptions(self):
567
564
"""
568
565
)
569
566
570
- def estimate_iv_coefficient (self , df ):
567
+ def estimate_iv_coefficient (self , df ) -> float :
571
568
"""
572
569
Estimate the linear regression coefficient of the treatment on the
573
570
outcome.
@@ -581,7 +578,7 @@ def estimate_iv_coefficient(self, df):
581
578
# Estimate the coefficient of I on X by cancelling
582
579
return ab / a
583
580
584
- def estimate_coefficient (self , bootstrap_size = 100 ):
581
+ def estimate_coefficient (self , bootstrap_size = 100 ) -> tuple [ pd . Series , list [ pd . Series , pd . Series ]] :
585
582
"""
586
583
Estimate the unit ate (i.e. coefficient) of the treatment on the
587
584
outcome.
@@ -590,10 +587,10 @@ def estimate_coefficient(self, bootstrap_size=100):
590
587
[self .estimate_iv_coefficient (self .df .sample (len (self .df ), replace = True )) for _ in range (bootstrap_size )]
591
588
)
592
589
bound = ceil ((bootstrap_size * self .alpha ) / 2 )
593
- ci_low = bootstraps [bound ]
594
- ci_high = bootstraps [bootstrap_size - bound ]
590
+ ci_low = pd . Series ( bootstraps [bound ])
591
+ ci_high = pd . Series ( bootstraps [bootstrap_size - bound ])
595
592
596
- return self .estimate_iv_coefficient (self .df ), ( ci_low , ci_high )
593
+ return pd . Series ( self .estimate_iv_coefficient (self .df )), [ ci_low , ci_high ]
597
594
598
595
599
596
class CausalForestEstimator (Estimator ):
@@ -610,7 +607,7 @@ def add_modelling_assumptions(self):
610
607
"""
611
608
self .modelling_assumptions .append ("Non-parametric estimator: no restrictions imposed on the data." )
612
609
613
- def estimate_ate (self ) -> float :
610
+ def estimate_ate (self ) -> tuple [ pd . Series , list [ pd . Series , pd . Series ]] :
614
611
"""Estimate the average treatment effect.
615
612
616
613
:return ate, confidence_intervals: The average treatment effect and 95% confidence intervals.
@@ -638,9 +635,9 @@ def estimate_ate(self) -> float:
638
635
model .fit (outcome_df , treatment_df , X = effect_modifier_df , W = confounders_df )
639
636
640
637
# Obtain the ATE and 95% confidence intervals
641
- ate = model .ate (effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value )
638
+ ate = pd . Series ( model .ate (effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value ) )
642
639
ate_interval = model .ate_interval (effect_modifier_df , T0 = self .control_value , T1 = self .treatment_value )
643
- ci_low , ci_high = ate_interval [0 ], ate_interval [1 ]
640
+ ci_low , ci_high = pd . Series ( ate_interval [0 ]), pd . Series ( ate_interval [1 ])
644
641
return ate , [ci_low , ci_high ]
645
642
646
643
def estimate_cates (self ) -> pd .DataFrame :
0 commit comments