5
5
6
6
import pandas as pd
7
7
import statsmodels .formula .api as smf
8
- from patsy import dmatrix # pylint: disable = no-name-in-module
9
- from patsy import ModelDesc
10
- from statsmodels .regression .linear_model import RegressionResultsWrapper
8
+ from patsy import dmatrix , ModelDesc # pylint: disable = no-name-in-module
11
9
12
10
from causal_testing .specification .variable import Variable
13
11
from causal_testing .estimation .gp import GP
14
- from causal_testing .estimation .estimator import Estimator
12
+ from causal_testing .estimation .regression_estimator import RegressionEstimator
15
13
16
14
logger = logging .getLogger (__name__ )
17
15
18
16
19
- class LinearRegressionEstimator (Estimator ):
17
+ class LinearRegressionEstimator (RegressionEstimator ):
20
18
"""A Linear Regression Estimator is a parametric estimator which restricts the variables in the data to a linear
21
19
combination of parameters and functions of the variables (note these functions need not be linear).
22
20
"""
23
21
22
+ regressor = smf .ols
23
+
24
24
def __init__ (
25
25
# pylint: disable=too-many-arguments
26
26
self ,
@@ -35,6 +35,7 @@ def __init__(
35
35
alpha : float = 0.05 ,
36
36
query : str = "" ,
37
37
):
38
+ # pylint: disable=too-many-arguments
38
39
super ().__init__ (
39
40
treatment ,
40
41
treatment_value ,
@@ -43,20 +44,10 @@ def __init__(
43
44
outcome ,
44
45
df ,
45
46
effect_modifiers ,
46
- alpha = alpha ,
47
- query = query ,
47
+ formula ,
48
+ alpha ,
49
+ query ,
48
50
)
49
-
50
- self .model = None
51
- if effect_modifiers is None :
52
- effect_modifiers = []
53
-
54
- if formula is not None :
55
- self .formula = formula
56
- else :
57
- terms = [treatment ] + sorted (list (adjustment_set )) + sorted (list (effect_modifiers ))
58
- self .formula = f"{ outcome } ~ { '+' .join (terms )} "
59
-
60
51
for term in self .effect_modifiers :
61
52
self .adjustment_set .add (term )
62
53
@@ -118,7 +109,7 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
118
109
119
110
:return: The unit average treatment effect and the 95% Wald confidence intervals.
120
111
"""
121
- model = self ._run_linear_regression ()
112
+ model = self ._run_regression ()
122
113
newline = "\n "
123
114
patsy_md = ModelDesc .from_formula (self .treatment )
124
115
@@ -147,7 +138,7 @@ def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
147
138
148
139
:return: The average treatment effect and the 95% Wald confidence intervals.
149
140
"""
150
- model = self ._run_linear_regression ()
141
+ model = self ._run_regression ()
151
142
152
143
# Create an empty individual for the control and treated
153
144
individuals = pd .DataFrame (1 , index = ["control" , "treated" ], columns = model .params .index )
@@ -167,37 +158,6 @@ def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
167
158
confidence_intervals = [pd .Series (interval ) for interval in confidence_intervals ]
168
159
return ate , confidence_intervals
169
160
170
- def estimate_control_treatment (self , adjustment_config : dict = None ) -> tuple [pd .Series , pd .Series ]:
171
- """Estimate the outcomes under control and treatment.
172
-
173
- :return: The estimated outcome under control and treatment in the form
174
- (control_outcome, treatment_outcome).
175
- """
176
- if adjustment_config is None :
177
- adjustment_config = {}
178
- model = self ._run_linear_regression ()
179
-
180
- x = pd .DataFrame (columns = self .df .columns )
181
- x [self .treatment ] = [self .treatment_value , self .control_value ]
182
- x ["Intercept" ] = 1 # self.intercept
183
-
184
- print (x [self .treatment ])
185
- for k , v in adjustment_config .items ():
186
- x [k ] = v
187
- for k , v in self .effect_modifiers .items ():
188
- x [k ] = v
189
- x = dmatrix (self .formula .split ("~" )[1 ], x , return_type = "dataframe" )
190
- for col in x :
191
- if str (x .dtypes [col ]) == "object" :
192
- x = pd .get_dummies (x , columns = [col ], drop_first = True )
193
- x = x [model .params .index ]
194
-
195
- x [self .treatment ] = [self .treatment_value , self .control_value ]
196
-
197
- y = model .get_prediction (x ).summary_frame ()
198
-
199
- return y .iloc [1 ], y .iloc [0 ]
200
-
201
161
def estimate_risk_ratio (self , adjustment_config : dict = None ) -> tuple [pd .Series , list [pd .Series , pd .Series ]]:
202
162
"""Estimate the risk_ratio effect of the treatment on the outcome. That is, the change in outcome caused
203
163
by changing the treatment variable from the control value to the treatment value.
@@ -206,7 +166,8 @@ def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[pd.Series
206
166
"""
207
167
if adjustment_config is None :
208
168
adjustment_config = {}
209
- control_outcome , treatment_outcome = self .estimate_control_treatment (adjustment_config = adjustment_config )
169
+ prediction = self ._predict (adjustment_config = adjustment_config )
170
+ control_outcome , treatment_outcome = prediction .iloc [1 ], prediction .iloc [0 ]
210
171
ci_low = pd .Series (treatment_outcome ["mean_ci_lower" ] / control_outcome ["mean_ci_upper" ])
211
172
ci_high = pd .Series (treatment_outcome ["mean_ci_upper" ] / control_outcome ["mean_ci_lower" ])
212
173
return pd .Series (treatment_outcome ["mean" ] / control_outcome ["mean" ]), [ci_low , ci_high ]
@@ -221,20 +182,12 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[pd.Se
221
182
"""
222
183
if adjustment_config is None :
223
184
adjustment_config = {}
224
- control_outcome , treatment_outcome = self .estimate_control_treatment (adjustment_config = adjustment_config )
185
+ prediction = self ._predict (adjustment_config = adjustment_config )
186
+ control_outcome , treatment_outcome = prediction .iloc [1 ], prediction .iloc [0 ]
225
187
ci_low = pd .Series (treatment_outcome ["mean_ci_lower" ] - control_outcome ["mean_ci_upper" ])
226
188
ci_high = pd .Series (treatment_outcome ["mean_ci_upper" ] - control_outcome ["mean_ci_lower" ])
227
189
return pd .Series (treatment_outcome ["mean" ] - control_outcome ["mean" ]), [ci_low , ci_high ]
228
190
229
- def _run_linear_regression (self ) -> RegressionResultsWrapper :
230
- """Run linear regression of the treatment and adjustment set against the outcome and return the model.
231
-
232
- :return: The model after fitting to data.
233
- """
234
- model = smf .ols (formula = self .formula , data = self .df ).fit ()
235
- self .model = model
236
- return model
237
-
238
191
def _get_confidence_intervals (self , model , treatment ):
239
192
confidence_intervals = model .conf_int (alpha = self .alpha , cols = None )
240
193
ci_low , ci_high = (
0 commit comments