@@ -58,8 +58,6 @@ def __init__(
58
58
self .df = df
59
59
if effect_modifiers is None :
60
60
self .effect_modifiers = {}
61
- elif isinstance (effect_modifiers , (list , set )):
62
- self .effect_modifiers = {k for k in effect_modifiers }
63
61
elif isinstance (effect_modifiers , dict ):
64
62
self .effect_modifiers = {k : v for k , v in effect_modifiers .items ()}
65
63
else :
@@ -119,9 +117,6 @@ def __init__(
119
117
terms = [treatment ] + sorted (list (adjustment_set )) + sorted (list (self .effect_modifiers ))
120
118
self .formula = f"{ outcome } ~ { '+' .join (((terms )))} "
121
119
122
- for term in self .effect_modifiers :
123
- self .adjustment_set .add (term )
124
-
125
120
def add_modelling_assumptions (self ):
126
121
"""
127
122
Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
@@ -170,6 +165,10 @@ def estimate(self, data: pd.DataFrame, adjustment_config=None) -> RegressionResu
170
165
"""
171
166
if adjustment_config is None :
172
167
adjustment_config = {}
168
+ if set (self .adjustment_set ) != set (adjustment_config ):
169
+ raise ValueError (
170
+ f"Invalid adjustment configuration { adjustment_config } . Must specify values for { self .adjustment_set } "
171
+ )
173
172
174
173
model = self ._run_logistic_regression (data )
175
174
self .model = model
@@ -188,18 +187,19 @@ def estimate(self, data: pd.DataFrame, adjustment_config=None) -> RegressionResu
188
187
# x = x[model.params.index]
189
188
return model .predict (x )
190
189
191
- def estimate_control_treatment (self , bootstrap_size = 100 ) -> tuple [pd .Series , pd .Series ]:
190
+ def estimate_control_treatment (self , bootstrap_size = 100 , adjustment_config = None ) -> tuple [pd .Series , pd .Series ]:
192
191
"""Estimate the outcomes under control and treatment.
193
192
194
193
:return: The estimated control and treatment values and their confidence
195
194
intervals in the form ((ci_low, control, ci_high), (ci_low, treatment, ci_high)).
196
195
"""
197
196
198
- y = self .estimate (self .df )
197
+ y = self .estimate (self .df , adjustment_config = adjustment_config )
199
198
200
199
try :
201
200
bootstrap_samples = [
202
- self .estimate (self .df .sample (len (self .df ), replace = True )) for _ in range (bootstrap_size )
201
+ self .estimate (self .df .sample (len (self .df ), replace = True ), adjustment_config = adjustment_config )
202
+ for _ in range (bootstrap_size )
203
203
]
204
204
control , treatment = zip (* [(x .iloc [1 ], x .iloc [0 ]) for x in bootstrap_samples ])
205
205
except PerfectSeparationError :
@@ -223,7 +223,7 @@ def estimate_control_treatment(self, bootstrap_size=100) -> tuple[pd.Series, pd.
223
223
224
224
return (y .iloc [1 ], np .array (control )), (y .iloc [0 ], np .array (treatment ))
225
225
226
- def estimate_ate (self , bootstrap_size = 100 ) -> float :
226
+ def estimate_ate (self , bootstrap_size = 100 , adjustment_config = None ) -> float :
227
227
"""Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
228
228
by changing the treatment variable from the control value to the treatment value. Here, we actually
229
229
calculate the expected outcomes under control and treatment and take one away from the other. This
@@ -234,7 +234,7 @@ def estimate_ate(self, bootstrap_size=100) -> float:
234
234
(control_outcome , control_bootstraps ), (
235
235
treatment_outcome ,
236
236
treatment_bootstraps ,
237
- ) = self .estimate_control_treatment (bootstrap_size = bootstrap_size )
237
+ ) = self .estimate_control_treatment (bootstrap_size = bootstrap_size , adjustment_config = adjustment_config )
238
238
estimate = treatment_outcome - control_outcome
239
239
240
240
if control_bootstraps is None or treatment_bootstraps is None :
@@ -253,7 +253,7 @@ def estimate_ate(self, bootstrap_size=100) -> float:
253
253
254
254
return estimate , (ci_low , ci_high )
255
255
256
- def estimate_risk_ratio (self , bootstrap_size = 100 ) -> float :
256
+ def estimate_risk_ratio (self , bootstrap_size = 100 , adjustment_config = None ) -> float :
257
257
"""Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
258
258
by changing the treatment variable from the control value to the treatment value. Here, we actually
259
259
calculate the expected outcomes under control and treatment and divide one by the other. This
@@ -264,7 +264,7 @@ def estimate_risk_ratio(self, bootstrap_size=100) -> float:
264
264
(control_outcome , control_bootstraps ), (
265
265
treatment_outcome ,
266
266
treatment_bootstraps ,
267
- ) = self .estimate_control_treatment (bootstrap_size = bootstrap_size )
267
+ ) = self .estimate_control_treatment (bootstrap_size = bootstrap_size , adjustment_config = adjustment_config )
268
268
estimate = treatment_outcome / control_outcome
269
269
270
270
if control_bootstraps is None or treatment_bootstraps is None :
0 commit comments