@@ -149,7 +149,8 @@ def _run_logistic_regression(self) -> RegressionResultsWrapper:
149
149
def estimate_control_treatment (self ) -> tuple [pd .Series , pd .Series ]:
150
150
"""Estimate the outcomes under control and treatment.
151
151
152
- :return: The average treatment effect and the 95% Wald confidence intervals.
152
+ :return: The estimated control and treatment values and their confidence
153
+ intervals in the form ((ci_low, control, ci_high), (ci_low, treatment, ci_high)).
153
154
"""
154
155
model = self ._run_logistic_regression ()
155
156
self .model = model
@@ -168,31 +169,53 @@ def estimate_control_treatment(self) -> tuple[pd.Series, pd.Series]:
168
169
x = x [model .params .index ]
169
170
170
171
y = model .predict (x )
171
- return y .iloc [1 ], y .iloc [0 ]
172
+
173
+ # Delta method confidence intervals from
174
+ # https://stackoverflow.com/questions/47414842/confidence-interval-of-probability-prediction-from-logistic-regression-statsmode
175
+ cov = model .cov_params ()
176
+ gradient = (y * (1 - y ) * x .T ).T # matrix of gradients for each observation
177
+ std_errors = np .array ([np .sqrt (np .dot (np .dot (g , cov ), g )) for g in gradient .to_numpy ()])
178
+ c = 1.96 # multiplier for confidence interval
179
+ upper = np .maximum (0 , np .minimum (1 , y + std_errors * c ))
180
+ lower = np .maximum (0 , np .minimum (1 , y - std_errors * c ))
181
+
182
+ return (lower .iloc [1 ], y .iloc [1 ], upper .iloc [1 ]), (lower .iloc [0 ], y .iloc [0 ], upper .iloc [0 ])
172
183
173
184
def estimate_ate (self ) -> float :
174
185
"""Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
175
186
by changing the treatment variable from the control value to the treatment value. Here, we actually
176
187
calculate the expected outcomes under control and treatment and take one away from the other. This
177
188
allows for custom terms to be put in such as squares, inverses, products, etc.
178
189
179
- :return: The average treatment effect. Confidence intervals are not yet supported.
190
+ :return: The estimated average treatment effect and 95% confidence intervals
180
191
"""
181
- control_outcome , treatment_outcome = self .estimate_control_treatment ()
192
+ (cci_low , control_outcome , cci_high ), (tci_low , treatment_outcome , tci_high ) = self .estimate_control_treatment ()
193
+
194
+ ci_low = tci_low - cci_high
195
+ ci_high = tci_high - cci_low
196
+ estimate = treatment_outcome - control_outcome
182
197
183
- return treatment_outcome - control_outcome
198
+ logger .info (
199
+ f"Changing { self .treatment } from { self .control_values } to { self .treatment_values } gives an estimated ATE of { ci_low } < { estimate } < { ci_high } "
200
+ )
201
+ assert ci_low < estimate < ci_high , f"Expecting { ci_low } < { estimate } < { ci_high } "
202
+
203
+ return estimate , (ci_low , ci_high )
184
204
185
205
def estimate_risk_ratio (self ) -> float :
186
206
"""Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
187
207
by changing the treatment variable from the control value to the treatment value. Here, we actually
188
208
calculate the expected outcomes under control and treatment and divide one by the other. This
189
209
allows for custom terms to be put in such as squares, inverses, products, etc.
190
210
191
- :return: The average treatment effect. Confidence intervals are not yet supported .
211
+ :return: The estimated risk ratio and 95% confidence intervals .
192
212
"""
193
- control_outcome , treatment_outcome = self .estimate_control_treatment ()
213
+ ( cci_low , control_outcome , cci_high ), ( tci_low , treatment_outcome , tci_high ) = self .estimate_control_treatment ()
194
214
195
- return treatment_outcome / control_outcome
215
+ ci_low = tci_low / cci_high
216
+ ci_high = tci_high / cci_low
217
+
218
+ return treatment_outcome / control_outcome , (ci_low , ci_high )
196
219
197
220
def estimate_unit_odds_ratio (self ) -> float :
198
221
"""Estimate the odds ratio of increasing the treatment by one. In logistic regression, this corresponds to the
@@ -214,7 +237,7 @@ def __init__(
214
237
treatment : tuple ,
215
238
treatment_values : float ,
216
239
control_values : float ,
217
- adjustment_set : set ,
240
+ adjustment_set : list [ float ] ,
218
241
outcome : tuple ,
219
242
df : pd .DataFrame = None ,
220
243
effect_modifiers : dict [Variable :Any ] = None ,
@@ -332,7 +355,8 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
332
355
def estimate_control_treatment (self ) -> tuple [pd .Series , pd .Series ]:
333
356
"""Estimate the outcomes under control and treatment.
334
357
335
- :return: The average treatment effect and the 95% Wald confidence intervals.
358
+ :return: The estimated outcome under control and treatment in the form
359
+ (control_outcome, treatment_outcome).
336
360
"""
337
361
model = self ._run_linear_regression ()
338
362
self .model = model
0 commit comments