Skip to content

Commit 508244b

Browse files
committed
Fixed pytest errors on logit. Closes #25
1 parent 2f6d774 commit 508244b

File tree

4 files changed

+109
-54
lines changed

4 files changed

+109
-54
lines changed

causal_testing/testing/estimators.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from statsmodels.tools.sm_exceptions import PerfectSeparationError
1717

1818
from causal_testing.specification.variable import Variable
19+
from math import ceil
1920

2021
logger = logging.getLogger(__name__)
2122

@@ -106,7 +107,7 @@ def __init__(
106107
outcome: str,
107108
df: pd.DataFrame = None,
108109
effect_modifiers: dict[Variable:Any] = None,
109-
formula: str = None
110+
formula: str = None,
110111
):
111112
super().__init__(treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers)
112113

@@ -151,7 +152,7 @@ def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
151152
logger.debug(reduced_df[necessary_cols])
152153

153154
# 2. Add intercept
154-
reduced_df["Intercept"] = 1#self.intercept
155+
reduced_df["Intercept"] = 1 # self.intercept
155156

156157
# 3. Estimate the unit difference in outcome caused by unit difference in treatment
157158
cols = [self.treatment]
@@ -164,22 +165,23 @@ def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
164165
treatment_and_adjustments_cols, columns=[col], drop_first=True
165166
)
166167
# regression = sm.Logit(outcome_col, treatment_and_adjustments_cols) # This one works
167-
model = smf.logit(formula=self.formula, data=self.df).fit(disp=0)
168+
model = smf.logit(formula=self.formula, data=data).fit(disp=0)
168169
return model
169170

170171
def estimate(self, data: pd.DataFrame, adjustment_config=None) -> RegressionResultsWrapper:
171172
"""add terms to the dataframe and estimate the outcome from the data
172173
:param data: A pandas dataframe containing execution data from the system-under-test.
173174
174175
"""
176+
print(data)
175177
if adjustment_config is None:
176178
adjustment_config = {}
177179

178180
model = self._run_logistic_regression(data)
179181
self.model = model
180182

181183
x = pd.DataFrame(columns=self.df.columns)
182-
x["Intercept"] = 1#self.intercept
184+
x["Intercept"] = 1 # self.intercept
183185
x[self.treatment] = [self.treatment_value, self.control_value]
184186
for k, v in adjustment_config.items():
185187
x[k] = v
@@ -235,7 +237,7 @@ def estimate_ate(self, bootstrap_size=100) -> float:
235237
(control_outcome, control_bootstraps), (
236238
treatment_outcome,
237239
treatment_bootstraps,
238-
) = self.estimate_control_treatment()
240+
) = self.estimate_control_treatment(bootstrap_size=bootstrap_size)
239241
estimate = treatment_outcome - control_outcome
240242

241243
if control_bootstraps is None or treatment_bootstraps is None:
@@ -265,14 +267,16 @@ def estimate_risk_ratio(self, bootstrap_size=100) -> float:
265267
(control_outcome, control_bootstraps), (
266268
treatment_outcome,
267269
treatment_bootstraps,
268-
) = self.estimate_control_treatment()
270+
) = self.estimate_control_treatment(bootstrap_size=bootstrap_size)
269271
estimate = treatment_outcome / control_outcome
270272

271273
if control_bootstraps is None or treatment_bootstraps is None:
272274
return estimate, (None, None)
273275

274276
bootstraps = sorted(list(treatment_bootstraps / control_bootstraps))
275-
bound = int((bootstrap_size * 0.05) / 2)
277+
bound = ceil((bootstrap_size * 0.05) / 2)
278+
print("bootstraps", bootstraps)
279+
print("bound", bound)
276280
ci_low = bootstraps[bound]
277281
ci_high = bootstraps[bootstrap_size - bound]
278282

@@ -309,7 +313,7 @@ def __init__(
309313
outcome: str,
310314
df: pd.DataFrame = None,
311315
effect_modifiers: dict[Variable:Any] = None,
312-
formula: str = None
316+
formula: str = None,
313317
):
314318
super().__init__(treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers)
315319

@@ -392,7 +396,7 @@ def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd
392396

393397
x = pd.DataFrame(columns=self.df.columns)
394398
x[self.treatment] = [self.treatment_value, self.control_value]
395-
x["Intercept"] = 1#self.intercept
399+
x["Intercept"] = 1 # self.intercept
396400
for k, v in adjustment_config.items():
397401
x[k] = v
398402
for k, v in self.effect_modifiers.items():
@@ -443,7 +447,7 @@ def estimate_cates(self) -> tuple[float, list[float, float]]:
443447
), f"Must have at least one effect modifier to compute CATE - {self.effect_modifiers}."
444448
x = pd.DataFrame()
445449
x[self.treatment] = [self.treatment_value, self.control_value]
446-
x["Intercept"] = 1#self.intercept
450+
x["Intercept"] = 1 # self.intercept
447451
for k, v in self.effect_modifiers.items():
448452
self.adjustment_set.add(k)
449453
x[k] = v
@@ -475,7 +479,7 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
475479
logger.debug(reduced_df[necessary_cols])
476480

477481
# 2. Add intercept
478-
reduced_df["Intercept"] = 1#self.intercept
482+
reduced_df["Intercept"] = 1 # self.intercept
479483

480484
# 3. Estimate the unit difference in outcome caused by unit difference in treatment
481485
cols = [self.treatment]

tests/testing_tests/test_causal_test_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ def test_execute_test_observational_linear_regression_estimator_squared_term(sel
218218
self.minimal_adjustment_set,
219219
"C",
220220
self.causal_test_engine.scenario_execution_data_df,
221-
formula=f"C ~ A + {'+'.join(self.minimal_adjustment_set)} + (D ** 2)"
221+
formula=f"C ~ A + {'+'.join(self.minimal_adjustment_set)} + (D ** 2)",
222222
)
223223
causal_test_result = self.causal_test_engine.execute_test(estimation_model, self.causal_test_case)
224224
self.assertAlmostEqual(round(causal_test_result.test_value.value, 1), 4, delta=1)

tests/testing_tests/test_causal_test_outcome.py

Lines changed: 49 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,17 @@ def test_None_ci(self):
2727

2828
self.assertIsNone(ctr.ci_low())
2929
self.assertIsNone(ctr.ci_high())
30-
self.assertEqual(ctr.to_dict(),
31-
{"treatment": "A",
32-
"control_value": 0,
33-
"treatment_value": 1,
34-
"outcome": "A",
35-
"adjustment_set": set(),
36-
"test_value": test_value})
30+
self.assertEqual(
31+
ctr.to_dict(),
32+
{
33+
"treatment": "A",
34+
"control_value": 0,
35+
"treatment_value": 1,
36+
"outcome": "A",
37+
"adjustment_set": set(),
38+
"test_value": test_value,
39+
},
40+
)
3741

3842
def test_empty_adjustment_set(self):
3943
test_value = TestValue(type="ate", value=0)
@@ -46,13 +50,18 @@ def test_empty_adjustment_set(self):
4650

4751
self.assertIsNone(ctr.ci_low())
4852
self.assertIsNone(ctr.ci_high())
49-
self.assertEqual(str(ctr), ("Causal Test Result\n==============\n"
50-
"Treatment: A\n"
51-
"Control value: 0\n"
52-
"Treatment value: 1\n"
53-
"Outcome: A\n"
54-
"Adjustment set: set()\n"
55-
"ate: 0\n" ))
53+
self.assertEqual(
54+
str(ctr),
55+
(
56+
"Causal Test Result\n==============\n"
57+
"Treatment: A\n"
58+
"Control value: 0\n"
59+
"Treatment value: 1\n"
60+
"Outcome: A\n"
61+
"Adjustment set: set()\n"
62+
"ate: 0\n"
63+
),
64+
)
5665

5766
def test_exactValue_pass(self):
5867
test_value = TestValue(type="ate", value=5.05)
@@ -97,20 +106,29 @@ def test_someEffect_fail(self):
97106
)
98107
ev = SomeEffect()
99108
self.assertFalse(ev.apply(ctr))
100-
self.assertEqual(str(ctr), ("Causal Test Result\n==============\n"
101-
"Treatment: A\n"
102-
"Control value: 0\n"
103-
"Treatment value: 1\n"
104-
"Outcome: A\n"
105-
"Adjustment set: set()\n"
106-
"ate: 0\n"
107-
"Confidence intervals: [-0.1, 0.2]\n" ))
108-
self.assertEqual(ctr.to_dict(),
109-
{"treatment": "A",
110-
"control_value": 0,
111-
"treatment_value": 1,
112-
"outcome": "A",
113-
"adjustment_set": set(),
114-
"test_value": test_value,
115-
"ci_low": -0.1,
116-
"ci_high": 0.2})
109+
self.assertEqual(
110+
str(ctr),
111+
(
112+
"Causal Test Result\n==============\n"
113+
"Treatment: A\n"
114+
"Control value: 0\n"
115+
"Treatment value: 1\n"
116+
"Outcome: A\n"
117+
"Adjustment set: set()\n"
118+
"ate: 0\n"
119+
"Confidence intervals: [-0.1, 0.2]\n"
120+
),
121+
)
122+
self.assertEqual(
123+
ctr.to_dict(),
124+
{
125+
"treatment": "A",
126+
"control_value": 0,
127+
"treatment_value": 1,
128+
"outcome": "A",
129+
"adjustment_set": set(),
130+
"test_value": test_value,
131+
"ci_low": -0.1,
132+
"ci_high": 0.2,
133+
},
134+
)

tests/testing_tests/test_estimators.py

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -167,12 +167,17 @@ def test_program_11_2(self):
167167
def test_program_11_3(self):
168168
"""Test whether our linear regression implementation produces the same results as program 11.3 (p. 144)."""
169169
df = self.chapter_11_df.copy()
170-
linear_regression_estimator = LinearRegressionEstimator("treatments", 100, 90, set(), "outcomes", df, formula="outcomes ~ treatments + np.power(treatments, 2)")
170+
linear_regression_estimator = LinearRegressionEstimator(
171+
"treatments", 100, 90, set(), "outcomes", df, formula="outcomes ~ treatments + np.power(treatments, 2)"
172+
)
171173
model = linear_regression_estimator._run_linear_regression()
172174
ate, _ = linear_regression_estimator.estimate_unit_ate()
173175
self.assertEqual(
174176
round(
175-
model.params["Intercept"] + 90 * model.params["treatments"] + 90 * 90 * model.params["np.power(treatments, 2)"], 1
177+
model.params["Intercept"]
178+
+ 90 * model.params["treatments"]
179+
+ 90 * 90 * model.params["np.power(treatments, 2)"],
180+
1,
176181
),
177182
197.1,
178183
)
@@ -198,14 +203,21 @@ def test_program_15_1A(self):
198203
"smokeintensity",
199204
"smokeyrs",
200205
}
201-
linear_regression_estimator = LinearRegressionEstimator("qsmk", 1, 0, covariates, "wt82_71", df,
202-
formula=f"""wt82_71 ~ qsmk +
206+
linear_regression_estimator = LinearRegressionEstimator(
207+
"qsmk",
208+
1,
209+
0,
210+
covariates,
211+
"wt82_71",
212+
df,
213+
formula=f"""wt82_71 ~ qsmk +
203214
{'+'.join(sorted(list(covariates)))} +
204215
np.power(age, 2) +
205216
np.power(wt71, 2) +
206217
np.power(smokeintensity, 2) +
207218
np.power(smokeyrs, 2) +
208-
(qsmk * smokeintensity)""")
219+
(qsmk * smokeintensity)""",
220+
)
209221
# terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
210222
# terms_to_product = [("qsmk", "smokeintensity")]
211223
# for term_to_square in terms_to_square:
@@ -236,8 +248,15 @@ def test_program_15_no_interaction(self):
236248
"smokeintensity",
237249
"smokeyrs",
238250
}
239-
linear_regression_estimator = LinearRegressionEstimator("qsmk", 1, 0, covariates, "wt82_71", df,
240-
formula="wt82_71 ~ qsmk + age + np.power(age, 2) + wt71 + np.power(wt71, 2) + smokeintensity + np.power(smokeintensity, 2) + smokeyrs + np.power(smokeyrs, 2)")
251+
linear_regression_estimator = LinearRegressionEstimator(
252+
"qsmk",
253+
1,
254+
0,
255+
covariates,
256+
"wt82_71",
257+
df,
258+
formula="wt82_71 ~ qsmk + age + np.power(age, 2) + wt71 + np.power(wt71, 2) + smokeintensity + np.power(smokeintensity, 2) + smokeyrs + np.power(smokeyrs, 2)",
259+
)
241260
# terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
242261
# for term_to_square in terms_to_square:
243262
ate, [ci_low, ci_high] = linear_regression_estimator.estimate_unit_ate()
@@ -264,8 +283,15 @@ def test_program_15_no_interaction_ate(self):
264283
"smokeintensity",
265284
"smokeyrs",
266285
}
267-
linear_regression_estimator = LinearRegressionEstimator("qsmk", 1, 0, covariates, "wt82_71", df,
268-
formula="wt82_71 ~ qsmk + age + np.power(age, 2) + wt71 + np.power(wt71, 2) + smokeintensity + np.power(smokeintensity, 2) + smokeyrs + np.power(smokeyrs, 2)")
286+
linear_regression_estimator = LinearRegressionEstimator(
287+
"qsmk",
288+
1,
289+
0,
290+
covariates,
291+
"wt82_71",
292+
df,
293+
formula="wt82_71 ~ qsmk + age + np.power(age, 2) + wt71 + np.power(wt71, 2) + smokeintensity + np.power(smokeintensity, 2) + smokeyrs + np.power(smokeyrs, 2)",
294+
)
269295
# terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
270296
# for term_to_square in terms_to_square:
271297
ate, [ci_low, ci_high] = linear_regression_estimator.estimate_ate()
@@ -292,8 +318,15 @@ def test_program_15_no_interaction_ate_calculated(self):
292318
"smokeintensity",
293319
"smokeyrs",
294320
}
295-
linear_regression_estimator = LinearRegressionEstimator("qsmk", 1, 0, covariates, "wt82_71", df,
296-
formula="wt82_71 ~ qsmk + age + np.power(age, 2) + wt71 + np.power(wt71, 2) + smokeintensity + np.power(smokeintensity, 2) + smokeyrs + np.power(smokeyrs, 2)")
321+
linear_regression_estimator = LinearRegressionEstimator(
322+
"qsmk",
323+
1,
324+
0,
325+
covariates,
326+
"wt82_71",
327+
df,
328+
formula="wt82_71 ~ qsmk + age + np.power(age, 2) + wt71 + np.power(wt71, 2) + smokeintensity + np.power(smokeintensity, 2) + smokeyrs + np.power(smokeyrs, 2)",
329+
)
297330
# terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
298331
# for term_to_square in terms_to_square:
299332
ate, [ci_low, ci_high] = linear_regression_estimator.estimate_ate_calculated(

0 commit comments

Comments
 (0)