Skip to content

Commit c0fbbdd

Browse files
committed
Removed ate and risk ratio from logistic regression estimator, since they're not statistically meaningful
1 parent 968fc89 commit c0fbbdd

File tree

2 files changed

+0
-172
lines changed

2 files changed

+0
-172
lines changed

causal_testing/estimation/logistic_regression_estimator.py

Lines changed: 0 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -34,129 +34,6 @@ def add_modelling_assumptions(self):
3434
self.modelling_assumptions.append("The outcome must be binary.")
3535
self.modelling_assumptions.append("Independently and identically distributed errors.")
3636

37-
def _predict(self, data=None, adjustment_config: dict = None) -> pd.DataFrame:
38-
"""Estimate the outcomes under control and treatment.
39-
40-
:param data: The data to use, defaults to `self.df`. Controllable for boostrap sampling.
41-
:param: adjustment_config: The values of the adjustment variables to use.
42-
43-
:return: The estimated outcome under control and treatment, with confidence intervals in the form of a
44-
dataframe with columns "predicted", "se", "ci_lower", and "ci_upper".
45-
"""
46-
if adjustment_config is None:
47-
adjustment_config = {}
48-
if set(self.adjustment_set) != set(adjustment_config):
49-
raise ValueError(
50-
f"Invalid adjustment configuration {adjustment_config}. Must specify values for {self.adjustment_set}"
51-
)
52-
53-
return super()._predict(data, adjustment_config)
54-
55-
def estimate_control_treatment(
56-
self, adjustment_config: dict = None, bootstrap_size: int = 100
57-
) -> tuple[pd.Series, pd.Series]:
58-
"""Estimate the outcomes under control and treatment.
59-
60-
:return: The estimated control and treatment values and their confidence
61-
intervals in the form ((ci_low, control, ci_high), (ci_low, treatment, ci_high)).
62-
"""
63-
if adjustment_config is None:
64-
adjustment_config = {}
65-
y = self._predict(self.df, adjustment_config=adjustment_config)["predicted"]
66-
67-
try:
68-
bootstrap_samples = [
69-
self._predict(self.df.sample(len(self.df), replace=True), adjustment_config=adjustment_config)[
70-
"predicted"
71-
]
72-
for _ in range(bootstrap_size)
73-
]
74-
control, treatment = zip(*[(x.iloc[1], x.iloc[0]) for x in bootstrap_samples])
75-
except PerfectSeparationError:
76-
logger.warning(
77-
"Perfect separation detected, results not available. Cannot calculate confidence intervals for such "
78-
"a small dataset."
79-
)
80-
return (y.iloc[1], None), (y.iloc[0], None)
81-
except np.linalg.LinAlgError:
82-
logger.warning("Singular matrix detected. Confidence intervals not available. Try with a larger data set")
83-
return (y.iloc[1], None), (y.iloc[0], None)
84-
85-
# Delta method confidence intervals from
86-
# https://stackoverflow.com/questions/47414842/confidence-interval-of-probability-prediction-from-logistic-regression-statsmode
87-
# cov = model.cov_params()
88-
# gradient = (y * (1 - y) * x.T).T # matrix of gradients for each observation
89-
# std_errors = np.array([np.sqrt(np.dot(np.dot(g, cov), g)) for g in gradient.to_numpy()])
90-
# c = 1.96 # multiplier for confidence interval
91-
# upper = np.maximum(0, np.minimum(1, y + std_errors * c))
92-
# lower = np.maximum(0, np.minimum(1, y - std_errors * c))
93-
94-
return (y.iloc[1], np.array(control)), (y.iloc[0], np.array(treatment))
95-
96-
def estimate_ate(self, adjustment_config: dict = None, bootstrap_size: int = 100) -> float:
97-
"""Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
98-
by changing the treatment variable from the control value to the treatment value. Here, we actually
99-
calculate the expected outcomes under control and treatment and take one away from the other. This
100-
allows for custom terms to be put in such as squares, inverses, products, etc.
101-
102-
:return: The estimated average treatment effect and 95% confidence intervals
103-
"""
104-
if adjustment_config is None:
105-
adjustment_config = {}
106-
(control_outcome, control_bootstraps), (
107-
treatment_outcome,
108-
treatment_bootstraps,
109-
) = self.estimate_control_treatment(bootstrap_size=bootstrap_size, adjustment_config=adjustment_config)
110-
estimate = treatment_outcome - control_outcome
111-
112-
if control_bootstraps is None or treatment_bootstraps is None:
113-
return estimate, (None, None)
114-
115-
bootstraps = sorted(list(treatment_bootstraps - control_bootstraps))
116-
bound = int((bootstrap_size * self.alpha) / 2)
117-
ci_low = bootstraps[bound]
118-
ci_high = bootstraps[bootstrap_size - bound]
119-
120-
logger.info(
121-
f"Changing {self.treatment} from {self.control_value} to {self.treatment_value} gives an estimated "
122-
f"ATE of {ci_low} < {estimate} < {ci_high}"
123-
)
124-
assert ci_low < estimate < ci_high, f"Expecting {ci_low} < {estimate} < {ci_high}"
125-
126-
return estimate, (ci_low, ci_high)
127-
128-
def estimate_risk_ratio(self, adjustment_config: dict = None, bootstrap_size: int = 100) -> float:
129-
"""Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
130-
by changing the treatment variable from the control value to the treatment value. Here, we actually
131-
calculate the expected outcomes under control and treatment and divide one by the other. This
132-
allows for custom terms to be put in such as squares, inverses, products, etc.
133-
134-
:return: The estimated risk ratio and 95% confidence intervals.
135-
"""
136-
if adjustment_config is None:
137-
adjustment_config = {}
138-
(control_outcome, control_bootstraps), (
139-
treatment_outcome,
140-
treatment_bootstraps,
141-
) = self.estimate_control_treatment(bootstrap_size=bootstrap_size, adjustment_config=adjustment_config)
142-
estimate = treatment_outcome / control_outcome
143-
144-
if control_bootstraps is None or treatment_bootstraps is None:
145-
return estimate, (None, None)
146-
147-
bootstraps = sorted(list(treatment_bootstraps / control_bootstraps))
148-
bound = ceil((bootstrap_size * self.alpha) / 2)
149-
ci_low = bootstraps[bound]
150-
ci_high = bootstraps[bootstrap_size - bound]
151-
152-
logger.info(
153-
f"Changing {self.treatment} from {self.control_value} to {self.treatment_value} gives an estimated "
154-
f"risk ratio of {ci_low} < {estimate} < {ci_high}"
155-
)
156-
assert ci_low < estimate < ci_high, f"Expecting {ci_low} < {estimate} < {ci_high}"
157-
158-
return estimate, (ci_low, ci_high)
159-
16037
def estimate_unit_odds_ratio(self) -> float:
16138
"""Estimate the odds ratio of increasing the treatment by one. In logistic regression, this corresponds to the
16239
coefficient of the treatment of interest.

tests/estimation_tests/test_logistic_regression_estimator.py

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -17,57 +17,8 @@ class TestLogisticRegressionEstimator(unittest.TestCase):
1717
def setUpClass(cls) -> None:
1818
cls.scarf_df = pd.read_csv("tests/resources/data/scarf_data.csv")
1919

20-
def test_ate(self):
21-
df = self.scarf_df.copy()
22-
logistic_regression_estimator = LogisticRegressionEstimator("length_in", 65, 55, set(), "completed", df)
23-
ate, _ = logistic_regression_estimator.estimate_ate()
24-
self.assertEqual(round(ate, 4), -0.1987)
25-
26-
def test_risk_ratio(self):
27-
df = self.scarf_df.copy()
28-
logistic_regression_estimator = LogisticRegressionEstimator("length_in", 65, 55, set(), "completed", df)
29-
rr, _ = logistic_regression_estimator.estimate_risk_ratio()
30-
self.assertEqual(round(rr, 4), 0.7664)
31-
3220
def test_odds_ratio(self):
3321
df = self.scarf_df.copy()
3422
logistic_regression_estimator = LogisticRegressionEstimator("length_in", 65, 55, set(), "completed", df)
3523
odds = logistic_regression_estimator.estimate_unit_odds_ratio()
3624
self.assertEqual(round(odds, 4), 0.8948)
37-
38-
def test_ate_adjustment(self):
39-
df = self.scarf_df.copy()
40-
logistic_regression_estimator = LogisticRegressionEstimator(
41-
"length_in", 65, 55, {"large_gauge"}, "completed", df
42-
)
43-
ate, _ = logistic_regression_estimator.estimate_ate(adjustment_config={"large_gauge": 0})
44-
self.assertEqual(round(ate, 4), -0.3388)
45-
46-
def test_ate_invalid_adjustment(self):
47-
df = self.scarf_df.copy()
48-
logistic_regression_estimator = LogisticRegressionEstimator("length_in", 65, 55, {}, "completed", df)
49-
with self.assertRaises(ValueError):
50-
ate, _ = logistic_regression_estimator.estimate_ate(adjustment_config={"large_gauge": 0})
51-
52-
def test_ate_effect_modifiers(self):
53-
df = self.scarf_df.copy()
54-
logistic_regression_estimator = LogisticRegressionEstimator(
55-
"length_in", 65, 55, set(), "completed", df, effect_modifiers={"large_gauge": 0}
56-
)
57-
ate, _ = logistic_regression_estimator.estimate_ate()
58-
self.assertEqual(round(ate, 4), -0.3388)
59-
60-
def test_ate_effect_modifiers_formula(self):
61-
df = self.scarf_df.copy()
62-
logistic_regression_estimator = LogisticRegressionEstimator(
63-
"length_in",
64-
65,
65-
55,
66-
set(),
67-
"completed",
68-
df,
69-
effect_modifiers={"large_gauge": 0},
70-
formula="completed ~ length_in + large_gauge",
71-
)
72-
ate, _ = logistic_regression_estimator.estimate_ate()
73-
self.assertEqual(round(ate, 4), -0.3388)

0 commit comments

Comments
 (0)