Skip to content

Commit 7079204

Browse files
committed
Converted logistic_regression_estimator too, but two tests fail
1 parent e3c36a4 commit 7079204

File tree

1 file changed

+26
-22
lines changed

1 file changed

+26
-22
lines changed

causal_testing/testing/estimators.py

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -103,19 +103,24 @@ def __init__(
103103
outcome: str,
104104
df: pd.DataFrame = None,
105105
effect_modifiers: dict[Variable:Any] = None,
106-
intercept: int = 1,
106+
formula: str = None
107107
):
108108
super().__init__(treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers)
109109

110+
self.model = None
111+
if effect_modifiers is None:
112+
effect_modifiers = []
113+
114+
if formula is not None:
115+
# TODO: validate it
116+
self.formula = formula
117+
else:
118+
terms = [treatment] + sorted(list(adjustment_set)) + sorted(list(effect_modifiers))
119+
self.formula = f"{outcome} ~ {'+'.join(((terms)))}"
120+
110121
for term in self.effect_modifiers:
111122
self.adjustment_set.add(term)
112123

113-
self.product_terms = []
114-
self.square_terms = []
115-
self.inverse_terms = []
116-
self.intercept = intercept
117-
self.model = None
118-
119124
def add_modelling_assumptions(self):
120125
"""
121126
Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
@@ -143,7 +148,7 @@ def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
143148
logger.debug(reduced_df[necessary_cols])
144149

145150
# 2. Add intercept
146-
reduced_df["Intercept"] = self.intercept
151+
reduced_df["Intercept"] = 1#self.intercept
147152

148153
# 3. Estimate the unit difference in outcome caused by unit difference in treatment
149154
cols = [self.treatment]
@@ -155,35 +160,34 @@ def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
155160
treatment_and_adjustments_cols = pd.get_dummies(
156161
treatment_and_adjustments_cols, columns=[col], drop_first=True
157162
)
158-
regression = sm.Logit(outcome_col, treatment_and_adjustments_cols)
159-
model = regression.fit()
163+
# regression = sm.Logit(outcome_col, treatment_and_adjustments_cols) # This one works
164+
regression = smf.logit(formula=self.formula, data=self.df) # This one doesn't work
165+
model = regression.fit(disp=0)
160166
return model
161167

162-
def estimate(self, data: pd.DataFrame) -> RegressionResultsWrapper:
168+
def estimate(self, data: pd.DataFrame, adjustment_config=None) -> RegressionResultsWrapper:
163169
"""add terms to the dataframe and estimate the outcome from the data
164170
:param data: A pandas dataframe containing execution data from the system-under-test.
165171
166172
"""
173+
if adjustment_config is None:
174+
adjustment_config = {}
175+
167176
model = self._run_logistic_regression(data)
168177
self.model = model
169178

170-
x = pd.DataFrame()
179+
x = pd.DataFrame(columns=self.df.columns)
180+
x["Intercept"] = 1#self.intercept
171181
x[self.treatment] = [self.treatment_value, self.control_value]
172-
x["Intercept"] = self.intercept
182+
for k, v in adjustment_config.items():
183+
x[k] = v
173184
for k, v in self.effect_modifiers.items():
174185
x[k] = v
175-
for t in self.square_terms:
176-
x[t + "^2"] = x[t] ** 2
177-
for t in self.inverse_terms:
178-
x["1/" + t] = 1 / x[t]
179-
for a, b in self.product_terms:
180-
x[f"{a}*{b}"] = x[a] * x[b]
181-
186+
x = dmatrix(self.formula.split("~")[1], x, return_type="dataframe")
182187
for col in x:
183188
if str(x.dtypes[col]) == "object":
184189
x = pd.get_dummies(x, columns=[col], drop_first=True)
185-
x = x[model.params.index]
186-
190+
# x = x[model.params.index]
187191
return model.predict(x)
188192

189193
def estimate_control_treatment(self, bootstrap_size=100) -> tuple[pd.Series, pd.Series]:

0 commit comments

Comments
 (0)