Skip to content

Commit 48b2c68

Browse files
committed
IV estimation
1 parent 8e8fd8b commit 48b2c68

File tree

6 files changed

+74
-69
lines changed

6 files changed

+74
-69
lines changed

causal_testing/json_front/json_class.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -208,12 +208,12 @@ def _setup_test(self, causal_test_case: CausalTestCase, estimator: Estimator) ->
208208
treatment_var = causal_test_case.treatment_variable
209209
minimal_adjustment_set = minimal_adjustment_set - {treatment_var}
210210
estimation_model = estimator(
211-
(treatment_var.name,),
212-
causal_test_case.treatment_value,
213-
causal_test_case.control_value,
214-
minimal_adjustment_set,
215-
(causal_test_case.outcome_variable.name,),
216-
causal_test_engine.scenario_execution_data_df,
211+
treatment=treatment_var.name,
212+
treatment_value=causal_test_case.treatment_value,
213+
control_value=causal_test_case.control_value,
214+
adjustment_set=minimal_adjustment_set,
215+
outcome=causal_test_case.outcome_variable.name,
216+
df=causal_test_engine.scenario_execution_data_df,
217217
effect_modifiers=causal_test_case.effect_modifier_configuration,
218218
)
219219

causal_testing/testing/causal_test_engine.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,11 @@ def execute_test_suite(self, test_suite: CausalTestSuite) -> list[CausalTestResu
8989
treatment_value = test.treatment_value
9090
control_value = test.control_value
9191
estimator = estimator_class(
92-
(treatment_variable.name,),
92+
treatment_variable.name,
9393
treatment_value,
9494
control_value,
9595
minimal_adjustment_set,
96-
(test.outcome_variable.name,),
96+
test.outcome_variable.name,
9797
)
9898
if estimator.df is None:
9999
estimator.df = self.scenario_execution_data_df

causal_testing/testing/causal_test_result.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,16 @@ def ci_low(self):
8383
"""Return the lower bracket of the confidence intervals."""
8484
if not self.confidence_intervals:
8585
return None
86+
if any([x is None for x in self.confidence_intervals]):
87+
return None
8688
return min(self.confidence_intervals)
8789

8890
def ci_high(self):
8991
"""Return the higher bracket of the confidence intervals."""
9092
if not self.confidence_intervals:
9193
return None
94+
if any([x is None for x in self.confidence_intervals]):
95+
return None
9296
return max(self.confidence_intervals)
9397

9498
def summary(self):

causal_testing/testing/estimators.py

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ class Estimator(ABC):
3636

3737
def __init__(
3838
self,
39-
treatment: tuple,
39+
treatment: str,
4040
treatment_value: float,
4141
control_value: float,
4242
adjustment_set: set,
43-
outcome: tuple,
43+
outcome: str,
4444
df: pd.DataFrame = None,
4545
effect_modifiers: dict[Variable:Any] = None,
4646
):
@@ -93,11 +93,11 @@ class LogisticRegressionEstimator(Estimator):
9393

9494
def __init__(
9595
self,
96-
treatment: tuple,
96+
treatment: str,
9797
treatment_value: float,
9898
control_value: float,
9999
adjustment_set: set,
100-
outcome: tuple,
100+
outcome: str,
101101
df: pd.DataFrame = None,
102102
effect_modifiers: dict[Variable:Any] = None,
103103
intercept: int = 1,
@@ -133,20 +133,20 @@ def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
133133
"""
134134
# 1. Reduce dataframe to contain only the necessary columns
135135
reduced_df = data.copy()
136-
necessary_cols = list(self.treatment) + list(self.adjustment_set) + list(self.outcome)
136+
necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
137137
missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
138138
reduced_df = reduced_df[~missing_rows]
139-
reduced_df = reduced_df.sort_values(list(self.treatment))
139+
reduced_df = reduced_df.sort_values([self.treatment])
140140
logger.debug(reduced_df[necessary_cols])
141141

142142
# 2. Add intercept
143143
reduced_df["Intercept"] = self.intercept
144144

145145
# 3. Estimate the unit difference in outcome caused by unit difference in treatment
146-
cols = list(self.treatment)
146+
cols = [self.treatment]
147147
cols += [x for x in self.adjustment_set if x not in cols]
148148
treatment_and_adjustments_cols = reduced_df[cols + ["Intercept"]]
149-
outcome_col = reduced_df[list(self.outcome)]
149+
outcome_col = reduced_df[[self.outcome]]
150150
for col in treatment_and_adjustments_cols:
151151
if str(treatment_and_adjustments_cols.dtypes[col]) == "object":
152152
treatment_and_adjustments_cols = pd.get_dummies(
@@ -165,7 +165,7 @@ def estimate(self, data: pd.DataFrame) -> RegressionResultsWrapper:
165165
self.model = model
166166

167167
x = pd.DataFrame()
168-
x[self.treatment[0]] = [self.treatment_value, self.control_value]
168+
x[self.treatment] = [self.treatment_value, self.control_value]
169169
x["Intercept"] = self.intercept
170170
for k, v in self.effect_modifiers.items():
171171
x[k] = v
@@ -238,7 +238,7 @@ def estimate_ate(self, bootstrap_size=100) -> float:
238238
ci_high = bootstraps[bootstrap_size - bound]
239239

240240
logger.info(
241-
f"Changing {self.treatment[0]} from {self.control_value} to {self.treatment_value} gives an estimated "
241+
f"Changing {self.treatment} from {self.control_value} to {self.treatment_value} gives an estimated "
242242
f"ATE of {ci_low} < {estimate} < {ci_high}"
243243
)
244244
assert ci_low < estimate < ci_high, f"Expecting {ci_low} < {estimate} < {ci_high}"
@@ -268,7 +268,7 @@ def estimate_risk_ratio(self, bootstrap_size=100) -> float:
268268
ci_high = bootstraps[bootstrap_size - bound]
269269

270270
logger.info(
271-
f"Changing {self.treatment[0]} from {self.control_value} to {self.treatment_value} gives an estimated "
271+
f"Changing {self.treatment} from {self.control_value} to {self.treatment_value} gives an estimated "
272272
f"risk ratio of {ci_low} < {estimate} < {ci_high}"
273273
)
274274
assert ci_low < estimate < ci_high, f"Expecting {ci_low} < {estimate} < {ci_high}"
@@ -282,7 +282,7 @@ def estimate_unit_odds_ratio(self) -> float:
282282
:return: The odds ratio. Confidence intervals are not yet supported.
283283
"""
284284
model = self._run_logistic_regression(self.df)
285-
return np.exp(model.params[self.treatment[0]])
285+
return np.exp(model.params[self.treatment])
286286

287287

288288
class LinearRegressionEstimator(Estimator):
@@ -292,11 +292,11 @@ class LinearRegressionEstimator(Estimator):
292292

293293
def __init__(
294294
self,
295-
treatment: tuple,
295+
treatment: str,
296296
treatment_value: float,
297297
control_value: float,
298298
adjustment_set: set,
299-
outcome: tuple,
299+
outcome: str,
300300
df: pd.DataFrame = None,
301301
effect_modifiers: dict[Variable:Any] = None,
302302
product_terms: list[tuple[Variable, Variable]] = None,
@@ -383,7 +383,7 @@ def estimate_unit_ate(self) -> float:
383383
:return: The unit average treatment effect and the 95% Wald confidence intervals.
384384
"""
385385
model = self._run_linear_regression()
386-
unit_effect = model.params[list(self.treatment)].values[0] # Unit effect is the coefficient of the treatment
386+
unit_effect = model.params[[self.treatment]].values[0] # Unit effect is the coefficient of the treatment
387387
[ci_low, ci_high] = self._get_confidence_intervals(model)
388388

389389
return unit_effect * self.treatment_value - unit_effect * self.control_value, [ci_low, ci_high]
@@ -407,8 +407,8 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
407407

408408
# It is ABSOLUTELY CRITICAL that these go last, otherwise we can't index
409409
# the effect with "ate = t_test_results.effect[0]"
410-
individuals.loc["control", list(self.treatment)] = self.control_value
411-
individuals.loc["treated", list(self.treatment)] = self.treatment_value
410+
individuals.loc["control", [self.treatment]] = self.control_value
411+
individuals.loc["treated", [self.treatment]] = self.treatment_value
412412

413413
# Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
414414
t_test_results = model.t_test(individuals.loc["treated"] - individuals.loc["control"])
@@ -429,7 +429,7 @@ def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd
429429
self.model = model
430430

431431
x = pd.DataFrame()
432-
x[self.treatment[0]] = [self.treatment_value, self.control_value]
432+
x[self.treatment] = [self.treatment_value, self.control_value]
433433
x["Intercept"] = self.intercept
434434
for k, v in adjustment_config.items():
435435
x[k] = v
@@ -485,7 +485,7 @@ def estimate_cates(self) -> tuple[float, list[float, float]]:
485485
self.effect_modifiers
486486
), f"Must have at least one effect modifier to compute CATE - {self.effect_modifiers}."
487487
x = pd.DataFrame()
488-
x[self.treatment[0]] = [self.treatment_value, self.control_value]
488+
x[self.treatment] = [self.treatment_value, self.control_value]
489489
x["Intercept"] = self.intercept
490490
for k, v in self.effect_modifiers.items():
491491
self.adjustment_set.add(k)
@@ -511,20 +511,20 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
511511
"""
512512
# 1. Reduce dataframe to contain only the necessary columns
513513
reduced_df = self.df.copy()
514-
necessary_cols = list(self.treatment) + list(self.adjustment_set) + list(self.outcome)
514+
necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
515515
missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
516516
reduced_df = reduced_df[~missing_rows]
517-
reduced_df = reduced_df.sort_values(list(self.treatment))
517+
reduced_df = reduced_df.sort_values([self.treatment])
518518
logger.debug(reduced_df[necessary_cols])
519519

520520
# 2. Add intercept
521521
reduced_df["Intercept"] = self.intercept
522522

523523
# 3. Estimate the unit difference in outcome caused by unit difference in treatment
524-
cols = list(self.treatment)
524+
cols = [self.treatment]
525525
cols += [x for x in self.adjustment_set if x not in cols]
526526
treatment_and_adjustments_cols = reduced_df[cols + ["Intercept"]]
527-
outcome_col = reduced_df[list(self.outcome)]
527+
outcome_col = reduced_df[[self.outcome]]
528528
for col in treatment_and_adjustments_cols:
529529
if str(treatment_and_adjustments_cols.dtypes[col]) == "object":
530530
treatment_and_adjustments_cols = pd.get_dummies(
@@ -537,8 +537,8 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
537537
def _get_confidence_intervals(self, model):
538538
confidence_intervals = model.conf_int(alpha=0.05, cols=None)
539539
ci_low, ci_high = (
540-
confidence_intervals[0][list(self.treatment)],
541-
confidence_intervals[1][list(self.treatment)],
540+
confidence_intervals[0][[self.treatment]],
541+
confidence_intervals[1][[self.treatment]],
542542
)
543543
return [ci_low.values[0], ci_high.values[0]]
544544

@@ -551,20 +551,22 @@ class InstrumentalVariableEstimator(Estimator):
551551

552552
def __init__(
553553
self,
554-
treatment: tuple,
554+
treatment: str,
555555
treatment_value: float,
556556
control_value: float,
557557
adjustment_set: set,
558-
outcome: tuple,
558+
outcome: str,
559559
instrument: str,
560560
df: pd.DataFrame = None,
561561
intercept: int = 1,
562+
effect_modifiers: dict=None # Not used (yet?). Needed for compatibility
562563
):
563564
super().__init__(treatment, treatment_value, control_value, adjustment_set, outcome, df, None)
564565
self.intercept = intercept
565566
self.model = None
566567
self.instrument = instrument
567568

569+
568570
def add_modelling_assumptions(self):
569571
"""
570572
Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
@@ -582,7 +584,6 @@ def estimate_coefficient(self):
582584
"""
583585
Estimate the linear regression coefficient of the treatment on the outcome.
584586
"""
585-
586587
# Estimate the total effect of instrument I on outcome Y = abI + c1
587588
ab = sm.OLS(self.df[self.outcome], self.df[[self.instrument]]).fit().params[self.instrument]
588589

@@ -617,7 +618,7 @@ def estimate_ate(self) -> float:
617618
"""
618619
# Remove any NA containing rows
619620
reduced_df = self.df.copy()
620-
necessary_cols = list(self.treatment) + list(self.adjustment_set) + list(self.outcome)
621+
necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
621622
missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
622623
reduced_df = reduced_df[~missing_rows]
623624

@@ -628,8 +629,8 @@ def estimate_ate(self) -> float:
628629
else:
629630
effect_modifier_df = reduced_df[list(self.adjustment_set)]
630631
confounders_df = reduced_df[list(self.adjustment_set)]
631-
treatment_df = np.ravel(reduced_df[list(self.treatment)])
632-
outcome_df = np.ravel(reduced_df[list(self.outcome)])
632+
treatment_df = np.ravel(reduced_df[[self.treatment]])
633+
outcome_df = np.ravel(reduced_df[[self.outcome]])
633634

634635
# Fit the model to the data using a gradient boosting regressor for both the treatment and outcome model
635636
model = CausalForestDML(
@@ -657,7 +658,7 @@ def estimate_cates(self) -> pd.DataFrame:
657658

658659
# Remove any NA containing rows
659660
reduced_df = self.df.copy()
660-
necessary_cols = list(self.treatment) + list(self.adjustment_set) + list(self.outcome)
661+
necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
661662
missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
662663
reduced_df = reduced_df[~missing_rows]
663664

@@ -671,8 +672,8 @@ def estimate_cates(self) -> pd.DataFrame:
671672
confounders_df = reduced_df[list(self.adjustment_set)]
672673
else:
673674
confounders_df = None
674-
treatment_df = reduced_df[list(self.treatment)]
675-
outcome_df = reduced_df[list(self.outcome)]
675+
treatment_df = reduced_df[[self.treatment]]
676+
outcome_df = reduced_df[[self.outcome]]
676677

677678
# Fit a model to the data
678679
model = CausalForestDML(model_y=GradientBoostingRegressor(), model_t=GradientBoostingRegressor())

0 commit comments

Comments
 (0)