Merge pull request #207 from CITCOM-project/alpha

jmafoster1 · web-flow · commit 7ad58270b31f · 2023-07-11T13:10:32.000+01:00
Alpha
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -313,6 +313,7 @@ def _setup_test(
             "outcome": causal_test_case.outcome_variable.name,
             "df": causal_test_engine.scenario_execution_data_df,
             "effect_modifiers": causal_test_case.effect_modifier_configuration,
+            "alpha": test["alpha"] if "alpha" in test else 0.05,
         }
         if "formula" in test:
             estimator_kwargs["formula"] = test["formula"]
diff --git a/causal_testing/specification/metamorphic_relation.py b/causal_testing/specification/metamorphic_relation.py
@@ -181,6 +181,7 @@ def to_json_stub(self, skip=True) -> dict:
             "mutations": [self.treatment_var],
             "expected_effect": {self.output_var: "NoEffect"},
             "formula": f"{self.output_var} ~ {' + '.join([self.treatment_var] + self.adjustment_vars)}",
+            "alpha": 0.05,
             "skip": skip,
         }
 
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
@@ -51,6 +51,13 @@ def apply(self, res: CausalTestResult) -> bool:
             ci_low = res.ci_low() if isinstance(res.ci_low(), Iterable) else [res.ci_low()]
             ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]
             value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
+
+            if not all(ci_low < 0 < ci_high for ci_low, ci_high in zip(ci_low, ci_high)):
+                print(
+                    "FAILING ON",
+                    [(ci_low, ci_high) for ci_low, ci_high in zip(ci_low, ci_high) if not ci_low < 0 < ci_high],
+                )
+
             return all(ci_low < 0 < ci_high for ci_low, ci_high in zip(ci_low, ci_high)) or all(
                 abs(v) < self.atol for v in value
             )
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
@@ -56,6 +56,7 @@ def push(s, inc="  "):
             f"Treatment value: {self.estimator.treatment_value}\n"
             f"Outcome: {self.estimator.outcome}\n"
             f"Adjustment set: {self.adjustment_set}\n"
+            f"Formula: {self.estimator.formula}\n"
             f"{self.test_value.type}: {result_str}\n"
         )
         confidence_str = ""
@@ -64,6 +65,7 @@ def push(s, inc="  "):
             if "\n" in ci_str:
                 ci_str = " " + push(pd.DataFrame(self.confidence_intervals).transpose().to_string(header=False))
             confidence_str += f"Confidence intervals:{ci_str}\n"
+            confidence_str += f"Alpha:{self.estimator.alpha}\n"
         return base_str + confidence_str
 
     def to_dict(self):
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -49,13 +49,15 @@ def __init__(
         outcome: str,
         df: pd.DataFrame = None,
         effect_modifiers: dict[str:Any] = None,
+        alpha: float = 0.05,
     ):
         self.treatment = treatment
         self.treatment_value = treatment_value
         self.control_value = control_value
         self.adjustment_set = adjustment_set
         self.outcome = outcome
         self.df = df
+        self.alpha = alpha
         if effect_modifiers is None:
             self.effect_modifiers = {}
         elif isinstance(effect_modifiers, dict):
@@ -237,7 +239,7 @@ def estimate_ate(self, estimator_params: dict = None) -> float:
             return estimate, (None, None)
 
         bootstraps = sorted(list(treatment_bootstraps - control_bootstraps))
-        bound = int((bootstrap_size * 0.05) / 2)
+        bound = int((bootstrap_size * self.alpha) / 2)
         ci_low = bootstraps[bound]
         ci_high = bootstraps[bootstrap_size - bound]
 
@@ -271,7 +273,7 @@ def estimate_risk_ratio(self, estimator_params: dict = None) -> float:
             return estimate, (None, None)
 
         bootstraps = sorted(list(treatment_bootstraps / control_bootstraps))
-        bound = ceil((bootstrap_size * 0.05) / 2)
+        bound = ceil((bootstrap_size * self.alpha) / 2)
         ci_low = bootstraps[bound]
         ci_high = bootstraps[bootstrap_size - bound]
 
@@ -309,8 +311,11 @@ def __init__(
         df: pd.DataFrame = None,
         effect_modifiers: dict[Variable:Any] = None,
         formula: str = None,
+        alpha: float = 0.05,
     ):
-        super().__init__(treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers)
+        super().__init__(
+            treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers, alpha=alpha
+        )
 
         self.model = None
         if effect_modifiers is None:
@@ -344,7 +349,6 @@ def estimate_unit_ate(self) -> float:
         """
         model = self._run_linear_regression()
         newline = "\n"
-        print(model.conf_int())
         treatment = [self.treatment]
         if str(self.df.dtypes[self.treatment]) == "object":
             design_info = dmatrix(self.formula.split("~")[1], self.df).design_info
@@ -380,7 +384,7 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
         # Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
         t_test_results = model.t_test(individuals.loc["treated"] - individuals.loc["control"])
         ate = t_test_results.effect[0]
-        confidence_intervals = list(t_test_results.conf_int().flatten())
+        confidence_intervals = list(t_test_results.conf_int(alpha=self.alpha).flatten())
         return ate, confidence_intervals
 
     def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd.Series, pd.Series]:
@@ -442,25 +446,11 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
 
         :return: The model after fitting to data.
         """
-        # 1. Reduce dataframe to contain only the necessary columns
-        reduced_df = self.df.copy()
-        necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
-        missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
-        reduced_df = reduced_df[~missing_rows]
-        reduced_df = reduced_df.sort_values([self.treatment])
-        logger.debug(reduced_df[necessary_cols])
-
-        # 2. Add intercept
-        reduced_df["Intercept"] = 1  # self.intercept
-
-        # 3. Estimate the unit difference in outcome caused by unit difference in treatment
-        cols = [self.treatment]
-        cols += [x for x in self.adjustment_set if x not in cols]
         model = smf.ols(formula=self.formula, data=self.df).fit()
         return model
 
     def _get_confidence_intervals(self, model, treatment):
-        confidence_intervals = model.conf_int(alpha=0.05, cols=None)
+        confidence_intervals = model.conf_int(alpha=self.alpha, cols=None)
         ci_low, ci_high = (
             confidence_intervals[0].loc[treatment],
             confidence_intervals[1].loc[treatment],
@@ -527,7 +517,7 @@ def estimate_unit_ate(self, bootstrap_size=100):
         bootstraps = sorted(
             [self.estimate_coefficient(self.df.sample(len(self.df), replace=True)) for _ in range(bootstrap_size)]
         )
-        bound = ceil((bootstrap_size * 0.05) / 2)
+        bound = ceil((bootstrap_size * self.alpha) / 2)
         ci_low = bootstraps[bound]
         ci_high = bootstraps[bootstrap_size - bound]
 
@@ -618,7 +608,7 @@ def estimate_cates(self) -> pd.DataFrame:
         # Obtain CATES and confidence intervals
         conditional_ates = model.effect(effect_modifier_df, T0=self.control_value, T1=self.treatment_value).flatten()
         [ci_low, ci_high] = model.effect_interval(
-            effect_modifier_df, T0=self.control_value, T1=self.treatment_value, alpha=0.05
+            effect_modifier_df, T0=self.control_value, T1=self.treatment_value, alpha=self.alpha
         )
 
         # Merge results into a dataframe (CATE, confidence intervals, and effect modifier values)
diff --git a/tests/specification_tests/test_metamorphic_relations.py b/tests/specification_tests/test_metamorphic_relations.py
@@ -120,6 +120,7 @@ def test_should_not_cause_json_stub(self):
                 "mutations": ["X1"],
                 "name": "X1 _||_ Z",
                 "formula": "Z ~ X1",
+                "alpha": 0.05,
                 "skip": True,
             },
         )
diff --git a/tests/testing_tests/test_causal_test_outcome.py b/tests/testing_tests/test_causal_test_outcome.py
@@ -60,6 +60,7 @@ def test_empty_adjustment_set(self):
                 "Treatment value: 1\n"
                 "Outcome: A\n"
                 "Adjustment set: set()\n"
+                "Formula: A ~ A\n"
                 "ate: 0\n"
             ),
         )

Original file line number	Diff line number	Diff line change
`@@ -313,6 +313,7 @@ def _setup_test(`
`313`	`313`	`"outcome": causal_test_case.outcome_variable.name,`
`314`	`314`	`"df": causal_test_engine.scenario_execution_data_df,`
`315`	`315`	`"effect_modifiers": causal_test_case.effect_modifier_configuration,`
	`316`	`+ "alpha": test["alpha"] if "alpha" in test else 0.05,`
`316`	`317`	`}`
`317`	`318`	`if "formula" in test:`
`318`	`319`	`estimator_kwargs["formula"] = test["formula"]`
Original file line number	Diff line number	Diff line change
`@@ -181,6 +181,7 @@ def to_json_stub(self, skip=True) -> dict:`
`181`	`181`	`"mutations": [self.treatment_var],`
`182`	`182`	`"expected_effect": {self.output_var: "NoEffect"},`
`183`	`183`	`"formula": f"{self.output_var} ~ {' + '.join([self.treatment_var] + self.adjustment_vars)}",`
	`184`	`+ "alpha": 0.05,`
`184`	`185`	`"skip": skip,`
`185`	`186`	`}`
`186`	`187`
Original file line number	Diff line number	Diff line change
`@@ -120,6 +120,7 @@ def test_should_not_cause_json_stub(self):`
`120`	`120`	`"mutations": ["X1"],`
`121`	`121`	`"name": "X1 _\|\|_ Z",`
`122`	`122`	`"formula": "Z ~ X1",`
	`123`	`+ "alpha": 0.05,`
`123`	`124`	`"skip": True,`
`124`	`125`	`},`
`125`	`126`	`)`
Original file line number	Diff line number	Diff line change
`@@ -60,6 +60,7 @@ def test_empty_adjustment_set(self):`
`60`	`60`	`"Treatment value: 1\n"`
`61`	`61`	`"Outcome: A\n"`
`62`	`62`	`"Adjustment set: set()\n"`
	`63`	`+ "Formula: A ~ A\n"`
`63`	`64`	`"ate: 0\n"`
`64`	`65`	`),`
`65`	`66`	`)`