Extra logging

jmafoster1 · jmafoster1 · commit e2fcc6ff3b28 · 2023-07-11T08:57:38.000+01:00
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -286,6 +286,7 @@ def _setup_test(
             "outcome": causal_test_case.outcome_variable.name,
             "df": causal_test_engine.scenario_execution_data_df,
             "effect_modifiers": causal_test_case.effect_modifier_configuration,
+            "alpha": test['alpha'] if 'alpha' in test else 0.05
         }
         if "formula" in test:
             estimator_kwargs["formula"] = test["formula"]
diff --git a/causal_testing/specification/metamorphic_relation.py b/causal_testing/specification/metamorphic_relation.py
@@ -181,6 +181,7 @@ def to_json_stub(self, skip=True) -> dict:
             "mutations": [self.treatment_var],
             "expected_effect": {self.output_var: "NoEffect"},
             "formula": f"{self.output_var} ~ {' + '.join([self.treatment_var] + self.adjustment_vars)}",
+            "alpha": 0.01,
             "skip": skip,
         }
 
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
@@ -51,6 +51,10 @@ def apply(self, res: CausalTestResult) -> bool:
             ci_low = res.ci_low() if isinstance(res.ci_low(), Iterable) else [res.ci_low()]
             ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]
             value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
+
+            if not all(ci_low < 0 < ci_high for ci_low, ci_high in zip(ci_low, ci_high)):
+                print("FAILING ON", [(ci_low, ci_high) for ci_low, ci_high in zip(ci_low, ci_high) if not ci_low < 0 < ci_high])
+
             return all(ci_low < 0 < ci_high for ci_low, ci_high in zip(ci_low, ci_high)) or all(
                 abs(v) < self.atol for v in value
             )
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
@@ -56,14 +56,16 @@ def push(s, inc="  "):
             f"Treatment value: {self.estimator.treatment_value}\n"
             f"Outcome: {self.estimator.outcome}\n"
             f"Adjustment set: {self.adjustment_set}\n"
-            f"{self.test_value.type}: {result_str}\n"
+            f"Formula: {self.estimator.formula}\n"
+            f"{self.test_value.type.capitalize()}: {result_str}\n"
         )
         confidence_str = ""
         if self.confidence_intervals:
             ci_str = " " + str(self.confidence_intervals)
             if "\n" in ci_str:
                 ci_str = " " + push(pd.DataFrame(self.confidence_intervals).transpose().to_string(header=False))
             confidence_str += f"Confidence intervals:{ci_str}\n"
+            confidence_str += f"Alpha:{self.estimator.alpha}\n"
         return base_str + confidence_str
 
     def to_dict(self):
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -49,13 +49,15 @@ def __init__(
         outcome: str,
         df: pd.DataFrame = None,
         effect_modifiers: dict[str:Any] = None,
+        alpha: float = 0.05
     ):
         self.treatment = treatment
         self.treatment_value = treatment_value
         self.control_value = control_value
         self.adjustment_set = adjustment_set
         self.outcome = outcome
         self.df = df
+        self.alpha = alpha
         if effect_modifiers is None:
             self.effect_modifiers = {}
         elif isinstance(effect_modifiers, dict):
@@ -233,7 +235,7 @@ def estimate_ate(self, bootstrap_size=100, adjustment_config=None) -> float:
             return estimate, (None, None)
 
         bootstraps = sorted(list(treatment_bootstraps - control_bootstraps))
-        bound = int((bootstrap_size * 0.05) / 2)
+        bound = int((bootstrap_size * self.alpha) / 2)
         ci_low = bootstraps[bound]
         ci_high = bootstraps[bootstrap_size - bound]
 
@@ -263,7 +265,7 @@ def estimate_risk_ratio(self, bootstrap_size=100, adjustment_config=None) -> flo
             return estimate, (None, None)
 
         bootstraps = sorted(list(treatment_bootstraps / control_bootstraps))
-        bound = ceil((bootstrap_size * 0.05) / 2)
+        bound = ceil((bootstrap_size * self.alpha) / 2)
         ci_low = bootstraps[bound]
         ci_high = bootstraps[bootstrap_size - bound]
 
@@ -301,8 +303,9 @@ def __init__(
         df: pd.DataFrame = None,
         effect_modifiers: dict[Variable:Any] = None,
         formula: str = None,
+        alpha: float = 0.05
     ):
-        super().__init__(treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers)
+        super().__init__(treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers, alpha=alpha)
 
         self.model = None
         if effect_modifiers is None:
@@ -336,7 +339,6 @@ def estimate_unit_ate(self) -> float:
         """
         model = self._run_linear_regression()
         newline = "\n"
-        print(model.conf_int())
         treatment = [self.treatment]
         if str(self.df.dtypes[self.treatment]) == "object":
             design_info = dmatrix(self.formula.split("~")[1], self.df).design_info
@@ -372,7 +374,7 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
         # Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
         t_test_results = model.t_test(individuals.loc["treated"] - individuals.loc["control"])
         ate = t_test_results.effect[0]
-        confidence_intervals = list(t_test_results.conf_int().flatten())
+        confidence_intervals = list(t_test_results.conf_int(alpha=self.alpha).flatten())
         return ate, confidence_intervals
 
     def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd.Series, pd.Series]:
@@ -434,25 +436,11 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
 
         :return: The model after fitting to data.
         """
-        # 1. Reduce dataframe to contain only the necessary columns
-        reduced_df = self.df.copy()
-        necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
-        missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
-        reduced_df = reduced_df[~missing_rows]
-        reduced_df = reduced_df.sort_values([self.treatment])
-        logger.debug(reduced_df[necessary_cols])
-
-        # 2. Add intercept
-        reduced_df["Intercept"] = 1  # self.intercept
-
-        # 3. Estimate the unit difference in outcome caused by unit difference in treatment
-        cols = [self.treatment]
-        cols += [x for x in self.adjustment_set if x not in cols]
         model = smf.ols(formula=self.formula, data=self.df).fit()
         return model
 
     def _get_confidence_intervals(self, model, treatment):
-        confidence_intervals = model.conf_int(alpha=0.05, cols=None)
+        confidence_intervals = model.conf_int(alpha=self.alpha, cols=None)
         ci_low, ci_high = (
             confidence_intervals[0].loc[treatment],
             confidence_intervals[1].loc[treatment],
@@ -519,7 +507,7 @@ def estimate_unit_ate(self, bootstrap_size=100):
         bootstraps = sorted(
             [self.estimate_coefficient(self.df.sample(len(self.df), replace=True)) for _ in range(bootstrap_size)]
         )
-        bound = ceil((bootstrap_size * 0.05) / 2)
+        bound = ceil((bootstrap_size * self.alpha) / 2)
         ci_low = bootstraps[bound]
         ci_high = bootstraps[bootstrap_size - bound]
 
@@ -610,7 +598,7 @@ def estimate_cates(self) -> pd.DataFrame:
         # Obtain CATES and confidence intervals
         conditional_ates = model.effect(effect_modifier_df, T0=self.control_value, T1=self.treatment_value).flatten()
         [ci_low, ci_high] = model.effect_interval(
-            effect_modifier_df, T0=self.control_value, T1=self.treatment_value, alpha=0.05
+            effect_modifier_df, T0=self.control_value, T1=self.treatment_value, alpha=self.alpha
         )
 
         # Merge results into a dataframe (CATE, confidence intervals, and effect modifier values)

Original file line number	Diff line number	Diff line change
`@@ -286,6 +286,7 @@ def _setup_test(`
`286`	`286`	`"outcome": causal_test_case.outcome_variable.name,`
`287`	`287`	`"df": causal_test_engine.scenario_execution_data_df,`
`288`	`288`	`"effect_modifiers": causal_test_case.effect_modifier_configuration,`
	`289`	`+ "alpha": test['alpha'] if 'alpha' in test else 0.05`
`289`	`290`	`}`
`290`	`291`	`if "formula" in test:`
`291`	`292`	`estimator_kwargs["formula"] = test["formula"]`
Original file line number	Diff line number	Diff line change
`@@ -181,6 +181,7 @@ def to_json_stub(self, skip=True) -> dict:`
`181`	`181`	`"mutations": [self.treatment_var],`
`182`	`182`	`"expected_effect": {self.output_var: "NoEffect"},`
`183`	`183`	`"formula": f"{self.output_var} ~ {' + '.join([self.treatment_var] + self.adjustment_vars)}",`
	`184`	`+ "alpha": 0.01,`
`184`	`185`	`"skip": skip,`
`185`	`186`	`}`
`186`	`187`