Preliminary coverage

jmafoster1 · jmafoster1 · commit 7d6ec17693bc · 2023-07-14T16:21:45.000+01:00
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -25,6 +25,7 @@
 from causal_testing.testing.causal_test_engine import CausalTestEngine
 from causal_testing.testing.estimators import Estimator
 from causal_testing.testing.base_test_case import BaseTestCase
+from causal_testing.testing.causal_test_adequacy import DataAdequacy
 
 logger = logging.getLogger(__name__)
 
@@ -264,9 +265,28 @@ def _execute_test_case(
             causal_test_case, test, test["conditions"] if "conditions" in test else None
         )
         causal_test_result = causal_test_engine.execute_test(estimation_model, causal_test_case)
-
         test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result)
 
+        if "coverage" in test and test["coverage"]:
+            adequacy = DataAdequacy(causal_test_case, causal_test_engine, estimation_model)
+            results = adequacy.measure_adequacy_bootstrap(100)
+            outcomes = [causal_test_case.expected_causal_effect.apply(c) for c in results]
+            coverage = pd.DataFrame(c.to_dict() for c in results)[["effect_estimate", "ci_low", "ci_high"]]
+            coverage["pass"] = outcomes
+            std = coverage.std(numeric_only=True)
+            self._append_to_file(f"COVERAGE: {coverage['pass'].sum()}", logging.INFO)
+            # std["pass"] = coverage["pass"].sum()
+            # print(coverage)
+            # print(std)
+
+            # k_folds = adequacy.measure_adequacy_k_folds()
+
+            # import matplotlib.pyplot as plt
+            #
+            # plt.hist(coverage["ci_low"], alpha=0.8)
+            # plt.hist(coverage["ci_high"], alpha=0.8)
+            # plt.show()
+
         if causal_test_result.ci_low() is not None and causal_test_result.ci_high() is not None:
             result_string = (
                 f"{causal_test_result.ci_low()} < {causal_test_result.test_value.value} <  "
diff --git a/causal_testing/testing/causal_test_adequacy.py b/causal_testing/testing/causal_test_adequacy.py
@@ -5,22 +5,13 @@
 from causal_testing.specification.causal_specification import CausalSpecification
 from causal_testing.testing.estimators import Estimator
 from causal_testing.testing.causal_test_case import CausalTestCase
+from causal_testing.testing.causal_test_engine import CausalTestEngine
 from itertools import combinations
-
-
-class CausalTestAdequacy:
-    def __init__(
-        self,
-        causal_specification: CausalSpecification,
-        test_suite: CausalTestSuite,
-    ):
-        self.causal_dag, self.scenario = (
-            causal_specification.causal_dag,
-            causal_specification.scenario,
-        )
-        self.test_suite = test_suite
-        self.estimator = estimator
-        self.dag_adequacy = DAGAdequacy(causal_specification, test_suite)
+from copy import deepcopy
+from sklearn.model_selection import KFold
+from sklearn.metrics import mean_squared_error as mse
+import numpy as np
+from sklearn.model_selection import cross_val_score
 
 
 class DAGAdequacy:
@@ -31,9 +22,37 @@ def __init__(
     ):
         self.causal_dag = causal_specification.causal_dag
         self.test_suite = test_suite
+
+    def measure_adequacy(self):
         self.tested_pairs = {
             (t.base_test_case.treatment_variable, t.base_test_case.outcome_variable) for t in self.causal_test_suite
         }
-        self.pairs_to_test = set(combinations(dag.graph.nodes, 2))
+        self.pairs_to_test = set(combinations(self.causal_dag.graph.nodes, 2))
         self.untested_edges = pairs_to_test.difference(tested_pairs)
         self.dag_adequacy = len(tested_pairs) / len(pairs_to_test)
+
+
+class DataAdequacy:
+    def __init__(self, test_case: CausalTestCase, test_engine: CausalTestEngine, estimator: Estimator):
+        self.test_case = test_case
+        self.test_engine = test_engine
+        self.estimator = estimator
+
+    def measure_adequacy_bootstrap(self, bootstrap_size: int = 100):
+        results = []
+        for i in range(bootstrap_size):
+            estimator = deepcopy(self.estimator)
+            estimator.df = estimator.df.sample(len(estimator.df), replace=True, random_state=i)
+            results.append(self.test_engine.execute_test(estimator, self.test_case))
+        return results
+
+    def measure_adequacy_k_folds(self, k: int = 10, random_state=0):
+        results = []
+        kf = KFold(n_splits=k, shuffle=True, random_state=random_state)
+        for train_inx, test_inx in kf.split(self.estimator.df):
+            estimator = deepcopy(self.estimator)
+            test = estimator.df.iloc[test_inx]
+            estimator.df = estimator.df.iloc[train_inx]
+            test_result = estimator.model.predict(test)
+            results.append(np.sqrt(mse(test_result, test[self.test_case.base_test_case.outcome_variable.name])).mean())
+        print("K-score", np.mean(results))
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
@@ -41,8 +41,13 @@ def apply(self, res: CausalTestResult) -> bool:
 class NoEffect(CausalTestOutcome):
     """An extension of TestOutcome representing that the expected causal effect should be zero."""
 
-    def __init__(self, atol: float = 1e-10):
+    def __init__(self, atol: float = 1e-10, ctol: float = 0.05):
+        """
+        :param atol: Arithmetic tolerance. The test will pass if the absolute value of the causal effect is less than atol.
+        :param ctol: Categorical tolerance. The test will pass if this proportion of categories pass.
+        """
         self.atol = atol
+        self.ctol = ctol
 
     def apply(self, res: CausalTestResult) -> bool:
         if res.test_value.type == "ate":
@@ -52,14 +57,19 @@ def apply(self, res: CausalTestResult) -> bool:
             ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]
             value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
 
-            if not all(ci_low < 0 < ci_high for ci_low, ci_high in zip(ci_low, ci_high)):
-                print(
-                    "FAILING ON",
-                    [(ci_low, ci_high) for ci_low, ci_high in zip(ci_low, ci_high) if not ci_low < 0 < ci_high],
-                )
+            # if not all(ci_low < 0 < ci_high for ci_low, ci_high in zip(ci_low, ci_high)):
+            #     print(
+            #         "FAILING ON",
+            #         [(ci_low, ci_high) for ci_low, ci_high in zip(ci_low, ci_high) if not ci_low < 0 < ci_high],
+            #     )
 
-            return all(ci_low < 0 < ci_high for ci_low, ci_high in zip(ci_low, ci_high)) or all(
-                abs(v) < self.atol for v in value
+            return (
+                sum(
+                    not ((ci_low < 0 < ci_high) or abs(v) < self.atol)
+                    for ci_low, ci_high, v in zip(ci_low, ci_high, value)
+                )
+                / len(value)
+                < self.ctol
             )
         if res.test_value.type == "risk_ratio":
             return (res.ci_low() < 1 < res.ci_high()) or np.isclose(res.test_value.value, 1.0, atol=self.atol)
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
@@ -72,18 +72,17 @@ def to_dict(self):
         """Return result contents as a dictionary
         :return: Dictionary containing contents of causal_test_result
         """
-        base_dict = {
-            "treatment": self.estimator.treatment[0],
+        return {
+            "treatment": self.estimator.treatment,
             "control_value": self.estimator.control_value,
             "treatment_value": self.estimator.treatment_value,
-            "outcome": self.estimator.outcome[0],
+            "outcome": self.estimator.outcome,
             "adjustment_set": self.adjustment_set,
-            "test_value": self.test_value,
+            "effect_measure": self.test_value.type,
+            "effect_estimate": self.test_value.value,
+            "ci_low": self.ci_low(),
+            "ci_high": self.ci_high(),
         }
-        if self.confidence_intervals and all(self.confidence_intervals):
-            base_dict["ci_low"] = min(self.confidence_intervals)
-            base_dict["ci_high"] = max(self.confidence_intervals)
-        return base_dict
 
     def ci_low(self):
         """Return the lower bracket of the confidence intervals."""
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -150,6 +150,7 @@ def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
                     treatment_and_adjustments_cols, columns=[col], drop_first=True
                 )
         model = smf.logit(formula=self.formula, data=data).fit(disp=0)
+        self.model = model
         return model
 
     def estimate(self, data: pd.DataFrame, adjustment_config: dict = None) -> RegressionResultsWrapper:
@@ -165,7 +166,6 @@ def estimate(self, data: pd.DataFrame, adjustment_config: dict = None) -> Regres
             )
 
         model = self._run_logistic_regression(data)
-        self.model = model
 
         x = pd.DataFrame(columns=self.df.columns)
         x["Intercept"] = 1  # self.intercept
@@ -371,7 +371,6 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
         :return: The average treatment effect and the 95% Wald confidence intervals.
         """
         model = self._run_linear_regression()
-        self.model = model
 
         # Create an empty individual for the control and treated
         individuals = pd.DataFrame(1, index=["control", "treated"], columns=model.params.index)
@@ -397,7 +396,6 @@ def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd
             adjustment_config = {}
 
         model = self._run_linear_regression()
-        self.model = model
 
         x = pd.DataFrame(columns=self.df.columns)
         x[self.treatment] = [self.treatment_value, self.control_value]
@@ -447,6 +445,7 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
         :return: The model after fitting to data.
         """
         model = smf.ols(formula=self.formula, data=self.df).fit()
+        self.model = model
         return model
 
     def _get_confidence_intervals(self, model, treatment):