Merge remote-tracking branch 'origin/adjustment_set_formula_check' into adjustment_set_formula_check

christopher-wild · christopher-wild · commit e4ec9d665129 · 2023-08-15T16:02:35.000+01:00
diff --git a/causal_testing/testing/causal_test_adequacy.py b/causal_testing/testing/causal_test_adequacy.py
@@ -26,16 +26,26 @@ def __init__(
         self.test_suite = test_suite
         self.tested_pairs = None
         self.pairs_to_test = None
-        self.untested_edges = None
+        self.untested_pairs = None
         self.dag_adequacy = None
 
     def measure_adequacy(self):
         """
-        Calculate the adequacy measurement, and populate the `dat_adequacy` field.
+        Calculate the adequacy measurement, and populate the `dag_adequacy` field.
         """
-        self.tested_pairs = {(t.treatment_variable, t.outcome_variable) for t in self.test_suite}
-        self.pairs_to_test = set(combinations(self.causal_dag.graph.nodes, 2))
-        self.untested_edges = self.pairs_to_test.difference(self.tested_pairs)
+        self.pairs_to_test = set(combinations(self.causal_dag.graph.nodes(), 2))
+        self.tested_pairs = set()
+
+        for n1, n2 in self.pairs_to_test:
+            if (n1, n2) in self.causal_dag.graph.edges():
+                if any((t.treatment_variable, t.outcome_variable) == (n1, n2) for t in self.test_suite):
+                    self.tested_pairs.add((n1, n2))
+            else:
+                # Causal independences are not order dependent
+                if any((t.treatment_variable, t.outcome_variable) in {(n1, n2), (n2, n1)} for t in self.test_suite):
+                    self.tested_pairs.add((n1, n2))
+
+        self.untested_pairs = self.pairs_to_test.difference(self.tested_pairs)
         self.dag_adequacy = len(self.tested_pairs) / len(self.pairs_to_test)
 
     def to_dict(self):
@@ -45,7 +55,7 @@ def to_dict(self):
             "test_suite": self.test_suite,
             "tested_pairs": self.tested_pairs,
             "pairs_to_test": self.pairs_to_test,
-            "untested_edges": self.untested_edges,
+            "untested_pairs": self.untested_pairs,
             "dag_adequacy": self.dag_adequacy,
         }
 
diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
@@ -57,22 +57,6 @@ def __init__(
         else:
             self.effect_modifier_configuration = {}
 
-    def get_treatment_variable(self):
-        """Return the treatment variable name (as string) for this causal test case"""
-        return self.treatment_variable.name
-
-    def get_outcome_variable(self):
-        """Return the outcome variable name (as string) for this causal test case."""
-        return self.outcome_variable.name
-
-    def get_control_value(self):
-        """Return a the control value of the treatment variable in this causal test case."""
-        return self.control_value
-
-    def get_treatment_value(self):
-        """Return the treatment value of the treatment variable in this causal test case."""
-        return self.treatment_value
-
     def execute_test(self, estimator: type(Estimator), data_collector: DataCollector) -> CausalTestResult:
         """Execute a causal test case and return the causal test result.
 
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
@@ -85,9 +85,11 @@ def to_dict(self, json=False):
             "outcome": self.estimator.outcome,
             "adjustment_set": list(self.adjustment_set) if json else self.adjustment_set,
             "effect_measure": self.test_value.type,
-            "effect_estimate": self.test_value.value,
-            "ci_low": self.ci_low(),
-            "ci_high": self.ci_high(),
+            "effect_estimate": self.test_value.value.to_dict()
+            if json and hasattr(self.test_value.value, "to_dict")
+            else self.test_value.value,
+            "ci_low": self.ci_low().to_dict() if json and hasattr(self.ci_low(), "to_dict") else self.ci_low(),
+            "ci_high": self.ci_high().to_dict() if json and hasattr(self.ci_high(), "to_dict") else self.ci_high(),
         }
         if self.adequacy:
             base_dict["adequacy"] = self.adequacy.to_dict()
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -161,13 +161,16 @@ def estimate(self, data: pd.DataFrame, adjustment_config: dict = None) -> Regres
         # x = x[model.params.index]
         return model.predict(x)
 
-    def estimate_control_treatment(self, bootstrap_size, adjustment_config) -> tuple[pd.Series, pd.Series]:
+    def estimate_control_treatment(
+        self, adjustment_config: dict = None, bootstrap_size: int = 100
+    ) -> tuple[pd.Series, pd.Series]:
         """Estimate the outcomes under control and treatment.
 
         :return: The estimated control and treatment values and their confidence
         intervals in the form ((ci_low, control, ci_high), (ci_low, treatment, ci_high)).
         """
-
+        if adjustment_config is None:
+            adjustment_config = {}
         y = self.estimate(self.df, adjustment_config=adjustment_config)
 
         try:
@@ -197,18 +200,16 @@ def estimate_control_treatment(self, bootstrap_size, adjustment_config) -> tuple
 
         return (y.iloc[1], np.array(control)), (y.iloc[0], np.array(treatment))
 
-    def estimate_ate(self, estimator_params: dict = None) -> float:
+    def estimate_ate(self, adjustment_config: dict = None, bootstrap_size: int = 100) -> float:
         """Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value. Here, we actually
         calculate the expected outcomes under control and treatment and take one away from the other. This
         allows for custom terms to be put in such as squares, inverses, products, etc.
 
         :return: The estimated average treatment effect and 95% confidence intervals
         """
-        if estimator_params is None:
-            estimator_params = {}
-        bootstrap_size = estimator_params.get("bootstrap_size", 100)
-        adjustment_config = estimator_params.get("adjustment_config", None)
+        if adjustment_config is None:
+            adjustment_config = {}
         (control_outcome, control_bootstraps), (
             treatment_outcome,
             treatment_bootstraps,
@@ -231,18 +232,16 @@ def estimate_ate(self, estimator_params: dict = None) -> float:
 
         return estimate, (ci_low, ci_high)
 
-    def estimate_risk_ratio(self, estimator_params: dict = None) -> float:
+    def estimate_risk_ratio(self, adjustment_config: dict = None, bootstrap_size: int = 100) -> float:
         """Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value. Here, we actually
         calculate the expected outcomes under control and treatment and divide one by the other. This
         allows for custom terms to be put in such as squares, inverses, products, etc.
 
         :return: The estimated risk ratio and 95% confidence intervals.
         """
-        if estimator_params is None:
-            estimator_params = {}
-        bootstrap_size = estimator_params.get("bootstrap_size", 100)
-        adjustment_config = estimator_params.get("adjustment_config", None)
+        if adjustment_config is None:
+            adjustment_config = {}
         (control_outcome, control_bootstraps), (
             treatment_outcome,
             treatment_bootstraps,
@@ -374,7 +373,6 @@ def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd
         """
         if adjustment_config is None:
             adjustment_config = {}
-
         model = self._run_linear_regression()
 
         x = pd.DataFrame(columns=self.df.columns)
@@ -393,13 +391,15 @@ def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd
 
         return y.iloc[1], y.iloc[0]
 
-    def estimate_risk_ratio(self) -> tuple[float, list[float, float]]:
+    def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[float, list[float, float]]:
         """Estimate the risk_ratio effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value.
 
         :return: The average treatment effect and the 95% Wald confidence intervals.
         """
-        control_outcome, treatment_outcome = self.estimate_control_treatment()
+        if adjustment_config is None:
+            adjustment_config = {}
+        control_outcome, treatment_outcome = self.estimate_control_treatment(adjustment_config=adjustment_config)
         ci_low = treatment_outcome["mean_ci_lower"] / control_outcome["mean_ci_upper"]
         ci_high = treatment_outcome["mean_ci_upper"] / control_outcome["mean_ci_lower"]
 
@@ -413,6 +413,8 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[float
 
         :return: The average treatment effect and the 95% Wald confidence intervals.
         """
+        if adjustment_config is None:
+            adjustment_config = {}
         control_outcome, treatment_outcome = self.estimate_control_treatment(adjustment_config=adjustment_config)
         ci_low = treatment_outcome["mean_ci_lower"] - control_outcome["mean_ci_upper"]
         ci_high = treatment_outcome["mean_ci_upper"] - control_outcome["mean_ci_lower"]
diff --git a/examples/poisson-line-process/example_poisson_process.py b/examples/poisson-line-process/example_poisson_process.py
@@ -90,8 +90,8 @@ def causal_test_intensity_num_shapes(
     # 8. Set up an estimator
     data = pd.read_csv(observational_data_path)
 
-    treatment = causal_test_case.get_treatment_variable()
-    outcome = causal_test_case.get_outcome_variable()
+    treatment = causal_test_case.treatment_variable.name
+    outcome = causal_test_case.outcome_variable.name
 
     estimator = None
     if empirical:
diff --git a/pyproject.toml b/pyproject.toml
@@ -20,7 +20,7 @@ dependencies = [
     "fitter~=1.4",
     "lhsmdu~=1.1",
     "networkx~=2.6",
-    "numpy~=1.22.0",
+    "numpy~=1.23",
     "pandas~=1.3",
     "scikit_learn~=1.1",
     "scipy~=1.7",
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
@@ -107,10 +107,10 @@ def test_data_adequacy_cateogorical(self):
             {"kurtosis": {"test_input_no_dist[T.b]": 0.0}, "bootstrap_size": 100, "passing": 100},
         )
 
-    def test_dag_adequacy(self):
+    def test_dag_adequacy_dependent(self):
         base_test_case = BaseTestCase(
             treatment_variable="test_input",
-            outcome_variable="test_output",
+            outcome_variable="B",
             effect=None,
         )
         causal_test_case = CausalTestCase(
@@ -128,29 +128,127 @@ def test_dag_adequacy(self):
             {
                 "causal_dag": self.json_class.causal_specification.causal_dag,
                 "test_suite": test_suite,
-                "tested_pairs": {("test_input", "test_output")},
+                "tested_pairs": {("test_input", "B")},
                 "pairs_to_test": {
+                    ("B", "C"),
                     ("test_input_no_dist", "test_input"),
+                    ("C", "test_output"),
                     ("test_input", "B"),
-                    ("test_input_no_dist", "C"),
                     ("test_input_no_dist", "B"),
+                    ("test_input", "test_output"),
                     ("test_input", "C"),
-                    ("B", "C"),
                     ("test_input_no_dist", "test_output"),
+                    ("B", "test_output"),
+                    ("test_input_no_dist", "C"),
+                },
+                "untested_pairs": {
+                    ("B", "C"),
+                    ("test_input_no_dist", "test_input"),
+                    ("C", "test_output"),
+                    ("test_input_no_dist", "B"),
                     ("test_input", "test_output"),
+                    ("test_input", "C"),
+                    ("test_input_no_dist", "test_output"),
+                    ("B", "test_output"),
+                    ("test_input_no_dist", "C"),
+                },
+                "dag_adequacy": 0.1,
+            },
+        )
+
+    def test_dag_adequacy_independent(self):
+        base_test_case = BaseTestCase(
+            treatment_variable="test_input",
+            outcome_variable="C",
+            effect=None,
+        )
+        causal_test_case = CausalTestCase(
+            base_test_case=base_test_case,
+            expected_causal_effect=None,
+            estimate_type=None,
+        )
+        test_suite = CausalTestSuite()
+        test_suite.add_test_object(base_test_case, causal_test_case, None, None)
+        dag_adequacy = DAGAdequacy(self.json_class.causal_specification.causal_dag, test_suite)
+        dag_adequacy.measure_adequacy()
+        print(dag_adequacy.to_dict())
+        self.assertEqual(
+            dag_adequacy.to_dict(),
+            {
+                "causal_dag": self.json_class.causal_specification.causal_dag,
+                "test_suite": test_suite,
+                "tested_pairs": {("test_input", "C")},
+                "pairs_to_test": {
+                    ("B", "C"),
+                    ("test_input_no_dist", "test_input"),
                     ("C", "test_output"),
+                    ("test_input", "B"),
+                    ("test_input_no_dist", "B"),
+                    ("test_input", "test_output"),
+                    ("test_input", "C"),
+                    ("test_input_no_dist", "test_output"),
                     ("B", "test_output"),
+                    ("test_input_no_dist", "C"),
                 },
-                "untested_edges": {
+                "untested_pairs": {
+                    ("B", "C"),
                     ("test_input_no_dist", "test_input"),
+                    ("C", "test_output"),
+                    ("test_input_no_dist", "B"),
+                    ("test_input", "test_output"),
                     ("test_input", "B"),
+                    ("test_input_no_dist", "test_output"),
+                    ("B", "test_output"),
                     ("test_input_no_dist", "C"),
+                },
+                "dag_adequacy": 0.1,
+            },
+        )
+
+    def test_dag_adequacy_independent_other_way(self):
+        base_test_case = BaseTestCase(
+            treatment_variable="C",
+            outcome_variable="test_input",
+            effect=None,
+        )
+        causal_test_case = CausalTestCase(
+            base_test_case=base_test_case,
+            expected_causal_effect=None,
+            estimate_type=None,
+        )
+        test_suite = CausalTestSuite()
+        test_suite.add_test_object(base_test_case, causal_test_case, None, None)
+        dag_adequacy = DAGAdequacy(self.json_class.causal_specification.causal_dag, test_suite)
+        dag_adequacy.measure_adequacy()
+        print(dag_adequacy.to_dict())
+        self.assertEqual(
+            dag_adequacy.to_dict(),
+            {
+                "causal_dag": self.json_class.causal_specification.causal_dag,
+                "test_suite": test_suite,
+                "tested_pairs": {("test_input", "C")},
+                "pairs_to_test": {
+                    ("B", "C"),
+                    ("test_input_no_dist", "test_input"),
+                    ("C", "test_output"),
+                    ("test_input", "B"),
                     ("test_input_no_dist", "B"),
+                    ("test_input", "test_output"),
                     ("test_input", "C"),
-                    ("B", "C"),
                     ("test_input_no_dist", "test_output"),
+                    ("B", "test_output"),
+                    ("test_input_no_dist", "C"),
+                },
+                "untested_pairs": {
+                    ("B", "C"),
+                    ("test_input_no_dist", "test_input"),
                     ("C", "test_output"),
+                    ("test_input_no_dist", "B"),
+                    ("test_input", "test_output"),
+                    ("test_input", "B"),
+                    ("test_input_no_dist", "test_output"),
                     ("B", "test_output"),
+                    ("test_input_no_dist", "C"),
                 },
                 "dag_adequacy": 0.1,
             },
diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py
@@ -37,18 +37,6 @@ def setUp(self) -> None:
             treatment_value=1,
         )
 
-    def test_get_treatment_variable(self):
-        self.assertEqual(self.causal_test_case.get_treatment_variable(), "A")
-
-    def test_get_outcome_variable(self):
-        self.assertEqual(self.causal_test_case.get_outcome_variable(), "C")
-
-    def test_get_treatment_value(self):
-        self.assertEqual(self.causal_test_case.get_treatment_value(), 1)
-
-    def test_get_control_value(self):
-        self.assertEqual(self.causal_test_case.get_control_value(), 0)
-
     def test_str(self):
         self.assertEqual(
             str(self.causal_test_case),
diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py
@@ -124,15 +124,15 @@ def test_ate_adjustment(self):
         logistic_regression_estimator = LogisticRegressionEstimator(
             "length_in", 65, 55, {"large_gauge"}, "completed", df
         )
-        ate, _ = logistic_regression_estimator.estimate_ate(estimator_params={"adjustment_config": {"large_gauge": 0}})
+        ate, _ = logistic_regression_estimator.estimate_ate(adjustment_config = {"large_gauge": 0})
         self.assertEqual(round(ate, 4), -0.3388)
 
     def test_ate_invalid_adjustment(self):
         df = self.scarf_df.copy()
         logistic_regression_estimator = LogisticRegressionEstimator("length_in", 65, 55, {}, "completed", df)
         with self.assertRaises(ValueError):
             ate, _ = logistic_regression_estimator.estimate_ate(
-                estimator_params={"adjustment_config": {"large_gauge": 0}}
+                adjustment_config = {"large_gauge": 0}
             )
 
     def test_ate_effect_modifiers(self):
@@ -392,8 +392,9 @@ def test_program_15_no_interaction_ate_calculated(self):
         )
         # terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
         # for term_to_square in terms_to_square:
+
         ate, [ci_low, ci_high] = linear_regression_estimator.estimate_ate_calculated(
-            {k: self.nhefs_df.mean()[k] for k in covariates}
+            adjustment_config = {k: self.nhefs_df.mean()[k] for k in covariates}
         )
         self.assertEqual(round(ate, 1), 3.5)
         self.assertEqual([round(ci_low, 1), round(ci_high, 1)], [1.9, 5])