Coefficient causal effect metric (and black)

jmafoster1 · jmafoster1 · commit 132a0fee9f97 · 2023-04-13T15:05:43.000+01:00
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -22,6 +22,7 @@
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_engine import CausalTestEngine
 from causal_testing.testing.estimators import Estimator
+from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
 
@@ -110,20 +111,43 @@ def generate_tests(self, effects: dict, mutates: dict, estimators: dict, f_flag:
         for test in self.test_plan["tests"]:
             if "skip" in test and test["skip"]:
                 continue
-            abstract_test = self._create_abstract_test_case(test, mutates, effects)
 
-            concrete_tests, dummy = abstract_test.generate_concrete_tests(5, 0.05)
-            logger.info("Executing test: %s", test["name"])
-            logger.info(abstract_test)
-            logger.info([abstract_test.treatment_variable.name, abstract_test.treatment_variable.distribution])
-            logger.info("Number of concrete tests for test case: %s", str(len(concrete_tests)))
+            if test["estimate_type"] == "coefficient":
+                base_test_case = BaseTestCase(
+                    treatment_variable=next(self.modelling_scenario.variables[v] for v in test["mutations"]),
+                    outcome_variable=next(self.modelling_scenario.variables[v] for v in test["expectedEffect"]),
+                    effect=test["effect"],
+                )
+                assert len(test["expectedEffect"]) == 1, "Can only have one expected effect."
+                concrete_tests = [
+                    CausalTestCase(
+                        base_test_case=base_test_case,
+                        expected_causal_effect=next(
+                            effects[effect] for variable, effect in test["expectedEffect"].items()
+                        ),
+                        estimate_type="coefficient",
+                        effect_modifier_configuration={
+                            self.modelling_scenario.variables[v] for v in test.get("effect_modifiers", [])
+                        },
+                    )
+                ]
+            else:
+                abstract_test = self._create_abstract_test_case(test, mutates, effects)
+
+                concrete_tests, dummy = abstract_test.generate_concrete_tests(5, 0.05)
+                logger.info("Executing test: %s", test["name"])
+                logger.info(abstract_test)
+                logger.info([abstract_test.treatment_variable.name, abstract_test.treatment_variable.distribution])
+                logger.info("Number of concrete tests for test case: %s", str(len(concrete_tests)))
             failures = self._execute_tests(concrete_tests, estimators, test, f_flag)
             logger.info("%s/%s failed for %s\n", failures, len(concrete_tests), test["name"])
 
     def _execute_tests(self, concrete_tests, estimators, test, f_flag):
         failures = 0
         for concrete_test in concrete_tests:
-            failed = self._execute_test_case(concrete_test, estimators[test["estimator"]], f_flag)
+            failed = self._execute_test_case(
+                concrete_test, estimators[test["estimator"]], f_flag, test.get("conditions", [])
+            )
             if failed:
                 failures += 1
         return failures
@@ -152,7 +176,9 @@ def _populate_metas(self):
                 var.distribution = getattr(scipy.stats, dist)(**params)
                 logger.info(var.name + f" {dist}({params})")
 
-    def _execute_test_case(self, causal_test_case: CausalTestCase, estimator: Estimator, f_flag: bool) -> bool:
+    def _execute_test_case(
+        self, causal_test_case: CausalTestCase, estimator: Estimator, f_flag: bool, conditions: list[str]
+    ) -> bool:
         """Executes a singular test case, prints the results and returns the test case result
         :param causal_test_case: The concrete test case to be executed
         :param f_flag: Failure flag that if True the script will stop executing when a test fails.
@@ -162,7 +188,8 @@ def _execute_test_case(self, causal_test_case: CausalTestCase, estimator: Estima
         """
         failed = False
 
-        causal_test_engine, estimation_model = self._setup_test(causal_test_case, estimator)
+        print(causal_test_case)
+        causal_test_engine, estimation_model = self._setup_test(causal_test_case, estimator, conditions)
         causal_test_result = causal_test_engine.execute_test(
             estimation_model, causal_test_case, estimate_type=causal_test_case.estimate_type
         )
@@ -187,15 +214,17 @@ def _execute_test_case(self, causal_test_case: CausalTestCase, estimator: Estima
             logger.warning("   FAILED- expected %s, got %s", causal_test_case.expected_causal_effect, result_string)
         return failed
 
-    def _setup_test(self, causal_test_case: CausalTestCase, estimator: Estimator) -> tuple[CausalTestEngine, Estimator]:
+    def _setup_test(
+        self, causal_test_case: CausalTestCase, estimator: Estimator, conditions: list[str]
+    ) -> tuple[CausalTestEngine, Estimator]:
         """Create the necessary inputs for a single test case
         :param causal_test_case: The concrete test case to be executed
         :returns:
                 - causal_test_engine - Test Engine instance for the test being run
                 - estimation_model - Estimator instance for the test being run
         """
 
-        data_collector = ObservationalDataCollector(self.modelling_scenario, self.data)
+        data_collector = ObservationalDataCollector(self.modelling_scenario, self.data.query(" & ".join(conditions)))
         causal_test_engine = CausalTestEngine(self.causal_specification, data_collector, index_col=0)
 
         minimal_adjustment_set = self.causal_specification.causal_dag.identification(causal_test_case.base_test_case)
diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
@@ -27,7 +27,7 @@ def __init__(
         self,
         base_test_case: BaseTestCase,
         expected_causal_effect: CausalTestOutcome,
-        control_value: Any,
+        control_value: Any = None,
         treatment_value: Any = None,
         estimate_type: str = "ate",
         effect_modifier_configuration: dict[Variable:Any] = None,
diff --git a/causal_testing/testing/causal_test_engine.py b/causal_testing/testing/causal_test_engine.py
@@ -174,6 +174,15 @@ def _return_causal_test_results(self, estimate_type, estimator, causal_test_case
                 effect_modifier_configuration=causal_test_case.effect_modifier_configuration,
                 confidence_intervals=confidence_intervals,
             )
+        elif estimate_type == "coefficient":
+            logger.debug("calculating coefficient")
+            coefficient, confidence_intervals = estimator.estimate_unit_ate()
+            causal_test_result = CausalTestResult(
+                estimator=estimator,
+                test_value=TestValue("coefficient", coefficient),
+                effect_modifier_configuration=causal_test_case.effect_modifier_configuration,
+                confidence_intervals=confidence_intervals,
+            )
         elif estimate_type == "ate":
             logger.debug("calculating ate")
             ate, confidence_intervals = estimator.estimate_ate()
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
@@ -26,7 +26,7 @@ class SomeEffect(CausalTestOutcome):
     """An extension of TestOutcome representing that the expected causal effect should not be zero."""
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type == "ate":
+        if res.test_value.type == "ate" or res.test_value.type == "coefficient":
             return (0 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 0)
         if res.test_value.type == "risk_ratio":
             return (1 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 1)
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -343,10 +343,11 @@ def estimate_unit_ate(self) -> float:
         :return: The unit average treatment effect and the 95% Wald confidence intervals.
         """
         model = self._run_linear_regression()
+        assert self.treatment in model.params, f"{self.treatment} not in {model.params}"
         unit_effect = model.params[[self.treatment]].values[0]  # Unit effect is the coefficient of the treatment
         [ci_low, ci_high] = self._get_confidence_intervals(model)
 
-        return unit_effect * self.treatment_value - unit_effect * self.control_value, [ci_low, ci_high]
+        return unit_effect, [ci_low, ci_high]
 
     def estimate_ate(self) -> tuple[float, list[float, float], float]:
         """Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
diff --git a/examples/covasim_/doubling_beta/example_beta.py b/examples/covasim_/doubling_beta/example_beta.py
@@ -48,54 +48,75 @@ def doubling_beta_CATE_on_csv(
     :return results_dict: A nested dictionary containing results (ate and confidence intervals)
                           for association, causation, and counterfactual (if completed).
     """
-    results_dict = {'association': {},
-                    'causation': {}}
+    results_dict = {"association": {}, "causation": {}}
 
     # Read in the observational data, perform identification, and setup the causal_test_engine
     past_execution_df = pd.read_csv(observational_data_path)
     _, causal_test_engine, causal_test_case = engine_setup(observational_data_path)
 
-    linear_regression_estimator = LinearRegressionEstimator('beta', 0.032, 0.016,
-                                                            {'avg_age', 'contacts'},  # We use custom adjustment set
-                                                            'cum_infections',
-                                                            df=past_execution_df,
-                                                            formula="cum_infections ~ beta + np.power(beta, 2) + avg_age + contacts")
+    linear_regression_estimator = LinearRegressionEstimator(
+        "beta",
+        0.032,
+        0.016,
+        {"avg_age", "contacts"},  # We use custom adjustment set
+        "cum_infections",
+        df=past_execution_df,
+        formula="cum_infections ~ beta + np.power(beta, 2) + avg_age + contacts",
+    )
 
     # Add squared terms for beta, since it has a quadratic relationship with cumulative infections
-    causal_test_result = causal_test_engine.execute_test(linear_regression_estimator, causal_test_case, 'ate')
+    causal_test_result = causal_test_engine.execute_test(linear_regression_estimator, causal_test_case, "ate")
 
     # Repeat for association estimate (no adjustment)
-    no_adjustment_linear_regression_estimator = LinearRegressionEstimator('beta', 0.032, 0.016,
-                                                                          set(),
-                                                                          'cum_infections',
-                                                                          df=past_execution_df,
-                                                                          formula="cum_infections ~ beta + np.power(beta, 2)")
-    association_test_result = causal_test_engine.execute_test(no_adjustment_linear_regression_estimator, causal_test_case, 'ate')
+    no_adjustment_linear_regression_estimator = LinearRegressionEstimator(
+        "beta",
+        0.032,
+        0.016,
+        set(),
+        "cum_infections",
+        df=past_execution_df,
+        formula="cum_infections ~ beta + np.power(beta, 2)",
+    )
+    association_test_result = causal_test_engine.execute_test(
+        no_adjustment_linear_regression_estimator, causal_test_case, "ate"
+    )
 
     # Store results for plotting
-    results_dict['association'] = {'ate': association_test_result.test_value.value,
-                                   'cis': association_test_result.confidence_intervals,
-                                   'df': past_execution_df}
-    results_dict['causation'] = {'ate': causal_test_result.test_value.value,
-                                 'cis': causal_test_result.confidence_intervals,
-                                 'df': past_execution_df}
+    results_dict["association"] = {
+        "ate": association_test_result.test_value.value,
+        "cis": association_test_result.confidence_intervals,
+        "df": past_execution_df,
+    }
+    results_dict["causation"] = {
+        "ate": causal_test_result.test_value.value,
+        "cis": causal_test_result.confidence_intervals,
+        "df": past_execution_df,
+    }
 
     if verbose:
         print(f"Association:\n{association_test_result}")
         print(f"Causation:\n{causal_test_result}")
 
     # Repeat causal inference after deleting all rows with treatment value to obtain counterfactual inferences
     if simulate_counterfactuals:
-        counterfactual_past_execution_df = past_execution_df[past_execution_df['beta'] != 0.032]
-        counterfactual_linear_regression_estimator = LinearRegressionEstimator('beta', 0.032, 0.016,
-                                                                               {'avg_age', 'contacts'},
-                                                                               'cum_infections',
-                                                                               df=counterfactual_past_execution_df,
-                                                                               formula="cum_infections ~ beta + np.power(beta, 2) + avg_age + contacts")
-        counterfactual_causal_test_result = causal_test_engine.execute_test(linear_regression_estimator, causal_test_case, 'ate')
-        results_dict['counterfactual'] = {'ate': counterfactual_causal_test_result.test_value.value,
-                                          'cis': counterfactual_causal_test_result.confidence_intervals,
-                                          'df': counterfactual_past_execution_df}
+        counterfactual_past_execution_df = past_execution_df[past_execution_df["beta"] != 0.032]
+        counterfactual_linear_regression_estimator = LinearRegressionEstimator(
+            "beta",
+            0.032,
+            0.016,
+            {"avg_age", "contacts"},
+            "cum_infections",
+            df=counterfactual_past_execution_df,
+            formula="cum_infections ~ beta + np.power(beta, 2) + avg_age + contacts",
+        )
+        counterfactual_causal_test_result = causal_test_engine.execute_test(
+            linear_regression_estimator, causal_test_case, "ate"
+        )
+        results_dict["counterfactual"] = {
+            "ate": counterfactual_causal_test_result.test_value.value,
+            "cis": counterfactual_causal_test_result.confidence_intervals,
+            "df": counterfactual_past_execution_df,
+        }
         if verbose:
             print(f"Counterfactual:\n{counterfactual_causal_test_result}")
 
diff --git a/examples/poisson-line-process/example_poisson_process.py b/examples/poisson-line-process/example_poisson_process.py
@@ -86,9 +86,7 @@ def causal_test_intensity_num_shapes(
     data_collector = ObservationalDataCollector(scenario, pd.read_csv(observational_data_path))
 
     # 7. Create an instance of the causal test engine
-    causal_test_engine = CausalTestEngine(
-        causal_specification, data_collector
-    )
+    causal_test_engine = CausalTestEngine(causal_specification, data_collector)
 
     # 8. Obtain the minimal adjustment set for the causal test case from the causal DAG
     minimal_adjustment_set = causal_dag.identification(causal_test_case.base_test_case)
@@ -121,13 +119,11 @@ def causal_test_intensity_num_shapes(
             outcome=outcome,
             df=data,
             effect_modifiers=causal_test_case.effect_modifier_configuration,
-            formula=f"{outcome} ~ {treatment} + {'+'.join(square_terms + inverse_terms + list([e for e in causal_test_case.effect_modifier_configuration]))} -1"
+            formula=f"{outcome} ~ {treatment} + {'+'.join(square_terms + inverse_terms + list([e for e in causal_test_case.effect_modifier_configuration]))} -1",
         )
 
     # 10. Execute the test
-    causal_test_result = causal_test_engine.execute_test(
-        estimator, causal_test_case, causal_test_case.estimate_type
-    )
+    causal_test_result = causal_test_engine.execute_test(estimator, causal_test_case, causal_test_case.estimate_type)
 
     return causal_test_result
 
diff --git a/tests/specification_tests/test_metamorphic_relations.py b/tests/specification_tests/test_metamorphic_relations.py
@@ -106,9 +106,9 @@ def test_should_cause_metamorphic_relations_correct_spec_one_input(self):
         self.data_collector = SingleInputProgramUnderTestEDC(
             self.scenario, self.default_control_input_config, self.default_treatment_input_config
         )
-        causal_dag.graph.remove_nodes_from(['X2', 'X3'])
-        adj_set = list(causal_dag.direct_effect_adjustment_sets(['X1'], ['Z'])[0])
-        should_cause_MR = ShouldCause('X1', 'Z', adj_set, causal_dag)
+        causal_dag.graph.remove_nodes_from(["X2", "X3"])
+        adj_set = list(causal_dag.direct_effect_adjustment_sets(["X1"], ["Z"])[0])
+        should_cause_MR = ShouldCause("X1", "Z", adj_set, causal_dag)
         should_cause_MR.generate_follow_up(10, -10.0, 10.0, 1)
         test_results = should_cause_MR.execute_tests(self.data_collector)
         should_cause_MR.test_oracle(test_results)
diff --git a/tests/specification_tests/test_variable.py b/tests/specification_tests/test_variable.py
@@ -133,7 +133,6 @@ class Var(Variable):
         var = Var("v", int)
         self.assertEqual(var.typestring(), "Var")
 
-
     def test_copy(self):
         ip = Input("ip", float, norm)
         self.assertTrue(ip.copy() is not ip)
@@ -173,7 +172,7 @@ def test_neg(self):
         self.assertEqual(str(-self.i1), "-i1")
 
     def test_pow(self):
-        self.assertEqual(str(self.i1 ** 5), "i1**5")
+        self.assertEqual(str(self.i1**5), "i1**5")
 
     def test_le(self):
         self.assertEqual(str(self.i1 <= 5), "i1 <= 5")