Merge branch 'main' of github.com:CITCOM-project/CausalTestingFramework into json-cate

jmafoster1 · jmafoster1 · commit 651a3588814d · 2023-04-24T10:48:07.000+01:00
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -5,6 +5,7 @@
 import json
 import logging
 
+from collections.abc import Iterable, Mapping
 from dataclasses import dataclass
 from pathlib import Path
 from statistics import StatisticsError
@@ -154,10 +155,11 @@ def generate_tests(self, effects: dict, mutates: dict, estimators: dict, f_flag:
 
     def _execute_tests(self, concrete_tests, estimators, test, f_flag):
         failures = 0
+        test["estimator"] = estimators[test["estimator"]]
+        if "formula" in test:
+            self._append_to_file(f"Estimator formula used for test: {test['formula']}")
         for concrete_test in concrete_tests:
-            failed = self._execute_test_case(
-                concrete_test, estimators[test["estimator"]], f_flag, test.get("conditions", [])
-            )
+            failed = self._execute_test_case(concrete_test, test, f_flag)
             if failed:
                 failures += 1
         return failures
@@ -178,19 +180,26 @@ def _populate_metas(self):
         for meta in self.scenario.variables_of_type(Meta):
             meta.populate(self.data)
 
-    def _execute_test_case(
-        self, causal_test_case: CausalTestCase, estimator: Estimator, f_flag: bool, conditions: list[str]
-    ) -> bool:
+    def _execute_test_case(self, causal_test_case: CausalTestCase, test: Iterable[Mapping], f_flag: bool) -> bool:
         """Executes a singular test case, prints the results and returns the test case result
         :param causal_test_case: The concrete test case to be executed
+        :param test: Single JSON test definition stored in a mapping (dict)
         :param f_flag: Failure flag that if True the script will stop executing when a test fails.
         :return: A boolean that if True indicates the causal test case passed and if false indicates the test case
          failed.
         :rtype: bool
         """
         failed = False
 
-        causal_test_engine, estimation_model = self._setup_test(causal_test_case, estimator, conditions)
+        for var in self.scenario.variables_of_type(Meta).union(self.scenario.variables_of_type(Output)):
+            if not var.distribution:
+                fitter = Fitter(self.data[var.name], distributions=get_common_distributions())
+                fitter.fit()
+                (dist, params) = list(fitter.get_best(method="sumsquare_error").items())[0]
+                var.distribution = getattr(scipy.stats, dist)(**params)
+                self._append_to_file(var.name + f" {dist}({params})", logging.INFO)
+
+        causal_test_engine, estimation_model = self._setup_test(causal_test_case, test)
         causal_test_result = causal_test_engine.execute_test(
             estimation_model, causal_test_case, estimate_type=causal_test_case.estimate_type
         )
@@ -216,11 +225,10 @@ def _execute_test_case(
             logger.warning("   FAILED- expected %s, got %s", causal_test_case.expected_causal_effect, result_string)
         return failed
 
-    def _setup_test(
-        self, causal_test_case: CausalTestCase, estimator: Estimator, conditions: list[str]
-    ) -> tuple[CausalTestEngine, Estimator]:
+    def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping, conditions: list[str]=None) -> tuple[CausalTestEngine, Estimator]:
         """Create the necessary inputs for a single test case
         :param causal_test_case: The concrete test case to be executed
+        :param test: Single JSON test definition stored in a mapping (dict)
         :returns:
                 - causal_test_engine - Test Engine instance for the test being run
                 - estimation_model - Estimator instance for the test being run
@@ -234,27 +242,21 @@ def _setup_test(
         minimal_adjustment_set = self.causal_specification.causal_dag.identification(causal_test_case.base_test_case)
         treatment_var = causal_test_case.treatment_variable
         minimal_adjustment_set = minimal_adjustment_set - {treatment_var}
-        estimation_model = estimator(
-            treatment=treatment_var.name,
-            treatment_value=causal_test_case.treatment_value,
-            control_value=causal_test_case.control_value,
-            adjustment_set=minimal_adjustment_set,
-            outcome=causal_test_case.outcome_variable.name,
-            df=causal_test_engine.scenario_execution_data_df,
-            effect_modifiers=causal_test_case.effect_modifier_configuration,
-        )
-
-        self.add_modelling_assumptions(estimation_model)
-
+        estimator_kwargs = {
+            "treatment": treatment_var.name,
+            "treatment_value": causal_test_case.treatment_value,
+            "control_value": causal_test_case.control_value,
+            "adjustment_set": minimal_adjustment_set,
+            "outcome": causal_test_case.outcome_variable.name,
+            "df": causal_test_engine.scenario_execution_data_df,
+            "effect_modifiers": causal_test_case.effect_modifier_configuration,
+        }
+        if "formula" in test:
+            estimator_kwargs["formula"] = test["formula"]
+
+        estimation_model = test["estimator"](**estimator_kwargs)
         return causal_test_engine, estimation_model
 
-    def add_modelling_assumptions(self, estimation_model: Estimator):  # pylint: disable=unused-argument
-        """Optional abstract method where user functionality can be written to determine what assumptions are required
-        for specific test cases
-        :param estimation_model: estimator model instance for the current running test.
-        """
-        return
-
     def _append_to_file(self, line: str, log_level: int = None):
         """Appends given line(s) to the current output file. If log_level is specified it also logs that message to the
         logging level.
@@ -263,9 +265,7 @@ def _append_to_file(self, line: str, log_level: int = None):
         is possible to use the inbuilt logging level variables such as logging.INFO and logging.WARNING
         """
         with open(self.output_path, "a", encoding="utf-8") as f:
-            f.write(
-                line + "\n",
-            )
+            f.write(line)
         if log_level:
             logger.log(level=log_level, msg=line)
 
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -312,7 +312,7 @@ def __init__(
             self.formula = formula
         else:
             terms = [treatment] + sorted(list(adjustment_set)) + sorted(list(effect_modifiers))
-            self.formula = f"{outcome} ~ {'+'.join(((terms)))}"
+            self.formula = f"{outcome} ~ {'+'.join(terms)}"
 
         for term in self.effect_modifiers:
             self.adjustment_set.add(term)
diff --git a/examples/poisson/example_run_causal_tests.py b/examples/poisson/example_run_causal_tests.py
@@ -149,15 +149,6 @@ def populate_num_shapes_unit(data):
 }
 
 
-class MyJsonUtility(JsonUtility):
-    """Extension of JsonUtility class to add modelling assumptions to the estimator instance"""
-
-    def add_modelling_assumptions(self, estimation_model: Estimator):
-        # Add squared intensity term as a modelling assumption if intensity is the treatment of the test
-        if "intensity" in estimation_model.treatment[0]:
-            estimation_model.intercept = 0
-
-
 def test_run_causal_tests():
     ROOT = os.path.realpath(os.path.dirname(__file__))
 
@@ -166,7 +157,7 @@ def test_run_causal_tests():
     dag_path = f"{ROOT}/dag.dot"
     data_path = f"{ROOT}/data.csv"
 
-    json_utility = MyJsonUtility(log_path)  # Create an instance of the extended JsonUtility class
+    json_utility = JsonUtility(log_path)  # Create an instance of the extended JsonUtility class
     json_utility.set_paths(
         json_path, dag_path, [data_path]
     )  # Set the path to the data.csv, dag.dot and causal_tests.json file
@@ -178,8 +169,8 @@ def test_run_causal_tests():
 
 
 if __name__ == "__main__":
-    args = MyJsonUtility.get_args()
-    json_utility = MyJsonUtility(args.log_path)  # Create an instance of the extended JsonUtility class
+    args = JsonUtility.get_args()
+    json_utility = JsonUtility(args.log_path)  # Create an instance of the extended JsonUtility class
     json_utility.set_paths(
         args.json_path, args.dag_path, args.data_path
     )  # Set the path to the data.csv, dag.dot and causal_tests.json file
diff --git a/tests/json_front_tests/test_json_class.py b/tests/json_front_tests/test_json_class.py
@@ -6,7 +6,7 @@
 import json
 
 from causal_testing.testing.estimators import LinearRegressionEstimator
-from causal_testing.testing.causal_test_outcome import NoEffect
+from causal_testing.testing.causal_test_outcome import NoEffect, Positive
 from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
 from causal_testing.json_front.json_class import JsonUtility, CausalVariables
 from causal_testing.specification.variable import Input, Output, Meta
@@ -186,9 +186,36 @@ def test_generate_tests_from_json_no_dist(self):
             temp_out = reader.readlines()
         self.assertIn("failed", temp_out[-1])
 
+    def test_formula_in_json_test(self):
+        example_test = {
+            "tests": [
+                {
+                    "name": "test1",
+                    "mutations": {"test_input": "Increase"},
+                    "estimator": "LinearRegressionEstimator",
+                    "estimate_type": "ate",
+                    "effect_modifiers": [],
+                    "expectedEffect": {"test_output": "Positive"},
+                    "skip": False,
+                    "formula": "test_output ~ test_input"
+                }
+            ]
+        }
+        self.json_class.test_plan = example_test
+        effects = {"Positive": Positive()}
+        mutates = {
+            "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3
+                                  > self.json_class.scenario.variables[x].z3
+        }
+        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
+
+        self.json_class.generate_tests(effects, mutates, estimators, False)
+        with open("temp_out.txt", 'r') as reader:
+            temp_out = reader.readlines()
+        self.assertIn("test_output ~ test_input", ''.join(temp_out))
+
     def tearDown(self) -> None:
-        pass
-        # remove_temp_dir_if_existent()
+        remove_temp_dir_if_existent()
 
 
 def populate_example(*args, **kwargs):