Merge pull request #89 from CITCOM-project/test-coverage-json

christopher-wild · web-flow · commit d8c21dcaa655 · 2022-09-26T05:12:46.000-07:00
Json Frontend Tests
diff --git a/causal_testing/generation/abstract_causal_test_case.py b/causal_testing/generation/abstract_causal_test_case.py
@@ -144,7 +144,12 @@ def _generate_concrete_tests(
         return concrete_tests, pd.DataFrame(runs, columns=run_columns + ["bin"])
 
     def generate_concrete_tests(
-        self, sample_size: int, target_ks_score: float = None, rct: bool = False, seed: int = 0, hard_max: int = 1000
+        self,
+        sample_size: int,
+        target_ks_score: float = None,
+        rct: bool = False,
+        seed: int = 0,
+        hard_max: int = 1000,
     ) -> tuple[list[CausalTestCase], pd.DataFrame]:
         """Generates a list of `num` concrete test cases.
 
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -67,17 +67,16 @@ def set_path(self, json_path: str, dag_path: str, data_path: str):
         self.dag_path = Path(dag_path)
         self.data_path = Path(data_path)
 
-    def set_variables(self, inputs: dict, outputs: dict, metas: dict, distributions: dict, populates: dict):
+    def set_variables(self, inputs: dict, outputs: dict, metas: dict):
+
         """Populate the Causal Variables
         :param inputs:
         :param outputs:
         :param metas:
-        :param distributions:
-        :param populates:
         """
-        self.inputs = [Input(i["name"], i["type"], distributions[i["distribution"]]) for i in inputs]
+        self.inputs = [Input(i["name"], i["type"], i["distribution"]) for i in inputs]
         self.outputs = [Output(i["name"], i["type"]) for i in outputs]
-        self.metas = [Meta(i["name"], i["type"], populates[i["populate"]]) for i in metas] if metas else []
+        self.metas = [Meta(i["name"], i["type"], i["populate"]) for i in metas] if metas else []
 
     def setup(self):
         """Function to populate all the necessary parts of the json_class needed to execute tests"""
@@ -89,54 +88,58 @@ def setup(self):
         self._json_parse()
         self._populate_metas()
 
-    def execute_tests(self, effects: dict, mutates: dict, estimators: dict, f_flag: bool):
+    def _create_abstract_test_case(self, test, mutates, effects):
+        abstract_test = AbstractCausalTestCase(
+            scenario=self.modelling_scenario,
+            intervention_constraints=[mutates[v](k) for k, v in test["mutations"].items()],
+            treatment_variables={self.modelling_scenario.variables[v] for v in test["mutations"]},
+            expected_causal_effect={
+                self.modelling_scenario.variables[variable]: effects[effect]
+                for variable, effect in test["expectedEffect"].items()
+            },
+            effect_modifiers={self.modelling_scenario.variables[v] for v in test["effect_modifiers"]}
+            if "effect_modifiers" in test
+            else {},
+            estimate_type=test["estimate_type"],
+        )
+        return abstract_test
+
+    def generate_tests(self, effects: dict, mutates: dict, estimators: dict, f_flag: bool):
         """Runs and evaluates each test case specified in the JSON input
 
         :param effects: Dictionary mapping effect class instances to string representations.
         :param mutates: Dictionary mapping mutation functions to string representations.
         :param estimators: Dictionary mapping estimator classes to string representations.
         :param f_flag: Failure flag that if True the script will stop executing when a test fails.
         """
-        executed_tests = 0
         failures = 0
         for test in self.test_plan["tests"]:
             if "skip" in test and test["skip"]:
                 continue
-
-            abstract_test = AbstractCausalTestCase(
-                scenario=self.modelling_scenario,
-                intervention_constraints=[mutates[v](k) for k, v in test["mutations"].items()],
-                treatment_variables={self.modelling_scenario.variables[v] for v in test["mutations"]},
-                expected_causal_effect={
-                    self.modelling_scenario.variables[variable]: effects[effect]
-                    for variable, effect in test["expectedEffect"].items()
-                },
-                effect_modifiers={self.modelling_scenario.variables[v] for v in test["effect_modifiers"]}
-                if "effect_modifiers" in test
-                else {},
-                estimate_type=test["estimate_type"],
-            )
+            abstract_test = self._create_abstract_test_case(test, mutates, effects)
 
             concrete_tests, dummy = abstract_test.generate_concrete_tests(5, 0.05)
             logger.info("Executing test: %s", test["name"])
             logger.info(abstract_test)
             logger.info([(v.name, v.distribution) for v in abstract_test.treatment_variables])
             logger.info("Number of concrete tests for test case: %s", str(len(concrete_tests)))
-            for concrete_test in concrete_tests:
-                executed_tests += 1
-                failed = self._execute_test_case(concrete_test, estimators[test["estimator"]], f_flag)
-                if failed:
-                    failures += 1
+            failures = self._execute_tests(concrete_tests, estimators, test, f_flag)
+
+        logger.info(f"{failures}/{len(concrete_tests)} failed")
 
-        logger.info("{%d}/{%d} failed", failures, executed_tests)
+    def _execute_tests(self, concrete_tests, estimators, test, f_flag):
+        failures = 0
+        for concrete_test in concrete_tests:
+            failed = self._execute_test_case(concrete_test, estimators[test["estimator"]], f_flag)
+            if failed:
+                failures += 1
+        return failures
 
     def _json_parse(self):
-        """Parse a JSON input file into inputs, outputs, metas and a test plan
-        :param distributions: dictionary of user defined scipy distributions
-        :param populates: dictionary of user defined populate functions
-        """
-        with open(self.json_path, encoding="UTF-8") as file:
-            self.test_plan = json.load(file)
+
+        """Parse a JSON input file into inputs, outputs, metas and a test plan"""
+        with open(self.json_path) as f:
+            self.test_plan = json.load(f)
 
         self.data = pd.read_csv(self.data_path)
 
@@ -187,7 +190,9 @@ def _execute_test_case(self, causal_test_case: CausalTestCase, estimator: Estima
         if not test_passes:
             failed = True
             logger.warning(
-                "   FAILED- expected %s, got %s", causal_test_case.expected_causal_effect, causal_test_result.ate
+                "   FAILED- expected %s, got %s",
+                causal_test_case.expected_causal_effect,
+                causal_test_result.ate,
             )
         return failed
 
@@ -235,25 +240,37 @@ def setup_logger(log_path: str):
         setup_log.addHandler(file_handler)
 
     @staticmethod
-    def get_args() -> argparse.Namespace:
+    def get_args(test_args=None) -> argparse.Namespace:
         """Command-line arguments
 
         :return: parsed command line arguments
         """
         parser = argparse.ArgumentParser(
             description="A script for parsing json config files for the Causal Testing Framework"
         )
-        parser.add_argument("-f", help="if included, the script will stop if a test fails", action="store_true")
+        parser.add_argument(
+            "-f",
+            help="if included, the script will stop if a test fails",
+            action="store_true",
+        )
         parser.add_argument(
             "--log_path",
             help="Specify a directory to change the location of the log file",
             default="./json_frontend.log",
         )
-        parser.add_argument("--data_path", help="Specify path to file containing runtime data", required=True)
         parser.add_argument(
-            "--dag_path", help="Specify path to file containing the DAG, normally a .dot file", required=True
+            "--data_path",
+            help="Specify path to file containing runtime data",
+            required=True,
+        )
+        parser.add_argument(
+            "--dag_path",
+            help="Specify path to file containing the DAG, normally a .dot file",
+            required=True,
         )
         parser.add_argument(
-            "--json_path", help="Specify path to file containing JSON tests, normally a .json file", required=True
+            "--json_path",
+            help="Specify path to file containing JSON tests, normally a .json file",
+            required=True,
         )
-        return parser.parse_args()
+        return parser.parse_args(test_args)
diff --git a/causal_testing/testing/causal_test_engine.py b/causal_testing/testing/causal_test_engine.py
@@ -36,7 +36,10 @@ def __init__(
     ):
         self.causal_test_case = causal_test_case
         self.treatment_variables = list(self.causal_test_case.control_input_configuration)
-        self.casual_dag, self.scenario = causal_specification.causal_dag, causal_specification.scenario
+        self.casual_dag, self.scenario = (
+            causal_specification.causal_dag,
+            causal_specification.scenario,
+        )
         self.data_collector = data_collector
         self.scenario_execution_data_df = pd.DataFrame()
 
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -306,6 +306,7 @@ def estimate_unit_ate(self) -> float:
         model = self._run_linear_regression()
         unit_effect = model.params[list(self.treatment)].values[0]  # Unit effect is the coefficient of the treatment
         [ci_low, ci_high] = self._get_confidence_intervals(model)
+
         return unit_effect * self.treatment_values - unit_effect * self.control_values, [ci_low, ci_high]
 
     def estimate_ate(self) -> tuple[float, list[float, float], float]:
@@ -437,7 +438,10 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
 
     def _get_confidence_intervals(self, model):
         confidence_intervals = model.conf_int(alpha=0.05, cols=None)
-        ci_low, ci_high = confidence_intervals[0][list(self.treatment)], confidence_intervals[1][list(self.treatment)]
+        ci_low, ci_high = (
+            confidence_intervals[0][list(self.treatment)],
+            confidence_intervals[1][list(self.treatment)],
+        )
         return [ci_low.values[0], ci_high.values[0]]
 
 
diff --git a/examples/poisson/run_causal_tests.py b/examples/poisson/run_causal_tests.py
@@ -92,10 +92,11 @@ def populate_num_shapes_unit(data):
     area = data['width'] * data['height']
     data['num_shapes_unit'] = data['num_shapes_abs'] / area
 
+
 inputs = [
-    {"name": "width", "type": float, "distribution": "uniform"},
-    {"name": "height", "type": float, "distribution": "uniform"},
-    {"name": "intensity", "type": float, "distribution": "uniform"}
+    {"name": "width", "type": float, "distribution": scipy.stats.uniform(0, 10)},
+    {"name": "height", "type": float, "distribution": scipy.stats.uniform(0, 10)},
+    {"name": "intensity", "type": float, "distribution": scipy.stats.uniform(0, 10)}
 ]
 
 outputs = [
@@ -104,23 +105,13 @@ def populate_num_shapes_unit(data):
 ]
 
 metas = [
-    {"name": "num_lines_unit", "type": float, "populate": "populate_num_lines_unit"},
-    {"name": "num_shapes_unit", "type": float, "populate": "populate_num_shapes_unit"},
-    {"name": "width_plus_height", "type": float, "populate": "populate_width_height"}
+    {"name": "num_lines_unit", "type": float, "populate": populate_num_lines_unit},
+    {"name": "num_shapes_unit", "type": float, "populate": populate_num_shapes_unit},
+    {"name": "width_plus_height", "type": float, "populate": populate_width_height}
 ]
 
 constraints = ["width > 0", "height > 0", "intensity > 0"]
 
-populates = {
-    "populate_width_height": populate_width_height,
-    "populate_num_lines_unit": populate_num_lines_unit,
-    "populate_num_shapes_unit": populate_num_shapes_unit
-}
-
-distributions = {
-    "uniform": scipy.stats.uniform(0, 10)
-}
-
 effects = {
     "PoissonWidthHeight": PoissonWidthHeight(),
     "Positive": Positive(),
@@ -136,9 +127,9 @@ def populate_num_shapes_unit(data):
 }
 
 # Create input structure required to create a modelling scenario
-modelling_inputs = [Input(i['name'], i['type'], distributions[i['distribution']]) for i in inputs] + \
+modelling_inputs = [Input(i['name'], i['type'], i['distribution']) for i in inputs] + \
                    [Output(i['name'], i['type']) for i in outputs] + \
-                   [Meta(i['name'], i['type'], populates[i['populate']]) for i in metas] if metas else []
+                   [Meta(i['name'], i['type'], [i['populate']]) for i in metas] if metas else list()
 
 # Create modelling scenario to access z3 variable mirrors
 modelling_scenario = Scenario(modelling_inputs, None)
@@ -173,7 +164,7 @@ def add_modelling_assumptions(self, estimation_model: Estimator):
                           args.data_path)  # Set the path to the data.csv, dag.dot and causal_tests.json file
 
     # Load the Causal Variables into the JsonUtility class ready to be used in the tests
-    json_utility.set_variables(inputs, outputs, metas, distributions, populates)
+    json_utility.set_variables(inputs, outputs, metas)
     json_utility.setup()  # Sets up all the necessary parts of the json_class needed to execute tests
 
-    json_utility.execute_tests(effects, mutates, estimators, args.f)
+    json_utility.generate_tests(effects, mutates, estimators, args.f)
diff --git a/tests/json_front_tests/test_json_class.py b/tests/json_front_tests/test_json_class.py
@@ -0,0 +1,116 @@
+import unittest
+from pathlib import Path
+import scipy
+import csv
+import json
+
+from causal_testing.testing.estimators import LinearRegressionEstimator
+from causal_testing.testing.causal_test_outcome import NoEffect
+from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
+from causal_testing.json_front.json_class import JsonUtility
+from causal_testing.specification.variable import Input, Output, Meta
+from causal_testing.specification.scenario import Scenario
+from causal_testing.specification.causal_specification import CausalSpecification
+from causal_testing.generation.abstract_causal_test_case import AbstractCausalTestCase
+
+
+class TestJsonClass(unittest.TestCase):
+    """Test the JSON frontend for the Causal Testing Framework (CTF)
+
+    The JSON frontend is an alternative interface for the CTF where tests are specified in JSON format and ingested
+    with the frontend. Tests involve testing that this correctly interfaces with the framework with some dummy data
+    """
+
+    def setUp(self) -> None:
+        json_file_name = "tests.json"
+        dag_file_name = "dag.dot"
+        data_file_name = "data.csv"
+        test_data_dir_path = Path("tests/resources/data")
+        self.json_path = test_data_dir_path / json_file_name
+        self.dag_path = test_data_dir_path / dag_file_name
+        self.data_path = test_data_dir_path / data_file_name
+        self.json_class = JsonUtility("logs.log")
+        self.example_distribution = scipy.stats.uniform(1, 10)
+        self.input_dict_list = [{"name": "test_input", "type": float, "distribution": self.example_distribution}]
+        self.output_dict_list = [{"name": "test_output", "type": float}]
+        self.meta_dict_list = [{"name": "test_meta", "type": float, "populate": populate_example}]
+        self.json_class.set_variables(self.input_dict_list, self.output_dict_list, None)
+        self.json_class.set_path(self.json_path, self.dag_path, self.data_path)
+
+    def test_setting_paths(self):
+        self.assertEqual(self.json_class.json_path, Path(self.json_path))
+        self.assertEqual(self.json_class.dag_path, Path(self.dag_path))
+        self.assertEqual(self.json_class.data_path, Path(self.data_path))
+
+    def test_set_inputs(self):
+        ctf_input = [Input("test_input", float, self.example_distribution)]
+        self.assertEqual(ctf_input[0].name, self.json_class.inputs[0].name)
+        self.assertEqual(ctf_input[0].datatype, self.json_class.inputs[0].datatype)
+        self.assertEqual(ctf_input[0].distribution, self.json_class.inputs[0].distribution)
+
+    def test_set_outputs(self):
+        ctf_output = [Output("test_output", float)]
+        self.assertEqual(ctf_output[0].name, self.json_class.outputs[0].name)
+        self.assertEqual(ctf_output[0].datatype, self.json_class.outputs[0].datatype)
+
+    def test_set_metas(self):
+        self.json_class.set_variables(self.input_dict_list, self.output_dict_list, self.meta_dict_list)
+        ctf_meta = [Meta("test_meta", float, populate_example)]
+        self.assertEqual(ctf_meta[0].name, self.json_class.metas[0].name)
+        self.assertEqual(ctf_meta[0].datatype, self.json_class.metas[0].datatype)
+
+    def test_argparse(self):
+        args = self.json_class.get_args(["--data_path=data.csv", "--dag_path=dag.dot", "--json_path=tests.json"])
+        self.assertEqual(args.data_path, "data.csv")
+        self.assertEqual(args.dag_path, "dag.dot")
+        self.assertEqual(args.json_path, "tests.json")
+
+    def test_setup_modelling_scenario(self):
+        self.json_class.setup()
+        print(type(self.json_class.modelling_scenario))
+        print(self.json_class.modelling_scenario)
+        self.assertIsInstance(self.json_class.modelling_scenario, Scenario)
+
+    def test_setup_causal_specification(self):
+        self.json_class.setup()
+        self.assertIsInstance(self.json_class.causal_specification, CausalSpecification)
+
+    def test_generate_tests_from_json(self):
+        example_test = {
+            "tests": [
+                {
+                    "name": "test1",
+                    "mutations": {"test_input": "Increase"},
+                    "estimator": "LinearRegressionEstimator",
+                    "estimate_type": "ate",
+                    "effect_modifiers": [],
+                    "expectedEffect": {"test_output": "NoEffect"},
+                    "skip": False,
+                }
+            ]
+        }
+        self.json_class.setup()
+        self.json_class.test_plan = example_test
+        effects = {"NoEffect": NoEffect()}
+        mutates = {
+            "Increase": lambda x: self.json_class.modelling_scenario.treatment_variables[x].z3 >
+                                  self.json_class.modelling_scenario.variables[x].z3
+        }
+        estimators = {
+            "LinearRegressionEstimator": LinearRegressionEstimator
+        }
+
+        with self.assertLogs() as captured:
+            self.json_class.generate_tests(effects, mutates, estimators, False)
+
+        # Test that the final log message prints that failed tests are printed, which is expected behaviour for this scenario
+        self.assertIn("failed", captured.records[-1].getMessage())
+
+    def tearDown(self) -> None:
+        pass
+        #remove_temp_dir_if_existent()
+
+
+def populate_example(*args, **kwargs):
+    pass
+
diff --git a/tests/resources/data/dag.dot b/tests/resources/data/dag.dot
@@ -0,0 +1 @@
+digraph G { test_input -> B; B -> C; test_output -> test_input; test_output -> C}
diff --git a/tests/resources/data/data.csv b/tests/resources/data/data.csv
@@ -0,0 +1,2 @@
+index,test_input,test_output
+0,1,2
diff --git a/tests/resources/data/tests.json b/tests/resources/data/tests.json
@@ -0,0 +1 @@
+{"tests": [{"name": "test1", "mutations": {}, "estimator": null, "estimate_type": null, "effect_modifiers": [], "expectedEffect": {}, "skip": false}]}

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+digraph G { test_input -> B; B -> C; test_output -> test_input; test_output -> C}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+index,test_input,test_output`
	`2`	`+0,1,2`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"tests": [{"name": "test1", "mutations": {}, "estimator": null, "estimate_type": null, "effect_modifiers": [], "expectedEffect": {}, "skip": false}]}`