Merge pull request #175 from CITCOM-project/json_output_to_file

christopher-wild · web-flow · commit b68aee3e30d7 · 2023-04-11T03:13:47.000-07:00
Json output to file
diff --git a/causal_testing/__init__.py b/causal_testing/__init__.py
@@ -12,4 +12,3 @@
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
-logger.addHandler(logging.StreamHandler())
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -7,6 +7,7 @@
 
 from dataclasses import dataclass
 from pathlib import Path
+from statistics import StatisticsError
 
 import pandas as pd
 import scipy
@@ -42,14 +43,15 @@ class JsonUtility:
     :attr {CausalSpecification} causal_specification:
     """
 
-    def __init__(self, log_path):
-        self.paths = None
+    def __init__(self, output_path: str, output_overwrite: bool = False):
+        self.input_paths = None
         self.variables = None
         self.data = []
         self.test_plan = None
         self.scenario = None
         self.causal_specification = None
-        self.setup_logger(log_path)
+        self.output_path = Path(output_path)
+        self.check_file_exists(self.output_path, output_overwrite)
 
     def set_paths(self, json_path: str, dag_path: str, data_paths: str):
         """
@@ -58,14 +60,14 @@ def set_paths(self, json_path: str, dag_path: str, data_paths: str):
         :param dag_path: string path representation to the .dot file containing the Causal DAG
         :param data_paths: string path representation to the data files
         """
-        self.paths = JsonClassPaths(json_path=json_path, dag_path=dag_path, data_paths=data_paths)
+        self.input_paths = JsonClassPaths(json_path=json_path, dag_path=dag_path, data_paths=data_paths)
 
     def setup(self, scenario: Scenario):
         """Function to populate all the necessary parts of the json_class needed to execute tests"""
         self.scenario = scenario
         self.scenario.setup_treatment_variables()
         self.causal_specification = CausalSpecification(
-            scenario=self.scenario, causal_dag=CausalDAG(self.paths.dag_path)
+            scenario=self.scenario, causal_dag=CausalDAG(self.input_paths.dag_path)
         )
         self._json_parse()
         self._populate_metas()
@@ -103,12 +105,16 @@ def generate_tests(self, effects: dict, mutates: dict, estimators: dict, f_flag:
             abstract_test = self._create_abstract_test_case(test, mutates, effects)
 
             concrete_tests, dummy = abstract_test.generate_concrete_tests(5, 0.05)
-            logger.info("Executing test: %s", test["name"])
-            logger.info(abstract_test)
-            logger.info([abstract_test.treatment_variable.name, abstract_test.treatment_variable.distribution])
-            logger.info("Number of concrete tests for test case: %s", str(len(concrete_tests)))
             failures = self._execute_tests(concrete_tests, estimators, test, f_flag)
-            logger.info("%s/%s failed for %s\n", failures, len(concrete_tests), test["name"])
+            msg = (
+                f"Executing test: {test['name']} \n"
+                + "abstract_test \n"
+                + f"{abstract_test} \n"
+                + f"{abstract_test.treatment_variable.name},{abstract_test.treatment_variable.distribution} \n"
+                + f"Number of concrete tests for test case: {str(len(concrete_tests))} \n"
+                + f"{failures}/{len(concrete_tests)} failed for {test['name']}"
+            )
+            self._append_to_file(msg, logging.INFO)
 
     def _execute_tests(self, concrete_tests, estimators, test, f_flag):
         failures = 0
@@ -120,9 +126,9 @@ def _execute_tests(self, concrete_tests, estimators, test, f_flag):
 
     def _json_parse(self):
         """Parse a JSON input file into inputs, outputs, metas and a test plan"""
-        with open(self.paths.json_path, encoding="utf-8") as f:
+        with open(self.input_paths.json_path, encoding="utf-8") as f:
             self.test_plan = json.load(f)
-        for data_file in self.paths.data_paths:
+        for data_file in self.input_paths.data_paths:
             df = pd.read_csv(data_file, header=0)
             self.data.append(df)
         self.data = pd.concat(self.data)
@@ -139,7 +145,7 @@ def _populate_metas(self):
                 fitter.fit()
                 (dist, params) = list(fitter.get_best(method="sumsquare_error").items())[0]
                 var.distribution = getattr(scipy.stats, dist)(**params)
-                logger.info(var.name + f" {dist}({params})")
+                self._append_to_file(var.name + f" {dist}({params})", logging.INFO)
 
     def _execute_test_case(self, causal_test_case: CausalTestCase, estimator: Estimator, f_flag: bool) -> bool:
         """Executes a singular test case, prints the results and returns the test case result
@@ -166,12 +172,13 @@ def _execute_test_case(self, causal_test_case: CausalTestCase, estimator: Estima
             )
         else:
             result_string = f"{causal_test_result.test_value.value} no confidence intervals"
-        if f_flag:
-            assert test_passes, (
-                f"{causal_test_case}\n    FAILED - expected {causal_test_case.expected_causal_effect}, "
-                f"got {result_string}"
-            )
+
         if not test_passes:
+            if f_flag:
+                raise StatisticsError(
+                    f"{causal_test_case}\n    FAILED - expected {causal_test_case.expected_causal_effect}, "
+                    f"got {result_string}"
+                )
             failed = True
             logger.warning("   FAILED- expected %s, got %s", causal_test_case.expected_causal_effect, result_string)
         return failed
@@ -211,15 +218,32 @@ def add_modelling_assumptions(self, estimation_model: Estimator):  # pylint: dis
         """
         return
 
+    def _append_to_file(self, line: str, log_level: int = None):
+        """Appends given line(s) to the current output file. If log_level is specified it also logs that message to the
+        logging level.
+        :param line: The line or lines of text to be appended to the file
+        :param log_level: An integer representing the logging level as specified by pythons inbuilt logging module. It
+        is possible to use the inbuilt logging level variables such as logging.INFO and logging.WARNING
+        """
+        with open(self.output_path, "a", encoding="utf-8") as f:
+            f.write(
+                line + "\n",
+            )
+        if log_level:
+            logger.log(level=log_level, msg=line)
+
     @staticmethod
-    def setup_logger(log_path: str):
-        """Setups up logging instance for the module and adds a FileHandler stream so all stdout prints are also
-        sent to the logfile
-        :param log_path: Path specifying location and name of the logging file to be used
+    def check_file_exists(output_path: Path, overwrite: bool):
+        """Method that checks if the given path to an output file already exists. If overwrite is true the check is
+        passed.
+        :param output_path: File path for the output file of the JSON Frontend
+        :param overwrite: bool that if true, the current file can be overwritten
         """
-        setup_log = logging.getLogger(__name__)
-        file_handler = logging.FileHandler(Path(log_path))
-        setup_log.addHandler(file_handler)
+        if output_path.is_file():
+            if overwrite:
+                output_path.unlink()
+            else:
+                raise FileExistsError(f"Chosen file output ({output_path}) already exists")
 
     @staticmethod
     def get_args(test_args=None) -> argparse.Namespace:
@@ -235,6 +259,12 @@ def get_args(test_args=None) -> argparse.Namespace:
             help="if included, the script will stop if a test fails",
             action="store_true",
         )
+        parser.add_argument(
+            "-w",
+            help="Specify to overwrite any existing output files. This can lead to the loss of existing outputs if not "
+            "careful",
+            action="store_true",
+        )
         parser.add_argument(
             "--log_path",
             help="Specify a directory to change the location of the log file",
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -158,7 +158,7 @@ def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
         model = smf.logit(formula=self.formula, data=data).fit(disp=0)
         return model
 
-    def estimate(self, data: pd.DataFrame, adjustment_config:dict = None) -> RegressionResultsWrapper:
+    def estimate(self, data: pd.DataFrame, adjustment_config: dict = None) -> RegressionResultsWrapper:
         """add terms to the dataframe and estimate the outcome from the data
         :param data: A pandas dataframe containing execution data from the system-under-test.
         :param adjustment_config: Dictionary containing the adjustment configuration of the adjustment set
diff --git a/tests/json_front_tests/test_json_class.py b/tests/json_front_tests/test_json_class.py
@@ -1,5 +1,6 @@
 import unittest
 from pathlib import Path
+from statistics import StatisticsError
 import scipy
 import csv
 import json
@@ -29,7 +30,7 @@ def setUp(self) -> None:
         self.json_path = str(test_data_dir_path / json_file_name)
         self.dag_path = str(test_data_dir_path / dag_file_name)
         self.data_path = [str(test_data_dir_path / data_file_name)]
-        self.json_class = JsonUtility("logs.log")
+        self.json_class = JsonUtility("temp_out.txt", True)
         self.example_distribution = scipy.stats.uniform(1, 10)
         self.input_dict_list = [{"name": "test_input", "datatype": float, "distribution": self.example_distribution}]
         self.output_dict_list = [{"name": "test_output", "datatype": float}]
@@ -41,9 +42,9 @@ def setUp(self) -> None:
         self.json_class.setup(self.scenario)
 
     def test_setting_paths(self):
-        self.assertEqual(self.json_class.paths.json_path, Path(self.json_path))
-        self.assertEqual(self.json_class.paths.dag_path, Path(self.dag_path))
-        self.assertEqual(self.json_class.paths.data_paths, [Path(self.data_path[0])])  # Needs to be list of Paths
+        self.assertEqual(self.json_class.input_paths.json_path, Path(self.json_path))
+        self.assertEqual(self.json_class.input_paths.dag_path, Path(self.dag_path))
+        self.assertEqual(self.json_class.input_paths.data_paths, [Path(self.data_path[0])])  # Needs to be list of Paths
 
     def test_set_inputs(self):
         ctf_input = [Input("test_input", float, self.example_distribution)]
@@ -73,6 +74,30 @@ def test_setup_scenario(self):
     def test_setup_causal_specification(self):
         self.assertIsInstance(self.json_class.causal_specification, CausalSpecification)
 
+    def test_f_flag(self):
+        example_test = {
+            "tests": [
+                {
+                    "name": "test1",
+                    "mutations": {"test_input": "Increase"},
+                    "estimator": "LinearRegressionEstimator",
+                    "estimate_type": "ate",
+                    "effect_modifiers": [],
+                    "expectedEffect": {"test_output": "NoEffect"},
+                    "skip": False,
+                }
+            ]
+        }
+        self.json_class.test_plan = example_test
+        effects = {"NoEffect": NoEffect()}
+        mutates = {
+            "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3
+                                  > self.json_class.scenario.variables[x].z3
+        }
+        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
+        with self.assertRaises(StatisticsError):
+            self.json_class.generate_tests(effects, mutates, estimators, True)
+
     def test_generate_tests_from_json(self):
         example_test = {
             "tests": [
@@ -95,11 +120,12 @@ def test_generate_tests_from_json(self):
         }
         estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
 
-        with self.assertLogs() as captured:
-            self.json_class.generate_tests(effects, mutates, estimators, False)
+        self.json_class.generate_tests(effects, mutates, estimators, False)
 
         # Test that the final log message prints that failed tests are printed, which is expected behaviour for this scenario
-        self.assertIn("failed", captured.records[-1].getMessage())
+        with open("temp_out.txt", 'r') as reader:
+            temp_out = reader.readlines()
+        self.assertIn("failed", temp_out[-1])
 
     def tearDown(self) -> None:
         pass

Original file line number	Diff line number	Diff line change
`@@ -12,4 +12,3 @@`
`12`	`12`
`13`	`13`	`logger = logging.getLogger(__name__)`
`14`	`14`	`logger.setLevel(logging.INFO)`
`15`		`-logger.addHandler(logging.StreamHandler())`