Merge branch 'json_read_multiple_files' of github.com:CITCOM-project/CausalTestingFramework into json_read_multiple_files

jmafoster1 · jmafoster1 · commit 555e8309826f · 2023-03-13T14:11:57.000Z
diff --git a/causal_testing/data_collection/data_collector.py b/causal_testing/data_collection/data_collector.py
@@ -122,14 +122,15 @@ def run_system_with_input_configuration(self, input_configuration: dict) -> pd.D
 
 
 class ObservationalDataCollector(DataCollector):
-    """A data collector that extracts data that is relevant to the specified scenario from a csv of execution data."""
+    """A data collector that extracts data that is relevant to the specified scenario from a dataframe of execution
+    data."""
 
     def __init__(self, scenario: Scenario, data: pd.DataFrame):
         super().__init__(scenario)
         self.data = data
 
     def collect_data(self, **kwargs) -> pd.DataFrame:
-        """Read a csv containing execution data for the system-under-test into a pandas dataframe and filter to remove
+        """Read a pandas dataframe and filter to remove
         any data which is invalid for the scenario-under-test.
 
         Data is invalid if it does not meet the constraints outlined in the scenario-under-test (Scenario).
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -4,7 +4,6 @@
 import argparse
 import json
 import logging
-import tempfile
 
 from abc import ABC
 from dataclasses import dataclass
@@ -47,7 +46,7 @@ class JsonUtility(ABC):
     def __init__(self, log_path):
         self.paths = None
         self.variables = None
-        self.data = list()
+        self.data = []
         self.test_plan = None
         self.modelling_scenario = None
         self.causal_specification = None
@@ -137,6 +136,7 @@ def _json_parse(self):
             df = pd.read_csv(data_file, header=0)
             self.data.append(df)
         self.data = pd.concat(self.data)
+
     def _populate_metas(self):
         """
         Populate data with meta-variable values and add distributions to Causal Testing Framework Variables
@@ -255,7 +255,7 @@ def get_args(test_args=None) -> argparse.Namespace:
             "--data_path",
             help="Specify path to file containing runtime data",
             required=True,
-            nargs='+',
+            nargs="+",
         )
         parser.add_argument(
             "--dag_path",
@@ -286,7 +286,7 @@ class JsonClassPaths:
     def __init__(self, json_path: str, dag_path: str, data_paths: str):
         self.json_path = Path(json_path)
         self.dag_path = Path(dag_path)
-        self.data_paths = [Path(path) for path in [data_paths]]
+        self.data_paths = [Path(path) for path in data_paths]
 
 
 @dataclass()
diff --git a/tests/data_collection_tests/test_observational_data_collector.py b/tests/data_collection_tests/test_observational_data_collector.py
@@ -38,18 +38,18 @@ class Color(Enum):
 
     def test_not_all_variables_in_data(self):
         scenario = Scenario({self.X1, self.X2, self.X3, self.X4})
-        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df_path)
+        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df)
         self.assertRaises(IndexError, observational_data_collector.collect_data)
 
     def test_all_variables_in_data(self):
         scenario = Scenario({self.X1, self.X2, self.X3, self.Y1, self.Y2})
-        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df_path)
+        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df)
         df = observational_data_collector.collect_data(index_col=0)
         assert df.equals(self.observational_df), f"\n{df}\nwas not equal to\n{self.observational_df}"
 
     def test_data_constraints(self):
         scenario = Scenario({self.X1, self.X2, self.X3, self.Y1, self.Y2}, {self.X1.z3 > 2})
-        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df_path)
+        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df)
         df = observational_data_collector.collect_data(index_col=0)
         expected = self.observational_df.loc[[2, 3]]
         assert df.equals(expected), f"\n{df}\nwas not equal to\n{expected}"
@@ -60,7 +60,7 @@ def populate_m(data):
 
         meta = Meta("M", int, populate_m)
         scenario = Scenario({self.X1, meta})
-        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df_path)
+        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df)
         data = observational_data_collector.collect_data()
         assert all((m == 2 * x1 for x1, m in zip(data["X1"], data["M"])))
 
diff --git a/tests/json_front_tests/test_json_class.py b/tests/json_front_tests/test_json_class.py
@@ -28,7 +28,7 @@ def setUp(self) -> None:
         test_data_dir_path = Path("tests/resources/data")
         self.json_path = str(test_data_dir_path / json_file_name)
         self.dag_path = str(test_data_dir_path / dag_file_name)
-        self.data_path = str(test_data_dir_path / data_file_name)
+        self.data_path = [str(test_data_dir_path / data_file_name)]
         self.json_class = JsonUtility("logs.log")
         self.example_distribution = scipy.stats.uniform(1, 10)
         self.input_dict_list = [{"name": "test_input", "type": float, "distribution": self.example_distribution}]
@@ -40,7 +40,7 @@ def setUp(self) -> None:
     def test_setting_paths(self):
         self.assertEqual(self.json_class.paths.json_path, Path(self.json_path))
         self.assertEqual(self.json_class.paths.dag_path, Path(self.dag_path))
-        self.assertEqual(self.json_class.paths.data_path, Path(self.data_path))
+        self.assertEqual(self.json_class.paths.data_paths, [Path(self.data_path[0])]) # Needs to be list of Paths
 
     def test_set_inputs(self):
         ctf_input = [Input("test_input", float, self.example_distribution)]
@@ -61,7 +61,7 @@ def test_set_metas(self):
 
     def test_argparse(self):
         args = self.json_class.get_args(["--data_path=data.csv", "--dag_path=dag.dot", "--json_path=tests.json"])
-        self.assertEqual(args.data_path, "data.csv")
+        self.assertEqual(args.data_path, ["data.csv"])
         self.assertEqual(args.dag_path, "dag.dot")
         self.assertEqual(args.json_path, "tests.json")
 
diff --git a/tests/testing_tests/test_causal_test_engine.py b/tests/testing_tests/test_causal_test_engine.py
@@ -59,7 +59,7 @@ def setUp(self) -> None:
 
         # 5. Create observational data collector
         # Obsolete?
-        self.data_collector = ObservationalDataCollector(self.scenario, self.observational_data_csv_path)
+        self.data_collector = ObservationalDataCollector(self.scenario, df)
 
         # 5. Create causal test engine
         self.causal_test_engine = CausalTestEngine(self.causal_specification, self.data_collector)
diff --git a/tests/testing_tests/test_causal_test_suite.py b/tests/testing_tests/test_causal_test_suite.py
@@ -40,9 +40,7 @@ def setUp(self) -> None:
         df = pd.DataFrame({"D": list(np.random.normal(60, 10, 1000))})  # D = exogenous
         df["A"] = [1 if d > 50 else 0 for d in df["D"]]
         df["C"] = df["D"] + (4 * (df["A"] + 2))  # C = (4*(A+2)) + D
-        self.observational_data_csv_path = os.path.join(temp_dir_path, "observational_data.csv")
-        df.to_csv(self.observational_data_csv_path, index=False)
-
+        self.df = df
         self.causal_dag = CausalDAG(dag_dot_path)
 
         # 3. Specify data structures required for test suite
@@ -126,6 +124,6 @@ def create_causal_test_engine(self):
         """
         causal_specification = CausalSpecification(self.scenario, self.causal_dag)
 
-        data_collector = ObservationalDataCollector(self.scenario, self.observational_data_csv_path)
+        data_collector = ObservationalDataCollector(self.scenario, self.df)
         causal_test_engine = CausalTestEngine(causal_specification, data_collector)
         return causal_test_engine