From 5495caa4a377ac667cb9ded996c7c777db1e566b Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 08:49:13 +0000
Subject: [PATCH 01/44] Removed datacollector from surrogate assisted

---
 .../surrogate/causal_surrogate_assisted.py    | 24 +++++++++----------
 .../test_causal_surrogate_assisted.py         | 13 ++++------
 2 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/causal_testing/surrogate/causal_surrogate_assisted.py b/causal_testing/surrogate/causal_surrogate_assisted.py
index 4fba5371..56642770 100644
--- a/causal_testing/surrogate/causal_surrogate_assisted.py
+++ b/causal_testing/surrogate/causal_surrogate_assisted.py
@@ -4,7 +4,6 @@
 from dataclasses import dataclass
 from typing import Callable
 import pandas as pd
-from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.specification.causal_specification import CausalSpecification
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.estimation.cubic_spline_estimator import CubicSplineRegressionEstimator
@@ -73,21 +72,20 @@ def __init__(
 
     def execute(
         self,
-        data_collector: ObservationalDataCollector,
+        df: pd.DataFrame,
         max_executions: int = 200,
         custom_data_aggregator: Callable[[dict, dict], dict] = None,
     ):
         """For this specific test case, a search algorithm is used to find the most contradictory point in the input
         space which is, therefore, most likely to indicate incorrect behaviour. This cadidate test case is run against
         the simulator, checked for faults and the result returned with collected data
-        :param data_collector: An ObservationalDataCollector which gathers data relevant to the specified scenario
+        :param df: An dataframe which contains data relevant to the specified scenario
         :param max_executions: Maximum number of simulator executions before exiting the search
         :param custom_data_aggregator:
         :return: tuple containing SimulationResult or str, execution number and collected data"""
-        data_collector.collect_data()
 
         for i in range(max_executions):
-            surrogate_models = self.generate_surrogates(self.specification, data_collector)
+            surrogate_models = self.generate_surrogates(self.specification, df)
             candidate_test_case, _, surrogate = self.search_algorithm.search(surrogate_models, self.specification)
 
             self.simulator.startup()
@@ -96,10 +94,10 @@ def execute(
             self.simulator.shutdown()
 
             if custom_data_aggregator is not None:
-                if data_collector.data is not None:
-                    data_collector.data = custom_data_aggregator(data_collector.data, test_result.data)
+                if df is not None:
+                    df = custom_data_aggregator(df, test_result.data)
             else:
-                data_collector.data = pd.concat([data_collector.data, test_result_df], ignore_index=True)
+                df = pd.concat([df, test_result_df], ignore_index=True)
             if test_result.fault:
                 print(
                     f"Fault found between {surrogate.treatment} causing {surrogate.outcome}. Contradiction with "
@@ -108,17 +106,17 @@ def execute(
                 test_result.relationship = (
                     f"{surrogate.treatment} -> {surrogate.outcome} expected {surrogate.expected_relationship}"
                 )
-                return test_result, i + 1, data_collector.data
+                return test_result, i + 1, df
 
         print("No fault found")
-        return "No fault found", i + 1, data_collector.data
+        return "No fault found", i + 1, df
 
     def generate_surrogates(
-        self, specification: CausalSpecification, data_collector: ObservationalDataCollector
+        self, specification: CausalSpecification, df: pd.DataFrame
     ) -> list[CubicSplineRegressionEstimator]:
         """Generate a surrogate model for each edge of the dag that specifies it is included in the DAG metadata.
         :param specification: The Causal Specification (combination of Scenario and Causal Dag)
-        :param data_collector: An ObservationalDataCollector which gathers data relevant to the specified scenario
+        :param df: An dataframe which contains data relevant to the specified scenario
         :return: A list of surrogate models
         """
         surrogate_models = []
@@ -139,7 +137,7 @@ def generate_surrogates(
                     minimal_adjustment_set,
                     v,
                     4,
-                    df=data_collector.data,
+                    df=df,
                     expected_relationship=edge_metadata["expected"],
                 )
                 surrogate_models.append(surrogate)
diff --git a/tests/surrogate_tests/test_causal_surrogate_assisted.py b/tests/surrogate_tests/test_causal_surrogate_assisted.py
index 5d408a85..6668836d 100644
--- a/tests/surrogate_tests/test_causal_surrogate_assisted.py
+++ b/tests/surrogate_tests/test_causal_surrogate_assisted.py
@@ -1,5 +1,4 @@
 import unittest
-from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.specification.causal_dag import CausalDAG
 from causal_testing.specification.causal_specification import CausalSpecification
 from causal_testing.specification.scenario import Scenario
@@ -69,7 +68,7 @@ def test_surrogate_model_generation(self):
         scenario = Scenario(variables={z, x, m, y})
         specification = CausalSpecification(scenario, causal_dag)
 
-        surrogate_models = c_s_a_test_case.generate_surrogates(specification, ObservationalDataCollector(scenario, df))
+        surrogate_models = c_s_a_test_case.generate_surrogates(specification, df)
         self.assertEqual(len(surrogate_models), 2)
 
         for surrogate in surrogate_models:
@@ -101,7 +100,7 @@ def test_causal_surrogate_assisted_execution(self):
 
         c_s_a_test_case = CausalSurrogateAssistedTestCase(specification, search_algorithm, simulator)
 
-        result, iterations, result_data = c_s_a_test_case.execute(ObservationalDataCollector(scenario, df))
+        result, iterations, result_data = c_s_a_test_case.execute(df)
 
         self.assertIsInstance(result, SimulationResult)
         self.assertEqual(iterations, 1)
@@ -131,7 +130,7 @@ def test_causal_surrogate_assisted_execution_failure(self):
 
         c_s_a_test_case = CausalSurrogateAssistedTestCase(specification, search_algorithm, simulator)
 
-        result, iterations, result_data = c_s_a_test_case.execute(ObservationalDataCollector(scenario, df), 1)
+        result, iterations, result_data = c_s_a_test_case.execute(df, 1)
 
         self.assertIsInstance(result, str)
         self.assertEqual(iterations, 1)
@@ -161,9 +160,7 @@ def test_causal_surrogate_assisted_execution_custom_aggregator(self):
 
         c_s_a_test_case = CausalSurrogateAssistedTestCase(specification, search_algorithm, simulator)
 
-        result, iterations, result_data = c_s_a_test_case.execute(
-            ObservationalDataCollector(scenario, df), custom_data_aggregator=data_double_aggregator
-        )
+        result, iterations, result_data = c_s_a_test_case.execute(df, custom_data_aggregator=data_double_aggregator)
 
         self.assertIsInstance(result, SimulationResult)
         self.assertEqual(iterations, 1)
@@ -197,7 +194,7 @@ def test_causal_surrogate_assisted_execution_incorrect_search_config(self):
         self.assertRaises(
             ValueError,
             c_s_a_test_case.execute,
-            data_collector=ObservationalDataCollector(scenario, df),
+            df=df,
             custom_data_aggregator=data_double_aggregator,
         )
 

From b8ace8edc237c5a186d4eb355ad579ee563439ea Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 10:05:16 +0000
Subject: [PATCH 02/44] Removed data collector from causal test suite

---
 causal_testing/json_front/json_class.py       | 20 +++++-------
 .../testing/causal_test_adequacy.py           |  2 +-
 causal_testing/testing/causal_test_case.py    | 18 ++---------
 causal_testing/testing/causal_test_suite.py   | 18 +++++------
 .../covasim_/doubling_beta/example_beta.py    | 12 ++-----
 examples/lr91/example_max_conductances.py     | 13 ++++----
 .../example_max_conductances_test_suite.py    |  6 +---
 .../example_poisson_process.py                |  2 +-
 examples/poisson/example_run_causal_tests.py  |  1 -
 .../test_causal_test_adequacy.py              |  2 +-
 tests/testing_tests/test_causal_test_case.py  | 31 ++++++++-----------
 tests/testing_tests/test_causal_test_suite.py | 25 +--------------
 12 files changed, 47 insertions(+), 103 deletions(-)

diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
index fd617228..c81d510a 100644
--- a/causal_testing/json_front/json_class.py
+++ b/causal_testing/json_front/json_class.py
@@ -14,7 +14,6 @@
 import scipy
 from fitter import Fitter, get_common_distributions
 
-from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.generation.abstract_causal_test_case import AbstractCausalTestCase
 from causal_testing.specification.causal_dag import CausalDAG
 from causal_testing.specification.causal_specification import CausalSpecification
@@ -56,8 +55,8 @@ def __init__(self, output_path: str, output_overwrite: bool = False):
         self.scenario = None
         self.causal_specification = None
         self.output_path = Path(output_path)
+        self.df = None
         self.check_file_exists(self.output_path, output_overwrite)
-        self.data_collector = None
 
     def set_paths(self, json_path: str, dag_path: str, data_paths: list[str] = None):
         """
@@ -70,7 +69,7 @@ def set_paths(self, json_path: str, dag_path: str, data_paths: list[str] = None)
             data_paths = []
         self.input_paths = JsonClassPaths(json_path=json_path, dag_path=dag_path, data_paths=data_paths)
 
-    def setup(self, scenario: Scenario, data=None, ignore_cycles=False):
+    def setup(self, scenario: Scenario, ignore_cycles=False):
         """Function to populate all the necessary parts of the json_class needed to execute tests"""
         self.scenario = scenario
         self._get_scenario_variables()
@@ -83,13 +82,12 @@ def setup(self, scenario: Scenario, data=None, ignore_cycles=False):
             self.test_plan = json.load(f)
         # Populate the data
         if self.input_paths.data_paths:
-            data = pd.concat([pd.read_csv(data_file, header=0) for data_file in self.input_paths.data_paths])
-        if data is None or len(data) == 0:
+            self.df = pd.concat([pd.read_csv(data_file, header=0) for data_file in self.input_paths.data_paths])
+        if self.df is None or len(self.df) == 0:
             raise ValueError(
                 "No data found. Please either provide a path to a file containing data or manually populate the .data "
                 "attribute with a dataframe before calling .setup()"
             )
-        self.data_collector = ObservationalDataCollector(self.scenario, data)
         self._populate_metas()
 
     def _create_abstract_test_case(self, test, mutates, effects):
@@ -97,7 +95,7 @@ def _create_abstract_test_case(self, test, mutates, effects):
         treatment_var = next(self.scenario.variables[v] for v in test["mutations"])
 
         if not treatment_var.distribution:
-            fitter = Fitter(self.data_collector.data[treatment_var.name], distributions=get_common_distributions())
+            fitter = Fitter(self.df[treatment_var.name], distributions=get_common_distributions())
             fitter.fit()
             (dist, params) = list(fitter.get_best(method="sumsquare_error").items())[0]
             treatment_var.distribution = getattr(scipy.stats, dist)(**params)
@@ -257,7 +255,7 @@ def _populate_metas(self):
         Populate data with meta-variable values and add distributions to Causal Testing Framework Variables
         """
         for meta in self.scenario.variables_of_type(Meta):
-            meta.populate(self.data_collector.data)
+            meta.populate(self.df)
 
     def _execute_test_case(
         self, causal_test_case: CausalTestCase, test: Mapping, f_flag: bool
@@ -273,9 +271,7 @@ def _execute_test_case(
         failed = False
 
         estimation_model = self._setup_test(causal_test_case=causal_test_case, test=test)
-        causal_test_result = causal_test_case.execute_test(
-            estimator=estimation_model, data_collector=self.data_collector
-        )
+        causal_test_result = causal_test_case.execute_test(estimator=estimation_model)
         test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result)
 
         if "coverage" in test and test["coverage"]:
@@ -329,7 +325,7 @@ def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping) -> Estima
         estimator_kwargs["control_value"] = causal_test_case.control_value
         estimator_kwargs["outcome"] = causal_test_case.outcome_variable.name
         estimator_kwargs["effect_modifiers"] = causal_test_case.effect_modifier_configuration
-        estimator_kwargs["df"] = self.data_collector.collect_data()
+        estimator_kwargs["df"] = self.df
         estimator_kwargs["alpha"] = test["alpha"] if "alpha" in test else 0.05
 
         estimation_model = test["estimator"](**estimator_kwargs)
diff --git a/causal_testing/testing/causal_test_adequacy.py b/causal_testing/testing/causal_test_adequacy.py
index 5fb043eb..48a5c381 100644
--- a/causal_testing/testing/causal_test_adequacy.py
+++ b/causal_testing/testing/causal_test_adequacy.py
@@ -105,7 +105,7 @@ def measure_adequacy(self):
             else:
                 estimator.df = estimator.df.sample(len(estimator.df), replace=True, random_state=i)
             try:
-                results.append(self.test_case.execute_test(estimator, None))
+                results.append(self.test_case.execute_test(estimator))
             except LinAlgError:
                 logger.warning("Adequacy LinAlgError")
                 continue
diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
index ea3795f2..b3a48009 100644
--- a/causal_testing/testing/causal_test_case.py
+++ b/causal_testing/testing/causal_test_case.py
@@ -2,13 +2,13 @@
 
 import logging
 from typing import Any
+import pandas as pd
 
 from causal_testing.specification.variable import Variable
 from causal_testing.testing.causal_test_outcome import CausalTestOutcome
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.estimation.abstract_estimator import Estimator
 from causal_testing.testing.causal_test_result import CausalTestResult, TestValue
-from causal_testing.data_collection.data_collector import DataCollector
 
 
 logger = logging.getLogger(__name__)
@@ -58,25 +58,13 @@ def __init__(
         else:
             self.effect_modifier_configuration = {}
 
-    def execute_test(self, estimator: type(Estimator), data_collector: DataCollector) -> CausalTestResult:
+    def execute_test(self, estimator: type(Estimator)) -> CausalTestResult:
         """Execute a causal test case and return the causal test result.
 
-        :param estimator: A reference to an Estimator class.
-        :param data_collector: The data collector to be used which provides a dataframe for the Estimator
+        :param estimator: An Estimator class object
         :return causal_test_result: A CausalTestResult for the executed causal test case.
         """
-        if estimator.df is None:
-            estimator.df = data_collector.collect_data()
-
-        causal_test_result = self._return_causal_test_results(estimator)
-        return causal_test_result
-
-    def _return_causal_test_results(self, estimator) -> CausalTestResult:
-        """Depending on the estimator used, calculate the 95% confidence intervals and return in a causal_test_result
 
-        :param estimator: An Estimator class object
-        :return: a CausalTestResult object containing the confidence intervals
-        """
         if not hasattr(estimator, f"estimate_{self.estimate_type}"):
             raise AttributeError(f"{estimator.__class__} has no {self.estimate_type} method.")
         estimate_effect = getattr(estimator, f"estimate_{self.estimate_type}")
diff --git a/causal_testing/testing/causal_test_suite.py b/causal_testing/testing/causal_test_suite.py
index 14099143..797b6a1b 100644
--- a/causal_testing/testing/causal_test_suite.py
+++ b/causal_testing/testing/causal_test_suite.py
@@ -2,14 +2,14 @@
 https://causal-testing-framework.readthedocs.io/en/latest/test_suite.html"""
 
 import logging
-
-from collections import UserDict
 from typing import Type, Iterable
+from collections import UserDict
+import pandas as pd
+
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.estimation.abstract_estimator import Estimator
 from causal_testing.testing.causal_test_result import CausalTestResult
-from causal_testing.data_collection.data_collector import DataCollector
 from causal_testing.specification.causal_specification import CausalSpecification
 
 logger = logging.getLogger(__name__)
@@ -47,17 +47,14 @@ def add_test_object(
         self.data[base_test_case] = test_object
 
     def execute_test_suite(
-        self, data_collector: DataCollector, causal_specification: CausalSpecification
+        self, causal_specification: CausalSpecification, df: pd.DataFrame
     ) -> dict[str, CausalTestResult]:
         """Execute a suite of causal tests and return the results in a list
-        :param data_collector: The data collector to be used for the test_suite. Can be observational, experimental or
-         custom
-        :param causal_specification:
+        :param causal_specification: A causal specification object which wraps up the scenario and causal DAG.
+        :param df: A dataframe containing the test data.
         :return: A dictionary where each key is the name of the estimators specified and the values are lists of
                 causal_test_result objects
         """
-        if data_collector.data.empty:
-            raise ValueError("No data has been loaded. Please call load_data prior to executing a causal test case.")
         test_suite_results = {}
         for edge in self:
             logger.info("treatment: %s", edge.treatment_variable)
@@ -79,8 +76,9 @@ def execute_test_suite(
                         test.control_value,
                         minimal_adjustment_set,
                         test.outcome_variable.name,
+                        df=df,
                     )
-                    causal_test_result = test.execute_test(estimator, data_collector)
+                    causal_test_result = test.execute_test(estimator)
                     causal_test_results.append(causal_test_result)
 
                 results[estimator_class.__name__] = causal_test_results
diff --git a/examples/covasim_/doubling_beta/example_beta.py b/examples/covasim_/doubling_beta/example_beta.py
index fb3ebb59..ff0a6b05 100644
--- a/examples/covasim_/doubling_beta/example_beta.py
+++ b/examples/covasim_/doubling_beta/example_beta.py
@@ -65,9 +65,7 @@ def doubling_beta_CATE_on_csv(
     )
 
     # Add squared terms for beta, since it has a quadratic relationship with cumulative infections
-    causal_test_result = causal_test_case.execute_test(
-        estimator=linear_regression_estimator, data_collector=data_collector
-    )
+    causal_test_result = causal_test_case.execute_test(estimator=linear_regression_estimator)
 
     # Repeat for association estimate (no adjustment)
     no_adjustment_linear_regression_estimator = LinearRegressionEstimator(
@@ -79,9 +77,7 @@ def doubling_beta_CATE_on_csv(
         df=past_execution_df,
         formula="cum_infections ~ beta + I(beta ** 2)",
     )
-    association_test_result = causal_test_case.execute_test(
-        estimator=no_adjustment_linear_regression_estimator, data_collector=data_collector
-    )
+    association_test_result = causal_test_case.execute_test(estimator=no_adjustment_linear_regression_estimator)
 
     # Store results for plotting
     results_dict["association"] = {
@@ -111,9 +107,7 @@ def doubling_beta_CATE_on_csv(
             df=counterfactual_past_execution_df,
             formula="cum_infections ~ beta + I(beta ** 2) + avg_age + contacts",
         )
-        counterfactual_causal_test_result = causal_test_case.execute_test(
-            estimator=linear_regression_estimator, data_collector=data_collector
-        )
+        counterfactual_causal_test_result = causal_test_case.execute_test(estimator=linear_regression_estimator)
 
         results_dict["counterfactual"] = {
             "ate": counterfactual_causal_test_result.test_value.value,
diff --git a/examples/lr91/example_max_conductances.py b/examples/lr91/example_max_conductances.py
index d235abee..de27ac19 100644
--- a/examples/lr91/example_max_conductances.py
+++ b/examples/lr91/example_max_conductances.py
@@ -5,7 +5,6 @@
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Output
 from causal_testing.specification.causal_specification import CausalSpecification
-from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_outcome import Positive, Negative, NoEffect
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
@@ -134,17 +133,19 @@ def effects_on_APD90(observational_data_path, treatment_var, control_val, treatm
         treatment_value=treatment_val,
     )
 
-    # 7. Create a data collector
-    data_collector = ObservationalDataCollector(scenario, pd.read_csv(observational_data_path))
-
     # 8. Obtain the minimal adjustment set from the causal DAG
     minimal_adjustment_set = causal_dag.identification(base_test_case)
     linear_regression_estimator = LinearRegressionEstimator(
-        treatment_var.name, treatment_val, control_val, minimal_adjustment_set, "APD90"
+        treatment_var.name,
+        treatment_val,
+        control_val,
+        minimal_adjustment_set,
+        "APD90",
+        df=pd.read_csv(observational_data_path),
     )
 
     # 9. Run the causal test and print results
-    causal_test_result = causal_test_case.execute_test(linear_regression_estimator, data_collector)
+    causal_test_result = causal_test_case.execute_test(linear_regression_estimator)
     logger.info("%s", causal_test_result)
     return causal_test_result.test_value.value, causal_test_result.confidence_intervals
 
diff --git a/examples/lr91/example_max_conductances_test_suite.py b/examples/lr91/example_max_conductances_test_suite.py
index 79b49ad7..d244d5bf 100644
--- a/examples/lr91/example_max_conductances_test_suite.py
+++ b/examples/lr91/example_max_conductances_test_suite.py
@@ -5,7 +5,6 @@
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Output
 from causal_testing.specification.causal_specification import CausalSpecification
-from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_outcome import Positive, Negative, NoEffect
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
@@ -143,11 +142,8 @@ def effects_on_APD90(observational_data_path, test_suite):
     # 5. Create a causal specification from the scenario and causal DAG
     causal_specification = CausalSpecification(scenario, causal_dag)
 
-    # 7. Create a data collector
-    data_collector = ObservationalDataCollector(scenario, pd.read_csv(observational_data_path))
-
     # 8. Run the causal test suite
-    causal_test_results = test_suite.execute_test_suite(data_collector, causal_specification)
+    causal_test_results = test_suite.execute_test_suite(causal_specification, pd.read_csv(observational_data_path))
     return causal_test_results
 
 
diff --git a/examples/poisson-line-process/example_poisson_process.py b/examples/poisson-line-process/example_poisson_process.py
index be8bc906..00ae042d 100644
--- a/examples/poisson-line-process/example_poisson_process.py
+++ b/examples/poisson-line-process/example_poisson_process.py
@@ -116,7 +116,7 @@ def causal_test_intensity_num_shapes(
         )
 
     # 9. Execute the test
-    causal_test_result = causal_test_case.execute_test(estimator, None)
+    causal_test_result = causal_test_case.execute_test(estimator)
 
     return causal_test_result
 
diff --git a/examples/poisson/example_run_causal_tests.py b/examples/poisson/example_run_causal_tests.py
index 2ae72e20..1174818f 100644
--- a/examples/poisson/example_run_causal_tests.py
+++ b/examples/poisson/example_run_causal_tests.py
@@ -7,7 +7,6 @@
 from causal_testing.testing.causal_test_outcome import ExactValue, Positive, Negative, NoEffect, CausalTestOutcome
 from causal_testing.testing.causal_test_result import CausalTestResult
 from causal_testing.json_front.json_class import JsonUtility
-from causal_testing.estimation.abstract_estimator import Estimator
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Output, Meta
 
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
index 208bb007..061ef661 100644
--- a/tests/testing_tests/test_causal_test_adequacy.py
+++ b/tests/testing_tests/test_causal_test_adequacy.py
@@ -138,7 +138,7 @@ def test_data_adequacy_group_by(self):
             treatment_value=treatment_strategy,
             estimate_type="hazard_ratio",
         )
-        causal_test_result = causal_test_case.execute_test(estimation_model, None)
+        causal_test_result = causal_test_case.execute_test(estimation_model)
         adequacy_metric = DataAdequacy(causal_test_case, estimation_model, group_by="id")
         adequacy_metric.measure_adequacy()
         adequacy_dict = adequacy_metric.to_dict()
diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py
index 600191d3..f1b123f4 100644
--- a/tests/testing_tests/test_causal_test_case.py
+++ b/tests/testing_tests/test_causal_test_case.py
@@ -8,7 +8,6 @@
 from causal_testing.specification.causal_specification import CausalSpecification, Scenario
 from causal_testing.specification.variable import Input, Output
 from causal_testing.specification.causal_dag import CausalDAG
-from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_outcome import ExactValue
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
@@ -83,17 +82,13 @@ def setUp(self) -> None:
 
         # 4. Create dummy test data and write to csv
         np.random.seed(1)
-        df = pd.DataFrame({"D": list(np.random.normal(60, 10, 1000))})  # D = exogenous
-        df["A"] = [1 if d > 50 else 0 for d in df["D"]]
-        df["C"] = df["D"] + (4 * (df["A"] + 2))  # C = (4*(A+2)) + D
-        self.observational_data_csv_path = os.path.join(self.temp_dir_path, "observational_data.csv")
-        df.to_csv(self.observational_data_csv_path, index=False)
-
-        # 5. Create observational data collector
-        # Obsolete?
-        self.data_collector = ObservationalDataCollector(self.scenario, df)
-        self.data_collector.collect_data()
-        self.df = self.data_collector.collect_data()
+        self.df = pd.DataFrame({"D": list(np.random.normal(60, 10, 1000))})  # D = exogenous
+        self.df["A"] = [1 if d > 50 else 0 for d in self.df["D"]]
+        self.df["C"] = self.df["D"] + (4 * (self.df["A"] + 2))  # C = (4*(A+2)) + D
+        # self.observational_data_csv_path = os.path.join(self.temp_dir_path, "observational_data.csv")
+        # self.df.to_csv(self.observational_data_csv_path, index=False)
+
+        # 5. Create minimal adjustment set
         self.minimal_adjustment_set = self.causal_dag.identification(self.base_test_case)
         # 6. Easier to access treatment and outcome values
         self.treatment_value = 1
@@ -126,7 +121,7 @@ def test_execute_test_observational_linear_regression_estimator(self):
             "C",
             self.df,
         )
-        causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
+        causal_test_result = self.causal_test_case.execute_test(estimation_model)
         pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1e-10)
 
     def test_execute_test_observational_linear_regression_estimator_direct_effect(self):
@@ -153,7 +148,7 @@ def test_execute_test_observational_linear_regression_estimator_direct_effect(se
             "C",
             self.df,
         )
-        causal_test_result = causal_test_case.execute_test(estimation_model, self.data_collector)
+        causal_test_result = causal_test_case.execute_test(estimation_model)
         pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1e-10)
 
     def test_execute_test_observational_linear_regression_estimator_coefficient(self):
@@ -168,7 +163,7 @@ def test_execute_test_observational_linear_regression_estimator_coefficient(self
             self.df,
         )
         self.causal_test_case.estimate_type = "coefficient"
-        causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
+        causal_test_result = self.causal_test_case.execute_test(estimation_model)
         pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series({"D": 0.0}), atol=1e-1)
 
     def test_execute_test_observational_linear_regression_estimator_risk_ratio(self):
@@ -183,7 +178,7 @@ def test_execute_test_observational_linear_regression_estimator_risk_ratio(self)
             self.df,
         )
         self.causal_test_case.estimate_type = "risk_ratio"
-        causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
+        causal_test_result = self.causal_test_case.execute_test(estimation_model)
         pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(0.0), atol=1)
 
     def test_invalid_estimate_type(self):
@@ -199,7 +194,7 @@ def test_invalid_estimate_type(self):
         )
         self.causal_test_case.estimate_type = "invalid"
         with self.assertRaises(AttributeError):
-            self.causal_test_case.execute_test(estimation_model, self.data_collector)
+            self.causal_test_case.execute_test(estimation_model)
 
     def test_execute_test_observational_linear_regression_estimator_squared_term(self):
         """Check that executing the causal test case returns the correct results for dummy data with a squared term
@@ -213,5 +208,5 @@ def test_execute_test_observational_linear_regression_estimator_squared_term(sel
             self.df,
             formula=f"C ~ A + {'+'.join(self.minimal_adjustment_set)} + (D ** 2)",
         )
-        causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
+        causal_test_result = self.causal_test_case.execute_test(estimation_model)
         pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1)
diff --git a/tests/testing_tests/test_causal_test_suite.py b/tests/testing_tests/test_causal_test_suite.py
index 151f7af2..a7c7704c 100644
--- a/tests/testing_tests/test_causal_test_suite.py
+++ b/tests/testing_tests/test_causal_test_suite.py
@@ -12,7 +12,6 @@
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
 from causal_testing.estimation.logistic_regression_estimator import LogisticRegressionEstimator
 from causal_testing.specification.causal_specification import CausalSpecification, Scenario
-from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.specification.causal_dag import CausalDAG
 
 
@@ -64,8 +63,6 @@ def setUp(self) -> None:
         )
         self.causal_specification = CausalSpecification(self.scenario, self.causal_dag)
 
-        self.data_collector = ObservationalDataCollector(self.scenario, self.df)
-
     def tearDown(self) -> None:
         shutil.rmtree(self.temp_dir_path)
 
@@ -101,28 +98,8 @@ def test_return_single_test_object(self):
     def test_execute_test_suite_single_base_test_case(self):
         """Check that the test suite can return the correct results from dummy data for a single base_test-case"""
 
-        causal_test_results = self.test_suite.execute_test_suite(self.data_collector, self.causal_specification)
+        causal_test_results = self.test_suite.execute_test_suite(self.causal_specification, self.df)
         causal_test_case_result = causal_test_results[self.base_test_case]
         self.assertAlmostEqual(
             causal_test_case_result["LinearRegressionEstimator"][0].test_value.value[0], 4, delta=1e-10
         )
-
-    # Without CausalForestEstimator we now only have 2 estimators. Unfortunately LogicisticRegressionEstimator does not
-    # currently work with TestSuite. So for now removed test
-
-    # def test_execute_test_suite_multiple_estimators(self):
-    #     """Check that executing a test suite with multiple estimators returns correct results for the dummy data
-    #     for each estimator
-    #     """
-    #     estimators = [LinearRegressionEstimator, LogisticRegressionEstimator]
-    #     test_suite_2_estimators = CausalTestSuite()
-    #     test_list = [CausalTestCase(self.base_test_case, self.expected_causal_effect, 0, 1)]
-    #     test_suite_2_estimators.add_test_object(
-    #         base_test_case=self.base_test_case, causal_test_case_list=test_list, estimators_classes=estimators
-    #     )
-    #     causal_test_results = test_suite_2_estimators.execute_test_suite(self.data_collector, self.causal_specification)
-    #     causal_test_case_result = causal_test_results[self.base_test_case]
-    #     linear_regression_result = causal_test_case_result["LinearRegressionEstimator"][0]
-    #     logistic_regression_estimator = causal_test_case_result["LogisticRegressionEstimator"][0]
-    #     self.assertAlmostEqual(linear_regression_result.test_value.value, 4, delta=1e-1)
-    #     self.assertAlmostEqual(logistic_regression_estimator.test_value.value, 4, delta=1e-1)

From 2ceb6540c2839ba93421bde88dfefce432552ae8 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 10:10:39 +0000
Subject: [PATCH 03/44] Removed datacollector from testing

---
 .../covasim_/doubling_beta/example_beta.py    | 84 +++++--------------
 .../vaccinating_elderly/example_vaccine.py    | 13 ++-
 2 files changed, 25 insertions(+), 72 deletions(-)

diff --git a/examples/covasim_/doubling_beta/example_beta.py b/examples/covasim_/doubling_beta/example_beta.py
index ff0a6b05..6aa91b2b 100644
--- a/examples/covasim_/doubling_beta/example_beta.py
+++ b/examples/covasim_/doubling_beta/example_beta.py
@@ -6,7 +6,6 @@
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Output
 from causal_testing.specification.causal_specification import CausalSpecification
-from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_outcome import Positive
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
@@ -52,7 +51,26 @@ def doubling_beta_CATE_on_csv(
 
     # Read in the observational data, perform identification
     past_execution_df = pd.read_csv(observational_data_path)
-    data_collector, _, causal_test_case, causal_specification = setup(past_execution_df)
+
+    # 2. Create variables
+    pop_size = Input("pop_size", int)
+    pop_infected = Input("pop_infected", int)
+    n_days = Input("n_days", int)
+    cum_infections = Output("cum_infections", int)
+    cum_deaths = Output("cum_deaths", int)
+    location = Input("location", str)
+    variants = Input("variants", str)
+    avg_age = Input("avg_age", float)
+    beta = Input("beta", float)
+    contacts = Input("contacts", float)
+
+    # 5. Create a base test case
+    base_test_case = BaseTestCase(treatment_variable=beta, outcome_variable=cum_infections)
+
+    # 6. Create a causal test case
+    causal_test_case = CausalTestCase(
+        base_test_case=base_test_case, expected_causal_effect=Positive, control_value=0.016, treatment_value=0.032
+    )
 
     linear_regression_estimator = LinearRegressionEstimator(
         "beta",
@@ -98,15 +116,6 @@ def doubling_beta_CATE_on_csv(
     # Repeat causal inference after deleting all rows with treatment value to obtain counterfactual inferences
     if simulate_counterfactuals:
         counterfactual_past_execution_df = past_execution_df[past_execution_df["beta"] != 0.032]
-        counterfactual_linear_regression_estimator = LinearRegressionEstimator(
-            "beta",
-            0.032,
-            0.016,
-            {"avg_age", "contacts"},
-            "cum_infections",
-            df=counterfactual_past_execution_df,
-            formula="cum_infections ~ beta + I(beta ** 2) + avg_age + contacts",
-        )
         counterfactual_causal_test_result = causal_test_case.execute_test(estimator=linear_regression_estimator)
 
         results_dict["counterfactual"] = {
@@ -215,59 +224,6 @@ def doubling_beta_CATEs(observational_data_path: str, simulate_counterfactual: b
     age_contact_fig.savefig(outpath_base_str + "age_contact_executions.pdf", format="pdf")
 
 
-def setup(observational_data):
-    # 1. Read in the Causal DAG
-    causal_dag = CausalDAG(f"{ROOT}/dag.dot")
-
-    # 2. Create variables
-    pop_size = Input("pop_size", int)
-    pop_infected = Input("pop_infected", int)
-    n_days = Input("n_days", int)
-    cum_infections = Output("cum_infections", int)
-    cum_deaths = Output("cum_deaths", int)
-    location = Input("location", str)
-    variants = Input("variants", str)
-    avg_age = Input("avg_age", float)
-    beta = Input("beta", float)
-    contacts = Input("contacts", float)
-
-    # 3. Create scenario by applying constraints over a subset of the input variables
-    scenario = Scenario(
-        variables={
-            pop_size,
-            pop_infected,
-            n_days,
-            cum_infections,
-            cum_deaths,
-            location,
-            variants,
-            avg_age,
-            beta,
-            contacts,
-        },
-        constraints={pop_size.z3 == 51633, pop_infected.z3 == 1000, n_days.z3 == 216},
-    )
-
-    # 4. Construct a causal specification from the scenario and causal DAG
-    causal_specification = CausalSpecification(scenario, causal_dag)
-
-    # 5. Create a base test case
-    base_test_case = BaseTestCase(treatment_variable=beta, outcome_variable=cum_infections)
-
-    # 6. Create a causal test case
-    causal_test_case = CausalTestCase(
-        base_test_case=base_test_case, expected_causal_effect=Positive, control_value=0.016, treatment_value=0.032
-    )
-
-    # 7. Create a data collector
-    data_collector = ObservationalDataCollector(scenario, observational_data)
-
-    # 8. Obtain the minimal adjustment set for the base test case from the causal DAG
-    minimal_adjustment_set = causal_dag.identification(base_test_case)
-
-    return data_collector, minimal_adjustment_set, causal_test_case, causal_specification
-
-
 def plot_doubling_beta_CATEs(results_dict, title, figure=None, axes=None, row=None, col=None):
     # Get the CATE as a percentage for association and causation
     ate = results_dict["causation"]["ate"][0]
diff --git a/examples/covasim_/vaccinating_elderly/example_vaccine.py b/examples/covasim_/vaccinating_elderly/example_vaccine.py
index 6c4ba0d2..b481b281 100644
--- a/examples/covasim_/vaccinating_elderly/example_vaccine.py
+++ b/examples/covasim_/vaccinating_elderly/example_vaccine.py
@@ -6,7 +6,6 @@
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Output
 from causal_testing.specification.causal_specification import CausalSpecification
-from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_outcome import Positive, Negative, NoEffect
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
@@ -19,8 +18,8 @@
 
 
 def setup_test_case(verbose: bool = False):
-    """Run the causal test case for the effect of changing vaccine to prioritise elderly from an observational
-    data collector that was previously simulated.
+    """Run the causal test case for the effect of changing vaccine to prioritise elderly from observational
+    data that was previously simulated.
 
     :param verbose: Whether to print verbose details (causal test results).
     :return results_dict: A dictionary containing ATE, 95% CIs, and Test Pass/Fail
@@ -57,11 +56,9 @@ def setup_test_case(verbose: bool = False):
     # 4. Construct a causal specification from the scenario and causal DAG
     causal_specification = CausalSpecification(scenario, causal_dag)
 
-    # 5. Instantiate the observational data collector using the previously simulated data
+    # 5. Read the previously simulated data
     obs_df = pd.read_csv("simulated_data.csv")
 
-    data_collector = ObservationalDataCollector(scenario, obs_df)
-
     # 6. Express expected outcomes
     expected_outcome_effects = {
         cum_infections: Positive(),
@@ -90,7 +87,7 @@ def setup_test_case(verbose: bool = False):
         )
 
         # 9. Execute test and save results in dict
-        causal_test_result = causal_test_case.execute_test(linear_regression_estimator, data_collector)
+        causal_test_result = causal_test_case.execute_test(linear_regression_estimator, obs_df)
 
         if verbose:
             logging.info("Causation:\n%s", causal_test_result)
@@ -110,4 +107,4 @@ def setup_test_case(verbose: bool = False):
 
     test_results = setup_test_case(verbose=True)
 
-    logging.info("%s", test_results)
\ No newline at end of file
+    logging.info("%s", test_results)

From bb70338983f311c2f2fc8978c3e150c6c4199882 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 10:10:58 +0000
Subject: [PATCH 04/44] Removed data collection tests

---
 .../test_observational_data_collector.py      | 73 -------------------
 1 file changed, 73 deletions(-)
 delete mode 100644 tests/data_collection_tests/test_observational_data_collector.py

diff --git a/tests/data_collection_tests/test_observational_data_collector.py b/tests/data_collection_tests/test_observational_data_collector.py
deleted file mode 100644
index 97163853..00000000
--- a/tests/data_collection_tests/test_observational_data_collector.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import unittest
-import os
-import shutil, tempfile
-import pandas as pd
-from causal_testing.data_collection.data_collector import ObservationalDataCollector
-from causal_testing.specification.causal_specification import Scenario
-from causal_testing.specification.variable import Input, Output, Meta
-from scipy.stats import uniform, rv_discrete
-from enum import Enum
-import random
-
-
-class TestObservationalDataCollector(unittest.TestCase):
-    def setUp(self) -> None:
-        class Color(Enum):
-            RED = "RED"
-            GREEN = "GREEN"
-            BLUE = "BLUE"
-
-        self.temp_dir_path = tempfile.mkdtemp()
-        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
-        self.observational_df_path = os.path.join(self.temp_dir_path, "observational_data.csv")
-        # Y = 3*X1 + X2*X3 + 10
-        self.observational_df = pd.DataFrame(
-            {"X1": [1, 2, 3, 4], "X2": [5, 6, 7, 8], "X3": [10, 20, 30, 40], "Y2": ["RED", "GREEN", "BLUE", "BLUE"]}
-        )
-        self.observational_df["Y1"] = self.observational_df.apply(
-            lambda row: (3 * row.X1) + (row.X2 * row.X3) + 10, axis=1
-        )
-        self.observational_df.to_csv(self.observational_df_path)
-        self.observational_df["Y2"] = [Color[x] for x in self.observational_df["Y2"]]
-        self.X1 = Input("X1", int, uniform(1, 4))
-        self.X2 = Input("X2", int, rv_discrete(values=([7], [1])))
-        self.X3 = Input("X3", int, uniform(10, 40))
-        self.X4 = Input("X4", int, rv_discrete(values=([10], [1])))
-        self.Y1 = Output("Y1", int)
-        self.Y2 = Output("Y2", Color)
-
-    def test_not_all_variables_in_data(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.X4})
-        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df)
-        self.assertRaises(IndexError, observational_data_collector.collect_data)
-
-    def test_all_variables_in_data(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.Y1, self.Y2})
-        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df)
-        df = observational_data_collector.collect_data(index_col=0)
-        assert df.equals(self.observational_df), f"\n{df}\nwas not equal to\n{self.observational_df}"
-
-    def test_data_constraints(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.Y1, self.Y2}, {self.X1.z3 > 2})
-        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df)
-        df = observational_data_collector.collect_data(index_col=0)
-        expected = self.observational_df.loc[[2, 3]]
-        assert df.equals(expected), f"\n{df}\nwas not equal to\n{expected}"
-
-    def test_meta_population(self):
-        def populate_m(data):
-            data["M"] = data["X1"] * 2
-
-        meta = Meta("M", int, populate_m)
-        scenario = Scenario({self.X1, meta})
-        observational_data_collector = ObservationalDataCollector(scenario, self.observational_df)
-        observational_data_collector.collect_data()
-        data = observational_data_collector.collect_data()
-        assert all((m == 2 * x1 for x1, m in zip(data["X1"], data["M"])))
-
-    def tearDown(self) -> None:
-        shutil.rmtree(self.temp_dir_path)
-
-
-if __name__ == "__main__":
-    unittest.main()

From 736cf6d388bfde5418cfd5256b39f3ca21a5b344 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 11:43:34 +0000
Subject: [PATCH 05/44] Removed data collector from metamorphic relation

---
 .../metamorphic_relation.py                   | 139 +------
 .../test_metamorphic_relations.py             | 369 ------------------
 .../test_metamorphic_relations.py             | 247 ++++++++++++
 3 files changed, 266 insertions(+), 489 deletions(-)
 rename causal_testing/{specification => testing}/metamorphic_relation.py (61%)
 delete mode 100644 tests/specification_tests/test_metamorphic_relations.py
 create mode 100644 tests/testing_tests/test_metamorphic_relations.py

diff --git a/causal_testing/specification/metamorphic_relation.py b/causal_testing/testing/metamorphic_relation.py
similarity index 61%
rename from causal_testing/specification/metamorphic_relation.py
rename to causal_testing/testing/metamorphic_relation.py
index 4a7c70c9..1d296749 100644
--- a/causal_testing/specification/metamorphic_relation.py
+++ b/causal_testing/testing/metamorphic_relation.py
@@ -13,11 +13,9 @@
 from multiprocessing import Pool
 
 import networkx as nx
-import pandas as pd
-import numpy as np
 
 from causal_testing.specification.causal_specification import CausalDAG, Node
-from causal_testing.data_collection.data_collector import ExperimentalDataCollector
+from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
 
@@ -26,91 +24,11 @@
 class MetamorphicRelation:
     """Class representing a metamorphic relation."""
 
-    treatment_var: Node
-    output_var: Node
+    base_test_case: BaseTestCase
     adjustment_vars: Iterable[Node]
     dag: CausalDAG
     tests: Iterable = None
 
-    def generate_follow_up(self, n_tests: int, min_val: float, max_val: float, seed: int = 0):
-        """Generate numerical follow-up input configurations."""
-        np.random.seed(seed)
-
-        # Get set of variables to change, excluding the treatment itself
-        variables_to_change = {node for node in self.dag.graph.nodes if self.dag.graph.in_degree(node) == 0}
-        if self.adjustment_vars:
-            variables_to_change |= set(self.adjustment_vars)
-        if self.treatment_var in variables_to_change:
-            variables_to_change.remove(self.treatment_var)
-
-        # Assign random numerical values to the variables to change
-        test_inputs = pd.DataFrame(
-            np.random.randint(min_val, max_val, size=(n_tests, len(variables_to_change))),
-            columns=sorted(variables_to_change),
-        )
-
-        # Enumerate the possible source, follow-up pairs for the treatment
-        candidate_source_follow_up_pairs = np.array(list(combinations(range(int(min_val), int(max_val + 1)), 2)))
-
-        # Sample without replacement from the possible source, follow-up pairs
-        sampled_source_follow_up_indices = np.random.choice(
-            candidate_source_follow_up_pairs.shape[0], n_tests, replace=False
-        )
-
-        follow_up_input = f"{self.treatment_var}'"
-        source_follow_up_test_inputs = pd.DataFrame(
-            candidate_source_follow_up_pairs[sampled_source_follow_up_indices],
-            columns=sorted([self.treatment_var] + [follow_up_input]),
-        )
-        self.tests = [
-            MetamorphicTest(
-                source_inputs,
-                follow_up_inputs,
-                other_inputs,
-                self.output_var,
-                str(self),
-            )
-            for source_inputs, follow_up_inputs, other_inputs in zip(
-                source_follow_up_test_inputs[[self.treatment_var]].to_dict(orient="records"),
-                source_follow_up_test_inputs[[follow_up_input]]
-                .rename(columns={follow_up_input: self.treatment_var})
-                .to_dict(orient="records"),
-                (
-                    test_inputs.to_dict(orient="records")
-                    if not test_inputs.empty
-                    else [{}] * len(source_follow_up_test_inputs)
-                ),
-            )
-        ]
-
-    def execute_tests(self, data_collector: ExperimentalDataCollector):
-        """Execute the generated list of metamorphic tests, returning a dictionary of tests that pass and fail.
-
-        :param data_collector: An experimental data collector for the system-under-test.
-        """
-        test_results = {"pass": [], "fail": []}
-        for metamorphic_test in self.tests:
-            # Update the control and treatment configuration to take generated values for source and follow-up tests
-            control_input_config = metamorphic_test.source_inputs | metamorphic_test.other_inputs
-            treatment_input_config = metamorphic_test.follow_up_inputs | metamorphic_test.other_inputs
-            data_collector.control_input_configuration = control_input_config
-            data_collector.treatment_input_configuration = treatment_input_config
-            metamorphic_test_results_df = data_collector.collect_data()
-
-            # Apply assertion to control and treatment outputs
-            control_output = metamorphic_test_results_df.loc["control_0"][metamorphic_test.output]
-            treatment_output = metamorphic_test_results_df.loc["treatment_0"][metamorphic_test.output]
-
-            if not self.assertion(control_output, treatment_output):
-                test_results["fail"].append(metamorphic_test)
-            else:
-                test_results["pass"].append(metamorphic_test)
-        return test_results
-
-    @abstractmethod
-    def assertion(self, source_output, follow_up_output):
-        """An assertion that should be applied to an individual metamorphic test run."""
-
     @abstractmethod
     def to_json_stub(self, skip=True) -> dict:
         """Convert to a JSON frontend stub string for user customisation"""
@@ -123,10 +41,11 @@ def test_oracle(self, test_results):
 
     def __eq__(self, other):
         same_type = self.__class__ == other.__class__
-        same_treatment = self.treatment_var == other.treatment_var
-        same_output = self.output_var == other.output_var
+        same_treatment = self.base_test_case.treatment_variable == other.base_test_case.treatment_variable
+        same_outcome = self.base_test_case.outcome_variable == other.base_test_case.outcome_variable
+        same_effect = self.base_test_case.effect == other.base_test_case.effect
         same_adjustment_set = set(self.adjustment_vars) == set(other.adjustment_vars)
-        return same_type and same_treatment and same_output and same_adjustment_set
+        return same_type and same_treatment and same_outcome and same_effect and same_adjustment_set
 
 
 class ShouldCause(MetamorphicRelation):
@@ -149,14 +68,14 @@ def to_json_stub(self, skip=True) -> dict:
             "estimator": "LinearRegressionEstimator",
             "estimate_type": "coefficient",
             "effect": "direct",
-            "mutations": [self.treatment_var],
-            "expected_effect": {self.output_var: "SomeEffect"},
-            "formula": f"{self.output_var} ~ {' + '.join([self.treatment_var] + self.adjustment_vars)}",
+            "mutations": [self.base_test_case.treatment_variable],
+            "expected_effect": {self.base_test_case.outcome_variable: "SomeEffect"},
+            "formula": f"{self.base_test_case.outcome_variable} ~ {' + '.join([self.base_test_case.treatment_variable] + self.adjustment_vars)}",
             "skip": skip,
         }
 
     def __str__(self):
-        formatted_str = f"{self.treatment_var} --> {self.output_var}"
+        formatted_str = f"{self.base_test_case.treatment_variable} --> {self.base_test_case.outcome_variable}"
         if self.adjustment_vars:
             formatted_str += f" | {self.adjustment_vars}"
         return formatted_str
@@ -182,40 +101,20 @@ def to_json_stub(self, skip=True) -> dict:
             "estimator": "LinearRegressionEstimator",
             "estimate_type": "coefficient",
             "effect": "direct",
-            "mutations": [self.treatment_var],
-            "expected_effect": {self.output_var: "NoEffect"},
-            "formula": f"{self.output_var} ~ {' + '.join([self.treatment_var] + self.adjustment_vars)}",
+            "mutations": [self.base_test_case.treatment_variable],
+            "expected_effect": {self.base_test_case.outcome_variable: "NoEffect"},
+            "formula": f"{self.base_test_case.outcome_variable} ~ {' + '.join([self.base_test_case.treatment_variable] + self.adjustment_vars)}",
             "alpha": 0.05,
             "skip": skip,
         }
 
     def __str__(self):
-        formatted_str = f"{self.treatment_var} _||_ {self.output_var}"
+        formatted_str = f"{self.base_test_case.treatment_variable} _||_ {self.base_test_case.outcome_variable}"
         if self.adjustment_vars:
             formatted_str += f" | {self.adjustment_vars}"
         return formatted_str
 
 
-@dataclass(order=True)
-class MetamorphicTest:
-    """Class representing a metamorphic test case."""
-
-    source_inputs: dict
-    follow_up_inputs: dict
-    other_inputs: dict
-    output: str
-    relation: str
-
-    def __str__(self):
-        return (
-            f"Source inputs: {self.source_inputs}\n"
-            f"Follow-up inputs: {self.follow_up_inputs}\n"
-            f"Other inputs: {self.other_inputs}\n"
-            f"Output: {self.output}"
-            f"Metamorphic Relation: {self.relation}"
-        )
-
-
 def generate_metamorphic_relation(
     node_pair: tuple[str, str], dag: CausalDAG, nodes_to_ignore: set = None
 ) -> MetamorphicRelation:
@@ -241,30 +140,30 @@ def generate_metamorphic_relation(
         if u in nx.ancestors(dag.graph, v):
             adj_sets = dag.direct_effect_adjustment_sets([u], [v], nodes_to_ignore=nodes_to_ignore)
             if adj_sets:
-                metamorphic_relations.append(ShouldNotCause(u, v, list(adj_sets[0]), dag))
+                metamorphic_relations.append(ShouldNotCause(BaseTestCase(u, v), list(adj_sets[0]), dag))
 
         # Case 2: V --> ... --> U
         elif v in nx.ancestors(dag.graph, u):
             adj_sets = dag.direct_effect_adjustment_sets([v], [u], nodes_to_ignore=nodes_to_ignore)
             if adj_sets:
-                metamorphic_relations.append(ShouldNotCause(v, u, list(adj_sets[0]), dag))
+                metamorphic_relations.append(ShouldNotCause(BaseTestCase(v, u), list(adj_sets[0]), dag))
 
         # Case 3: V _||_ U (No directed walk from V to U but there may be a back-door path e.g. U <-- Z --> V).
         # Only make one MR since V _||_ U == U _||_ V
         else:
             adj_sets = dag.direct_effect_adjustment_sets([u], [v], nodes_to_ignore=nodes_to_ignore)
             if adj_sets:
-                metamorphic_relations.append(ShouldNotCause(u, v, list(adj_sets[0]), dag))
+                metamorphic_relations.append(ShouldNotCause(BaseTestCase(u, v), list(adj_sets[0]), dag))
 
     # Create a ShouldCause relation for each edge (u, v) or (v, u)
     elif (u, v) in dag.graph.edges:
         adj_sets = dag.direct_effect_adjustment_sets([u], [v], nodes_to_ignore=nodes_to_ignore)
         if adj_sets:
-            metamorphic_relations.append(ShouldCause(u, v, list(adj_sets[0]), dag))
+            metamorphic_relations.append(ShouldCause(BaseTestCase(u, v), list(adj_sets[0]), dag))
     else:
         adj_sets = dag.direct_effect_adjustment_sets([v], [u], nodes_to_ignore=nodes_to_ignore)
         if adj_sets:
-            metamorphic_relations.append(ShouldCause(v, u, list(adj_sets[0]), dag))
+            metamorphic_relations.append(ShouldCause(BaseTestCase(v, u), list(adj_sets[0]), dag))
     return metamorphic_relations
 
 
diff --git a/tests/specification_tests/test_metamorphic_relations.py b/tests/specification_tests/test_metamorphic_relations.py
deleted file mode 100644
index 7998b66c..00000000
--- a/tests/specification_tests/test_metamorphic_relations.py
+++ /dev/null
@@ -1,369 +0,0 @@
-import unittest
-import os
-import shutil, tempfile
-import pandas as pd
-from itertools import combinations
-
-from causal_testing.specification.causal_dag import CausalDAG
-from causal_testing.specification.causal_specification import Scenario
-from causal_testing.specification.metamorphic_relation import (
-    ShouldCause,
-    ShouldNotCause,
-    generate_metamorphic_relations,
-    generate_metamorphic_relation,
-)
-from causal_testing.data_collection.data_collector import ExperimentalDataCollector
-from causal_testing.specification.variable import Input, Output
-
-
-def single_input_program_under_test(X1, Z=None, M=None, Y=None):
-    if Z is None:
-        Z = 2 * X1 + -3
-    if M is None:
-        M = 3 * Z
-    if Y is None:
-        Y = M / 2
-    return {"X1": X1, "Z": Z, "M": M, "Y": Y}
-
-
-def program_under_test(X1, X2, X3, Z=None, M=None, Y=None):
-    if Z is None:
-        Z = 2 * X1 + -3 * X2 + 10
-    if M is None:
-        M = 3 * Z + X3
-    if Y is None:
-        Y = M / 2
-    return {"X1": X1, "X2": X2, "X3": X3, "Z": Z, "M": M, "Y": Y}
-
-
-def buggy_program_under_test(X1, X2, X3, Z=None, M=None, Y=None):
-    if Z is None:
-        Z = 2  # No effect of X1 or X2 on Z
-    if M is None:
-        M = 3 * Z + X3
-    if Y is None:
-        Y = M / 2
-    return {"X1": X1, "X2": X2, "X3": X3, "Z": Z, "M": M, "Y": Y}
-
-
-class SingleInputProgramUnderTestEDC(ExperimentalDataCollector):
-    def run_system_with_input_configuration(self, input_configuration: dict) -> pd.DataFrame:
-        results_dict = single_input_program_under_test(**input_configuration)
-        results_df = pd.DataFrame(results_dict, index=[0])
-        return results_df
-
-
-class ProgramUnderTestEDC(ExperimentalDataCollector):
-    def run_system_with_input_configuration(self, input_configuration: dict) -> pd.DataFrame:
-        results_dict = program_under_test(**input_configuration)
-        results_df = pd.DataFrame(results_dict, index=[0])
-        return results_df
-
-
-class BuggyProgramUnderTestEDC(ExperimentalDataCollector):
-    def run_system_with_input_configuration(self, input_configuration: dict) -> pd.DataFrame:
-        results_dict = buggy_program_under_test(**input_configuration)
-        results_df = pd.DataFrame(results_dict, index=[0])
-        return results_df
-
-
-class TestMetamorphicRelation(unittest.TestCase):
-    def setUp(self) -> None:
-        self.temp_dir_path = tempfile.mkdtemp()
-        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
-        dag_dot = """digraph DAG { rankdir=LR; X1 -> Z; Z -> M; M -> Y; X2 -> Z; X3 -> M;}"""
-        with open(self.dag_dot_path, "w") as f:
-            f.write(dag_dot)
-        self.dcg_dot_path = os.path.join(self.temp_dir_path, "dcg.dot")
-        dcg_dot = """digraph dct { a -> b -> c -> d; d -> c; }"""
-        with open(self.dcg_dot_path, "w") as f:
-            f.write(dcg_dot)
-
-        X1 = Input("X1", float)
-        X2 = Input("X2", float)
-        X3 = Input("X3", float)
-        Z = Output("Z", float)
-        M = Output("M", float)
-        Y = Output("Y", float)
-        self.scenario = Scenario(variables={X1, X2, X3, Z, M, Y})
-        self.default_control_input_config = {"X1": 1, "X2": 2, "X3": 3}
-        self.default_treatment_input_config = {"X1": 2, "X2": 3, "X3": 3}
-        self.data_collector = ProgramUnderTestEDC(
-            self.scenario, self.default_control_input_config, self.default_treatment_input_config
-        )
-
-    def tearDown(self) -> None:
-        shutil.rmtree(self.temp_dir_path)
-
-    def test_should_cause_metamorphic_relations_correct_spec(self):
-        """Test if the ShouldCause MR passes all metamorphic tests where the DAG perfectly represents the program."""
-        causal_dag = CausalDAG(self.dag_dot_path)
-        for edge in causal_dag.graph.edges:
-            (u, v) = edge
-            adj_set = list(causal_dag.direct_effect_adjustment_sets([u], [v])[0])
-            should_cause_MR = ShouldCause(u, v, adj_set, causal_dag)
-            should_cause_MR.generate_follow_up(10, -10.0, 10.0, 1)
-            test_results = should_cause_MR.execute_tests(self.data_collector)
-            should_cause_MR.test_oracle(test_results)
-
-    def test_should_not_cause_json_stub(self):
-        """Test if the ShouldCause MR passes all metamorphic tests where the DAG perfectly represents the program
-        and there is only a single input."""
-        causal_dag = CausalDAG(self.dag_dot_path)
-        self.data_collector = SingleInputProgramUnderTestEDC(
-            self.scenario, self.default_control_input_config, self.default_treatment_input_config
-        )
-        causal_dag.graph.remove_nodes_from(["X2", "X3"])
-        adj_set = list(causal_dag.direct_effect_adjustment_sets(["X1"], ["Z"])[0])
-        should_not_cause_MR = ShouldNotCause("X1", "Z", adj_set, causal_dag)
-        self.assertEqual(
-            should_not_cause_MR.to_json_stub(),
-            {
-                "effect": "direct",
-                "estimate_type": "coefficient",
-                "estimator": "LinearRegressionEstimator",
-                "expected_effect": {"Z": "NoEffect"},
-                "formula": "Z ~ X1",
-                "mutations": ["X1"],
-                "name": "X1 _||_ Z",
-                "formula": "Z ~ X1",
-                "alpha": 0.05,
-                "skip": True,
-            },
-        )
-
-    def test_should_cause_json_stub(self):
-        """Test if the ShouldCause MR passes all metamorphic tests where the DAG perfectly represents the program
-        and there is only a single input."""
-        causal_dag = CausalDAG(self.dag_dot_path)
-        self.data_collector = SingleInputProgramUnderTestEDC(
-            self.scenario, self.default_control_input_config, self.default_treatment_input_config
-        )
-        causal_dag.graph.remove_nodes_from(["X2", "X3"])
-        adj_set = list(causal_dag.direct_effect_adjustment_sets(["X1"], ["Z"])[0])
-        should_cause_MR = ShouldCause("X1", "Z", adj_set, causal_dag)
-        self.assertEqual(
-            should_cause_MR.to_json_stub(),
-            {
-                "effect": "direct",
-                "estimate_type": "coefficient",
-                "estimator": "LinearRegressionEstimator",
-                "expected_effect": {"Z": "SomeEffect"},
-                "formula": "Z ~ X1",
-                "mutations": ["X1"],
-                "name": "X1 --> Z",
-                "skip": True,
-            },
-        )
-
-    def test_should_cause_metamorphic_relations_correct_spec_one_input(self):
-        """Test if the ShouldCause MR passes all metamorphic tests where the DAG perfectly represents the program
-        and there is only a single input."""
-        causal_dag = CausalDAG(self.dag_dot_path)
-        self.data_collector = SingleInputProgramUnderTestEDC(
-            self.scenario, self.default_control_input_config, self.default_treatment_input_config
-        )
-        causal_dag.graph.remove_nodes_from(["X2", "X3"])
-        adj_set = list(causal_dag.direct_effect_adjustment_sets(["X1"], ["Z"])[0])
-        should_cause_MR = ShouldCause("X1", "Z", adj_set, causal_dag)
-        should_cause_MR.generate_follow_up(10, -10.0, 10.0, 1)
-        test_results = should_cause_MR.execute_tests(self.data_collector)
-        should_cause_MR.test_oracle(test_results)
-
-    def test_should_not_cause_metamorphic_relations_correct_spec(self):
-        """Test if the ShouldNotCause MR passes all metamorphic tests where the DAG perfectly represents the program."""
-        causal_dag = CausalDAG(self.dag_dot_path)
-        for node_pair in combinations(causal_dag.graph.nodes, 2):
-            (u, v) = node_pair
-            # Get all pairs of nodes which don't form an edge
-            if ((u, v) not in causal_dag.graph.edges) and ((v, u) not in causal_dag.graph.edges):
-                # Check both directions if there is no causality
-                # This can be done more efficiently by ignoring impossible directions (output --> input)
-                adj_set_u_to_v = list(causal_dag.direct_effect_adjustment_sets([u], [v])[0])
-                u_should_not_cause_v_MR = ShouldNotCause(u, v, adj_set_u_to_v, causal_dag)
-                adj_set_v_to_u = list(causal_dag.direct_effect_adjustment_sets([v], [u])[0])
-                v_should_not_cause_u_MR = ShouldNotCause(v, u, adj_set_v_to_u, causal_dag)
-                u_should_not_cause_v_MR.generate_follow_up(10, -100, 100)
-                v_should_not_cause_u_MR.generate_follow_up(10, -100, 100)
-                u_should_not_cause_v_test_results = u_should_not_cause_v_MR.execute_tests(self.data_collector)
-                v_should_not_cause_u_test_results = v_should_not_cause_u_MR.execute_tests(self.data_collector)
-                u_should_not_cause_v_MR.test_oracle(u_should_not_cause_v_test_results)
-                v_should_not_cause_u_MR.test_oracle(v_should_not_cause_u_test_results)
-
-    def test_should_cause_metamorphic_relation_missing_relationship(self):
-        """Test whether the ShouldCause MR catches missing relationships in the DAG."""
-        causal_dag = CausalDAG(self.dag_dot_path)
-
-        # Replace the data collector with one that runs a buggy program in which X1 and X2 do not affect Z
-        self.data_collector = BuggyProgramUnderTestEDC(
-            self.scenario, self.default_control_input_config, self.default_treatment_input_config
-        )
-        X1_should_cause_Z_MR = ShouldCause("X1", "Z", None, causal_dag)
-        X2_should_cause_Z_MR = ShouldCause("X2", "Z", None, causal_dag)
-        X1_should_cause_Z_MR.generate_follow_up(10, -100, 100, 1)
-        X2_should_cause_Z_MR.generate_follow_up(10, -100, 100, 1)
-        X1_should_cause_Z_test_results = X1_should_cause_Z_MR.execute_tests(self.data_collector)
-        X2_should_cause_Z_test_results = X2_should_cause_Z_MR.execute_tests(self.data_collector)
-        self.assertRaises(AssertionError, X1_should_cause_Z_MR.test_oracle, X1_should_cause_Z_test_results)
-        self.assertRaises(AssertionError, X2_should_cause_Z_MR.test_oracle, X2_should_cause_Z_test_results)
-
-    def test_all_metamorphic_relations_implied_by_dag(self):
-        dag = CausalDAG(self.dag_dot_path)
-        dag.add_edge("Z", "Y")  # Add a direct path from Z to Y so M becomes a mediator
-        metamorphic_relations = generate_metamorphic_relations(dag)
-        should_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldCause)]
-        should_not_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldNotCause)]
-
-        # Check all ShouldCause relations are present and no extra
-        expected_should_cause_relations = [
-            ShouldCause("X1", "Z", [], dag),
-            ShouldCause("Z", "M", [], dag),
-            ShouldCause("M", "Y", ["Z"], dag),
-            ShouldCause("Z", "Y", ["M"], dag),
-            ShouldCause("X2", "Z", [], dag),
-            ShouldCause("X3", "M", [], dag),
-        ]
-
-        extra_sc_relations = [scr for scr in should_cause_relations if scr not in expected_should_cause_relations]
-        missing_sc_relations = [escr for escr in expected_should_cause_relations if escr not in should_cause_relations]
-
-        self.assertEqual(extra_sc_relations, [])
-        self.assertEqual(missing_sc_relations, [])
-
-        # Check all ShouldNotCause relations are present and no extra
-        expected_should_not_cause_relations = [
-            ShouldNotCause("X1", "X2", [], dag),
-            ShouldNotCause("X1", "X3", [], dag),
-            ShouldNotCause("X1", "M", ["Z"], dag),
-            ShouldNotCause("X1", "Y", ["Z"], dag),
-            ShouldNotCause("X2", "X3", [], dag),
-            ShouldNotCause("X2", "M", ["Z"], dag),
-            ShouldNotCause("X2", "Y", ["Z"], dag),
-            ShouldNotCause("X3", "Y", ["M", "Z"], dag),
-            ShouldNotCause("Z", "X3", [], dag),
-        ]
-
-        extra_snc_relations = [
-            sncr for sncr in should_not_cause_relations if sncr not in expected_should_not_cause_relations
-        ]
-        missing_snc_relations = [
-            esncr for esncr in expected_should_not_cause_relations if esncr not in should_not_cause_relations
-        ]
-
-        self.assertEqual(extra_snc_relations, [])
-        self.assertEqual(missing_snc_relations, [])
-
-    def test_all_metamorphic_relations_implied_by_dag_parallel(self):
-        dag = CausalDAG(self.dag_dot_path)
-        dag.add_edge("Z", "Y")  # Add a direct path from Z to Y so M becomes a mediator
-        metamorphic_relations = generate_metamorphic_relations(dag, threads=2)
-        should_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldCause)]
-        should_not_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldNotCause)]
-
-        # Check all ShouldCause relations are present and no extra
-        expected_should_cause_relations = [
-            ShouldCause("X1", "Z", [], dag),
-            ShouldCause("Z", "M", [], dag),
-            ShouldCause("M", "Y", ["Z"], dag),
-            ShouldCause("Z", "Y", ["M"], dag),
-            ShouldCause("X2", "Z", [], dag),
-            ShouldCause("X3", "M", [], dag),
-        ]
-
-        extra_sc_relations = [scr for scr in should_cause_relations if scr not in expected_should_cause_relations]
-        missing_sc_relations = [escr for escr in expected_should_cause_relations if escr not in should_cause_relations]
-
-        self.assertEqual(extra_sc_relations, [])
-        self.assertEqual(missing_sc_relations, [])
-
-        # Check all ShouldNotCause relations are present and no extra
-        expected_should_not_cause_relations = [
-            ShouldNotCause("X1", "X2", [], dag),
-            ShouldNotCause("X1", "X3", [], dag),
-            ShouldNotCause("X1", "M", ["Z"], dag),
-            ShouldNotCause("X1", "Y", ["Z"], dag),
-            ShouldNotCause("X2", "X3", [], dag),
-            ShouldNotCause("X2", "M", ["Z"], dag),
-            ShouldNotCause("X2", "Y", ["Z"], dag),
-            ShouldNotCause("X3", "Y", ["M", "Z"], dag),
-            ShouldNotCause("Z", "X3", [], dag),
-        ]
-
-        extra_snc_relations = [
-            sncr for sncr in should_not_cause_relations if sncr not in expected_should_not_cause_relations
-        ]
-        missing_snc_relations = [
-            esncr for esncr in expected_should_not_cause_relations if esncr not in should_not_cause_relations
-        ]
-
-        self.assertEqual(extra_snc_relations, [])
-        self.assertEqual(missing_snc_relations, [])
-
-    def test_all_metamorphic_relations_implied_by_dag_ignore_cycles(self):
-        dag = CausalDAG(self.dcg_dot_path, ignore_cycles=True)
-        metamorphic_relations = generate_metamorphic_relations(dag, threads=2, nodes_to_ignore=set(dag.cycle_nodes()))
-        should_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldCause)]
-        should_not_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldNotCause)]
-
-        # Check all ShouldCause relations are present and no extra
-
-        self.assertEqual(
-            should_cause_relations,
-            [
-                ShouldCause("a", "b", [], dag),
-            ],
-        )
-        self.assertEqual(
-            should_not_cause_relations,
-            [],
-        )
-
-    def test_generate_metamorphic_relation_(self):
-        dag = CausalDAG(self.dag_dot_path)
-        [metamorphic_relation] = generate_metamorphic_relation(("X1", "Z"), dag)
-        self.assertEqual(
-            metamorphic_relation,
-            ShouldCause("X1", "Z", [], dag),
-        )
-
-    def test_equivalent_metamorphic_relations(self):
-        dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause("X", "Y", ["A", "B", "C"], dag)
-        sc_mr_b = ShouldCause("X", "Y", ["A", "B", "C"], dag)
-        self.assertEqual(sc_mr_a == sc_mr_b, True)
-
-    def test_equivalent_metamorphic_relations_empty_adjustment_set(self):
-        dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause("X", "Y", [], dag)
-        sc_mr_b = ShouldCause("X", "Y", [], dag)
-        self.assertEqual(sc_mr_a == sc_mr_b, True)
-
-    def test_equivalent_metamorphic_relations_different_order_adjustment_set(self):
-        dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause("X", "Y", ["A", "B", "C"], dag)
-        sc_mr_b = ShouldCause("X", "Y", ["C", "A", "B"], dag)
-        self.assertEqual(sc_mr_a == sc_mr_b, True)
-
-    def test_different_metamorphic_relations_empty_adjustment_set_different_outcome(self):
-        dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause("X", "Z", [], dag)
-        sc_mr_b = ShouldCause("X", "Y", [], dag)
-        self.assertEqual(sc_mr_a == sc_mr_b, False)
-
-    def test_different_metamorphic_relations_empty_adjustment_set_different_treatment(self):
-        dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause("X", "Y", [], dag)
-        sc_mr_b = ShouldCause("Z", "Y", [], dag)
-        self.assertEqual(sc_mr_a == sc_mr_b, False)
-
-    def test_different_metamorphic_relations_empty_adjustment_set_adjustment_set(self):
-        dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause("X", "Y", ["A"], dag)
-        sc_mr_b = ShouldCause("X", "Y", [], dag)
-        self.assertEqual(sc_mr_a == sc_mr_b, False)
-
-    def test_different_metamorphic_relations_different_type(self):
-        dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause("X", "Y", [], dag)
-        sc_mr_b = ShouldNotCause("X", "Y", [], dag)
-        self.assertEqual(sc_mr_a == sc_mr_b, False)
diff --git a/tests/testing_tests/test_metamorphic_relations.py b/tests/testing_tests/test_metamorphic_relations.py
new file mode 100644
index 00000000..dd9c3694
--- /dev/null
+++ b/tests/testing_tests/test_metamorphic_relations.py
@@ -0,0 +1,247 @@
+import unittest
+import os
+import shutil, tempfile
+import pandas as pd
+from itertools import combinations
+
+from causal_testing.specification.causal_dag import CausalDAG
+from causal_testing.specification.causal_specification import Scenario
+from causal_testing.testing.metamorphic_relation import (
+    ShouldCause,
+    ShouldNotCause,
+    generate_metamorphic_relations,
+    generate_metamorphic_relation,
+)
+from causal_testing.specification.variable import Input, Output
+from causal_testing.testing.base_test_case import BaseTestCase
+
+
+class TestMetamorphicRelation(unittest.TestCase):
+    def setUp(self) -> None:
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
+        dag_dot = """digraph DAG { rankdir=LR; X1 -> Z; Z -> M; M -> Y; X2 -> Z; X3 -> M;}"""
+        with open(self.dag_dot_path, "w") as f:
+            f.write(dag_dot)
+        self.dcg_dot_path = os.path.join(self.temp_dir_path, "dcg.dot")
+        dcg_dot = """digraph dct { a -> b -> c -> d; d -> c; }"""
+        with open(self.dcg_dot_path, "w") as f:
+            f.write(dcg_dot)
+
+        X1 = Input("X1", float)
+        X2 = Input("X2", float)
+        X3 = Input("X3", float)
+        Z = Output("Z", float)
+        M = Output("M", float)
+        Y = Output("Y", float)
+        self.scenario = Scenario(variables={X1, X2, X3, Z, M, Y})
+        self.default_control_input_config = {"X1": 1, "X2": 2, "X3": 3}
+        self.default_treatment_input_config = {"X1": 2, "X2": 3, "X3": 3}
+
+    def tearDown(self) -> None:
+        shutil.rmtree(self.temp_dir_path)
+
+    def test_should_not_cause_json_stub(self):
+        """Test if the ShouldCause MR passes all metamorphic tests where the DAG perfectly represents the program
+        and there is only a single input."""
+        causal_dag = CausalDAG(self.dag_dot_path)
+        causal_dag.graph.remove_nodes_from(["X2", "X3"])
+        adj_set = list(causal_dag.direct_effect_adjustment_sets(["X1"], ["Z"])[0])
+        should_not_cause_MR = ShouldNotCause(BaseTestCase("X1", "Z"), adj_set, causal_dag)
+        self.assertEqual(
+            should_not_cause_MR.to_json_stub(),
+            {
+                "effect": "direct",
+                "estimate_type": "coefficient",
+                "estimator": "LinearRegressionEstimator",
+                "expected_effect": {"Z": "NoEffect"},
+                "formula": "Z ~ X1",
+                "mutations": ["X1"],
+                "name": "X1 _||_ Z",
+                "formula": "Z ~ X1",
+                "alpha": 0.05,
+                "skip": True,
+            },
+        )
+
+    def test_should_cause_json_stub(self):
+        """Test if the ShouldCause MR passes all metamorphic tests where the DAG perfectly represents the program
+        and there is only a single input."""
+        causal_dag = CausalDAG(self.dag_dot_path)
+        causal_dag.graph.remove_nodes_from(["X2", "X3"])
+        adj_set = list(causal_dag.direct_effect_adjustment_sets(["X1"], ["Z"])[0])
+        should_cause_MR = ShouldCause(BaseTestCase("X1", "Z"), adj_set, causal_dag)
+        self.assertEqual(
+            should_cause_MR.to_json_stub(),
+            {
+                "effect": "direct",
+                "estimate_type": "coefficient",
+                "estimator": "LinearRegressionEstimator",
+                "expected_effect": {"Z": "SomeEffect"},
+                "formula": "Z ~ X1",
+                "mutations": ["X1"],
+                "name": "X1 --> Z",
+                "skip": True,
+            },
+        )
+
+    def test_all_metamorphic_relations_implied_by_dag(self):
+        dag = CausalDAG(self.dag_dot_path)
+        dag.add_edge("Z", "Y")  # Add a direct path from Z to Y so M becomes a mediator
+        metamorphic_relations = generate_metamorphic_relations(dag)
+        should_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldCause)]
+        should_not_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldNotCause)]
+
+        # Check all ShouldCause relations are present and no extra
+        expected_should_cause_relations = [
+            ShouldCause(BaseTestCase("X1", "Z"), [], dag),
+            ShouldCause(BaseTestCase("Z", "M"), [], dag),
+            ShouldCause(BaseTestCase("M", "Y"), ["Z"], dag),
+            ShouldCause(BaseTestCase("Z", "Y"), ["M"], dag),
+            ShouldCause(BaseTestCase("X2", "Z"), [], dag),
+            ShouldCause(BaseTestCase("X3", "M"), [], dag),
+        ]
+
+        extra_sc_relations = [scr for scr in should_cause_relations if scr not in expected_should_cause_relations]
+        missing_sc_relations = [escr for escr in expected_should_cause_relations if escr not in should_cause_relations]
+
+        self.assertEqual(extra_sc_relations, [])
+        self.assertEqual(missing_sc_relations, [])
+
+        # Check all ShouldNotCause relations are present and no extra
+        expected_should_not_cause_relations = [
+            ShouldNotCause(BaseTestCase("X1", "X2"), [], dag),
+            ShouldNotCause(BaseTestCase("X1", "X3"), [], dag),
+            ShouldNotCause(BaseTestCase("X1", "M"), ["Z"], dag),
+            ShouldNotCause(BaseTestCase("X1", "Y"), ["Z"], dag),
+            ShouldNotCause(BaseTestCase("X2", "X3"), [], dag),
+            ShouldNotCause(BaseTestCase("X2", "M"), ["Z"], dag),
+            ShouldNotCause(BaseTestCase("X2", "Y"), ["Z"], dag),
+            ShouldNotCause(BaseTestCase("X3", "Y"), ["M", "Z"], dag),
+            ShouldNotCause(BaseTestCase("Z", "X3"), [], dag),
+        ]
+
+        extra_snc_relations = [
+            sncr for sncr in should_not_cause_relations if sncr not in expected_should_not_cause_relations
+        ]
+        missing_snc_relations = [
+            esncr for esncr in expected_should_not_cause_relations if esncr not in should_not_cause_relations
+        ]
+
+        self.assertEqual(extra_snc_relations, [])
+        self.assertEqual(missing_snc_relations, [])
+
+    def test_all_metamorphic_relations_implied_by_dag_parallel(self):
+        dag = CausalDAG(self.dag_dot_path)
+        dag.add_edge("Z", "Y")  # Add a direct path from Z to Y so M becomes a mediator
+        metamorphic_relations = generate_metamorphic_relations(dag, threads=2)
+        should_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldCause)]
+        should_not_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldNotCause)]
+
+        # Check all ShouldCause relations are present and no extra
+        expected_should_cause_relations = [
+            ShouldCause(BaseTestCase("X1", "Z"), [], dag),
+            ShouldCause(BaseTestCase("Z", "M"), [], dag),
+            ShouldCause(BaseTestCase("M", "Y"), ["Z"], dag),
+            ShouldCause(BaseTestCase("Z", "Y"), ["M"], dag),
+            ShouldCause(BaseTestCase("X2", "Z"), [], dag),
+            ShouldCause(BaseTestCase("X3", "M"), [], dag),
+        ]
+
+        extra_sc_relations = [scr for scr in should_cause_relations if scr not in expected_should_cause_relations]
+        missing_sc_relations = [escr for escr in expected_should_cause_relations if escr not in should_cause_relations]
+
+        self.assertEqual(extra_sc_relations, [])
+        self.assertEqual(missing_sc_relations, [])
+
+        # Check all ShouldNotCause relations are present and no extra
+        expected_should_not_cause_relations = [
+            ShouldNotCause(BaseTestCase("X1", "X2"), [], dag),
+            ShouldNotCause(BaseTestCase("X1", "X3"), [], dag),
+            ShouldNotCause(BaseTestCase("X1", "M"), ["Z"], dag),
+            ShouldNotCause(BaseTestCase("X1", "Y"), ["Z"], dag),
+            ShouldNotCause(BaseTestCase("X2", "X3"), [], dag),
+            ShouldNotCause(BaseTestCase("X2", "M"), ["Z"], dag),
+            ShouldNotCause(BaseTestCase("X2", "Y"), ["Z"], dag),
+            ShouldNotCause(BaseTestCase("X3", "Y"), ["M", "Z"], dag),
+            ShouldNotCause(BaseTestCase("Z", "X3"), [], dag),
+        ]
+
+        extra_snc_relations = [
+            sncr for sncr in should_not_cause_relations if sncr not in expected_should_not_cause_relations
+        ]
+        missing_snc_relations = [
+            esncr for esncr in expected_should_not_cause_relations if esncr not in should_not_cause_relations
+        ]
+
+        self.assertEqual(extra_snc_relations, [])
+        self.assertEqual(missing_snc_relations, [])
+
+    def test_all_metamorphic_relations_implied_by_dag_ignore_cycles(self):
+        dag = CausalDAG(self.dcg_dot_path, ignore_cycles=True)
+        metamorphic_relations = generate_metamorphic_relations(dag, threads=2, nodes_to_ignore=set(dag.cycle_nodes()))
+        should_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldCause)]
+        should_not_cause_relations = [mr for mr in metamorphic_relations if isinstance(mr, ShouldNotCause)]
+
+        # Check all ShouldCause relations are present and no extra
+
+        self.assertEqual(
+            should_cause_relations,
+            [
+                ShouldCause(BaseTestCase("a", "b"), [], dag),
+            ],
+        )
+        self.assertEqual(
+            should_not_cause_relations,
+            [],
+        )
+
+    def test_generate_metamorphic_relation_(self):
+        dag = CausalDAG(self.dag_dot_path)
+        [metamorphic_relation] = generate_metamorphic_relation(("X1", "Z"), dag)
+        self.assertEqual(
+            metamorphic_relation,
+            ShouldCause(BaseTestCase("X1", "Z"), [], dag),
+        )
+
+    def test_equivalent_metamorphic_relations(self):
+        dag = CausalDAG(self.dag_dot_path)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"], dag)
+        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"], dag)
+        self.assertEqual(sc_mr_a == sc_mr_b, True)
+
+    def test_equivalent_metamorphic_relations_empty_adjustment_set(self):
+        dag = CausalDAG(self.dag_dot_path)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [], dag)
+        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [], dag)
+        self.assertEqual(sc_mr_a == sc_mr_b, True)
+
+    def test_equivalent_metamorphic_relations_different_order_adjustment_set(self):
+        dag = CausalDAG(self.dag_dot_path)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"], dag)
+        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), ["C", "A", "B"], dag)
+        self.assertEqual(sc_mr_a == sc_mr_b, True)
+
+    def test_different_metamorphic_relations_empty_adjustment_set_different_outcome(self):
+        dag = CausalDAG(self.dag_dot_path)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Z"), [], dag)
+        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [], dag)
+        self.assertEqual(sc_mr_a == sc_mr_b, False)
+
+    def test_different_metamorphic_relations_empty_adjustment_set_different_treatment(self):
+        dag = CausalDAG(self.dag_dot_path)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [], dag)
+        sc_mr_b = ShouldCause(BaseTestCase("Z", "Y"), [], dag)
+        self.assertEqual(sc_mr_a == sc_mr_b, False)
+
+    def test_different_metamorphic_relations_empty_adjustment_set_adjustment_set(self):
+        dag = CausalDAG(self.dag_dot_path)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A"], dag)
+        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [], dag)
+        self.assertEqual(sc_mr_a == sc_mr_b, False)
+
+    def test_different_metamorphic_relations_different_type(self):
+        dag = CausalDAG(self.dag_dot_path)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [], dag)
+        sc_mr_b = ShouldNotCause(BaseTestCase("X", "Y"), [], dag)
+        self.assertEqual(sc_mr_a == sc_mr_b, False)

From b3b5261a36823e26cae1633a1f43bf474cb31744 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 11:43:57 +0000
Subject: [PATCH 06/44] removed data collector classes

---
 causal_testing/data_collection/__init__.py    |   0
 .../data_collection/data_collector.py         | 161 ------------------
 2 files changed, 161 deletions(-)
 delete mode 100644 causal_testing/data_collection/__init__.py
 delete mode 100644 causal_testing/data_collection/data_collector.py

diff --git a/causal_testing/data_collection/__init__.py b/causal_testing/data_collection/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/causal_testing/data_collection/data_collector.py b/causal_testing/data_collection/data_collector.py
deleted file mode 100644
index d628bfad..00000000
--- a/causal_testing/data_collection/data_collector.py
+++ /dev/null
@@ -1,161 +0,0 @@
-"""This module contains the DataCollector abstract class, as well as its concrete extensions: ExperimentalDataCollector
-and ObservationalDataCollector"""
-
-import logging
-from abc import ABC, abstractmethod
-from enum import Enum
-
-import pandas as pd
-import z3
-
-from causal_testing.specification.causal_specification import Scenario
-
-logger = logging.getLogger(__name__)
-
-
-class DataCollector(ABC):
-    """A data collector is a mechanism which generates or collects data from a system for a given scenario."""
-
-    def __init__(self, scenario: Scenario):
-        self.scenario = scenario
-
-    @abstractmethod
-    def collect_data(self, **kwargs) -> pd.DataFrame:
-        """
-        Populate the dataframe with execution data.
-        :return df: A pandas dataframe containing execution data for the system-under-test.
-        """
-
-    def filter_valid_data(self, data: pd.DataFrame, check_pos: bool = True) -> pd.DataFrame:
-        """Check is execution data is valid for the scenario-under-test.
-
-        Data is invalid if it does not meet the constraints specified in the scenario-under-test.
-
-        :param data: A pandas dataframe containing execution data from the system-under-test.
-        :param check_pos: Whether to check the data for positivity violations (defaults to true).
-        :return satisfying_data: A pandas dataframe containing execution data that satisfy the constraints specified
-        in the scenario-under-test.
-        """
-
-        # Check positivity
-        scenario_variables = set(self.scenario.variables) - {x.name for x in self.scenario.hidden_variables()}
-
-        if check_pos and not (scenario_variables - {x.name for x in self.scenario.hidden_variables()}).issubset(
-            set(data.columns)
-        ):
-            missing_variables = scenario_variables - set(data.columns)
-            raise IndexError(
-                f"Missing columns: missing data for variables {missing_variables}. Should they be marked as hidden?"
-            )
-
-        # Quick out if we don't have any constraints
-        if len(self.scenario.constraints) == 0:
-            return data
-
-        # For each row, does it satisfy the constraints?
-        solver = z3.Solver()
-        for c in self.scenario.constraints:
-            solver.assert_and_track(c, f"background: {c}")
-        sat = []
-        unsat_core = None
-        for _, row in data.iterrows():
-            solver.push()
-            # Check that the row does not violate any scenario constraints
-            # Need to explicitly cast variables to their specified type. Z3 will not take e.g. np.int64 to be an int.
-            # Check that the row does not violate any scenario constraints
-            model = [
-                self.scenario.variables[var].z3
-                == self.scenario.variables[var].z3_val(self.scenario.variables[var].z3, row[var])
-                for var in self.scenario.variables
-                if var in row and not pd.isnull(row[var])
-            ]
-            for c in model:
-                solver.assert_and_track(c, f"model: {c}")
-            check = solver.check()
-            if check == z3.unsat and unsat_core is None:
-                unsat_core = solver.unsat_core()
-            sat.append(check == z3.sat)
-            solver.pop()
-
-        # Strip out rows which violate the constraints
-        satisfying_data = data.copy()
-        satisfying_data["sat"] = sat
-        satisfying_data = satisfying_data.loc[satisfying_data["sat"]]
-        satisfying_data = satisfying_data.drop("sat", axis=1)
-
-        # How many rows did we drop?
-        size_diff = len(data) - len(satisfying_data)
-        if size_diff > 0:
-            logger.warning(
-                f"Discarded {size_diff}/{len(data)} values due to constraint violations.\n For example {unsat_core}",
-            )
-        return satisfying_data
-
-
-class ExperimentalDataCollector(DataCollector):
-    """A data collector that generates data directly by running the system-under-test in the desired conditions.
-
-    Users should implement these methods to collect data from their system.
-    """
-
-    def __init__(
-        self,
-        scenario: Scenario,
-        control_input_configuration: dict,
-        treatment_input_configuration: dict,
-        n_repeats: int = 1,
-    ):
-        super().__init__(scenario)
-        self.control_input_configuration = control_input_configuration
-        self.treatment_input_configuration = treatment_input_configuration
-        self.n_repeats = n_repeats
-
-    def collect_data(self, **kwargs) -> pd.DataFrame:
-        """Run the system-under-test with control and treatment input configurations to obtain experimental data in
-        which the causal effect of interest is isolated by design.
-
-        :return: A pandas dataframe containing execution data for the system-under-test in both control and treatment
-        executions.
-        """
-        control_results_df = self.run_system_with_input_configuration(self.control_input_configuration)
-        control_results_df.rename(lambda x: f"control_{x}", inplace=True)
-        treatment_results_df = self.run_system_with_input_configuration(self.treatment_input_configuration)
-        treatment_results_df.rename(lambda x: f"treatment_{x}", inplace=True)
-        results_df = pd.concat([control_results_df, treatment_results_df], ignore_index=False)
-        return results_df
-
-    @abstractmethod
-    def run_system_with_input_configuration(self, input_configuration: dict) -> pd.DataFrame:
-        """Run the system with a given input configuration and return the resulting execution data.
-
-        :param input_configuration: A dictionary which maps a subset of inputs to values.
-        :return: A pandas dataframe containing execution data obtained by executing the system-under-test with the
-        specified input configuration.
-        """
-
-
-class ObservationalDataCollector(DataCollector):
-    """A data collector that extracts data that is relevant to the specified scenario from a dataframe of execution
-    data."""
-
-    def __init__(self, scenario: Scenario, data: pd.DataFrame):
-        super().__init__(scenario)
-        self.data = data
-
-    def collect_data(self, **kwargs) -> pd.DataFrame:
-        """Read a pandas dataframe and filter to remove
-        any data which is invalid for the scenario-under-test.
-
-        Data is invalid if it does not meet the constraints outlined in the scenario-under-test (Scenario).
-
-        :return: A pandas dataframe containing execution data that is valid for the scenario-under-test.
-        """
-        execution_data_df = self.data
-        for meta in self.scenario.metas():
-            if meta.name not in self.data:
-                meta.populate(execution_data_df)
-        scenario_execution_data_df = self.filter_valid_data(execution_data_df)
-        for var_name, var in self.scenario.variables.items():
-            if issubclass(var.datatype, Enum):
-                scenario_execution_data_df[var_name] = [var.datatype(x) for x in scenario_execution_data_df[var_name]]
-        return scenario_execution_data_df

From 73eb5f1f32de4f64145306ab5d07f5013f21fe73 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 11:52:18 +0000
Subject: [PATCH 07/44] Removed unnecessary methods and arguments from
 metamorphic relation

---
 .../testing/metamorphic_relation.py           | 43 +--------
 .../test_metamorphic_relations.py             | 96 +++++++++----------
 2 files changed, 53 insertions(+), 86 deletions(-)

diff --git a/causal_testing/testing/metamorphic_relation.py b/causal_testing/testing/metamorphic_relation.py
index 1d296749..813c2fb5 100644
--- a/causal_testing/testing/metamorphic_relation.py
+++ b/causal_testing/testing/metamorphic_relation.py
@@ -4,7 +4,6 @@
 """
 
 from dataclasses import dataclass
-from abc import abstractmethod
 from typing import Iterable
 from itertools import combinations
 import argparse
@@ -26,18 +25,6 @@ class MetamorphicRelation:
 
     base_test_case: BaseTestCase
     adjustment_vars: Iterable[Node]
-    dag: CausalDAG
-    tests: Iterable = None
-
-    @abstractmethod
-    def to_json_stub(self, skip=True) -> dict:
-        """Convert to a JSON frontend stub string for user customisation"""
-
-    @abstractmethod
-    def test_oracle(self, test_results):
-        """A test oracle that assert whether the MR holds or not based on ALL test results.
-
-        This method must raise an assertion, not return a bool."""
 
     def __eq__(self, other):
         same_type = self.__class__ == other.__class__
@@ -51,16 +38,6 @@ def __eq__(self, other):
 class ShouldCause(MetamorphicRelation):
     """Class representing a should cause metamorphic relation."""
 
-    def assertion(self, source_output, follow_up_output):
-        """If there is a causal effect, the outputs should not be the same."""
-        return source_output != follow_up_output
-
-    def test_oracle(self, test_results):
-        """A single passing test is sufficient to show presence of a causal effect."""
-        assert len(test_results["fail"]) < len(
-            self.tests
-        ), f"{str(self)}: {len(test_results['fail'])}/{len(self.tests)} tests failed."
-
     def to_json_stub(self, skip=True) -> dict:
         """Convert to a JSON frontend stub string for user customisation"""
         return {
@@ -84,16 +61,6 @@ def __str__(self):
 class ShouldNotCause(MetamorphicRelation):
     """Class representing a should cause metamorphic relation."""
 
-    def assertion(self, source_output, follow_up_output):
-        """If there is a causal effect, the outputs should not be the same."""
-        return source_output == follow_up_output
-
-    def test_oracle(self, test_results):
-        """A single passing test is sufficient to show presence of a causal effect."""
-        assert (
-            len(test_results["fail"]) == 0
-        ), f"{str(self)}: {len(test_results['fail'])}/{len(self.tests)} tests failed."
-
     def to_json_stub(self, skip=True) -> dict:
         """Convert to a JSON frontend stub string for user customisation"""
         return {
@@ -140,30 +107,30 @@ def generate_metamorphic_relation(
         if u in nx.ancestors(dag.graph, v):
             adj_sets = dag.direct_effect_adjustment_sets([u], [v], nodes_to_ignore=nodes_to_ignore)
             if adj_sets:
-                metamorphic_relations.append(ShouldNotCause(BaseTestCase(u, v), list(adj_sets[0]), dag))
+                metamorphic_relations.append(ShouldNotCause(BaseTestCase(u, v), list(adj_sets[0])))
 
         # Case 2: V --> ... --> U
         elif v in nx.ancestors(dag.graph, u):
             adj_sets = dag.direct_effect_adjustment_sets([v], [u], nodes_to_ignore=nodes_to_ignore)
             if adj_sets:
-                metamorphic_relations.append(ShouldNotCause(BaseTestCase(v, u), list(adj_sets[0]), dag))
+                metamorphic_relations.append(ShouldNotCause(BaseTestCase(v, u), list(adj_sets[0])))
 
         # Case 3: V _||_ U (No directed walk from V to U but there may be a back-door path e.g. U <-- Z --> V).
         # Only make one MR since V _||_ U == U _||_ V
         else:
             adj_sets = dag.direct_effect_adjustment_sets([u], [v], nodes_to_ignore=nodes_to_ignore)
             if adj_sets:
-                metamorphic_relations.append(ShouldNotCause(BaseTestCase(u, v), list(adj_sets[0]), dag))
+                metamorphic_relations.append(ShouldNotCause(BaseTestCase(u, v), list(adj_sets[0])))
 
     # Create a ShouldCause relation for each edge (u, v) or (v, u)
     elif (u, v) in dag.graph.edges:
         adj_sets = dag.direct_effect_adjustment_sets([u], [v], nodes_to_ignore=nodes_to_ignore)
         if adj_sets:
-            metamorphic_relations.append(ShouldCause(BaseTestCase(u, v), list(adj_sets[0]), dag))
+            metamorphic_relations.append(ShouldCause(BaseTestCase(u, v), list(adj_sets[0])))
     else:
         adj_sets = dag.direct_effect_adjustment_sets([v], [u], nodes_to_ignore=nodes_to_ignore)
         if adj_sets:
-            metamorphic_relations.append(ShouldCause(BaseTestCase(v, u), list(adj_sets[0]), dag))
+            metamorphic_relations.append(ShouldCause(BaseTestCase(v, u), list(adj_sets[0])))
     return metamorphic_relations
 
 
diff --git a/tests/testing_tests/test_metamorphic_relations.py b/tests/testing_tests/test_metamorphic_relations.py
index dd9c3694..9055c8c4 100644
--- a/tests/testing_tests/test_metamorphic_relations.py
+++ b/tests/testing_tests/test_metamorphic_relations.py
@@ -47,7 +47,7 @@ def test_should_not_cause_json_stub(self):
         causal_dag = CausalDAG(self.dag_dot_path)
         causal_dag.graph.remove_nodes_from(["X2", "X3"])
         adj_set = list(causal_dag.direct_effect_adjustment_sets(["X1"], ["Z"])[0])
-        should_not_cause_MR = ShouldNotCause(BaseTestCase("X1", "Z"), adj_set, causal_dag)
+        should_not_cause_MR = ShouldNotCause(BaseTestCase("X1", "Z"), adj_set)
         self.assertEqual(
             should_not_cause_MR.to_json_stub(),
             {
@@ -70,7 +70,7 @@ def test_should_cause_json_stub(self):
         causal_dag = CausalDAG(self.dag_dot_path)
         causal_dag.graph.remove_nodes_from(["X2", "X3"])
         adj_set = list(causal_dag.direct_effect_adjustment_sets(["X1"], ["Z"])[0])
-        should_cause_MR = ShouldCause(BaseTestCase("X1", "Z"), adj_set, causal_dag)
+        should_cause_MR = ShouldCause(BaseTestCase("X1", "Z"), adj_set)
         self.assertEqual(
             should_cause_MR.to_json_stub(),
             {
@@ -94,12 +94,12 @@ def test_all_metamorphic_relations_implied_by_dag(self):
 
         # Check all ShouldCause relations are present and no extra
         expected_should_cause_relations = [
-            ShouldCause(BaseTestCase("X1", "Z"), [], dag),
-            ShouldCause(BaseTestCase("Z", "M"), [], dag),
-            ShouldCause(BaseTestCase("M", "Y"), ["Z"], dag),
-            ShouldCause(BaseTestCase("Z", "Y"), ["M"], dag),
-            ShouldCause(BaseTestCase("X2", "Z"), [], dag),
-            ShouldCause(BaseTestCase("X3", "M"), [], dag),
+            ShouldCause(BaseTestCase("X1", "Z"), []),
+            ShouldCause(BaseTestCase("Z", "M"), []),
+            ShouldCause(BaseTestCase("M", "Y"), ["Z"]),
+            ShouldCause(BaseTestCase("Z", "Y"), ["M"]),
+            ShouldCause(BaseTestCase("X2", "Z"), []),
+            ShouldCause(BaseTestCase("X3", "M"), []),
         ]
 
         extra_sc_relations = [scr for scr in should_cause_relations if scr not in expected_should_cause_relations]
@@ -110,15 +110,15 @@ def test_all_metamorphic_relations_implied_by_dag(self):
 
         # Check all ShouldNotCause relations are present and no extra
         expected_should_not_cause_relations = [
-            ShouldNotCause(BaseTestCase("X1", "X2"), [], dag),
-            ShouldNotCause(BaseTestCase("X1", "X3"), [], dag),
-            ShouldNotCause(BaseTestCase("X1", "M"), ["Z"], dag),
-            ShouldNotCause(BaseTestCase("X1", "Y"), ["Z"], dag),
-            ShouldNotCause(BaseTestCase("X2", "X3"), [], dag),
-            ShouldNotCause(BaseTestCase("X2", "M"), ["Z"], dag),
-            ShouldNotCause(BaseTestCase("X2", "Y"), ["Z"], dag),
-            ShouldNotCause(BaseTestCase("X3", "Y"), ["M", "Z"], dag),
-            ShouldNotCause(BaseTestCase("Z", "X3"), [], dag),
+            ShouldNotCause(BaseTestCase("X1", "X2"), []),
+            ShouldNotCause(BaseTestCase("X1", "X3"), []),
+            ShouldNotCause(BaseTestCase("X1", "M"), ["Z"]),
+            ShouldNotCause(BaseTestCase("X1", "Y"), ["Z"]),
+            ShouldNotCause(BaseTestCase("X2", "X3"), []),
+            ShouldNotCause(BaseTestCase("X2", "M"), ["Z"]),
+            ShouldNotCause(BaseTestCase("X2", "Y"), ["Z"]),
+            ShouldNotCause(BaseTestCase("X3", "Y"), ["M", "Z"]),
+            ShouldNotCause(BaseTestCase("Z", "X3"), []),
         ]
 
         extra_snc_relations = [
@@ -140,12 +140,12 @@ def test_all_metamorphic_relations_implied_by_dag_parallel(self):
 
         # Check all ShouldCause relations are present and no extra
         expected_should_cause_relations = [
-            ShouldCause(BaseTestCase("X1", "Z"), [], dag),
-            ShouldCause(BaseTestCase("Z", "M"), [], dag),
-            ShouldCause(BaseTestCase("M", "Y"), ["Z"], dag),
-            ShouldCause(BaseTestCase("Z", "Y"), ["M"], dag),
-            ShouldCause(BaseTestCase("X2", "Z"), [], dag),
-            ShouldCause(BaseTestCase("X3", "M"), [], dag),
+            ShouldCause(BaseTestCase("X1", "Z"), []),
+            ShouldCause(BaseTestCase("Z", "M"), []),
+            ShouldCause(BaseTestCase("M", "Y"), ["Z"]),
+            ShouldCause(BaseTestCase("Z", "Y"), ["M"]),
+            ShouldCause(BaseTestCase("X2", "Z"), []),
+            ShouldCause(BaseTestCase("X3", "M"), []),
         ]
 
         extra_sc_relations = [scr for scr in should_cause_relations if scr not in expected_should_cause_relations]
@@ -156,15 +156,15 @@ def test_all_metamorphic_relations_implied_by_dag_parallel(self):
 
         # Check all ShouldNotCause relations are present and no extra
         expected_should_not_cause_relations = [
-            ShouldNotCause(BaseTestCase("X1", "X2"), [], dag),
-            ShouldNotCause(BaseTestCase("X1", "X3"), [], dag),
-            ShouldNotCause(BaseTestCase("X1", "M"), ["Z"], dag),
-            ShouldNotCause(BaseTestCase("X1", "Y"), ["Z"], dag),
-            ShouldNotCause(BaseTestCase("X2", "X3"), [], dag),
-            ShouldNotCause(BaseTestCase("X2", "M"), ["Z"], dag),
-            ShouldNotCause(BaseTestCase("X2", "Y"), ["Z"], dag),
-            ShouldNotCause(BaseTestCase("X3", "Y"), ["M", "Z"], dag),
-            ShouldNotCause(BaseTestCase("Z", "X3"), [], dag),
+            ShouldNotCause(BaseTestCase("X1", "X2"), []),
+            ShouldNotCause(BaseTestCase("X1", "X3"), []),
+            ShouldNotCause(BaseTestCase("X1", "M"), ["Z"]),
+            ShouldNotCause(BaseTestCase("X1", "Y"), ["Z"]),
+            ShouldNotCause(BaseTestCase("X2", "X3"), []),
+            ShouldNotCause(BaseTestCase("X2", "M"), ["Z"]),
+            ShouldNotCause(BaseTestCase("X2", "Y"), ["Z"]),
+            ShouldNotCause(BaseTestCase("X3", "Y"), ["M", "Z"]),
+            ShouldNotCause(BaseTestCase("Z", "X3"), []),
         ]
 
         extra_snc_relations = [
@@ -188,7 +188,7 @@ def test_all_metamorphic_relations_implied_by_dag_ignore_cycles(self):
         self.assertEqual(
             should_cause_relations,
             [
-                ShouldCause(BaseTestCase("a", "b"), [], dag),
+                ShouldCause(BaseTestCase("a", "b"), []),
             ],
         )
         self.assertEqual(
@@ -201,47 +201,47 @@ def test_generate_metamorphic_relation_(self):
         [metamorphic_relation] = generate_metamorphic_relation(("X1", "Z"), dag)
         self.assertEqual(
             metamorphic_relation,
-            ShouldCause(BaseTestCase("X1", "Z"), [], dag),
+            ShouldCause(BaseTestCase("X1", "Z"), []),
         )
 
     def test_equivalent_metamorphic_relations(self):
         dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"], dag)
-        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"], dag)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"])
+        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"])
         self.assertEqual(sc_mr_a == sc_mr_b, True)
 
     def test_equivalent_metamorphic_relations_empty_adjustment_set(self):
         dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [], dag)
-        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [], dag)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [])
+        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [])
         self.assertEqual(sc_mr_a == sc_mr_b, True)
 
     def test_equivalent_metamorphic_relations_different_order_adjustment_set(self):
         dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"], dag)
-        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), ["C", "A", "B"], dag)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"])
+        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), ["C", "A", "B"])
         self.assertEqual(sc_mr_a == sc_mr_b, True)
 
     def test_different_metamorphic_relations_empty_adjustment_set_different_outcome(self):
         dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause(BaseTestCase("X", "Z"), [], dag)
-        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [], dag)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Z"), [])
+        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [])
         self.assertEqual(sc_mr_a == sc_mr_b, False)
 
     def test_different_metamorphic_relations_empty_adjustment_set_different_treatment(self):
         dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [], dag)
-        sc_mr_b = ShouldCause(BaseTestCase("Z", "Y"), [], dag)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [])
+        sc_mr_b = ShouldCause(BaseTestCase("Z", "Y"), [])
         self.assertEqual(sc_mr_a == sc_mr_b, False)
 
     def test_different_metamorphic_relations_empty_adjustment_set_adjustment_set(self):
         dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A"], dag)
-        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [], dag)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A"])
+        sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [])
         self.assertEqual(sc_mr_a == sc_mr_b, False)
 
     def test_different_metamorphic_relations_different_type(self):
         dag = CausalDAG(self.dag_dot_path)
-        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [], dag)
-        sc_mr_b = ShouldNotCause(BaseTestCase("X", "Y"), [], dag)
+        sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [])
+        sc_mr_b = ShouldNotCause(BaseTestCase("X", "Y"), [])
         self.assertEqual(sc_mr_a == sc_mr_b, False)

From 127a2f40f0fba5f507704166970a6b0a14015047 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 12:28:20 +0000
Subject: [PATCH 08/44] Added experimental estimator to keep functionality of
 experimental data collector

---
 .../estimation/experimental_estimator.py      | 102 ++++++++++++++++++
 .../test_experimental_estimator.py            |  45 ++++++++
 2 files changed, 147 insertions(+)
 create mode 100644 causal_testing/estimation/experimental_estimator.py
 create mode 100644 tests/estimation_tests/test_experimental_estimator.py

diff --git a/causal_testing/estimation/experimental_estimator.py b/causal_testing/estimation/experimental_estimator.py
new file mode 100644
index 00000000..9ca8c1fa
--- /dev/null
+++ b/causal_testing/estimation/experimental_estimator.py
@@ -0,0 +1,102 @@
+"""This module contains the ExperimentalEstimator class for directly interacting with the system under test."""
+
+import pandas as pd
+from typing import Any
+from abc import abstractmethod
+
+from causal_testing.estimation.abstract_estimator import Estimator
+
+
+class ExperimentalEstimator(Estimator):
+    """A Logistic Regression Estimator is a parametric estimator which restricts the variables in the data to a linear
+    combination of parameters and functions of the variables (note these functions need not be linear). It is designed
+    for estimating categorical outcomes.
+    """
+
+    def __init__(
+        # pylint: disable=too-many-arguments
+        self,
+        treatment: str,
+        treatment_value: float,
+        control_value: float,
+        adjustment_set: dict[str:Any],
+        outcome: str,
+        effect_modifiers: dict[str:Any] = None,
+        alpha: float = 0.05,
+        repeats: int = 200,
+    ):
+        super().__init__(
+            treatment=treatment,
+            treatment_value=treatment_value,
+            control_value=control_value,
+            adjustment_set=adjustment_set,
+            outcome=outcome,
+            effect_modifiers=effect_modifiers,
+            alpha=alpha,
+        )
+        if effect_modifiers is None:
+            self.effect_modifiers = {}
+        self.repeats = repeats
+
+    def add_modelling_assumptions(self):
+        """
+        Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
+        must hold if the resulting causal inference is to be considered valid.
+        """
+        self.modelling_assumptions.append(
+            "The supplied number of repeats must be sufficient for statistical significance"
+        )
+
+    @abstractmethod
+    def run_system(self, configuration: dict) -> dict:
+        """
+        Runs the system under test with the supplied configuration and supplies the outputs as a dict.
+        :param configuration: The run configuration arguments.
+        :returns: The resulting output as a dict.
+        """
+
+    def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+        """Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
+        by changing the treatment variable from the control value to the treatment value.
+
+        :return: The average treatment effect and the bootstrapped confidence intervals.
+        """
+        control_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.control_value}
+        treatment_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.treatment_value}
+
+        control_outcomes = pd.DataFrame([self.run_system(control_configuration) for _ in range(self.repeats)])
+        treatment_outcomes = pd.DataFrame([self.run_system(treatment_configuration) for _ in range(self.repeats)])
+
+        difference = (treatment_outcomes[self.outcome] - control_outcomes[self.outcome]).sort_values().reset_index()
+
+        ci_low_index = round(self.repeats * (self.alpha / 2))
+        ci_low = difference.iloc[ci_low_index]
+        ci_high = difference.iloc[self.repeats - ci_low_index]
+
+        return pd.Series({self.treatment: difference.mean()[self.outcome]}), [
+            pd.Series({self.treatment: ci_low[self.outcome]}),
+            pd.Series({self.treatment: ci_high[self.outcome]}),
+        ]
+
+    def estimate_risk_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+        """Estimate the risk ratio of the treatment on the outcome. That is, the change in outcome caused
+        by changing the treatment variable from the control value to the treatment value.
+
+        :return: The average treatment effect and the bootstrapped confidence intervals.
+        """
+        control_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.control_value}
+        treatment_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.treatment_value}
+
+        control_outcomes = pd.DataFrame([self.run_system(control_configuration) for _ in range(self.repeats)])
+        treatment_outcomes = pd.DataFrame([self.run_system(treatment_configuration) for _ in range(self.repeats)])
+
+        difference = (treatment_outcomes[self.outcome] / control_outcomes[self.outcome]).sort_values().reset_index()
+
+        ci_low_index = round(self.repeats * (self.alpha / 2))
+        ci_low = difference.iloc[ci_low_index]
+        ci_high = difference.iloc[self.repeats - ci_low_index]
+
+        return pd.Series({self.treatment: difference.mean()[self.outcome]}), [
+            pd.Series({self.treatment: ci_low[self.outcome]}),
+            pd.Series({self.treatment: ci_high[self.outcome]}),
+        ]
diff --git a/tests/estimation_tests/test_experimental_estimator.py b/tests/estimation_tests/test_experimental_estimator.py
new file mode 100644
index 00000000..062a2c45
--- /dev/null
+++ b/tests/estimation_tests/test_experimental_estimator.py
@@ -0,0 +1,45 @@
+import unittest
+from causal_testing.estimation.experimental_estimator import ExperimentalEstimator
+
+
+class ConcreteExperimentalEstimator(ExperimentalEstimator):
+    def run_system(self, configuration):
+        return {"Y": 2 * configuration["X"]}
+
+
+class TestExperimentalEstimator(unittest.TestCase):
+    """
+    Test the experimental estimator.
+    """
+
+    def test_estimate_ate(self):
+        estimator = ConcreteExperimentalEstimator(
+            treatment="X",
+            treatment_value=2,
+            control_value=1,
+            adjustment_set={},
+            outcome="Y",
+            effect_modifiers={},
+            alpha=0.05,
+            repeats=200,
+        )
+        ate, [ci_low, ci_high] = estimator.estimate_ate()
+        self.assertEqual(ate["X"], 2)
+        self.assertEqual(ci_low["X"], 2)
+        self.assertEqual(ci_high["X"], 2)
+
+    def test_estimate_risk_ratio(self):
+        estimator = ConcreteExperimentalEstimator(
+            treatment="X",
+            treatment_value=2,
+            control_value=1,
+            adjustment_set={},
+            outcome="Y",
+            effect_modifiers={},
+            alpha=0.05,
+            repeats=200,
+        )
+        rr, [ci_low, ci_high] = estimator.estimate_risk_ratio()
+        self.assertEqual(rr["X"], 2)
+        self.assertEqual(ci_low["X"], 2)
+        self.assertEqual(ci_high["X"], 2)

From 4b7de4cfd89ba7b9e8c11816ef25ccc21bc583ef Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 12:59:38 +0000
Subject: [PATCH 09/44] pylint

---
 causal_testing/estimation/experimental_estimator.py |  2 +-
 causal_testing/testing/causal_test_case.py          |  1 -
 causal_testing/testing/metamorphic_relation.py      | 10 ++++++++--
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/causal_testing/estimation/experimental_estimator.py b/causal_testing/estimation/experimental_estimator.py
index 9ca8c1fa..5adb7958 100644
--- a/causal_testing/estimation/experimental_estimator.py
+++ b/causal_testing/estimation/experimental_estimator.py
@@ -1,8 +1,8 @@
 """This module contains the ExperimentalEstimator class for directly interacting with the system under test."""
 
-import pandas as pd
 from typing import Any
 from abc import abstractmethod
+import pandas as pd
 
 from causal_testing.estimation.abstract_estimator import Estimator
 
diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
index b3a48009..586c2b84 100644
--- a/causal_testing/testing/causal_test_case.py
+++ b/causal_testing/testing/causal_test_case.py
@@ -2,7 +2,6 @@
 
 import logging
 from typing import Any
-import pandas as pd
 
 from causal_testing.specification.variable import Variable
 from causal_testing.testing.causal_test_outcome import CausalTestOutcome
diff --git a/causal_testing/testing/metamorphic_relation.py b/causal_testing/testing/metamorphic_relation.py
index 813c2fb5..50b295be 100644
--- a/causal_testing/testing/metamorphic_relation.py
+++ b/causal_testing/testing/metamorphic_relation.py
@@ -47,7 +47,10 @@ def to_json_stub(self, skip=True) -> dict:
             "effect": "direct",
             "mutations": [self.base_test_case.treatment_variable],
             "expected_effect": {self.base_test_case.outcome_variable: "SomeEffect"},
-            "formula": f"{self.base_test_case.outcome_variable} ~ {' + '.join([self.base_test_case.treatment_variable] + self.adjustment_vars)}",
+            "formula": (
+                f"{self.base_test_case.outcome_variable} ~ "
+                f"{' + '.join([self.base_test_case.treatment_variable] + self.adjustment_vars)}"
+            ),
             "skip": skip,
         }
 
@@ -70,7 +73,10 @@ def to_json_stub(self, skip=True) -> dict:
             "effect": "direct",
             "mutations": [self.base_test_case.treatment_variable],
             "expected_effect": {self.base_test_case.outcome_variable: "NoEffect"},
-            "formula": f"{self.base_test_case.outcome_variable} ~ {' + '.join([self.base_test_case.treatment_variable] + self.adjustment_vars)}",
+            "formula": (
+                f"{self.base_test_case.outcome_variable} ~ "
+                f"{' + '.join([self.base_test_case.treatment_variable] + self.adjustment_vars)}"
+            ),
             "alpha": 0.05,
             "skip": skip,
         }

From a80ccefc8dd0a7bd20a8c775efa126f4d5d18345 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 13:15:58 +0000
Subject: [PATCH 10/44] pylint

---
 causal_testing/estimation/experimental_estimator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/causal_testing/estimation/experimental_estimator.py b/causal_testing/estimation/experimental_estimator.py
index 5adb7958..3d2c6ad4 100644
--- a/causal_testing/estimation/experimental_estimator.py
+++ b/causal_testing/estimation/experimental_estimator.py
@@ -25,6 +25,7 @@ def __init__(
         alpha: float = 0.05,
         repeats: int = 200,
     ):
+        # pylint: disable=R0801
         super().__init__(
             treatment=treatment,
             treatment_value=treatment_value,

From 9cfff7d306db2cf55c01281c0f78b697cda6036a Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 13:18:44 +0000
Subject: [PATCH 11/44] codecov

---
 tests/estimation_tests/test_experimental_estimator.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/estimation_tests/test_experimental_estimator.py b/tests/estimation_tests/test_experimental_estimator.py
index 062a2c45..af306009 100644
--- a/tests/estimation_tests/test_experimental_estimator.py
+++ b/tests/estimation_tests/test_experimental_estimator.py
@@ -19,7 +19,6 @@ def test_estimate_ate(self):
             control_value=1,
             adjustment_set={},
             outcome="Y",
-            effect_modifiers={},
             alpha=0.05,
             repeats=200,
         )

From 2d85097f03b46cc29712562746be9ce71267e32b Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 13:30:43 +0000
Subject: [PATCH 12/44] Clarified experiemental estimator test.

---
 .../test_experimental_estimator.py            | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/tests/estimation_tests/test_experimental_estimator.py b/tests/estimation_tests/test_experimental_estimator.py
index af306009..26d8b9e0 100644
--- a/tests/estimation_tests/test_experimental_estimator.py
+++ b/tests/estimation_tests/test_experimental_estimator.py
@@ -2,9 +2,28 @@
 from causal_testing.estimation.experimental_estimator import ExperimentalEstimator
 
 
+class SystemUnderTest:
+    """
+    Basic example of a system under test.
+    """
+
+    def run(self, x):
+        return x * 2
+
+
 class ConcreteExperimentalEstimator(ExperimentalEstimator):
-    def run_system(self, configuration):
-        return {"Y": 2 * configuration["X"]}
+    """
+    Concrete experimental estimator class which integrates with the system under test.
+    """
+
+    def run_system(self, configuration: dict):
+        """
+        Sets up the system under test, runs with the given configuration, and returns the result in the correct format.
+        :param configuration: The configuration.
+        :returns: Dictionary with the output.
+        """
+        sut = SystemUnderTest()
+        return {"Y": sut.run(configuration["x"])}
 
 
 class TestExperimentalEstimator(unittest.TestCase):

From b9d2504f9fcbd359176e88352d9fef7ebe9cf679 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 13:35:13 +0000
Subject: [PATCH 13/44] fixed pytest error

---
 tests/estimation_tests/test_experimental_estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/estimation_tests/test_experimental_estimator.py b/tests/estimation_tests/test_experimental_estimator.py
index 26d8b9e0..1ed84cd3 100644
--- a/tests/estimation_tests/test_experimental_estimator.py
+++ b/tests/estimation_tests/test_experimental_estimator.py
@@ -23,7 +23,7 @@ def run_system(self, configuration: dict):
         :returns: Dictionary with the output.
         """
         sut = SystemUnderTest()
-        return {"Y": sut.run(configuration["x"])}
+        return {"Y": sut.run(configuration["X"])}
 
 
 class TestExperimentalEstimator(unittest.TestCase):

From 5b78c166557e478ac542f387e2cf8ab481d9a2a3 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 15:29:43 +0000
Subject: [PATCH 14/44] pylint

---
 causal_testing/json_front/json_class.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
index f3881c7c..9f43c9e3 100644
--- a/causal_testing/json_front/json_class.py
+++ b/causal_testing/json_front/json_class.py
@@ -11,7 +11,6 @@
 from statistics import StatisticsError
 
 import pandas as pd
-import numpy as np
 import scipy
 from fitter import Fitter, get_common_distributions
 
@@ -21,7 +20,7 @@
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Meta, Output
 from causal_testing.testing.causal_test_case import CausalTestCase
-from causal_testing.testing.causal_test_result import CausalTestResult, TestValue
+from causal_testing.testing.causal_test_result import CausalTestResult
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.testing.causal_test_adequacy import DataAdequacy
 

From ee98a2a2026532c18aaa7f28856bb21ffdac4762 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 17 Feb 2025 15:42:43 +0000
Subject: [PATCH 15/44] Updated docs

---
 docs/source/description.rst                   | 14 ----------
 docs/source/index.rst                         |  1 -
 docs/source/modules/data_collector.rst        | 28 -------------------
 docs/source/usage.rst                         | 22 +++++++++------
 .../covasim_/vaccinating_elderly/README.md    |  9 +++---
 5 files changed, 18 insertions(+), 56 deletions(-)
 delete mode 100644 docs/source/modules/data_collector.rst

diff --git a/docs/source/description.rst b/docs/source/description.rst
index c15f78f6..37f3a9bd 100644
--- a/docs/source/description.rst
+++ b/docs/source/description.rst
@@ -34,18 +34,4 @@ The Causal Testing Framework consists of 3 main components: 1) Causal Specificat
       test should pass or fail based on the results. In the simplest case, this takes the form of an assertion which
       compares the point estimate to the expected causal effect specified in the causal test case.
 
-
-
-#.
-   :doc:`Data Collection <../modules/data_collector>`\ : Data for the system-under-test can be collected in two
-   ways: experimentally or observationally. The former involves executing the system-under-test under controlled
-   conditions which, by design, isolate the causal effect of interest (accurate but expensive), while the latter
-   involves collecting suitable previous execution data and utilising our causal knowledge to draw causal inferences (
-   potentially less accurate but efficient). To collect experimental data, the user must implement a single method which
-   runs the system-under-test with a given input configuration. On the other hand, when dealing with observational data,
-   we automatically check whether the data is suitable for the identified estimand in two steps. First, confirm whether
-   the data contains a column for each variable in the causal DAG. Second, we check
-   for `positivity violations <https://www.youtube.com/watch?v=4xc8VkrF98w>`_. If there are positivity violations, we can
-   provide instructions for an execution that will fill the gap (future work).
-
 For more information on each of these steps, follow the link to their respective documentation.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 6bf9041b..a7656534 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -106,7 +106,6 @@ system-under-test that is expected to cause a change to some output(s).
    :maxdepth: 1
    :caption: Module Descriptions
 
-   /modules/data_collector
    /modules/causal_specification
    /modules/causal_tests
 
diff --git a/docs/source/modules/data_collector.rst b/docs/source/modules/data_collector.rst
deleted file mode 100644
index be1e73c7..00000000
--- a/docs/source/modules/data_collector.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-Data Collection
-===============
-
-For causal testing, we require data for the scenario-under-test. This data can be collected in 2 ways: experimentally
-and observationally.
-
-Experimental Data Collector
-****************************
-- Experimental data collection involves running the system-under-test with two specific input configurations, one with the
-  intervention and one without. We refer to these as the treatment and control configurations, respectively. The only
-  difference between these two input configurations is the intervention and therefore the observed difference in outcome
-  is the causal effect. If the system-under-test is non-deterministic, each input configuration should be ran multiple
-  times to observe the difference in the distributions of outputs.
-
-Observational Data Collector
-*****************************
-
-- Observational data collection involves collecting past execution data for the system-under-test that was not ran under
-  the experimental conditions necessary to isolate the causal effect. Instead, we will use the causal knowledge encoded
-  in the causal specification's causal DAG to identify and appropriately mitigate any sources of bias in the data. That
-  way, we can still obtain the causal effect of the intervention but avoid running costly experiments.
-
-- We cannot use any data as observational data, though. We need to ensure that the data is representative of the
-  scenario-under-test. To achieve this, we filter any provided data using the defined constraints by checking whether the
-  data for a variables falls within the specified distribution or meets the exact specified value.
-
-- This package should contain methods which collect the data for causal inference. Users must implement these methods in a way that generates (experimental) or collects
-  (observational) data for the scenario-under-test. For the observational case, we should also provide helper methods which filter the data.
\ No newline at end of file
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
index f2f95bf8..c7ccebee 100644
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@@ -65,19 +65,23 @@ the given output and input and the desired effect. This information is the minim
 
 Before we can run our test case, we first need data. There are two ways to acquire this: 1. run the model with the
 specific input configurations we're interested in, 2. use data from previous model runs. For a small number of specific
-tests where accuracy is critical, the first approach will yield the best results. To do this, you need to instantiate
-the ``ExperimentalDataCollector`` class.
+tests where accuracy is critical, the first approach will yield the best results. To do this, you can use the
+`ExperimentalEstimator` class. This will run the system directly and calculate the causal effect estimate from this.
 
-Where there are many test cases using pre-existing data is likely to be faster. If the program's behaviour can be
+Where there are many test cases, using pre-existing data is likely to be faster. If the program's behaviour can be
 estimated statistically, the results should still be reliable as long as there is enough data for the estimator to work
 as intended. This will vary depending on the program and the estimator. To use this method, simply instantiate
-the ``ObservationalDataCollector`` class with the modelling scenario and a path to the CSV file containing the runtime
-data, e.g.
+one of the other estimator classes with a Pandas dataframe containing the runtime data, e.g.
 
 .. code-block:: python
-
-   obs_df = pd.read_csv('results/data.csv')
-   data_collector = ObservationalDataCollector(modelling_scenario, obs_df)
+   estimator = LinearRegressionEstimator(
+         treatment_variable,
+         treatment_value,
+         control_value,
+         minimal_adjustment_set,
+         outcome_variable,
+         df=pd.read_csv(observational_data_path),
+     )
 
 
 Whether using fresh or pre-existing data, a key aspect of causal inference is estimation. To actually execute a test, we
@@ -99,7 +103,7 @@ various information. Here, we simply assert that the observed result is (on aver
 
 .. code-block:: python
 
-   causal_test_result = causal_test_case.execute_test(estimation_model, data_collector)
+   causal_test_result = causal_test_case.execute_test(estimation_model)
    test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result)
    assert test_passes, "Expected to see a positive change in y."
 
diff --git a/examples/covasim_/vaccinating_elderly/README.md b/examples/covasim_/vaccinating_elderly/README.md
index e1fd0d2d..4a8422fa 100644
--- a/examples/covasim_/vaccinating_elderly/README.md
+++ b/examples/covasim_/vaccinating_elderly/README.md
@@ -13,15 +13,16 @@ four test cases: one focusing on each of the four previously mentioned outputs.
 
 Further details are provided in Section 5.3 (Prioritising the elderly for vaccination) of the paper.
 
-**Note**: this version of the CTF utilises the observational data collector in order to separate the software execution 
-and testing. Older versions of this framework simulate the data using the custom experimental data collector and the 
-`covasim` package (version 3.0.7) as outlined below. 
+>[!NOTE]
+>This version of the CTF uses observational data to separate the software execution and testing.
+Older versions of this framework simulate the data using a custom experimental data collector and the `covasim`
+package (version 3.0.7) as outlined below.
 
 ## How to run
 To run this case study:
 1. Ensure all project dependencies are installed by running `pip install .` from the top
 level of this directory (instructions are provided in the project README).
-2. Additionally, in order to run Covasim, install version 3.0.7 by running `pip install covasim==3.0.7`.
+2. If necessary, install version 3.0.7 by running `pip install covasim==3.0.7`.
 3. Change directory to `causal_testing/examples/covasim_/vaccinating_elderly`.
 4. Run the command `python example_vaccine.py`.
 

From 7e33f80baa36fcce161d909b7f838c37b76b137c Mon Sep 17 00:00:00 2001
From: f-allian <allianfarhad@gmail.com>
Date: Mon, 17 Feb 2025 17:15:32 +0000
Subject: [PATCH 16/44] fix: schematic diagram on homepage

---
 docs/source/conf.py   | 2 +-
 docs/source/index.rst | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index d9ab4e48..b6ffe62f 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -50,7 +50,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ['_static', os.path.abspath('../../images')] # add /images directory to static path
 
 html_css_files = ['css/custom.css']
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index a7656534..765c65b9 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -33,6 +33,7 @@ system-under-test that is expected to cause a change to some output(s).
    .zoom-container {
        cursor: zoom-in;
        transition: transform 1s ease-in-out;
+       background-color: white;
    }
 
    .zoom-container.zoomed {
@@ -44,6 +45,7 @@ system-under-test that is expected to cause a change to some output(s).
        max-width: 100%;
        max-height: 100%;
        margin: auto;
+       background-color: white;
    }
 
    .zoom-container:hover {
@@ -83,9 +85,9 @@ system-under-test that is expected to cause a change to some output(s).
 
 .. container:: zoom-container
 
-   .. image:: /images/schematic.png
+   .. image:: ../../images/schematic.png
       :class: zoomable-image
-      :alt: Zoomable Image
+      :alt: Schematic diagram of the Causal Testing Framework
 
 
 .. toctree::

From 61d1e3d536f431d3cb8acd3d0957f3cd6b644e6c Mon Sep 17 00:00:00 2001
From: f-allian <allianfarhad@gmail.com>
Date: Mon, 17 Feb 2025 18:38:06 +0000
Subject: [PATCH 17/44] fix: remove mentions of data collector

---
 docs/source/description.rst | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/docs/source/description.rst b/docs/source/description.rst
index 37f3a9bd..a2ea82ff 100644
--- a/docs/source/description.rst
+++ b/docs/source/description.rst
@@ -1,11 +1,11 @@
 Background
 =====================================
 
-The Causal Testing Framework consists of 3 main components: 1) Causal Specification, 2) Causal Test Case and 3) Data Collection.
+The Causal Testing Framework consists the following two components: 1) Causal Specification and 2) Causal Test Case.
 
 #.
    :doc:`Causal Specification <../modules/causal_specification>`\ : To apply graphical CI
-   techniques for testing, we need a causal DAG which depicts causal relationships amongst inputs and outputs. To
+   techniques for testing, we need a causal DAG, which depicts causal relationships amongst inputs and outputs. To
    collect this information, users must create a *causal specification*. This comprises a set of scenarios which place
    constraints over input variables that capture the use-case of interest, a causal DAG corresponding to this scenario,
    and a series of high-level functional requirements that the user wishes to test. In causal testing, these
@@ -14,7 +14,7 @@ The Causal Testing Framework consists of 3 main components: 1) Causal Specificat
 
 
 #.
-   :doc:`Causal Tests <../modules/causal_tests>`\ : With a causal specification in hand, we can now go about designing
+   :doc:`Causal Tests <../modules/causal_tests>`\ : With a causal specification in hand, we can now design
    a series of test cases that interrogate the causal relationships of interest in the scenario-under-test. Informally,
    a causal test case is a triple ``(M, X, Delta, Y)``, where ``M`` is the modelling scenario, ``X`` is an input configuration,
    ``Delta`` is an intervention which should be applied to ``X``, and ``Y`` is the expected *causal effect* of that intervention on
@@ -24,14 +24,13 @@ The Causal Testing Framework consists of 3 main components: 1) Causal Specificat
 
    a. Using the causal DAG, identify an estimand for the effect of the intervention on the output of interest. That is,
       a statistical procedure capable of estimating the causal effect of the intervention on the output.
-   #. Collect the data to which the statistical procedure will be applied (see Data collection below).
-   #. Apply a statistical model (e.g. linear regression or causal forest) to the data to obtain a point estimate for
+   #. Apply a statistical model (e.g. linear regression or logistic regression) to the data to obtain a point estimate for
       the causal effect. Depending on the estimator used, confidence intervals may also be obtained at a specified
-      confidence level e.g. 0.05 corresponds to 95% confidence intervals (optional).
+      significance level, e.g. 0.05 corresponds to 95% confidence intervals (optional).
    #. Return the casual test result including a point estimate and 95% confidence intervals, usually quantifying the
       average treatment effect (ATE).
    #. Implement and apply a test oracle to the causal test result - that is, a procedure that determines whether the
       test should pass or fail based on the results. In the simplest case, this takes the form of an assertion which
       compares the point estimate to the expected causal effect specified in the causal test case.
 
-For more information on each of these steps, follow the link to their respective documentation.
+For more information on each of these steps, follow the links above to their respective documentation.

From 9dd94f80a9a452cc801b2b38612775431f295720 Mon Sep 17 00:00:00 2001
From: f-allian <allianfarhad@gmail.com>
Date: Mon, 17 Feb 2025 18:38:38 +0000
Subject: [PATCH 18/44] fix: misc typos

---
 docs/source/dev/actions_and_webhooks.rst |  2 +-
 docs/source/frontends/json_front_end.rst |  4 ++--
 docs/source/frontends/test_suite.rst     | 15 ++++-----------
 3 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/docs/source/dev/actions_and_webhooks.rst b/docs/source/dev/actions_and_webhooks.rst
index bb95dd0e..7dc142ea 100644
--- a/docs/source/dev/actions_and_webhooks.rst
+++ b/docs/source/dev/actions_and_webhooks.rst
@@ -4,7 +4,7 @@ Github Actions and Webhooks
 Actions
 --------------
 
-Currently, this project makes use of 4 `Github Actions <https://github.com/features/actions>`_,
+Currently, this project makes use of 5 `Github Actions <https://github.com/features/actions>`_,
 which can be found in the
 `.github/workflows <https://github.com/CITCOM-project/CausalTestingFramework/tree/main/.github/workflows>`_ directory.
 
diff --git a/docs/source/frontends/json_front_end.rst b/docs/source/frontends/json_front_end.rst
index 7d0fe957..33c9de08 100644
--- a/docs/source/frontends/json_front_end.rst
+++ b/docs/source/frontends/json_front_end.rst
@@ -60,7 +60,7 @@ The second method of specifying a test is to specify the test in a concrete form
 
 Alternatively, a ``causal_tests.json`` file can be created from a ``dag.dot`` file using the ``causal_testing/specification/metamorphic_relation.py`` script as follows::
 
-  python causal_testing/specification/metamorphic_relation.py --dag_path dag.dot --output_path causal_tests.json
+  python causal_testing/testing/metamorphic_relation.py --dag_path dag.dot --output_path causal_tests.json
 
 Run Commands
 ************
@@ -88,4 +88,4 @@ Runtime Data
 There are currently 2 methods to inputting your runtime data into the JSON frontend:
 
 #. Providing one or more file paths to `.csv` files containing your data
-#. Setting a dataframe to the .data attribute of the JSONUtility instance, this must be done before the setup method is called.
\ No newline at end of file
+#. Setting a dataframe to the `.data` attribute of the JSONUtility instance, this must be done before the setup method is called.
\ No newline at end of file
diff --git a/docs/source/frontends/test_suite.rst b/docs/source/frontends/test_suite.rst
index 8ddd0afa..a5a488bd 100644
--- a/docs/source/frontends/test_suite.rst
+++ b/docs/source/frontends/test_suite.rst
@@ -7,9 +7,7 @@ This structure is defined by the parameters in the class: :class:`causal_testing
 A current limitation of the Test Suite is that it requires references to the estimator class, not instances (objects) of
 estimator classes, which prevents the usage of some of the features of an estimator.
 
-Class
---------------------
-The test_suite class is an extension of the python UserDict_, meaning it simulates a standard Python dictionary where
+The test_suite class is an extension of the Python UserDict_, meaning it simulates a standard Python dictionary where
 any dictionary method can be used. The class also features a setter to make adding new test cases quicker and less
 error prone :meth:`causal_testing.testing.causal_test_suite.CausalTestSuite.add_test_object`.
 
@@ -23,14 +21,9 @@ the value is a test object in the format of another dictionary:
 Each ``base_test_case`` contains the treatment and outcome variables, and only causal_test_cases testing this relationship
 should be placed in the test object for that ``base_test_case``.
 
-.. _UserDict: https://docs.python.org/3/library/collections.html#collections.UserDict
 
-Execution
------------------------
-The test_suite can be executed by a call to the :meth:`causal_testing.testing.causal_test_engine.CausalTestEngine.execute_test_suite`.
-Here the causal_test_engine will iterate over all the test objects and execute each `test` once per `estimator` and per
-`estimate_type`.
+Following this, users can similarly execute a suite of causal tests and return the results in a list by executing the
+class's :meth:`causal_testing.testing.causal_test_suite.CausalTestSuite.execute_test_suite` method.
+
 
-This structure allows for some optimisations in running cost by only performing certain actions like identification
-when necessary and not for every `causal_test_case`.
 

From bedf7692f2c5897ebe88fda49bc41bffc8a00983 Mon Sep 17 00:00:00 2001
From: f-allian <allianfarhad@gmail.com>
Date: Mon, 17 Feb 2025 18:38:58 +0000
Subject: [PATCH 19/44] add: note about 32-bit systems

---
 docs/source/installation.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index 7846a861..d28841be 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -13,6 +13,9 @@ The Causal Testing Framework can be installed through either the `Python Package
 
 .. _Python Package Index (PyPI): https://dl.acm.org/doi/10.1145/3607184
 
+.. note::
+   We recommend you use a 64 bit OS (standard in most modern machines) as we have had reports of the installation crashing on some 32 bit Debian installations.
+
 Method 1: Installing via pip
 ..............................
 

From 3310bbe02bbde1985f2e50aa00b8da5d8e5d789a Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 08:36:40 +0000
Subject: [PATCH 20/44] Removed all mention of the causal test engine

---
 causal_testing/testing/causal_test_suite.py   | 3 +--
 docs/source/usage.rst                         | 2 +-
 examples/lr91/README.md                       | 4 ++--
 tests/testing_tests/test_causal_test_case.py  | 6 ++----
 tests/testing_tests/test_causal_test_suite.py | 2 +-
 5 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/causal_testing/testing/causal_test_suite.py b/causal_testing/testing/causal_test_suite.py
index 797b6a1b..5539210d 100644
--- a/causal_testing/testing/causal_test_suite.py
+++ b/causal_testing/testing/causal_test_suite.py
@@ -39,8 +39,7 @@ def add_test_object(
 
         :param base_test_case: A BaseTestCase object consisting of a treatment variable, outcome variable and effect
         :param causal_test_case_list: A list of causal test cases to be executed
-        :param estimators_classes: A list of estimator class references, the execute_test_suite function in the
-            TestEngine will produce a list of test results for each estimator
+        :param estimators_classes: A list of estimators, one for each causal test case
         :param estimate_type: A string which denotes the type of estimate to return
         """
         test_object = {"tests": causal_test_case_list, "estimators": estimators_classes, "estimate_type": estimate_type}
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
index c7ccebee..92e03533 100644
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@@ -107,4 +107,4 @@ various information. Here, we simply assert that the observed result is (on aver
    test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result)
    assert test_passes, "Expected to see a positive change in y."
 
-Multiple tests can be executed at once using the test engines :doc:`Test Suite </frontends/test_suite>` feature.
+Multiple tests can be executed at once using a causal test suite :doc:`Test Suite </frontends/test_suite>` feature.
diff --git a/examples/lr91/README.md b/examples/lr91/README.md
index 4036c468..8b2c1d6b 100644
--- a/examples/lr91/README.md
+++ b/examples/lr91/README.md
@@ -11,8 +11,8 @@ can be found in Section 5.2 of the paper.
 
 ## How to run
 There are two versions of this case study:
-1. `causal_test_max_conductances.py` which has a for loop to iteratively call the `causal_test_engine`
-2. `causal_test_max_conductances_test_suite.py`, which uses the `causal_test_suite` object to interact with the `causal_test_engine`
+1. `causal_test_max_conductances.py` which has a for loop to iteratively build and execute each test case one at a time.
+2. `causal_test_max_conductances_test_suite.py`, which uses the `causal_test_suite` object to execute all the tests at once.
 
 To run this case study:
 1. Ensure all project dependencies are installed by running `pip install .` in the top level directory
diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py
index f1b123f4..2b9c086d 100644
--- a/tests/testing_tests/test_causal_test_case.py
+++ b/tests/testing_tests/test_causal_test_case.py
@@ -46,10 +46,8 @@ def test_str(self):
 
 
 class TestCausalTestExecution(unittest.TestCase):
-    """Test the causal test execution workflow using observational data.
-
-    The causal test engine (CTE) is the main workflow for the causal testing framework. The CTE takes a causal test case
-    and a causal specification and computes the causal effect of the intervention on the outcome of interest.
+    """
+    Test the causal test execution workflow using observational data.
     """
 
     def setUp(self) -> None:
diff --git a/tests/testing_tests/test_causal_test_suite.py b/tests/testing_tests/test_causal_test_suite.py
index a7c7704c..a3a5fc6b 100644
--- a/tests/testing_tests/test_causal_test_suite.py
+++ b/tests/testing_tests/test_causal_test_suite.py
@@ -16,7 +16,7 @@
 
 
 class TestCausalTestSuite(unittest.TestCase):
-    """Test the Test Suite object and it's implementation in the test engine using dummy data."""
+    """Test the Test Suite object using dummy data."""
 
     def setUp(self) -> None:
         # 1. Create dummy Scenario and BaseTestCase

From 2786fdc8f3e68535cff4bc8bf95e90d818671d31 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 08:44:43 +0000
Subject: [PATCH 21/44] Fixed metamorphic relation

---
 causal_testing/testing/metamorphic_relation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/causal_testing/testing/metamorphic_relation.py b/causal_testing/testing/metamorphic_relation.py
index 50b295be..5196399f 100644
--- a/causal_testing/testing/metamorphic_relation.py
+++ b/causal_testing/testing/metamorphic_relation.py
@@ -226,7 +226,7 @@ def generate_metamorphic_relations(
     tests = [
         relation.to_json_stub(skip=False)
         for relation in relations
-        if len(list(causal_dag.graph.predecessors(relation.output_var))) > 0
+        if len(list(causal_dag.graph.predecessors(relation.base_test_case.outcome_variable))) > 0
     ]
 
     logger.info(f"Generated {len(tests)} tests. Saving to {args.output_path}.")

From c38706d4aaa7786615b19bdca84ef0e901dd955b Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 09:02:28 +0000
Subject: [PATCH 22/44] Fixed default behaviour of MR generation

---
 causal_testing/specification/causal_dag.py    | 24 ++++++++++++-------
 .../surrogate/causal_surrogate_assisted.py    |  2 +-
 .../testing/causal_test_adequacy.py           |  4 ++--
 .../testing/metamorphic_relation.py           | 12 +++++-----
 tests/specification_tests/test_causal_dag.py  | 20 ++++++++--------
 5 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/causal_testing/specification/causal_dag.py b/causal_testing/specification/causal_dag.py
index f00d4ad8..b8c5f5dc 100644
--- a/causal_testing/specification/causal_dag.py
+++ b/causal_testing/specification/causal_dag.py
@@ -151,6 +151,14 @@ def __init__(self, dot_path: str = None, ignore_cycles: bool = False, **attr):
             else:
                 raise nx.HasACycle("Invalid Causal DAG: contains a cycle.")
 
+    @property
+    def nodes(self):
+        return self.graph.nodes
+
+    @property
+    def edges(self):
+        return self.graph.edges
+
     def check_iv_assumptions(self, treatment, outcome, instrument) -> bool:
         """
         Checks the three instrumental variable assumptions, raising a
@@ -170,7 +178,7 @@ def check_iv_assumptions(self, treatment, outcome, instrument) -> bool:
 
         # (iii) Instrument and outcome do not share causes
 
-        for cause in self.graph.nodes:
+        for cause in self.nodes:
             # Exclude self-cycles due to breaking changes in NetworkX > 3.2
             outcome_paths = (
                 list(nx.all_simple_paths(self.graph, source=cause, target=outcome)) if cause != outcome else []
@@ -222,8 +230,8 @@ def get_proper_backdoor_graph(self, treatments: list[str], outcomes: list[str])
         :return: A CausalDAG corresponding to the proper back-door graph.
         """
         for var in treatments + outcomes:
-            if var not in self.graph.nodes:
-                raise IndexError(f"{var} not a node in Causal DAG.\nValid nodes are{self.graph.nodes}.")
+            if var not in self.nodes:
+                raise IndexError(f"{var} not a node in Causal DAG.\nValid nodes are{self.nodes}.")
 
         proper_backdoor_graph = self.copy()
         nodes_on_proper_causal_path = proper_backdoor_graph.proper_causal_pathway(treatments, outcomes)
@@ -255,7 +263,7 @@ def get_ancestor_graph(self, treatments: list[str], outcomes: list[str]) -> Caus
             *[nx.ancestors(ancestor_graph.graph, outcome).union({outcome}) for outcome in outcomes]
         )
         variables_to_keep = treatment_ancestors.union(outcome_ancestors)
-        variables_to_remove = set(self.graph.nodes).difference(variables_to_keep)
+        variables_to_remove = set(self.nodes).difference(variables_to_keep)
         ancestor_graph.graph.remove_nodes_from(variables_to_remove)
         return ancestor_graph
 
@@ -273,7 +281,7 @@ def get_indirect_graph(self, treatments: list[str], outcomes: list[str]) -> Caus
         ee = []
         for s in treatments:
             for t in outcomes:
-                if (s, t) in gback.graph.edges:
+                if (s, t) in gback.edges:
                     ee.append((s, t))
         for v1, v2 in ee:
             gback.graph.remove_edge(v1, v2)
@@ -451,7 +459,7 @@ def constructive_backdoor_criterion(
                 ]
             )
 
-            if not set(covariates).issubset(set(self.graph.nodes).difference(descendents_of_proper_casual_paths)):
+            if not set(covariates).issubset(set(self.nodes).difference(descendents_of_proper_casual_paths)):
                 logger.info(
                     "Failed Condition 1: Z=%s **is** a descendent of some variable on a proper causal "
                     "path between X=%s and Y=%s.",
@@ -566,9 +574,9 @@ def to_dot_string(self) -> str:
         :return DOT string of the DAG.
         """
         dotstring = "digraph G {\n"
-        dotstring += "".join([f"{a} -> {b};\n" for a, b in self.graph.edges])
+        dotstring += "".join([f"{a} -> {b};\n" for a, b in self.edges])
         dotstring += "}"
         return dotstring
 
     def __str__(self):
-        return f"Nodes: {self.graph.nodes}\nEdges: {self.graph.edges}"
+        return f"Nodes: {self.nodes}\nEdges: {self.edges}"
diff --git a/causal_testing/surrogate/causal_surrogate_assisted.py b/causal_testing/surrogate/causal_surrogate_assisted.py
index 56642770..ffeadbd2 100644
--- a/causal_testing/surrogate/causal_surrogate_assisted.py
+++ b/causal_testing/surrogate/causal_surrogate_assisted.py
@@ -121,7 +121,7 @@ def generate_surrogates(
         """
         surrogate_models = []
 
-        for u, v in specification.causal_dag.graph.edges:
+        for u, v in specification.causal_dag.edges:
             edge_metadata = specification.causal_dag.graph.adj[u][v]
             if "included" in edge_metadata:
                 from_var = specification.scenario.variables.get(u)
diff --git a/causal_testing/testing/causal_test_adequacy.py b/causal_testing/testing/causal_test_adequacy.py
index 48a5c381..aa8222c6 100644
--- a/causal_testing/testing/causal_test_adequacy.py
+++ b/causal_testing/testing/causal_test_adequacy.py
@@ -38,11 +38,11 @@ def measure_adequacy(self):
         """
         Calculate the adequacy measurement, and populate the `dag_adequacy` field.
         """
-        self.pairs_to_test = set(combinations(self.causal_dag.graph.nodes(), 2))
+        self.pairs_to_test = set(combinations(self.causal_dag.nodes, 2))
         self.tested_pairs = set()
 
         for n1, n2 in self.pairs_to_test:
-            if (n1, n2) in self.causal_dag.graph.edges():
+            if (n1, n2) in self.causal_dag.edges():
                 if any((t.treatment_variable, t.outcome_variable) == (n1, n2) for t in self.test_suite):
                     self.tested_pairs.add((n1, n2))
             else:
diff --git a/causal_testing/testing/metamorphic_relation.py b/causal_testing/testing/metamorphic_relation.py
index 5196399f..8165e0fb 100644
--- a/causal_testing/testing/metamorphic_relation.py
+++ b/causal_testing/testing/metamorphic_relation.py
@@ -108,7 +108,7 @@ def generate_metamorphic_relation(
     metamorphic_relations = []
 
     # Create a ShouldNotCause relation for each pair of nodes that are not directly connected
-    if ((u, v) not in dag.graph.edges) and ((v, u) not in dag.graph.edges):
+    if ((u, v) not in dag.edges) and ((v, u) not in dag.edges):
         # Case 1: U --> ... --> V
         if u in nx.ancestors(dag.graph, v):
             adj_sets = dag.direct_effect_adjustment_sets([u], [v], nodes_to_ignore=nodes_to_ignore)
@@ -129,7 +129,7 @@ def generate_metamorphic_relation(
                 metamorphic_relations.append(ShouldNotCause(BaseTestCase(u, v), list(adj_sets[0])))
 
     # Create a ShouldCause relation for each edge (u, v) or (v, u)
-    elif (u, v) in dag.graph.edges:
+    elif (u, v) in dag.edges:
         adj_sets = dag.direct_effect_adjustment_sets([u], [v], nodes_to_ignore=nodes_to_ignore)
         if adj_sets:
             metamorphic_relations.append(ShouldCause(BaseTestCase(u, v), list(adj_sets[0])))
@@ -160,7 +160,7 @@ def generate_metamorphic_relations(
         nodes_to_ignore = {}
 
     if nodes_to_test is None:
-        nodes_to_test = dag.graph.nodes
+        nodes_to_test = dag.nodes
 
     if not threads:
         metamorphic_relations = [
@@ -205,9 +205,9 @@ def generate_metamorphic_relations(
 
     causal_dag = CausalDAG(args.dag_path, ignore_cycles=args.ignore_cycles)
 
-    dag_nodes_to_test = set(
-        k for k, v in nx.get_node_attributes(causal_dag.graph, "test", default=True).items() if v == "True"
-    )
+    dag_nodes_to_test = [
+        node for node in causal_dag.nodes if nx.get_node_attributes(causal_dag.graph, "test", default=True)[node]
+    ]
 
     if not causal_dag.is_acyclic() and args.ignore_cycles:
         logger.warning(
diff --git a/tests/specification_tests/test_causal_dag.py b/tests/specification_tests/test_causal_dag.py
index bd01d11c..87ae3963 100644
--- a/tests/specification_tests/test_causal_dag.py
+++ b/tests/specification_tests/test_causal_dag.py
@@ -86,7 +86,7 @@ def test_valid_causal_dag(self):
         """Test whether the Causal DAG is valid."""
         causal_dag = CausalDAG(self.dag_dot_path)
         print(causal_dag)
-        assert list(causal_dag.graph.nodes) == ["A", "B", "C", "D"] and list(causal_dag.graph.edges) == [
+        assert list(causal_dag.nodes) == ["A", "B", "C", "D"] and list(causal_dag.edges) == [
             ("A", "B"),
             ("B", "C"),
             ("D", "A"),
@@ -101,7 +101,7 @@ def test_invalid_causal_dag(self):
     def test_empty_casual_dag(self):
         """Test whether an empty dag can be created."""
         causal_dag = CausalDAG()
-        assert list(causal_dag.graph.nodes) == [] and list(causal_dag.graph.edges) == []
+        assert list(causal_dag.nodes) == [] and list(causal_dag.edges) == []
 
     def test_to_dot_string(self):
         causal_dag = CausalDAG(self.dag_dot_path)
@@ -174,10 +174,10 @@ def setUp(self) -> None:
     def test_get_indirect_graph(self):
         causal_dag = CausalDAG(self.dag_dot_path)
         indirect_graph = causal_dag.get_indirect_graph(["D1"], ["Y"])
-        original_edges = list(causal_dag.graph.edges)
+        original_edges = list(causal_dag.edges)
         original_edges.remove(("D1", "Y"))
-        self.assertEqual(list(indirect_graph.graph.edges), original_edges)
-        self.assertEqual(indirect_graph.graph.nodes, causal_dag.graph.nodes)
+        self.assertEqual(list(indirect_graph.edges), original_edges)
+        self.assertEqual(indirect_graph.nodes, causal_dag.nodes)
 
     def test_proper_backdoor_graph(self):
         """Test whether converting a Causal DAG to a proper back-door graph works correctly."""
@@ -195,7 +195,7 @@ def test_proper_backdoor_graph(self):
                 ("Z", "Y"),
             ]
         )
-        self.assertTrue(set(proper_backdoor_graph.graph.edges).issubset(edges))
+        self.assertTrue(set(proper_backdoor_graph.edges).issubset(edges))
 
     def test_constructive_backdoor_criterion_should_hold(self):
         """Test whether the constructive criterion holds when it should."""
@@ -246,9 +246,9 @@ def test_get_ancestor_graph_of_causal_dag(self):
         causal_dag = CausalDAG(self.dag_dot_path)
         xs, ys = ["X1", "X2"], ["Y"]
         ancestor_graph = causal_dag.get_ancestor_graph(xs, ys)
-        self.assertEqual(list(ancestor_graph.graph.nodes), ["X1", "X2", "D1", "Y", "Z"])
+        self.assertEqual(list(ancestor_graph.nodes), ["X1", "X2", "D1", "Y", "Z"])
         self.assertEqual(
-            list(ancestor_graph.graph.edges),
+            list(ancestor_graph.edges),
             [("X1", "X2"), ("X2", "D1"), ("D1", "Y"), ("Z", "X2"), ("Z", "Y")],
         )
 
@@ -258,9 +258,9 @@ def test_get_ancestor_graph_of_proper_backdoor_graph(self):
         xs, ys = ["X1", "X2"], ["Y"]
         proper_backdoor_graph = causal_dag.get_proper_backdoor_graph(xs, ys)
         ancestor_graph = proper_backdoor_graph.get_ancestor_graph(xs, ys)
-        self.assertEqual(list(ancestor_graph.graph.nodes), ["X1", "X2", "D1", "Y", "Z"])
+        self.assertEqual(list(ancestor_graph.nodes), ["X1", "X2", "D1", "Y", "Z"])
         self.assertEqual(
-            list(ancestor_graph.graph.edges),
+            list(ancestor_graph.edges),
             [("X1", "X2"), ("D1", "Y"), ("Z", "X2"), ("Z", "Y")],
         )
 

From 9a82172a3913d1d2a50ed2b2664bd5323a59dc05 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 09:06:21 +0000
Subject: [PATCH 23/44] Pylint

---
 causal_testing/specification/causal_dag.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/causal_testing/specification/causal_dag.py b/causal_testing/specification/causal_dag.py
index b8c5f5dc..33fba9c8 100644
--- a/causal_testing/specification/causal_dag.py
+++ b/causal_testing/specification/causal_dag.py
@@ -152,11 +152,19 @@ def __init__(self, dot_path: str = None, ignore_cycles: bool = False, **attr):
                 raise nx.HasACycle("Invalid Causal DAG: contains a cycle.")
 
     @property
-    def nodes(self):
+    def nodes(self) -> list:
+        """
+        Get the nodes of the DAG.
+        :returns: The nodes of the DAG.
+        """
         return self.graph.nodes
 
     @property
-    def edges(self):
+    def edges(self) -> list:
+        """
+        Get the edges of the DAG.
+        :returns: The edges of the DAG.
+        """
         return self.graph.edges
 
     def check_iv_assumptions(self, treatment, outcome, instrument) -> bool:

From 1cef1fb5e9a0024a7a09dddf75be1799a578ede8 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 11:27:57 +0000
Subject: [PATCH 24/44] Examples

---
 .../example_json_frontend.py                  |  64 ++++++
 .../example_pure_python.py                    | 216 ++++++++++++++++++
 2 files changed, 280 insertions(+)
 create mode 100644 examples/poisson-line-process/example_json_frontend.py
 create mode 100644 examples/poisson-line-process/example_pure_python.py

diff --git a/examples/poisson-line-process/example_json_frontend.py b/examples/poisson-line-process/example_json_frontend.py
new file mode 100644
index 00000000..43676ef7
--- /dev/null
+++ b/examples/poisson-line-process/example_json_frontend.py
@@ -0,0 +1,64 @@
+import logging
+
+from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
+from causal_testing.testing.causal_test_outcome import ExactValue, Positive, Negative, NoEffect
+from causal_testing.json_front.json_class import JsonUtility
+from causal_testing.specification.scenario import Scenario
+from causal_testing.specification.variable import Input, Output
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.DEBUG, format="%(message)s")
+
+effects = {
+    "Positive": Positive(),
+    "Negative": Negative(),
+    "ExactValue4_05": ExactValue(4, atol=0.5),
+    "NoEffect": NoEffect(),
+}
+
+estimators = {
+    "LinearRegressionEstimator": LinearRegressionEstimator,
+}
+
+# 2. Create variables
+width = Input("width", float)
+height = Input("height", float)
+intensity = Input("intensity", float)
+
+num_lines_abs = Output("num_lines_abs", float)
+num_lines_unit = Output("num_lines_unit", float)
+num_shapes_abs = Output("num_shapes_abs", float)
+num_shapes_unit = Output("num_shapes_unit", float)
+
+# 3. Create scenario by applying constraints over a subset of the input variables
+scenario = Scenario(
+    variables={
+        width,
+        height,
+        intensity,
+        num_lines_abs,
+        num_lines_unit,
+        num_shapes_abs,
+        num_shapes_unit,
+    }
+)
+scenario.setup_treatment_variables()
+
+mutates = {
+    "Increase": lambda x: scenario.treatment_variables[x].z3 > scenario.variables[x].z3,
+    "ChangeByFactor(2)": lambda x: scenario.treatment_variables[x].z3 == scenario.variables[x].z3 * 2,
+}
+
+
+if __name__ == "__main__":
+    args = JsonUtility.get_args()
+    json_utility = JsonUtility(args.log_path)  # Create an instance of the extended JsonUtility class
+    json_utility.set_paths(
+        args.json_path, args.dag_path, args.data_path
+    )  # Set the path to the data.csv, dag.dot and causal_tests.json file
+
+    # Load the Causal Variables into the JsonUtility class ready to be used in the tests
+    json_utility.setup(scenario=scenario)  # Sets up all the necessary parts of the json_class needed to execute tests
+
+    json_utility.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=args.f)
diff --git a/examples/poisson-line-process/example_pure_python.py b/examples/poisson-line-process/example_pure_python.py
new file mode 100644
index 00000000..67a2f4b2
--- /dev/null
+++ b/examples/poisson-line-process/example_pure_python.py
@@ -0,0 +1,216 @@
+import os
+import logging
+
+import pandas as pd
+
+from causal_testing.specification.causal_dag import CausalDAG
+from causal_testing.specification.scenario import Scenario
+from causal_testing.specification.variable import Input, Output
+from causal_testing.specification.causal_specification import CausalSpecification
+from causal_testing.testing.causal_test_case import CausalTestCase
+from causal_testing.testing.causal_test_outcome import ExactValue, Positive
+from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
+from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.testing.base_test_case import BaseTestCase
+from causal_testing.testing.causal_test_suite import CausalTestSuite
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.DEBUG, format="%(message)s")
+
+
+class EmpiricalMeanEstimator(Estimator):
+    def add_modelling_assumptions(self):
+        """
+        Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
+        must hold if the resulting causal inference is to be considered valid.
+        """
+        self.modelling_assumptions += "The data must contain runs with the exact configuration of interest."
+
+    def estimate_ate(self) -> float:
+        """Estimate the outcomes under control and treatment.
+        :return: The empirical average treatment effect.
+        """
+        control_results = self.df.where(self.df[self.treatment] == self.control_value)[self.outcome].dropna()
+        treatment_results = self.df.where(self.df[self.treatment] == self.treatment_value)[self.outcome].dropna()
+        return treatment_results.mean() - control_results.mean(), None
+
+    def estimate_risk_ratio(self) -> float:
+        """Estimate the outcomes under control and treatment.
+        :return: The empirical average treatment effect.
+        """
+        control_results = self.df.where(self.df[self.treatment] == self.control_value)[self.outcome].dropna()
+        treatment_results = self.df.where(self.df[self.treatment] == self.treatment_value)[self.outcome].dropna()
+        return treatment_results.mean() / control_results.mean(), None
+
+
+# 1. Read in the Causal DAG
+ROOT = os.path.realpath(os.path.dirname(__file__))
+causal_dag = CausalDAG(f"{ROOT}/dag.dot")
+
+# 2. Create variables
+width = Input("width", float)
+height = Input("height", float)
+intensity = Input("intensity", float)
+
+num_lines_abs = Output("num_lines_abs", float)
+num_lines_unit = Output("num_lines_unit", float)
+num_shapes_abs = Output("num_shapes_abs", float)
+num_shapes_unit = Output("num_shapes_unit", float)
+
+# 3. Create scenario
+scenario = Scenario(
+    variables={
+        width,
+        height,
+        intensity,
+        num_lines_abs,
+        num_lines_unit,
+        num_shapes_abs,
+        num_shapes_unit,
+    }
+)
+
+# 4. Construct a causal specification from the scenario and causal DAG
+causal_specification = CausalSpecification(scenario, causal_dag)
+
+observational_data_path = f"{ROOT}/data/random/data_random_1000.csv"
+
+
+def causal_test_intensity_num_shapes(
+    observational_data_path,
+    causal_test_case,
+    square_terms=[],
+    inverse_terms=[],
+    empirical=False,
+):
+    # 8. Set up an estimator
+    data = pd.read_csv(observational_data_path, index_col=0).astype(float)
+
+    treatment = causal_test_case.treatment_variable.name
+    outcome = causal_test_case.outcome_variable.name
+
+    estimator = None
+    if empirical:
+        estimator = EmpiricalMeanEstimator(
+            treatment=[treatment],
+            control_value=causal_test_case.control_value,
+            treatment_value=causal_test_case.treatment_value,
+            adjustment_set=set(),
+            outcome=[outcome],
+            df=data,
+            effect_modifiers=causal_test_case.effect_modifier_configuration,
+        )
+    else:
+        square_terms = [f"I({t} ** 2)" for t in square_terms]
+        inverse_terms = [f"I({t} ** -1)" for t in inverse_terms]
+        estimator = LinearRegressionEstimator(
+            treatment=treatment,
+            control_value=causal_test_case.control_value,
+            treatment_value=causal_test_case.treatment_value,
+            adjustment_set=set(),
+            outcome=outcome,
+            df=data,
+            effect_modifiers=causal_test_case.effect_modifier_configuration,
+            formula=f"{outcome} ~ {treatment} + {'+'.join(square_terms + inverse_terms + list([e for e in causal_test_case.effect_modifier_configuration]))} -1",
+        )
+
+    # 9. Execute the test
+    causal_test_result = causal_test_case.execute_test(estimator)
+
+    return causal_test_result
+
+
+def test_poisson_intensity_num_shapes(save=False):
+    intensity_num_shapes_results = []
+    base_test_case = BaseTestCase(treatment_variable=intensity, outcome_variable=num_shapes_unit)
+    for wh in range(1, 11):
+        smt_data_path = f"{ROOT}/data/smt_100/data_smt_wh{wh}_100.csv"
+        causal_test_case_list = [
+            CausalTestCase(
+                base_test_case=base_test_case,
+                expected_causal_effect=ExactValue(4, atol=0.5),
+                treatment_value=treatment_value,
+                control_value=control_value,
+                estimate_type="risk_ratio",
+            )
+            for control_value, treatment_value in [(1, 2), (2, 4), (4, 8), (8, 16)]
+        ]
+        test_suite = CausalTestSuite()
+        test_suite.add_test_object(
+            base_test_case,
+            causal_test_case_list=causal_test_case_list,
+            estimators=[LinearRegressionEstimator, EmpiricalMeanEstimator],
+        )
+        test_suite_results = test_suite.execute_test_suite(
+            causal_specification, pd.read_csv(smt_data_path, index_col=0).astype(float)
+        )
+
+        smt_risk_ratios = [
+            causal_test_result.test_value.value
+            for causal_test_result in test_suite_results[base_test_case]["EmpiricalMeanEstimator"]
+        ]
+
+        intensity_num_shapes_results += [
+            {
+                "width": wh,
+                "height": wh,
+                "control": obs_causal_test_result.estimator.control_value,
+                "treatment": obs_causal_test_result.estimator.treatment_value,
+                "smt_risk_ratio": smt_causal_test_result.test_value.value,
+                "obs_risk_ratio": obs_causal_test_result.test_value.value[0],
+            }
+            for obs_causal_test_result, smt_causal_test_result in zip(
+                test_suite_results[base_test_case]["LinearRegressionEstimator"],
+                test_suite_results[base_test_case]["EmpiricalMeanEstimator"],
+            )
+        ]
+    intensity_num_shapes_results = pd.DataFrame(intensity_num_shapes_results)
+    if save:
+        intensity_num_shapes_results.to_csv("intensity_num_shapes_results_random_1000.csv")
+    logger.info("%s", intensity_num_shapes_results)
+
+
+def test_poisson_width_num_shapes(save=False):
+    base_test_case = BaseTestCase(treatment_variable=width, outcome_variable=num_shapes_unit)
+    causal_test_case_list = [
+        CausalTestCase(
+            base_test_case=base_test_case,
+            expected_causal_effect=Positive(),
+            control_value=float(w),
+            treatment_value=w + 1.0,
+            estimate_type="ate_calculated",
+            effect_modifier_configuration={"intensity": i},
+        )
+        for i in range(17)
+        for w in range(1, 10)
+    ]
+    test_suite = CausalTestSuite()
+    test_suite.add_test_object(
+        base_test_case,
+        causal_test_case_list=causal_test_case_list,
+        estimators=[LinearRegressionEstimator],
+    )
+    test_suite_results = test_suite.execute_test_suite(
+        causal_specification, pd.read_csv(observational_data_path, index_col=0).astype(float)
+    )
+    width_num_shapes_results = [
+        {
+            "control": causal_test_result.estimator.control_value,
+            "treatment": causal_test_result.estimator.treatment_value,
+            "intensity": causal_test_result.effect_modifier_configuration["intensity"],
+            "ate": causal_test_result.test_value.value[0],
+            "ci_low": causal_test_result.confidence_intervals[0][0],
+            "ci_high": causal_test_result.confidence_intervals[1][0],
+        }
+        for causal_test_result in test_suite_results[base_test_case]["LinearRegressionEstimator"]
+    ]
+    width_num_shapes_results = pd.DataFrame(width_num_shapes_results)
+    if save:
+        width_num_shapes_results.to_csv("width_num_shapes_results_random_1000.csv")
+    logger.info("%s", width_num_shapes_results)
+
+
+if __name__ == "__main__":
+    test_poisson_intensity_num_shapes(save=False)
+    # test_poisson_width_num_shapes(save=True)

From 78d1c3d586f3735866e0c510c82457de91f07426 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 11:34:50 +0000
Subject: [PATCH 25/44] causal test case

---
 causal_testing/testing/causal_test_case.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
index 586c2b84..d24b4089 100644
--- a/causal_testing/testing/causal_test_case.py
+++ b/causal_testing/testing/causal_test_case.py
@@ -32,6 +32,7 @@ def __init__(
         estimate_type: str = "ate",
         estimate_params: dict = None,
         effect_modifier_configuration: dict[Variable:Any] = None,
+        estimator: type(Estimator) = None,
     ):
         """
         :param base_test_case: A BaseTestCase object consisting of a treatment variable, outcome variable and effect
@@ -40,6 +41,7 @@ def __init__(
         :param treatment_value: The treatment value for the treatment variable (after intervention).
         :param estimate_type: A string which denotes the type of estimate to return
         :param effect_modifier_configuration:
+        :param estimator: An Estimator class instance
         """
         self.base_test_case = base_test_case
         self.control_value = control_value
@@ -48,6 +50,7 @@ def __init__(
         self.treatment_variable = base_test_case.treatment_variable
         self.treatment_value = treatment_value
         self.estimate_type = estimate_type
+        self.estimator = estimator
         if estimate_params is None:
             self.estimate_params = {}
         self.effect = base_test_case.effect
@@ -57,19 +60,18 @@ def __init__(
         else:
             self.effect_modifier_configuration = {}
 
-    def execute_test(self, estimator: type(Estimator)) -> CausalTestResult:
+    def execute_test(self) -> CausalTestResult:
         """Execute a causal test case and return the causal test result.
 
-        :param estimator: An Estimator class object
         :return causal_test_result: A CausalTestResult for the executed causal test case.
         """
 
-        if not hasattr(estimator, f"estimate_{self.estimate_type}"):
-            raise AttributeError(f"{estimator.__class__} has no {self.estimate_type} method.")
-        estimate_effect = getattr(estimator, f"estimate_{self.estimate_type}")
+        if not hasattr(self.estimator, f"estimate_{self.estimate_type}"):
+            raise AttributeError(f"{self.estimator.__class__} has no {self.estimate_type} method.")
+        estimate_effect = getattr(self.estimator, f"estimate_{self.estimate_type}")
         effect, confidence_intervals = estimate_effect(**self.estimate_params)
         return CausalTestResult(
-            estimator=estimator,
+            estimator=self.estimator,
             test_value=TestValue(self.estimate_type, effect),
             effect_modifier_configuration=self.effect_modifier_configuration,
             confidence_intervals=confidence_intervals,

From 6071bd343cac923550a5b1969c393c2ca454b683 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 12:44:29 +0000
Subject: [PATCH 26/44] Reworked examples

---
 causal_testing/testing/causal_test_case.py    |    6 +-
 causal_testing/testing/causal_test_suite.py   |    8 +-
 .../vaccinating_elderly/example_vaccine.py    |    1 -
 examples/poisson-line-process/README.md       |    3 +-
 .../causal_tests.json                         |    9 +-
 examples/poisson-line-process/dag.dot         |    1 -
 .../example_poisson_process.py                |  210 ----
 .../example_pure_python.py                    |  157 ++-
 examples/poisson/README.md                    |   11 -
 examples/poisson/dag.dot                      |   15 -
 examples/poisson/data.csv                     | 1001 -----------------
 examples/poisson/example_run_causal_tests.py  |  183 ---
 .../test_causal_test_adequacy.py              |    8 +-
 width_num_shapes_results_random_1000.csv      |  145 +++
 14 files changed, 229 insertions(+), 1529 deletions(-)
 rename examples/{poisson => poisson-line-process}/causal_tests.json (95%)
 delete mode 100644 examples/poisson-line-process/example_poisson_process.py
 delete mode 100644 examples/poisson/README.md
 delete mode 100644 examples/poisson/dag.dot
 delete mode 100644 examples/poisson/data.csv
 delete mode 100644 examples/poisson/example_run_causal_tests.py
 create mode 100644 width_num_shapes_results_random_1000.csv

diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
index d24b4089..ce1c4e8d 100644
--- a/causal_testing/testing/causal_test_case.py
+++ b/causal_testing/testing/causal_test_case.py
@@ -39,9 +39,9 @@ def __init__(
         :param expected_causal_effect: The expected causal effect (Positive, Negative, No Effect).
         :param control_value: The control value for the treatment variable (before intervention).
         :param treatment_value: The treatment value for the treatment variable (after intervention).
-        :param estimate_type: A string which denotes the type of estimate to return
-        :param effect_modifier_configuration:
-        :param estimator: An Estimator class instance
+        :param estimate_type: A string which denotes the type of estimate to return.
+        :param effect_modifier_configuration: The assignment of the effect modifiers to use for estimates.
+        :param estimator: An Estimator class object
         """
         self.base_test_case = base_test_case
         self.control_value = control_value
diff --git a/causal_testing/testing/causal_test_suite.py b/causal_testing/testing/causal_test_suite.py
index 5539210d..d5b93bc3 100644
--- a/causal_testing/testing/causal_test_suite.py
+++ b/causal_testing/testing/causal_test_suite.py
@@ -31,7 +31,7 @@ def add_test_object(
         self,
         base_test_case: BaseTestCase,
         causal_test_case_list: Iterable[CausalTestCase],
-        estimators_classes: Iterable[Type[Estimator]],
+        estimators: Iterable[Type[Estimator]],
         estimate_type: str = "ate",
     ):
         """
@@ -39,10 +39,12 @@ def add_test_object(
 
         :param base_test_case: A BaseTestCase object consisting of a treatment variable, outcome variable and effect
         :param causal_test_case_list: A list of causal test cases to be executed
-        :param estimators_classes: A list of estimators, one for each causal test case
+        :param estimators: A list of estimator classes (NOT instances) to be used to execute each of the test cases.
+        Each estimator will be applied to each test case, so this will typically just be a single element list.
+        However, if you want to compare the outputs of different estimators, you may include more than one class here.
         :param estimate_type: A string which denotes the type of estimate to return
         """
-        test_object = {"tests": causal_test_case_list, "estimators": estimators_classes, "estimate_type": estimate_type}
+        test_object = {"tests": causal_test_case_list, "estimators": estimators, "estimate_type": estimate_type}
         self.data[base_test_case] = test_object
 
     def execute_test_suite(
diff --git a/examples/covasim_/vaccinating_elderly/example_vaccine.py b/examples/covasim_/vaccinating_elderly/example_vaccine.py
index b481b281..50936a0a 100644
--- a/examples/covasim_/vaccinating_elderly/example_vaccine.py
+++ b/examples/covasim_/vaccinating_elderly/example_vaccine.py
@@ -50,7 +50,6 @@ def setup_test_case(verbose: bool = False):
             cum_vaccinations,
             max_doses,
         },
-        constraints={pop_size.z3 == 50000, pop_infected.z3 == 1000, n_days.z3 == 50},
     )
 
     # 4. Construct a causal specification from the scenario and causal DAG
diff --git a/examples/poisson-line-process/README.md b/examples/poisson-line-process/README.md
index 10d346cf..ac6dfec2 100644
--- a/examples/poisson-line-process/README.md
+++ b/examples/poisson-line-process/README.md
@@ -6,6 +6,7 @@ To run this case study:
 1. Ensure all project dependencies are installed by running `pip install .` in the top level directory
    (instructions are provided in the project README).
 2. Change directory to `causal_testing/examples/poisson-line-process`.
-3. Run the command `python example_poisson_process.py`
+3. Run the command `python example_pure_python.py` to demonstrate causal testing using pure python.
+3. Run the command `python example_json_frontend.py` to demonstrate the same causal tests using JSON.
 
 This should print a series of causal test results and produce two CSV files. `intensity_num_shapes_results_random_1000.csv` corresponds to table 1, and `width_num_shapes_results_random_1000.csv` relates to our findings regarding the relationship of width and `P_u`.
diff --git a/examples/poisson/causal_tests.json b/examples/poisson-line-process/causal_tests.json
similarity index 95%
rename from examples/poisson/causal_tests.json
rename to examples/poisson-line-process/causal_tests.json
index 08bf659a..69b107bf 100644
--- a/examples/poisson/causal_tests.json
+++ b/examples/poisson-line-process/causal_tests.json
@@ -3,7 +3,8 @@
     {
       "name": "width__num_lines_abs",
       "mutations": {"width": "Increase"},
-      "estimator": "WidthHeightEstimator",
+      "estimator": "LinearRegressionEstimator",
+      "formula": "num_lines_abs ~ I(intensity * (width + height))",
       "estimate_type": "ate",
       "effect_modifiers": ["intensity", "height"],
       "expected_effect": {"num_lines_abs": "PoissonWidthHeight"},
@@ -138,7 +139,8 @@
     {
       "name": "height__num_lines_abs",
       "mutations": {"height": "Increase"},
-      "estimator": "WidthHeightEstimator",
+      "estimator": "LinearRegressionEstimator",
+      "formula": "num_lines_abs ~ I(intensity * (width + height))",
       "estimate_type": "ate",
       "effect_modifiers": ["intensity", "width"],
       "expected_effect": {"num_lines_abs": "PoissonWidthHeight"},
@@ -192,7 +194,8 @@
     {
       "name": "intensity__num_lines_abs",
       "mutations": {"intensity": "Increase"},
-      "estimator": "WidthHeightEstimator",
+      "estimator": "LinearRegressionEstimator",
+      "formula": "num_lines_abs ~ I(intensity * (width + height))",
       "effect_modifiers": ["height", "width"],
       "estimate_type": "ate",
       "expected_effect": {"num_lines_abs": "PoissonIntensity"},
diff --git a/examples/poisson-line-process/dag.dot b/examples/poisson-line-process/dag.dot
index 44f5eb9f..adcc1af0 100644
--- a/examples/poisson-line-process/dag.dot
+++ b/examples/poisson-line-process/dag.dot
@@ -1,5 +1,4 @@
 digraph poisson_line_process {
-  rankdir=LR;
   width -> num_lines_abs;
   width -> num_shapes_abs;
   width -> num_lines_unit;
diff --git a/examples/poisson-line-process/example_poisson_process.py b/examples/poisson-line-process/example_poisson_process.py
deleted file mode 100644
index 00ae042d..00000000
--- a/examples/poisson-line-process/example_poisson_process.py
+++ /dev/null
@@ -1,210 +0,0 @@
-from causal_testing.specification.causal_dag import CausalDAG
-from causal_testing.specification.scenario import Scenario
-from causal_testing.specification.variable import Input, Output
-from causal_testing.specification.causal_specification import CausalSpecification
-from causal_testing.testing.causal_test_case import CausalTestCase
-from causal_testing.testing.causal_test_outcome import ExactValue, Positive
-from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
-from causal_testing.estimation.abstract_estimator import Estimator
-from causal_testing.testing.base_test_case import BaseTestCase
-
-import pandas as pd
-import os
-import logging
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.DEBUG, format="%(message)s")
-
-
-class EmpiricalMeanEstimator(Estimator):
-    def add_modelling_assumptions(self):
-        """
-        Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
-        must hold if the resulting causal inference is to be considered valid.
-        """
-        self.modelling_assumptions += "The data must contain runs with the exact configuration of interest."
-
-    def estimate_ate(self) -> float:
-        """Estimate the outcomes under control and treatment.
-        :return: The empirical average treatment effect.
-        """
-        control_results = self.df.where(self.df[self.treatment[0]] == self.control_value)[self.outcome].dropna()
-        treatment_results = self.df.where(self.df[self.treatment[0]] == self.treatment_value)[self.outcome].dropna()
-        return treatment_results.mean()[0] - control_results.mean()[0], None
-
-    def estimate_risk_ratio(self) -> float:
-        """Estimate the outcomes under control and treatment.
-        :return: The empirical average treatment effect.
-        """
-        control_results = self.df.where(self.df[self.treatment[0]] == self.control_value)[self.outcome].dropna()
-        treatment_results = self.df.where(self.df[self.treatment[0]] == self.treatment_value)[self.outcome].dropna()
-        return treatment_results.mean()[0] / control_results.mean()[0], None
-
-
-# 1. Read in the Causal DAG
-ROOT = os.path.realpath(os.path.dirname(__file__))
-causal_dag = CausalDAG(f"{ROOT}/dag.dot")
-
-# 2. Create variables
-width = Input("width", float)
-height = Input("height", float)
-intensity = Input("intensity", float)
-
-num_lines_abs = Output("num_lines_abs", float)
-num_lines_unit = Output("num_lines_unit", float)
-num_shapes_abs = Output("num_shapes_abs", float)
-num_shapes_unit = Output("num_shapes_unit", float)
-
-# 3. Create scenario by applying constraints over a subset of the input variables
-scenario = Scenario(
-    variables={
-        width,
-        height,
-        intensity,
-        num_lines_abs,
-        num_lines_unit,
-        num_shapes_abs,
-        num_shapes_unit,
-    }
-)
-
-# 4. Construct a causal specification from the scenario and causal DAG
-causal_specification = CausalSpecification(scenario, causal_dag)
-
-observational_data_path = f"{ROOT}/data/random/data_random_1000.csv"
-
-
-def causal_test_intensity_num_shapes(
-    observational_data_path,
-    causal_test_case,
-    square_terms=[],
-    inverse_terms=[],
-    empirical=False,
-):
-    # 7. Obtain the minimal adjustment set for the causal test case from the causal DAG
-    minimal_adjustment_set = causal_dag.identification(causal_test_case.base_test_case)
-
-    # 8. Set up an estimator
-    data = pd.read_csv(observational_data_path, index_col=0).astype(float)
-
-    treatment = causal_test_case.treatment_variable.name
-    outcome = causal_test_case.outcome_variable.name
-
-    estimator = None
-    if empirical:
-        estimator = EmpiricalMeanEstimator(
-            treatment=[treatment],
-            control_value=causal_test_case.control_value,
-            treatment_value=causal_test_case.treatment_value,
-            adjustment_set=set(),
-            outcome=[outcome],
-            df=data,
-            effect_modifiers=causal_test_case.effect_modifier_configuration,
-        )
-    else:
-        square_terms = [f"I({t} ** 2)" for t in square_terms]
-        inverse_terms = [f"I({t} ** -1)" for t in inverse_terms]
-        estimator = LinearRegressionEstimator(
-            treatment=treatment,
-            control_value=causal_test_case.control_value,
-            treatment_value=causal_test_case.treatment_value,
-            adjustment_set=set(),
-            outcome=outcome,
-            df=data,
-            effect_modifiers=causal_test_case.effect_modifier_configuration,
-            formula=f"{outcome} ~ {treatment} + {'+'.join(square_terms + inverse_terms + list([e for e in causal_test_case.effect_modifier_configuration]))} -1",
-        )
-
-    # 9. Execute the test
-    causal_test_result = causal_test_case.execute_test(estimator)
-
-    return causal_test_result
-
-
-def test_poisson_intensity_num_shapes(save=False):
-    intensity_num_shapes_results = []
-    for wh in range(1, 11):
-        smt_data_path = f"{ROOT}/data/smt_100/data_smt_wh{wh}_100.csv"
-        for control_value, treatment_value in [(1, 2), (2, 4), (4, 8), (8, 16)]:
-            logger.info("%s CAUSAL TEST %s", "=" * 33, "=" * 33)
-            logger.info("WIDTH = HEIGHT = %s", wh)
-            logger.info("Identifying")
-            base_test_case = BaseTestCase(treatment_variable=intensity, outcome_variable=num_shapes_unit)
-            causal_test_case = CausalTestCase(
-                base_test_case=base_test_case,
-                expected_causal_effect=ExactValue(4, atol=0.5),
-                treatment_value=treatment_value,
-                control_value=control_value,
-                estimate_type="risk_ratio",
-            )
-            obs_causal_test_result = causal_test_intensity_num_shapes(
-                observational_data_path,
-                causal_test_case,
-                square_terms=["intensity"],
-                empirical=False,
-            )
-            logger.info("Observational %s", obs_causal_test_result)
-            smt_causal_test_result = causal_test_intensity_num_shapes(
-                smt_data_path, causal_test_case, square_terms=["intensity"], empirical=True
-            )
-            logger.info("RCT %s", smt_causal_test_result)
-
-            results = {
-                "width": wh,
-                "height": wh,
-                "control": control_value,
-                "treatment": treatment_value,
-                "smt_risk_ratio": smt_causal_test_result.test_value.value,
-                "obs_risk_ratio": obs_causal_test_result.test_value.value,
-            }
-            intensity_num_shapes_results.append(results)
-
-    intensity_num_shapes_results = pd.DataFrame(intensity_num_shapes_results)
-    if save:
-        intensity_num_shapes_results.to_csv("intensity_num_shapes_results_random_1000.csv")
-    logger.info("%s", intensity_num_shapes_results)
-
-
-def test_poisson_width_num_shapes(save=False):
-    width_num_shapes_results = []
-    for i in range(17):
-        for w in range(1, 10):
-            logger.info("%s CAUSAL TEST %s", "=" * 33, "=" * 33)
-            logger.info("Identifying")
-            # 5. Create a causal test case
-            control_value = float(w)
-            treatment_value = w + 1.0
-            base_test_case = BaseTestCase(treatment_variable=width, outcome_variable=num_shapes_unit)
-            causal_test_case = CausalTestCase(
-                base_test_case=base_test_case,
-                expected_causal_effect=Positive(),
-                control_value=control_value,
-                treatment_value=treatment_value,
-                estimate_type="ate_calculated",
-                effect_modifier_configuration={"intensity": i},
-            )
-            causal_test_result = causal_test_intensity_num_shapes(
-                observational_data_path,
-                causal_test_case,
-                square_terms=["intensity"],
-                inverse_terms=["width"],
-            )
-            logger.info("%s", causal_test_result)
-            results = {
-                "control": control_value,
-                "treatment": treatment_value,
-                "intensity": i,
-                "ate": causal_test_result.test_value.value,
-                "ci_low": causal_test_result.confidence_intervals[0][0],
-                "ci_high": causal_test_result.confidence_intervals[1][0],
-            }
-            width_num_shapes_results.append(results)
-    width_num_shapes_results = pd.DataFrame(width_num_shapes_results)
-    if save:
-        width_num_shapes_results.to_csv("width_num_shapes_results_random_1000.csv")
-    logger.info("%s", width_num_shapes_results)
-
-
-if __name__ == "__main__":
-    # test_poisson_intensity_num_shapes(save=True)
-    test_poisson_width_num_shapes(save=True)
diff --git a/examples/poisson-line-process/example_pure_python.py b/examples/poisson-line-process/example_pure_python.py
index 67a2f4b2..d5b5005d 100644
--- a/examples/poisson-line-process/example_pure_python.py
+++ b/examples/poisson-line-process/example_pure_python.py
@@ -77,94 +77,67 @@ def estimate_risk_ratio(self) -> float:
 observational_data_path = f"{ROOT}/data/random/data_random_1000.csv"
 
 
-def causal_test_intensity_num_shapes(
-    observational_data_path,
-    causal_test_case,
-    square_terms=[],
-    inverse_terms=[],
-    empirical=False,
-):
-    # 8. Set up an estimator
-    data = pd.read_csv(observational_data_path, index_col=0).astype(float)
-
-    treatment = causal_test_case.treatment_variable.name
-    outcome = causal_test_case.outcome_variable.name
-
-    estimator = None
-    if empirical:
-        estimator = EmpiricalMeanEstimator(
-            treatment=[treatment],
-            control_value=causal_test_case.control_value,
-            treatment_value=causal_test_case.treatment_value,
-            adjustment_set=set(),
-            outcome=[outcome],
-            df=data,
-            effect_modifiers=causal_test_case.effect_modifier_configuration,
-        )
-    else:
-        square_terms = [f"I({t} ** 2)" for t in square_terms]
-        inverse_terms = [f"I({t} ** -1)" for t in inverse_terms]
-        estimator = LinearRegressionEstimator(
-            treatment=treatment,
-            control_value=causal_test_case.control_value,
-            treatment_value=causal_test_case.treatment_value,
-            adjustment_set=set(),
-            outcome=outcome,
-            df=data,
-            effect_modifiers=causal_test_case.effect_modifier_configuration,
-            formula=f"{outcome} ~ {treatment} + {'+'.join(square_terms + inverse_terms + list([e for e in causal_test_case.effect_modifier_configuration]))} -1",
-        )
-
-    # 9. Execute the test
-    causal_test_result = causal_test_case.execute_test(estimator)
-
-    return causal_test_result
-
-
 def test_poisson_intensity_num_shapes(save=False):
     intensity_num_shapes_results = []
     base_test_case = BaseTestCase(treatment_variable=intensity, outcome_variable=num_shapes_unit)
-    for wh in range(1, 11):
-        smt_data_path = f"{ROOT}/data/smt_100/data_smt_wh{wh}_100.csv"
-        causal_test_case_list = [
+    observational_df = pd.read_csv(observational_data_path, index_col=0).astype(float)
+    causal_test_cases = [
+        (
             CausalTestCase(
                 base_test_case=base_test_case,
                 expected_causal_effect=ExactValue(4, atol=0.5),
                 treatment_value=treatment_value,
                 control_value=control_value,
                 estimate_type="risk_ratio",
-            )
-            for control_value, treatment_value in [(1, 2), (2, 4), (4, 8), (8, 16)]
-        ]
-        test_suite = CausalTestSuite()
-        test_suite.add_test_object(
-            base_test_case,
-            causal_test_case_list=causal_test_case_list,
-            estimators=[LinearRegressionEstimator, EmpiricalMeanEstimator],
-        )
-        test_suite_results = test_suite.execute_test_suite(
-            causal_specification, pd.read_csv(smt_data_path, index_col=0).astype(float)
+                estimator=EmpiricalMeanEstimator(
+                    treatment=base_test_case.treatment_variable.name,
+                    treatment_value=treatment_value,
+                    control_value=control_value,
+                    adjustment_set=causal_specification.causal_dag.identification(base_test_case),
+                    outcome=base_test_case.outcome_variable.name,
+                    df=pd.read_csv(f"{ROOT}/data/smt_100/data_smt_wh{wh}_100.csv", index_col=0).astype(float),
+                    effect_modifiers=None,
+                    alpha=0.05,
+                    query="",
+                ),
+            ),
+            CausalTestCase(
+                base_test_case=base_test_case,
+                expected_causal_effect=ExactValue(4, atol=0.5),
+                treatment_value=treatment_value,
+                control_value=control_value,
+                estimate_type="risk_ratio",
+                estimator=LinearRegressionEstimator(
+                    treatment=base_test_case.treatment_variable.name,
+                    treatment_value=treatment_value,
+                    control_value=control_value,
+                    adjustment_set=causal_specification.causal_dag.identification(base_test_case),
+                    outcome=base_test_case.outcome_variable.name,
+                    df=observational_df,
+                    effect_modifiers=None,
+                    formula="num_shapes_unit ~ I(intensity ** 2) + intensity - 1",
+                    alpha=0.05,
+                    query="",
+                ),
+            ),
         )
+        for control_value, treatment_value in [(1, 2), (2, 4), (4, 8), (8, 16)]
+        for wh in range(1, 11)
+    ]
 
-        smt_risk_ratios = [
-            causal_test_result.test_value.value
-            for causal_test_result in test_suite_results[base_test_case]["EmpiricalMeanEstimator"]
-        ]
-
-        intensity_num_shapes_results += [
-            {
-                "width": wh,
-                "height": wh,
-                "control": obs_causal_test_result.estimator.control_value,
-                "treatment": obs_causal_test_result.estimator.treatment_value,
-                "smt_risk_ratio": smt_causal_test_result.test_value.value,
-                "obs_risk_ratio": obs_causal_test_result.test_value.value[0],
-            }
-            for obs_causal_test_result, smt_causal_test_result in zip(
-                test_suite_results[base_test_case]["LinearRegressionEstimator"],
-                test_suite_results[base_test_case]["EmpiricalMeanEstimator"],
-            )
-        ]
+    test_results = [(smt.execute_test(), observational.execute_test()) for smt, observational in causal_test_cases]
+
+    intensity_num_shapes_results += [
+        {
+            "width": obs_causal_test_result.estimator.control_value,
+            "height": obs_causal_test_result.estimator.treatment_value,
+            "control": obs_causal_test_result.estimator.control_value,
+            "treatment": obs_causal_test_result.estimator.treatment_value,
+            "smt_risk_ratio": smt_causal_test_result.test_value.value,
+            "obs_risk_ratio": obs_causal_test_result.test_value.value[0],
+        }
+        for smt_causal_test_result, obs_causal_test_result in test_results
+    ]
     intensity_num_shapes_results = pd.DataFrame(intensity_num_shapes_results)
     if save:
         intensity_num_shapes_results.to_csv("intensity_num_shapes_results_random_1000.csv")
@@ -173,7 +146,8 @@ def test_poisson_intensity_num_shapes(save=False):
 
 def test_poisson_width_num_shapes(save=False):
     base_test_case = BaseTestCase(treatment_variable=width, outcome_variable=num_shapes_unit)
-    causal_test_case_list = [
+    df = pd.read_csv(observational_data_path, index_col=0).astype(float)
+    causal_test_cases = [
         CausalTestCase(
             base_test_case=base_test_case,
             expected_causal_effect=Positive(),
@@ -181,19 +155,22 @@ def test_poisson_width_num_shapes(save=False):
             treatment_value=w + 1.0,
             estimate_type="ate_calculated",
             effect_modifier_configuration={"intensity": i},
+            estimator=LinearRegressionEstimator(
+                treatment=base_test_case.treatment_variable.name,
+                treatment_value=w + 1.0,
+                control_value=float(w),
+                adjustment_set=causal_specification.causal_dag.identification(base_test_case),
+                outcome=base_test_case.outcome_variable.name,
+                df=df,
+                effect_modifiers={"intensity": i},
+                formula="num_shapes_unit ~ width + I(intensity ** 2)+I(width ** -1)+intensity-1",
+                alpha=0.05,
+            ),
         )
-        for i in range(17)
+        for i in range(1, 17)
         for w in range(1, 10)
     ]
-    test_suite = CausalTestSuite()
-    test_suite.add_test_object(
-        base_test_case,
-        causal_test_case_list=causal_test_case_list,
-        estimators=[LinearRegressionEstimator],
-    )
-    test_suite_results = test_suite.execute_test_suite(
-        causal_specification, pd.read_csv(observational_data_path, index_col=0).astype(float)
-    )
+    test_results = [test.execute_test() for test in causal_test_cases]
     width_num_shapes_results = [
         {
             "control": causal_test_result.estimator.control_value,
@@ -203,7 +180,7 @@ def test_poisson_width_num_shapes(save=False):
             "ci_low": causal_test_result.confidence_intervals[0][0],
             "ci_high": causal_test_result.confidence_intervals[1][0],
         }
-        for causal_test_result in test_suite_results[base_test_case]["LinearRegressionEstimator"]
+        for causal_test_result in test_results
     ]
     width_num_shapes_results = pd.DataFrame(width_num_shapes_results)
     if save:
@@ -213,4 +190,4 @@ def test_poisson_width_num_shapes(save=False):
 
 if __name__ == "__main__":
     test_poisson_intensity_num_shapes(save=False)
-    # test_poisson_width_num_shapes(save=True)
+    test_poisson_width_num_shapes(save=True)
diff --git a/examples/poisson/README.md b/examples/poisson/README.md
deleted file mode 100644
index 1b8fe514..00000000
--- a/examples/poisson/README.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# Poisson Line Process Case Study: Statistical Metamorphic Testing
-Here we demonstrate how the same test suite as in `poisson-line-process` can be coded using the JSON front end.
-
-## How to run
-To run this case study:
-1. Ensure all project dependencies are installed by running `pip install .` in the top level directory
-   (instructions are provided in the project README).
-2. Change directory to `causal_testing/examples/poisson`.
-3. Run the command `python example_run_causal_tests.py --data_path data.csv --dag_path dag.dot --json_path causal_tests.json`
-
-This should print a series of causal test results and produce two CSV files. `intensity_num_shapes_results_random_1000.csv` corresponds to table 1, and `width_num_shapes_results_random_1000.csv` relates to our findings regarding the relationship of width and `P_u`.
diff --git a/examples/poisson/dag.dot b/examples/poisson/dag.dot
deleted file mode 100644
index fa11a288..00000000
--- a/examples/poisson/dag.dot
+++ /dev/null
@@ -1,15 +0,0 @@
-digraph poisson_line_process {
-  width -> num_lines_abs [color="green"];
-  width -> num_shapes_abs;
-  width -> num_lines_unit [color="green"];
-  width -> num_shapes_unit;
-  height -> num_lines_abs [color="green"];
-  height -> num_shapes_abs;
-  height -> num_lines_unit [color="green"];
-  height -> num_shapes_unit;
-  num_lines_abs -> num_lines_unit;
-  num_shapes_abs -> num_shapes_unit [color="orange"];
-  intensity -> num_lines_abs;
-  intensity -> num_shapes_abs [color="orange"];
-  num_lines_abs -> num_shapes_abs;
-}
\ No newline at end of file
diff --git a/examples/poisson/data.csv b/examples/poisson/data.csv
deleted file mode 100644
index d089bc26..00000000
--- a/examples/poisson/data.csv
+++ /dev/null
@@ -1,1001 +0,0 @@
-,width,height,intensity,num_lines_abs,num_shapes_abs
-0,4.90826588596665,8.36778313271925,10.6841029789477,295,8315
-1,9.23956189066697,0.632305830593579,12.8265405262846,258,1790
-2,5.14489487209122,9.6335297107609,15.2193488932789,440,17782
-3,3.48945276027756,4.53577468407476,8.3623932403167,140,1373
-4,7.9856504541106,1.09028977005441,1.03227646465512,20,51
-5,8.2420552715115,7.83530249558924,3.47898628640799,106,798
-6,6.67213554740588,3.32078006497174,12.3055003809639,222,4088
-7,2.76161961532872,6.18651851039985,2.21877655408086,24,71
-8,0.2376375439924,5.04077514155476,7.89876798197878,92,175
-9,1.72821634861186,2.59089294318824,5.02003351063359,33,78
-0,4.35597267865126,5.49161587511683,0.80501801372814,17,24
-1,0.757846110464369,3.47347177078057,15.7703837459013,131,939
-2,7.01439348862976,6.17320186991002,10.6539098105903,311,8648
-3,5.11607264050692,2.43385164923797,8.26069508333029,118,1255
-4,8.04600264202175,4.39850473439737,12.9129099958407,299,8596
-5,3.04072880231897,8.61585009805222,2.62787084513011,70,305
-6,9.85546058401101,9.63509365086764,3.24241809686659,157,2472
-7,6.70365785938002,7.04530400977204,12.1372409300375,330,11545
-8,1.47417382908733,0.374612614626471,6.30436838627993,27,47
-9,2.097834160651,1.62585991571424,7.32075868460141,55,206
-0,7.61341519593579,8.59413071535214,1.30064161460812,39,92
-1,9.41824303629062,0.380890856631021,4.79954827725781,94,146
-2,0.932728483354013,1.9699143978146,11.1946189393182,63,270
-3,2.86606388950041,9.84211892313571,7.2886907289642,179,1969
-4,4.04521867010619,4.83832870471114,2.83037986428882,51,284
-5,5.02636697449725,7.46869315979497,6.31162516781188,145,1717
-6,6.37646336687805,6.41481950233767,15.7594358250839,395,14003
-7,3.81055333078183,3.27340707193071,8.39575696278912,116,1279
-8,8.98727612931495,2.05637549665093,11.9208706164961,250,4152
-9,1.15041689110353,5.86472237625505,13.0066550642424,193,2086
-0,8.32367923640424,7.53258943255159,1.2163645758235,34,134
-1,9.42543643861642,0.051823536822427,2.55302198497255,56,45
-2,4.50761037868446,8.33660427819392,10.3545219016803,249,5721
-3,6.24240973241508,3.13441467693897,15.0589454626356,301,7603
-4,1.1148368247392,9.06337497047277,8.55818922646879,172,1104
-5,2.61062004244163,2.98996023238995,4.68724663079652,45,156
-6,7.28863055324026,6.32235384497472,14.1289910524604,411,13347
-7,3.58123822142261,4.80987444585464,7.94404305706642,131,1773
-8,0.154362715274202,5.25464065476376,11.3988755575769,145,299
-9,5.48114010185482,1.68150272222393,5.96938796032583,93,495
-0,5.37330931627975,0.231074796588071,5.95562709096905,70,87
-1,2.27074467314355,8.67189274359873,1.36911434897006,31,64
-2,0.643999543239016,9.01971053775436,12.5283517833072,249,1342
-3,1.4087341710981,7.10410858198457,3.83549364738953,64,242
-4,8.02538635566035,2.79991608537319,9.06893621851303,210,3281
-5,6.15615259736619,6.17854466205433,6.72797487266531,148,2265
-6,3.7159722288474,1.65274610785187,10.0690363684176,108,955
-7,4.65892394191015,5.23818278104673,14.2341373096338,272,6660
-8,9.02709599250348,4.09944139275935,1.62080307761718,59,424
-9,7.22197216193295,3.24317219099945,14.5368136493671,327,8964
-0,5.8747016726842,0.873677114186339,14.8740929927296,194,1963
-1,1.52893713402721,5.9840834691993,11.5737724016799,203,2723
-2,6.9390676985129,7.76827341386452,1.66734910341818,54,398
-3,4.79878323577367,1.41776678216733,9.62859829557346,119,978
-4,0.997934110533338,4.42135700227707,4.78035582357765,57,264
-5,2.35071181545171,9.73758230158889,8.68443701397738,211,2506
-6,9.76718828893113,3.2387771457683,5.41492263545549,152,1736
-7,3.40193091360924,2.11047411313139,1.08743565230891,16,34
-8,8.47987562030391,6.35462215764078,13.1492062205841,399,16001
-9,7.62750546321837,8.28723899165408,7.91993789432036,260,7723
-0,2.89409920367913,3.86862316795561,4.47445966556188,63,416
-1,9.22995460589109,7.77659685279174,7.17043576242008,272,7181
-2,3.21310470402508,5.77092184464405,1.78769310233983,38,217
-3,8.03113408288451,0.844783228116888,11.4002972675241,206,1942
-4,7.65166682537589,6.761023990943,1.09690445956635,29,67
-5,6.36852634372376,9.62622032163142,15.0884894318391,507,26135
-6,1.86435824983964,8.13124487768239,9.92083956272054,159,1938
-7,5.47320990669156,1.03252617949125,5.5865512747863,70,335
-8,4.96819342791472,2.92084784781569,8.10273429932022,112,1001
-9,0.185525515703121,4.61665031452074,13.731154243063,122,226
-0,4.20584121003677,3.31615610497675,10.3275837690142,132,1606
-1,1.11191961939773,1.50723480868839,7.2249536457268,46,109
-2,6.26974961151699,8.04157285905039,8.78215452841107,245,5509
-3,0.057086856089315,6.14834320096209,4.26698281207377,49,26
-4,8.53116952800103,5.98663012295885,11.4234420076101,325,10811
-5,2.93660569229496,9.96511869643605,12.8479577437963,332,8179
-6,3.03934354066851,4.00493998093441,0.492687906545905,8,4
-7,9.12210991401003,2.95181178542324,5.92748922037863,130,1498
-8,7.45219902828343,7.6391199378155,14.7229655233967,449,18886
-9,5.93387501752791,0.867918294520022,2.67749189331987,30,29
-0,6.69243615132437,8.19186732471874,4.60812654009833,134,1956
-1,1.99125599370219,6.32337156211552,7.44140379047551,131,1028
-2,5.12839428945413,3.22665640222699,8.85273245848214,128,1481
-3,8.10410964938417,4.35499630583194,11.7189340049846,313,7511
-4,9.72433881712874,9.06942384274609,13.3328030608931,492,24567
-5,0.578386917006409,5.51905979090827,10.6711791283171,147,781
-6,7.27416066655981,7.06761256359222,6.39062297795387,200,3728
-7,2.07941936898899,0.800356507822361,15.458942703759,87,692
-8,3.08565824959643,2.23371208191043,2.49245346779204,29,86
-9,4.89419087466412,1.54001191460451,1.16904081638279,11,19
-0,2.45944676840599,6.03842634896233,0.240270401307267,3,4
-1,8.78501654383706,3.90176199213542,9.17989402995438,242,5061
-2,6.89208468894023,0.461477464624734,13.6195550733003,211,1618
-3,5.95533468650393,9.63720147680828,15.4883644467334,506,19761
-4,4.7869033862553,2.65935392220578,11.2666766409638,157,2702
-5,0.315406895748013,5.89511774070654,9.73566722567329,135,378
-6,1.68813470608654,4.63666968216721,2.74611742090029,33,142
-7,9.43760312405539,7.61393358476006,4.91533493759788,169,3517
-8,7.25467062204605,1.06665204056069,6.51401076442441,123,489
-9,3.84087157942592,8.5184080219452,3.21937356037582,77,390
-0,6.08860042967962,7.11833619092339,0.038706822226168,3,2
-1,7.22061195271121,4.40990488969629,4.53071526988124,104,933
-2,2.59822529406374,8.83980228563562,11.6369132959565,265,4906
-3,3.73566311423919,5.38383295193304,13.6289260259821,229,4677
-4,4.99834751139295,6.57187227122719,14.877960917742,345,12004
-5,9.93311333134356,9.5877693608622,11.1050867921845,420,16564
-6,0.642565199535192,3.18447625313364,6.81487481418524,65,172
-7,8.42124805329407,0.362235441652288,2.2874509003823,44,29
-8,5.63617736470823,1.33451128874193,6.19636840077536,98,614
-9,1.78565161808023,2.02619670853206,9.34709370686689,57,426
-0,2.16223340578248,2.81906351386166,3.7363328757934,45,204
-1,5.01260751997576,1.54079375498554,11.4780185490172,172,2401
-2,0.559755683638383,4.71024279862331,9.13922243252925,84,246
-3,8.52739962212268,6.31435011318375,15.7215651403329,503,22041
-4,1.71935362236916,7.47116789893282,1.76101996563765,32,52
-5,9.8902580530178,0.821636897710472,9.98379858040951,224,1435
-6,6.07940658464303,3.45926519717321,5.02715518361759,101,1231
-7,4.73149649034036,9.35779826532544,13.3567062585315,396,12064
-8,3.18741194409758,5.4942124472503,0.72056156624866,9,9
-9,7.85817705315716,8.82824925243722,7.59812213914129,249,5547
-0,6.50741193871781,1.90761100255656,3.61217155253395,58,373
-1,0.874222524685176,4.71403264446686,4.89523952055643,42,188
-2,8.49354658034012,0.607905214090704,13.766072724719,262,1490
-3,5.70225876603167,8.30937271786278,2.69854386158817,75,531
-4,3.99281683899907,9.8237903842213,9.78318022090545,239,5176
-5,7.13148911989798,3.95506051148499,15.014142212872,351,12587
-6,2.27473152551952,5.82119828255023,8.72997841432887,151,1905
-7,1.39457607559765,6.00156510519147,0.590484834817466,4,1
-8,9.42182817853883,2.63640137018341,6.59364000077631,161,1454
-9,4.41102107526555,7.0511353814421,11.8703164643,279,6444
-0,2.28578355552587,3.8275189221063,3.29993214979051,49,181
-1,1.20322309108609,9.76452779465472,15.8135436207777,374,5305
-2,4.7617981913777,1.57352895145557,0.717310488591076,13,18
-3,6.38654050682303,7.95604714361193,10.4166919824068,311,11200
-4,7.51127546220075,8.2004745155284,12.2025480177491,385,12514
-5,9.49232548000118,0.109264210309621,14.2822121707681,279,379
-6,8.57727902923313,4.85396196695542,1.63058575108762,37,164
-7,3.86557714562455,6.43914981663375,7.16294954930851,169,2785
-8,5.98073934358343,2.84696885420706,9.1003552678889,141,1460
-9,0.407584208573817,5.89309020519542,5.95633113231717,68,136
-0,5.64483260402231,4.26759267672797,2.44950028545935,51,247
-1,3.3951309076115,7.83548022574219,10.5522106798491,232,4115
-2,6.71320688328262,9.01469338865496,4.81587811380454,139,2071
-3,0.199215274894053,6.37909404867194,15.1425522551256,180,457
-4,7.89021500263617,2.33734554676909,4.74159878542014,96,851
-5,2.28740959646558,0.019327630917601,12.0304516482073,46,26
-6,4.36778598775615,3.12436839900051,9.08406762793432,134,1801
-7,1.05809195208035,5.41364267491864,13.298982172667,157,1519
-8,8.11151221232128,1.49286458444745,7.63838620504442,172,1402
-9,9.51586190271233,8.40428974725487,1.23667437100178,48,310
-0,2.65663107370788,6.866770354506,3.0305508840532,77,403
-1,4.24039891289351,9.28247874397703,12.7143304210406,350,11059
-2,7.19159294573482,4.9504591705753,9.42809316397836,235,4752
-3,3.91823908667544,1.58162229298919,15.0711167903317,146,1661
-4,6.10180368294954,5.43661418976877,4.44858480481101,96,1072
-5,9.50595790773493,7.58008883466945,13.5621371732818,451,19459
-6,5.22085278296301,0.516698385226268,5.59606376343376,62,97
-7,8.03892996332567,3.75877625375732,9.92748817449165,271,5760
-8,1.03601881829143,8.28249606597285,0.945808485213626,20,21
-9,0.175221772105827,2.3530503916003,6.69781743640536,35,92
-0,1.19899213895944,7.17936468201221,9.92853270193088,165,1662
-1,5.41833386410228,0.970676545409744,5.57286351609422,72,267
-2,0.750939798363318,1.11330350472746,2.02965400970817,4,5
-3,4.10137289755316,6.40360100236159,8.4599386638356,188,3606
-4,6.27785277608971,4.73788498483461,4.25080970155929,108,1278
-5,9.27631910846585,8.70455442802745,1.54965973071983,56,322
-6,3.43201892759451,5.42272863689393,15.365819520895,247,5686
-7,8.98036874094218,3.34652416795272,11.3231671473039,266,4927
-8,2.06750254668382,9.39761237855287,12.9209337821665,292,3814
-9,7.5187009943806,2.26427626315864,7.92227715335291,148,1402
-0,3.16036540611974,0.514933349913259,5.63075931324833,31,65
-1,7.67364541747491,1.16136280105538,0.972404292953044,13,15
-2,2.17925325573928,7.08355433751069,1.99256370266594,33,79
-3,1.69394959051016,9.01912896643729,6.49237145476019,156,1117
-4,5.22959816075397,5.16574817696522,13.4256525906038,276,6336
-5,8.11755130015022,2.89085553498315,14.7747253934821,328,7736
-6,9.16528449014296,4.24153684170168,11.5512862771677,300,7403
-7,6.00199213521907,3.35427132721722,9.53578048434153,183,3429
-8,4.71873961785353,8.10534548448133,10.5860807774812,240,5827
-9,0.732401049058379,6.22236130474817,4.09100550907535,57,115
-0,3.32302734575409,3.06725591396737,13.4579064287913,158,2133
-1,1.89791918695767,0.09491793682167,6.60812907172632,25,23
-2,9.88811818893404,4.99889261203329,9.63588142265929,269,5548
-3,4.32529061836175,5.32663924942591,8.57668292342638,168,2562
-4,6.90096089569047,2.74818391204885,2.85398120172038,66,349
-5,0.996157673498779,8.80666505302682,12.1058891059601,239,2024
-6,2.82541566412395,9.85802327100723,5.30044478273657,159,1624
-7,7.84487134775523,7.99762829740216,15.4469472087884,485,21656
-8,5.24900873610909,6.24149648525687,0.371229088494097,5,3
-9,8.57670239885776,1.0403508268597,3.22301031371534,59,153
-0,9.31387549672247,0.483180863313669,7.92231172106095,168,687
-1,5.5380227053639,8.8382304210818,1.88078660381017,52,220
-2,3.8401080784392,7.36140377906719,0.908363708689687,22,45
-3,7.98945113096506,9.85979467481165,5.7269031466591,186,3265
-4,4.88951931104376,3.40680480095172,8.78339913928039,143,1802
-5,6.37146726262438,1.32823310567834,12.2315923977229,177,1948
-6,0.19516651721865,2.4539899111997,9.96772276127619,47,87
-7,8.48939253740659,5.76244695867487,13.6842596284428,373,12579
-8,1.74164145183604,6.12600202786399,14.995477208395,256,4828
-9,2.4925690707533,4.19641921587202,4.2586676154067,52,242
-0,3.89556284813184,4.11989109347271,10.4747130913154,158,2569
-1,1.90917526405359,7.73492584739489,12.6438608532578,254,3912
-2,7.31387807651503,9.53145491418931,2.1503322984923,73,380
-3,4.69105581273926,6.28794534267599,13.9728001715562,322,9239
-4,5.27163318127437,3.49281958453282,7.45385816790363,126,1186
-5,8.19060983307445,0.299942069777023,1.49048786289517,20,18
-6,0.184632457347369,1.59608195523831,4.51356962767863,23,20
-7,6.34210762086359,5.43384392285524,5.70656841424874,133,1835
-8,2.42995761977982,2.16440552907888,15.4512487971746,135,1719
-9,9.83094917105507,8.1165830745557,9.43741834653446,316,9498
-0,0.93852978347313,5.7572829782755,14.4379324058312,187,1826
-1,1.18118327905008,3.89796464684779,7.73591552911105,85,626
-2,5.62275340323984,6.08412766225938,3.15950495367768,73,416
-3,9.22267607912309,9.3931039075594,9.81673525887151,371,11867
-4,4.30721641702127,2.10024556372288,11.5703603420245,146,2284
-5,2.54645854172812,1.0165924806326,14.1894552383151,107,1112
-6,6.41719341856458,8.66171868264396,6.2823420767299,180,3980
-7,7.16022560097651,0.602449441966588,3.8714038124909,71,222
-8,3.17093640242085,4.16301911381489,0.080995955630519,0,0
-9,8.4181447881628,7.23381283309306,8.06302693390851,254,7221
-0,3.56391956675562,6.29335342144101,3.63512729503712,73,434
-1,9.50027853520776,7.52547030735589,11.5486262074028,370,11209
-2,6.06939408590418,0.69783278550517,2.6589353361824,39,73
-3,7.08987685173616,4.90026411120207,1.01393855962485,21,28
-4,4.60091200565909,2.79219119480168,8.94953821232756,126,1716
-5,5.34095857143556,3.67636106787272,6.42598950913513,110,1156
-6,8.91726806657768,5.67963318239514,15.566044887121,454,18270
-7,0.406590537359472,8.94592464403209,10.1176486599768,191,887
-8,2.14328145870264,1.29582409749654,13.8640895824106,93,854
-9,1.71471883350204,9.00111209419619,5.69052548560345,119,1023
-0,0.540278893187421,7.0806154051696,10.0433107995414,137,579
-1,7.07928028463119,1.93956858902517,1.50540827129473,22,35
-2,2.56585056031325,0.755133617445196,8.19895334628939,51,215
-3,6.90801854542166,9.58662391828366,14.2002137897938,480,20046
-4,5.39631262549822,5.82843867093899,3.15350439936739,60,439
-5,8.2963258018998,3.08018675681211,6.6834456371562,151,1684
-6,9.14346945701802,2.47683887773312,15.5553057027261,388,10158
-7,1.15144552193536,8.63009312686167,3.2644568300448,51,146
-8,3.43277148434182,4.82865290611829,11.8486765202226,196,3461
-9,4.59567982416153,6.78346956004927,5.62605327871628,105,822
-0,4.90704196041502,8.16145783406424,0.417859892017376,10,16
-1,7.99153277582739,4.47173983751844,12.3119249909489,298,7210
-2,9.20831539000852,9.55347177620985,13.7078255106497,494,19631
-3,3.35543933736406,2.57028693873139,10.8739897751302,114,1150
-4,8.66921902992487,7.21010719291235,7.57604029790372,238,6621
-5,1.48380253986065,6.74227139225229,2.57617267525798,36,90
-6,6.41838953337833,5.02519909482107,5.5810842804973,141,1714
-7,5.35781124933559,0.354831855543163,3.41309140106804,37,53
-8,2.59378063194345,1.78021264853563,14.8181328286166,125,1879
-9,0.575593428640082,3.5638929210554,8.67055915666734,76,287
-0,7.05363479152083,0.511113638489501,5.38077254464619,91,222
-1,1.71746499061191,9.60431298224032,11.0068472898512,258,4012
-2,3.47547571523273,6.46089833820658,7.56059456037577,146,1843
-3,6.84909801995868,8.82870898381801,14.5801447464467,455,15743
-4,0.383397141058259,1.82956271568305,2.25244485504986,12,15
-5,4.16827858850771,4.70938882182439,4.61820857913623,83,1125
-6,5.83644219246987,7.10540137739646,11.2755147227158,289,8598
-7,8.54890963164007,3.83885386190407,14.3382665914273,368,10038
-8,9.18919579239036,5.67101298053291,1.0866601093113,33,71
-9,2.72038399858645,2.6970241660043,9.0102015642294,107,1368
-0,6.79644056152348,7.95564946726649,11.170012123826,319,7975
-1,0.739469233238727,1.95363260782969,5.41058639857462,28,96
-2,9.64553435641052,9.35232705425254,14.0403752622385,507,24833
-3,5.15577326058017,8.04310010894833,8.54814573249781,252,5855
-4,3.54704534561558,0.179094939328182,15.4042702034541,98,280
-5,4.08744650179989,3.39221779343354,3.50722573558406,55,176
-6,1.37736317679665,4.94777969796923,11.4336336308751,143,1523
-7,8.41351653028996,2.14119906353719,3.10090262330901,57,266
-8,7.03981143944575,5.76351584366821,7.86823564464709,204,4219
-9,2.27398809946997,6.10868253937469,1.41995030434061,19,26
-0,3.16549977466366,4.66692744745868,2.08444020523515,34,175
-1,5.70660412899422,8.43539632339975,8.85195345261424,251,5185
-2,7.53534776806166,6.95312011439414,13.7030665404476,372,13661
-3,4.63532334670832,3.71924520057583,0.960933115179417,26,89
-4,9.19648696695621,0.93016520286255,5.06600852855952,101,496
-5,8.21174586205242,7.52758797327217,15.0075798477146,458,19043
-6,0.041497325509906,5.25890427479314,7.38751076338715,77,43
-7,1.32206753519573,9.05282668636472,4.75171018341406,97,426
-8,2.55974466133368,2.72608485148385,10.7642098203195,101,1580
-9,6.85793491399733,1.12130299800553,12.6761660113503,200,1747
-0,7.33217252206173,5.30449042435403,7.91629788330285,190,3948
-1,2.70080841800174,7.9829468795547,14.7500347334574,302,6461
-2,3.76531629952539,4.64277741347993,11.7219528111695,197,3098
-3,9.92940348384915,1.7176761577935,4.40990757457085,98,471
-4,6.20480240905621,3.97661763731501,0.630008226504651,14,48
-5,5.79799803521922,2.37720178245764,2.57313820282631,33,98
-6,1.73854605339696,8.80184016845281,10.3113176017111,220,2640
-7,8.0637945562536,0.434530963304495,5.98779189934365,91,191
-8,4.38722314121021,9.86953367181498,8.36533000378978,233,4106
-9,0.282765457060099,6.1806575413113,12.893532779579,180,588
-0,0.384515813706267,2.2243829797998,1.82312037792826,11,8
-1,3.34695895037759,1.50450292405484,4.11073034100708,50,208
-2,6.43180709510778,0.260849245962089,15.7887674027694,198,810
-3,9.78177982592873,8.00908703495122,12.1329692116478,397,13658
-4,7.48417303216341,6.6482975272988,7.5488226850032,240,6253
-5,1.79401445026596,5.5110938214724,10.1952697084995,144,1479
-6,5.57648890313987,4.44551781449316,4.94791922725931,88,814
-7,8.34206943465756,9.96696851461832,13.5326404722033,497,20317
-8,2.80997874368347,7.57119918396291,0.631167025342172,21,21
-9,4.23828967055309,3.86343157778938,9.38729629180632,145,2106
-0,6.26004804025243,1.34096197364299,12.7609630706506,193,2581
-1,9.38505805571896,4.99767226326466,6.50444281210634,203,3304
-2,0.942982784071135,0.263551479496778,15.7149429164046,41,117
-3,1.61037605978169,5.27913047077427,13.8340958938518,185,2465
-4,4.83497378568296,6.9306722555624,5.83574741499695,145,1786
-5,8.71035150071233,8.02464440463632,1.80535988258351,55,243
-6,7.11250313052663,9.6966889878732,10.640299466313,393,12791
-7,2.73592040637395,2.72532639073588,9.571972128962,106,1239
-8,3.27509888155912,7.8363785774104,4.65890971126312,107,1045
-9,5.63910784699471,3.97443931063848,0.702609057287761,15,45
-0,5.82147125277619,6.8909738846907,11.5897836671715,289,7157
-1,9.3540645623551,1.56949725851858,9.27768934100896,211,2279
-2,0.655820767327393,7.83239104170191,6.57124661473766,118,308
-3,7.53727567366804,9.99162434296123,14.0491188866746,488,20165
-4,2.67712426348256,0.163616204138276,1.45631379863151,7,7
-5,8.68267115246276,4.69177763914198,10.2125334316295,260,6682
-6,1.62873401505999,2.67429357370002,4.20901647197293,37,142
-7,4.26610136853731,3.97995544735828,5.48822837135492,95,1097
-8,3.03900642076282,8.9856007339528,15.100424087981,358,10374
-9,6.45556678199011,5.68667861172413,2.09659342257299,54,159
-0,5.40633223096377,7.10948236606899,0.125802737583613,3,6
-1,2.92658538321838,8.96583666880759,14.9294636252483,374,10013
-2,6.71042181501241,6.17478498788337,13.1972599674094,348,12241
-3,9.41892362479292,2.64803031805521,7.98082233930468,186,2457
-4,1.51727216975431,3.24545446253893,10.223023647467,84,536
-5,8.0842130111771,9.71756138264854,9.17218031878464,327,9573
-6,4.98457589637611,1.77682136724028,12.2955602622096,153,1586
-7,0.666407304694919,5.67341972891143,1.9393299799638,27,53
-8,3.28540406846342,0.614499460255193,3.58239628833848,34,98
-9,7.58580839421524,4.89246909395201,5.25005530168485,147,2123
-0,6.41893297798395,3.35252924130052,15.9623834578366,342,10306
-1,4.77586506520279,8.72268357447449,3.80201492692868,99,1084
-2,5.33873295577443,6.31922529778619,9.4772576336223,204,4467
-3,0.563737101445399,2.90591167123465,5.67171948107749,34,39
-4,7.20497923282591,4.12663712551838,10.031587945398,236,5578
-5,3.74199539112617,1.1785197892726,1.12383795372896,10,17
-6,8.05227910326876,5.4930301251588,13.8542428905198,412,14925
-7,1.8525426448755,7.08193843903815,12.782600510574,233,2225
-8,9.19819638642167,9.19598018470176,2.02407565985485,88,597
-9,2.87439803170833,0.943766382212014,7.47529520425878,51,165
-0,2.35000686847318,9.82686784717336,11.4284570186686,266,4181
-1,8.92837255198079,6.56265542290274,2.22309548812428,71,449
-2,9.06562608817782,3.76218751119408,14.3227238502249,371,11988
-3,4.54941183206097,7.71360736626347,0.511553358943767,15,25
-4,6.59086626739511,8.20751149538488,9.3889570557873,271,6636
-5,5.65272812377722,1.56881273643387,4.61000109069102,71,326
-6,1.25566622333181,4.86772315280509,14.9602984389311,192,2325
-7,3.36456642751986,5.43211623680193,4.93321225288101,98,851
-8,7.84368052966498,0.966590770223317,7.55929265465411,126,480
-9,0.19494276897016,2.45221510010172,10.0984196784224,40,81
-0,0.841096170616101,2.26522852250267,13.8527198579274,89,752
-1,5.99684928428144,3.68609115394349,7.71823421059946,155,1448
-2,4.43034317368907,5.94879049769192,12.2360316729123,266,5660
-3,6.32029874249674,7.26804718737864,6.35031103547887,193,3164
-4,3.906653296952,8.81146593915168,9.14233271658364,223,4426
-5,2.19124212907612,6.05535981693841,15.8794537753316,244,4535
-6,8.61542891503293,1.53812101003981,0.827411027217021,18,38
-7,9.40571442418095,4.0705500637099,3.38730306077098,100,695
-8,1.04108236495556,9.93403299113064,2.44508435427683,51,88
-9,7.3472884743057,0.76635128055739,9.80299652490095,157,561
-0,7.41201773895166,4.75635605474364,3.34203722712997,81,481
-1,5.13839758596812,5.79413456173227,6.18399196193244,163,3477
-2,4.66576524026537,2.49459705276376,10.1129918504804,159,2427
-3,3.52467230668562,8.8947475708313,1.88533041988515,42,134
-4,8.63937480054278,3.2676684371943,6.78299173814728,174,2756
-5,2.33799878102849,9.41059869723827,13.4687496128222,314,5531
-6,0.906866577339836,6.89859691056897,14.8019072073671,231,2280
-7,1.58736986857558,7.89625671647158,0.512908137626398,13,15
-8,9.08540248429087,0.685712768824681,11.4159655777146,203,1362
-9,6.44980399070443,1.85898054712222,8.4738828248944,131,951
-0,0.773995087294009,4.48222523033733,15.8904710513704,174,1196
-1,3.05207240622127,2.52032362619139,12.9424155758336,148,1914
-2,2.01282164951061,9.97524453528396,12.0184410252952,297,3756
-3,1.79554439192665,5.96577044388429,7.869634448709,126,770
-4,4.47714637598905,0.215269045785834,6.24658185676721,63,97
-5,7.13088113855358,7.81110456630532,9.85044674146519,308,9016
-6,8.2355659345945,6.19724266616702,8.58662032015433,244,5086
-7,5.93250395309738,3.21353747788395,4.19059424007471,63,394
-8,9.8595029756249,1.67609303416458,0.255031815634243,4,5
-9,6.70978505022397,8.40500109858755,3.14883547527318,88,883
-0,8.94690289946052,0.038799454728888,14.8007727147996,283,283
-1,7.24071258566351,3.18677252823113,14.0894493480535,292,6269
-2,3.23994890372363,4.83124580508994,10.0474781704743,148,1873
-3,4.08337905376876,2.76676835633834,1.90643411528024,24,71
-4,9.92850775194378,9.35064269145715,5.60682153726885,215,4269
-5,0.579737114939772,6.37681062777421,1.38503205819637,21,22
-6,1.66703685712535,8.53355443355358,11.58615952614,246,3955
-7,6.91157688726305,7.00024100468513,6.52565798846848,170,2674
-8,5.34562016890079,5.24124433603803,8.57024757735946,167,3136
-9,2.62150731113808,1.20823180860821,4.39624291930396,33,63
-0,0.48301411284395,0.233624763648567,10.4958248509533,23,29
-1,7.88683153946896,8.40956092726251,0.778248056857885,21,41
-2,2.37834758168265,2.78008402373496,15.207892486543,168,2094
-3,9.74972574678354,5.7712248152904,5.08225905668462,158,2188
-4,1.09681873098722,9.22810037790238,14.1003726468875,308,3649
-5,8.47605670215106,1.92222331815486,8.86681651190563,161,1580
-6,6.99168614039274,4.59256818276483,6.60972777872753,160,2586
-7,5.77043780279234,7.37992429096201,4.42585136253245,131,1640
-8,3.21052848844061,3.50193558078086,12.6919103654038,156,2356
-9,4.50144147369791,6.60507125343774,3.11569103877914,57,309
-0,2.3293212774825,3.70449363641168,7.47869911812738,96,696
-1,6.0422004685813,8.45384656380691,2.61323026773998,80,704
-2,0.49629512550064,7.54061099432473,3.35919396188902,43,49
-3,8.54446133214917,4.84960017498246,10.5666783010761,263,6467
-4,5.16844055113192,2.48013493963837,11.3935370595816,173,3493
-5,3.76046460627607,9.37320736910513,8.12591628031264,193,3567
-6,4.98492910844149,1.49324749302599,13.6436974330278,183,2076
-7,9.94577471700846,5.57912980403505,5.44445685447439,160,1978
-8,7.19710603882365,0.189077653778349,0.942096432249957,18,14
-9,1.87536323990076,6.90090773026699,14.5427554688916,250,3840
-0,0.084648753584629,7.1918731844045,7.91307765928129,112,112
-1,6.11927167203974,5.06121665814357,3.85428357721068,74,509
-2,3.2245823826539,0.590916092827227,15.459151743121,106,448
-3,4.50019793552279,1.98921668342059,0.743767649301233,16,25
-4,7.84044275054602,9.16682970495026,9.00894191569367,305,8360
-5,2.70129334208838,2.04668481989927,11.9193582496124,132,1808
-6,8.78904060420205,6.77953168182393,9.90181159427852,305,9192
-7,9.35097562392609,8.25258600668473,13.6126358656054,464,20912
-8,5.40465374164855,4.82964380535293,2.44573472229991,51,202
-9,1.39368188599061,3.70393666703452,6.09696615721626,58,221
-0,1.03342653309322,4.55405838346021,10.4439303419548,113,548
-1,3.36675207642594,3.93083286417581,3.42580178927891,53,316
-2,9.23707039117418,2.00534898869422,8.00126321774915,194,1957
-3,8.44727296815712,8.7482388061809,2.89622414519274,107,945
-4,4.07237639319412,9.90305065310251,12.940119300778,348,7548
-5,7.95413164157107,7.13972056697895,7.41415970997652,228,4064
-6,6.22325540106936,6.09612585173215,6.26338632722431,148,2296
-7,0.922307775483585,5.42457260718596,11.5338430610535,129,663
-8,5.42589315503668,0.933678906132935,15.4644365379791,178,1552
-9,2.98552413325522,1.54238656019044,0.125917722732015,0,0
-0,3.15049549807259,4.57702040169056,3.00904092394332,43,150
-1,8.50357007440356,1.22114761011069,7.06065790334274,113,604
-2,0.801642589831367,2.26911975263801,10.4599900058772,73,451
-3,6.4415340103789,8.80313308893625,12.8788794598547,388,11882
-4,1.4906135758982,5.04414556208746,11.2037559581879,153,1468
-5,5.84876758528131,6.99209218702882,14.7916797968028,383,13903
-6,9.70605811463803,7.13035167971479,4.28893294867003,148,2421
-7,2.49045440851978,9.79606852539694,0.194023829374619,2,1
-8,7.25769564818918,3.39477531677168,8.18201913408728,187,2779
-9,4.88492674684497,0.492216183287467,5.45753697285745,72,249
-0,7.43558915801271,3.31378214330573,9.38028026071316,196,3298
-1,3.4148468503016,0.141844397616985,3.90931251323632,28,33
-2,1.73272341541331,5.21380121066273,12.1373808808447,178,2495
-3,4.68587627508646,2.1055136464012,14.361725331956,183,2627
-4,8.83327128203585,8.68306957519012,5.65168698215707,188,3903
-5,5.2499589812395,7.66072739310915,15.8485149254017,388,13373
-6,6.60952850975406,4.95060637157987,1.7328620124963,50,223
-7,0.585194785392854,6.72967841364724,6.70568298993389,99,395
-8,2.36973781087269,9.05204583875613,0.705207909506109,22,72
-9,9.48605754190268,1.88424517392895,10.5356108933397,260,2723
-0,3.92145773621653,1.08559955282277,6.94796035069809,59,289
-1,9.97850596693269,9.18345788475887,0.172739916977337,10,10
-2,6.01067012700616,5.7584036730342,1.71143338839723,40,232
-3,8.62274127637748,7.02198652203932,10.5422976361001,348,12096
-4,5.41283939775891,4.81687798589378,8.83304856597458,187,3713
-5,4.85985449033512,8.01394901869191,12.3875216145938,306,7092
-6,1.25181470200913,6.65711883906127,5.6155768160063,79,342
-7,7.98102948911508,0.365093920231806,3.87591326404941,60,85
-8,0.613707969543747,3.6114948527192,15.036939485178,139,692
-9,2.79353740222883,2.69603140698911,12.8423150441563,157,2393
-0,5.19908231282796,2.27005846610156,10.6776268589076,168,1855
-1,6.58770322834148,4.20683711061661,14.584017498925,352,9698
-2,4.06404352386479,6.65402307765876,9.11171316779475,174,2505
-3,2.86243413014116,7.41621703103231,13.5064756622077,276,5657
-4,0.961499679237147,9.17965928902991,3.11197565688935,69,208
-5,9.24674742066738,8.68910908148682,6.62657816098864,244,5411
-6,1.45009526859086,5.56507228303669,5.82853342530518,87,483
-7,3.0221344510295,3.8586720549295,0.611168072740713,7,11
-8,8.64071022472722,1.11747589887649,4.73090347294541,85,292
-9,7.41392605805259,0.785666601944456,11.7279598324644,189,1146
-0,6.41813159979362,0.996012783382248,4.27875141191584,67,211
-1,2.91101569997922,6.17999961661069,0.354389678175424,8,5
-2,5.99316373893348,1.50254760672202,6.45567871978936,116,1434
-3,9.10609204994795,5.84841772643051,11.8547993180566,317,8416
-4,3.23305318798486,4.02040161068821,5.26270904314539,76,720
-5,7.95037975148209,7.60448898225846,9.29436439938193,280,7730
-6,0.958327686481387,2.27702544886962,2.88058270152855,21,36
-7,1.09770727538676,8.86233491643973,13.5312383235082,280,3534
-8,8.01499850037714,9.28346114926408,10.5704195605172,376,11942
-9,4.16110002325642,3.68675572028191,15.3668152255791,240,4588
-0,1.89581210139858,7.76361529896166,13.1249129486721,257,4371
-1,7.05778819701986,8.25826435788716,3.84312307938435,119,1538
-2,9.14065333923367,3.48078616817973,6.15319180532122,156,2601
-3,8.71663193670203,5.99201596939684,10.1540256812607,333,10183
-4,2.43728164824462,9.68665042335404,1.04739390962474,18,27
-5,6.40354862081369,0.841356771305199,9.13490287168557,136,842
-6,5.13696663454424,2.41484760656055,15.3378917665413,238,5599
-7,3.50677004071779,4.76786092312586,11.7715871490722,185,2757
-8,4.12654531119638,1.04050917294132,7.32328534674167,83,408
-9,0.47662045631054,6.12291118254224,2.14762494846359,31,25
-0,6.11644552408471,6.75487252890856,8.40349117091083,220,4488
-1,1.59647457754395,0.648695042533035,14.7259478292443,65,291
-2,3.27497919197364,5.6374453787179,3.74461574028408,75,393
-3,0.079645129034766,2.55342770078707,7.16250870972803,40,44
-4,9.42960274344855,8.01138966808855,10.0737048160623,370,12288
-5,7.55432259808365,9.69650718347689,13.623887192628,452,16277
-6,5.32874598834961,3.37604179110916,1.8645926808211,18,54
-7,8.72043370222578,7.94260971015865,12.7643405005208,443,15986
-8,4.64377060810125,4.81798477107721,4.97267928794399,101,1064
-9,2.33281836109167,1.58769446953131,1.491110623447,21,69
-0,7.54346986998136,0.908980891914864,8.43345475719499,122,706
-1,0.497153815882872,5.63793048957967,7.76865305936686,110,555
-2,4.13478578491744,3.76916986952359,12.8510156723337,190,3297
-3,5.49607547850053,9.36856246902771,11.3217599823692,338,10131
-4,1.16910390042065,4.96001188081913,3.63826379300785,41,120
-5,3.12613578629576,6.82046129015832,14.7197967594391,286,6809
-6,9.93264571734117,1.85394846301454,0.370975382212557,12,8
-7,2.6049500131477,2.17219426742575,6.07670910280528,62,333
-8,8.65949005812458,7.27096320787122,10.5010025502273,320,9536
-9,6.40179367033336,8.28562659968358,1.60923305806875,48,194
-0,3.69567766321534,8.67400137106985,1.77981275988594,47,272
-1,0.765784292715724,9.54150628212249,11.7563805368949,263,1863
-2,5.44120110221293,6.84342779570905,14.2022735097672,330,9442
-3,8.71647626264217,1.76174270646952,8.32264833802703,171,1356
-4,6.38649372286391,5.05829720386236,4.92518735772804,119,1273
-5,4.2112629169382,7.99842123392377,7.66099030391235,198,3490
-6,1.63267703484136,4.75240245371947,4.53677784902927,63,413
-7,9.97601392559879,2.86390331629411,9.86024354910384,271,4756
-8,7.31060097139533,3.65119771209535,15.7669535724523,389,11363
-9,2.46096959221254,0.856285128046762,1.09882847295547,7,6
-0,3.54453129290451,3.32233845808094,1.7252391691678,18,34
-1,4.53231026337991,2.61039386664405,4.69346131271929,60,400
-2,0.075871641224048,6.93910869831024,14.9810121444151,224,307
-3,8.69821545349447,9.67174484047748,14.1515346673214,553,27465
-4,6.62377219065085,5.07474590363632,5.46941334034124,131,1682
-5,9.1890172827465,8.46068497348213,7.10536818032403,260,6998
-6,7.46943486215404,1.89082413151063,9.01478894556407,182,1934
-7,5.72032291647856,7.09446242580109,12.4391638708416,307,7735
-8,1.52943574054128,0.988875712137543,0.054062641517198,0,0
-9,2.39548907618357,4.94906939364542,10.5490495928434,159,2293
-0,4.96721320680827,7.78073273002867,5.10086574814206,146,2104
-1,6.43865538135037,9.21500476827003,1.37546487626636,49,260
-2,8.39335283874776,5.71563555055776,10.012408345005,303,7936
-3,7.84655077581986,0.800054407951371,11.5622873681402,192,1415
-4,3.4269053206363,8.3461072407824,13.569142490375,328,10595
-5,2.55201817229565,2.17260911290227,8.79816163799084,82,493
-6,9.23094166459874,4.00263215557959,1.94413636605615,53,282
-7,5.74350253577908,3.45090365309315,6.83497716276147,136,1698
-8,1.53198831827078,1.35243273153884,14.6715283826484,93,688
-9,0.809034550240342,6.55490983382541,4.34953628531707,67,240
-0,2.28606005165929,1.42287646846678,1.14727670193137,9,20
-1,5.5168367677345,3.56575156422458,4.93483289468923,77,665
-2,9.26336361899808,2.36545148465327,15.6989787197321,343,7626
-3,4.20986467727912,9.93614633543211,12.3502511113677,349,10805
-4,3.59909932510275,8.05490489091078,2.76737063877655,63,308
-5,0.088929522755024,7.67339147902984,8.16038619242815,127,107
-6,7.05816775872092,4.89128266482775,10.748416871706,289,7730
-7,1.37224529518607,0.803884708735253,7.48770900035195,32,144
-8,6.72066445450786,5.18404103482145,14.129475985315,362,11974
-9,8.55480675827395,6.29592634898029,3.26655353001035,105,832
-0,5.80991312030609,2.25815816765848,14.2981484054372,229,4664
-1,7.46020661815662,0.037747316967433,6.09179350366798,92,60
-2,8.90376726139911,9.58319516188305,1.0169495712131,37,110
-3,0.40984767311835,1.22996141540103,8.4859535257459,25,89
-4,9.52493408424347,4.14813392201638,3.17271187459235,85,714
-5,6.47712976739568,5.65382996382768,10.0386178675071,245,5720
-6,1.44762387266552,3.68120925782155,3.52425107518469,29,48
-7,3.77519076929391,6.75215489225767,11.9074465820585,235,5815
-8,2.52676890726949,7.50357689633859,6.49094491313982,122,1297
-9,4.31671712202522,8.89405460520389,15.9655911434749,388,11785
-0,3.5125909252673,0.546098808003291,9.11844386246735,73,174
-1,5.33060080997,6.21400711317387,2.00465888889006,46,197
-2,2.86051148658744,4.47552652162619,14.4647805189253,221,4902
-3,4.47376015373872,5.3181752095087,0.644623326330609,18,21
-4,1.799578644102,8.26670343063721,11.0963484962734,210,2412
-5,0.580619760482354,3.96698501802804,4.8188241509872,36,62
-6,7.77309014161711,9.16007410548673,12.1406171070877,438,16780
-7,9.33267298653032,7.7873258374574,7.20223436575252,236,5860
-8,6.83141662193031,1.98416495948321,14.1974146629937,247,3848
-9,8.27457231555102,2.01990148374008,3.334402743233,78,533
-0,4.79371742578521,5.18741128798378,8.24440069066819,176,3005
-1,7.78520286791283,8.44822658210823,14.5326834440156,479,23192
-2,9.23731770420389,4.60102587748869,7.42684064710562,218,4298
-3,3.28866630267546,2.3273766801154,10.3550997852085,107,1058
-4,2.9379878654595,0.780944818950487,12.6526249606605,95,541
-5,5.82567144597948,7.84044958163881,13.3811027137454,361,12049
-6,1.69779703306252,9.32360946943407,5.25944362150021,107,470
-7,6.0822332215106,1.83865715449246,2.40117584983072,37,93
-8,8.41826408090258,6.10357656610302,3.92951389722986,129,1425
-9,0.559301872076827,3.83772909862266,0.551084873947501,6,4
-0,5.77676827135007,0.193700292785702,2.45848747361728,39,31
-1,4.62584666605061,4.20838367900883,4.40300199099071,73,645
-2,9.40914378791762,7.66130396228687,14.6241032949743,500,21879
-3,2.25402114909556,1.91125615183429,10.1572393845811,85,597
-4,1.85972878350223,6.40375661047835,0.623278663170436,10,14
-5,8.70018825929405,9.20774927301219,5.61807319399366,205,4531
-6,3.54773990946914,8.64874693723665,9.48652217641092,220,3488
-7,6.60956853389846,5.87118704635032,7.79887449635548,208,4358
-8,7.09106618208218,2.74831880112249,13.0152407876627,298,7279
-9,0.473919760911192,3.86875723053659,11.4764634430119,98,344
-0,2.47246211391489,8.95904399687141,6.05864402572215,142,1338
-1,9.69831332394056,1.50240279348013,0.542297951899242,13,20
-2,6.43369693310696,9.9169787490185,10.9109340036782,338,11435
-3,5.20227318174203,0.706765281861631,13.0091172059973,171,1121
-4,7.21393378280105,3.72243342302241,3.47739705081201,70,490
-5,3.00580376884294,5.08504362130607,14.9454024633343,259,5312
-6,8.30095771555867,4.21999112689693,7.05357982221164,184,3109
-7,1.74891764047852,6.00881483329447,8.55792449734877,153,1291
-8,4.79416270883761,2.71612764047674,2.82183525203715,35,114
-9,0.20246754209575,7.34443828842851,11.3551222282027,161,361
-0,9.17152269378719,2.31505596931865,0.625147907607399,16,13
-1,2.77154645731238,4.52367239999899,14.314502148766,194,3479
-2,0.223412544929625,8.48537237120455,8.39458787630508,172,343
-3,6.08084680164325,7.35390582625686,10.6693771233878,264,7212
-4,3.87493332435065,0.891455064556372,4.38423902991615,44,136
-5,4.85875125919089,9.16587792466254,7.11700591914426,178,3151
-6,1.4035703924496,3.1495086395207,6.30004157881533,57,202
-7,5.70239614254043,6.57690953532275,2.77865077635006,50,273
-8,7.55706593268392,5.65749422998494,11.9326793277774,326,10828
-9,8.85295611367375,1.19461925432457,15.3109363228418,326,4705
-0,9.77565463532603,8.76701252998077,6.43568272783783,237,5040
-1,3.28814948903338,7.94197443214359,4.17520819591909,101,971
-2,6.32397610656154,4.21618857387048,0.58853155891353,10,12
-3,4.6788782323906,0.771484874816926,5.46505760240072,59,149
-4,7.56319809244816,6.0202175201478,12.53126518275,343,10077
-5,5.1622295710965,5.67076002171153,13.2957436344443,295,8271
-6,8.30992990922917,9.46928227812024,3.10887809702019,92,643
-7,1.89998327687865,2.34676007192007,8.28741412219543,68,495
-8,0.688174436722263,3.86240435229309,10.6603558773891,92,354
-9,2.98597431768552,1.85426769265753,15.475368107222,162,2583
-0,8.10816035223518,7.60290881142469,9.33438521935855,301,7275
-1,1.50887625140933,0.579072527685995,9.82445602046075,54,200
-2,9.46487093015434,9.23190112176892,15.8185025802098,581,29446
-3,3.95158030943987,5.04669781788511,5.96763621973785,102,1158
-4,2.94483545302074,2.94518857975954,0.263472420011688,3,1
-5,4.77960864254247,1.31373525884465,7.30530878857515,103,682
-6,0.60210183012735,3.44258902332446,3.70025010296835,37,74
-7,7.45496031739301,8.11463994358571,13.0851729582585,425,16649
-8,5.35617853432842,4.83692432990929,2.97469151905777,67,402
-9,6.94708754697556,6.02763183285593,11.8456364264116,294,7877
-0,6.94031615323926,3.30835507063231,0.067271673721512,1,0
-1,4.29367641335844,5.88449653479386,4.15718041576304,106,1403
-2,3.41570357828493,0.676233735040477,6.1677120847875,42,202
-3,9.78695681047904,4.34091640919209,10.2105190205208,275,6840
-4,5.54602459332075,6.54353437837412,12.3840575119747,323,10927
-5,0.547028146424448,1.84053587994919,15.4043206617612,72,268
-6,7.05334016300518,2.88336829803091,8.6010379767888,178,2055
-7,1.39859966271846,9.16039387500615,13.0256956791877,237,2653
-8,2.27073373242173,8.33957314120817,7.53491816444155,170,1449
-9,8.99933512283112,7.33522318021357,2.96730427030598,103,781
-0,6.33670566889753,3.16424106722315,14.8473509928372,276,6102
-1,0.144475696594605,7.23679803459612,0.913189986773312,14,3
-2,8.23297221207804,9.50252962670166,4.43544400769194,172,2782
-3,3.90548947749133,0.271177785711203,7.88481398683929,75,119
-4,5.03689328891077,6.94760250160772,8.41780343109292,192,3620
-5,7.21980071196192,4.46386522142885,5.33577112159837,136,1493
-6,2.30956935223823,1.05879601654464,10.3428262724575,61,272
-7,9.61688407196639,5.72926640583663,14.2834797403776,414,15532
-8,4.53410169008587,2.32884528820899,3.04221897089705,37,114
-9,1.21826948285339,8.65348893205825,11.343805248161,228,2869
-0,9.28793774817142,0.910612507372605,12.7790619427491,262,2398
-1,0.889304723828218,8.01483016243171,5.43686907856655,105,234
-2,7.88074857707419,5.80910321682305,2.13105005583599,59,386
-3,8.75765761660297,9.93846205312467,13.799079883495,464,20339
-4,4.56347047381285,3.78930044910815,4.52097387601215,79,537
-5,2.12000274961689,6.13019445987037,9.4988383192368,165,1951
-6,1.5319717344965,2.08127910484089,7.68499866588646,61,393
-7,3.35679625565493,1.0208452398077,0.682678368985288,4,3
-8,6.79221703315535,7.62874522582807,15.4158627764993,416,16107
-9,5.75812920649685,4.62967265185989,10.9700789227458,213,4724
-0,7.50239852784354,0.623507809956222,2.04442247916361,32,32
-1,8.33867960445703,4.35771676494243,10.1277052941702,255,5912
-2,9.75701186774736,3.84440561229207,15.4405848967927,442,14484
-3,1.54389369649264,9.45329973864654,7.61119161518345,178,1561
-4,2.87684777415411,7.72644135810742,13.9992425740927,278,5077
-5,0.543543914649442,5.09553620763624,12.0157984900902,141,1065
-6,4.23826241333993,1.39602434630235,1.29731286054104,15,21
-7,6.85109188223214,6.9832414364724,5.49932493241196,172,3482
-8,5.55721904493004,8.80846028947715,3.3982089044729,95,695
-9,3.20684103814356,2.15994080236693,8.9031069466461,110,1268
-0,9.61688578326082,8.0752722410036,12.1838946846674,416,15266
-1,3.50900218353674,9.07336075951692,11.1635003826119,306,7582
-2,2.99159777552998,7.19583924189423,14.5781029143641,272,6211
-3,4.98321598665375,4.48525325459323,3.5394006379551,57,264
-4,6.63502456614161,6.78477432116279,0.756951260959,16,18
-5,8.44287050610589,3.48549589116587,6.34978210455349,148,1647
-6,5.49114096913466,5.33547661056087,9.23489564203199,181,3011
-7,7.68247906623686,2.08235299033676,7.83716670906053,151,1429
-8,0.101574544235381,1.55029552318313,13.3339437575613,45,87
-9,1.32835207381072,0.271268586483177,3.18726155804575,15,31
-0,9.68274383962704,5.74373681495245,2.64788437563542,74,481
-1,0.940717079639108,9.52805842189212,9.40817066753025,209,1179
-2,8.66345832301609,6.41122736263166,14.7423387693453,420,18184
-3,7.87127290761739,2.26879709616698,12.7552673506826,273,4016
-4,2.82748095975701,0.998698481841287,6.6284557602233,56,284
-5,4.360117715489,7.07171052088493,4.96180030109065,115,1385
-6,1.6001239399395,1.04356068057242,9.77953846279564,55,221
-7,6.15626133981136,8.21725740049386,0.917004677236104,30,60
-8,5.5355258826956,4.97107245862782,4.44955904114888,115,1146
-9,3.23042197181434,3.97923816831998,13.3866674845994,196,4069
-0,2.96731917223059,1.80192039225598,12.6103468238073,126,1358
-1,7.81140750270066,4.91507676527881,15.4219185518131,379,12858
-2,6.10972380124712,2.75659393401413,2.22025584089408,36,109
-3,0.121751997647678,8.82242231001628,5.48121858388849,100,105
-4,8.59447385491262,5.21649056726612,3.32051429146785,98,728
-5,9.34488917417318,9.2635729509186,9.65033151239364,386,14270
-6,3.03490585907311,6.20852754029919,13.9127609667025,260,5173
-7,5.54164150783305,0.765063431365564,7.10148622721789,89,268
-8,1.38784509968585,7.9479170789345,0.883544108887407,22,44
-9,4.10107649237174,3.7168372067339,8.89225799619587,128,1789
-0,3.02828492829098,0.150736993031168,10.7483253787349,74,95
-1,6.13706794254045,9.620559195419,2.57808882728862,81,765
-2,7.17320363075991,6.41986959087031,13.0856526472635,361,11594
-3,5.05441012775018,3.6132833468419,8.60183569601679,141,1792
-4,8.65665695662285,1.76167490539418,15.1485408209995,340,5292
-5,4.83700779580312,4.54570301603295,7.07102374074942,142,2097
-6,0.373315406365941,5.62763035490179,5.08523969331527,61,106
-7,1.8745764767182,2.93525031439433,12.6790881468548,132,2182
-8,2.21334657456013,7.29906430960884,1.21971001871036,30,73
-9,9.32539042356124,8.35916393863395,3.97016933137256,143,1683
-0,7.45210220576431,4.28262560595149,0.711011779681704,16,49
-1,9.81748093509014,2.67342080024687,6.15907563112782,163,1662
-2,8.53372066135208,9.01834866167363,8.98535023107398,306,8481
-3,3.33848051036798,0.75568177704952,10.0677272253482,84,438
-4,1.65795082811941,8.26534247480693,11.8732163324423,238,3479
-5,4.71772225474349,6.3100218462161,4.33071100854802,98,1019
-6,6.55804897562949,7.52790982794392,14.3572171893822,401,15649
-7,5.05661497313472,3.21592560093896,15.8290219494071,265,7025
-8,0.169882875160825,5.88447131357553,6.9478924934118,85,99
-9,2.28529772089023,1.08536877932657,3.19711660686057,18,36
-0,4.22301264141188,7.654831358394,2.66921509953308,70,357
-1,8.52563102800037,4.04339669415605,14.6919621297588,365,11831
-2,9.09125056928225,6.06641640448144,5.78153188261001,172,2281
-3,2.43644839882311,9.55182822422359,3.85913548783328,91,580
-4,5.58179126477771,1.10493918032795,10.6460792149338,144,1132
-5,1.52207357113733,5.02234678842686,9.5920844039668,100,641
-6,0.330908583078026,0.171512997648022,7.9005071443721,6,6
-7,3.54244580607721,8.56484917099957,11.2068508125378,258,5183
-8,7.95880095425885,2.70334095451893,14.2167020229797,314,7070
-9,6.20601740429138,3.20247856414138,0.106385272279626,1,0
-0,3.53251486975652,5.51680059469062,0.489215179429607,5,6
-1,4.11424845325806,0.213447881326042,5.53533920940711,46,65
-2,5.54927500812825,8.53642954589836,1.89849265951299,62,345
-3,8.00133363949079,1.83536235417889,4.21815090450894,79,584
-4,2.32384963074584,3.61579255210138,15.2001477372516,149,1795
-5,6.21993431383058,6.633788419545,8.16528076450125,212,4272
-6,1.97121878996078,2.54103028906281,9.91623644979138,89,904
-7,7.72724231956998,4.92726563263141,7.16741112991924,187,3566
-8,0.349653380462238,7.9319026844206,14.2201423851502,219,838
-9,9.09284508653062,9.51272320107355,12.1062251530098,458,20384
-0,3.15545229215646,1.6946720036442,6.16975253157821,55,230
-1,6.59594159573071,6.00810094164732,3.467876716394,67,421
-2,0.25054442427132,4.61671233076714,13.224856968982,152,455
-3,9.33384206434339,5.09372486605972,2.31269388045765,70,345
-4,5.47125071662353,2.78093317979049,15.5395982269504,277,6512
-5,2.02677058570313,0.991507688278502,10.2014143276731,67,460
-6,8.09193793592084,3.53119879424151,9.48437164765281,207,2664
-7,1.68592922070822,9.68277950019413,11.35113128203,275,4142
-8,7.42896998071751,8.23731416089475,0.593286716071756,23,71
-9,4.8647902219999,7.43759774375379,7.53432659748548,192,3899
-0,3.44948196518543,5.77280449286174,3.46246391532226,65,389
-1,9.2416712259726,8.24968106433933,1.64153762936627,57,282
-2,7.00141822013392,4.60890486103004,6.54397689015939,163,2428
-3,5.55996414714584,0.687773035545481,13.2584758376861,154,1044
-4,2.59065522660864,7.47123668729165,11.1556156827867,199,2673
-5,4.97204896477571,1.87635211302719,12.0936258450573,168,2249
-6,1.36147833409428,3.44379484097973,14.9972187712468,154,1528
-7,8.39413324282831,2.28490862304177,9.42194575138039,202,2149
-8,6.20179958134219,6.89601552296162,0.725018130365599,18,46
-9,0.070534295237322,9.86156428424996,5.30438180544163,109,88
-0,4.39865928751841,2.12813504025052,13.0236346577496,172,2354
-1,8.13688031969183,3.5121038554638,12.1334646964162,303,7381
-2,9.68760142892073,4.27906027717994,7.97601559652711,229,3415
-3,5.99290349089926,6.16116368789616,4.95400093318036,113,1050
-4,2.39564682119671,7.42628090625454,14.8571702857053,296,6436
-5,3.62455863591739,9.13101455443361,10.9911345864606,279,4189
-6,0.962975942364486,5.69898510817449,0.115635219608379,4,4
-7,7.83153497244688,8.45624004405334,4.60141715418712,148,2727
-8,1.46355222330269,0.531144632564664,2.4388575733996,8,8
-9,6.73748948445992,1.14637593866816,8.65322683828228,153,1267
-0,7.83108440735659,3.81943808179327,7.07038989649637,141,1407
-1,9.91173770827735,6.2759225530681,9.30367387625596,293,8332
-2,4.25434008757761,2.35070232743073,4.62108465660541,55,134
-3,2.77252642316259,9.21675771132199,9.99169718409744,222,3687
-4,1.26547322499765,7.58551404248108,13.7189915587824,241,3430
-5,3.20874253342839,1.34809554750787,2.82938398830816,28,34
-6,0.944266159655551,8.65739303949466,11.6621270878714,252,2166
-7,5.01930441671787,5.01564731794207,5.45441237877804,103,939
-8,8.96538870366343,0.06437488351504,0.838358724297142,20,13
-9,6.14926189698881,4.09966115273125,14.6157997542529,345,12626
-0,8.42006266957945,5.46822640409263,0.082487797226002,0,0
-1,4.06114618104769,7.88832581470262,3.1339892750535,74,460
-2,3.48933083162424,0.766697823833483,8.44345698215071,69,436
-3,5.81334332904511,4.81194225130181,4.59739110343907,99,1074
-4,7.09875296868169,6.81326230135065,13.4220721903382,355,11683
-5,9.06959184739631,9.64271715474556,6.59056009217497,227,6008
-6,2.15363152039327,1.45801462431595,14.5287091203349,108,1142
-7,1.76440011138931,3.86467775258565,9.85041263852873,114,1429
-8,6.72399538414474,8.92600741760325,5.19982911178778,159,2602
-9,0.565017768906111,2.02380604325834,11.9816293662893,53,203
-0,1.87488196744214,9.93565512940955,8.01368763218907,191,2074
-1,6.70652963230319,2.5861203488055,2.93377209390118,57,235
-2,0.76050200441724,7.29814297114814,3.33989231598983,65,164
-3,9.5231780098395,1.14913510784754,7.40920799686523,151,891
-4,2.92423770081413,4.82952316550075,5.60651991494414,82,841
-5,5.82503891062336,3.83652019444758,10.4427029742822,224,4983
-6,8.84388496566563,6.81450796813053,15.8827405524924,489,18807
-7,3.67300910096305,5.64508269196679,12.850584018034,236,4748
-8,7.30457994708725,0.366015387591876,1.23742755940604,21,28
-9,4.03562979539014,8.30407028478026,11.2832838373877,283,5692
-0,0.30774951660782,6.72147708273402,8.29415316302288,97,243
-1,3.12642622715503,1.06197100697941,11.1062578902696,92,566
-2,1.76080754143108,5.46828856767287,5.65176180242742,96,508
-3,5.85078038263068,8.8276711355692,12.56376873662,346,12071
-4,7.25700623222243,9.4512153132784,13.3473520092151,411,17681
-5,2.40709195615802,4.14908974876825,14.9364613073272,201,3197
-6,8.49856921696356,7.98503384500596,1.57686476907846,53,285
-7,6.53332684275294,0.889285462074263,2.43493287759176,43,54
-8,4.65949017108765,3.77131363322501,3.42511267817719,56,414
-9,9.04160323335532,2.41151160289921,6.75147948479153,162,1988
-0,9.96933012535166,5.89105061693053,0.39061891329105,12,17
-1,0.125378884108188,6.5936750743979,5.11768384565935,73,72
-2,7.16055781076317,1.8059056470189,7.2428846701695,122,690
-3,6.67673669867989,8.42743107116022,2.45953308703182,85,656
-4,3.54143447660984,2.11339832734491,11.9037131805332,139,1198
-5,1.01846590489324,0.433727062646016,10.8350274341204,29,100
-6,5.06197503598952,7.4572997979969,4.02664704116576,94,904
-7,8.8880803822588,4.5509441938406,8.97551272823197,215,3884
-8,4.86887079553939,9.10558271290443,13.9579942166525,360,12228
-9,2.35727092199605,3.99140186304961,14.4400401625022,179,2757
-0,0.257567725649517,0.738662053561825,13.0032852003808,27,72
-1,4.54389842071315,4.90728956145257,15.2040506948113,272,6323
-2,3.73955114873499,2.9210295160538,5.69804082376994,64,407
-3,6.60500306133803,1.5351067800006,6.94040921838974,109,850
-4,2.39212785498679,6.51150836476564,9.71464098277037,178,2175
-5,5.11832407300314,8.26567846550284,12.7096056570132,334,8918
-6,8.78077323488719,7.93579983152929,4.43736498157349,139,1617
-7,7.36378605159906,9.45791916719962,9.34220972880819,286,9100
-8,1.73605401448279,5.12759123928738,2.18698943085797,29,74
-9,9.51297884191434,3.43270491462655,1.21732964803112,27,54
-0,2.22597838465518,5.46587172890768,6.02469154679164,88,836
-1,0.639468439492104,2.05768328458354,2.79077641349958,15,40
-2,4.45700230937694,0.186405414068111,11.0453456219309,101,203
-3,6.40014546728456,3.56050873407797,0.013577619333509,1,0
-4,3.59285200489222,7.86618891526558,14.7466947903824,348,13825
-5,8.49126762849,9.88280567918849,9.32349874279251,318,11478
-6,7.30348420855156,4.06994401013863,13.8729280813231,289,8387
-7,9.55906128776525,6.41910508628671,7.83102376591759,230,5040
-8,1.66949857655069,8.58300648300455,11.2914874472484,258,3867
-9,5.02669315858685,1.14706990455034,4.15514234514631,44,136
-0,7.00055418458525,1.93614698659853,14.9576092797305,274,4518
-1,8.60831940140007,8.84802790265728,2.93279638758307,112,1494
-2,0.362148783480237,0.920300270572748,9.11621571653226,20,29
-3,6.75231097067346,3.44913968845939,12.306218312344,230,4252
-4,2.3751763983131,2.53643039678949,0.521336951751998,2,1
-5,5.48600344823823,9.74209259255824,7.65696286787684,243,5487
-6,1.40708497281539,4.69614592493854,14.2406016357562,180,2534
-7,9.65057479110935,6.89067859560547,9.9496775384329,353,10767
-8,4.35613916201497,5.7163310272567,5.80079493938673,117,1367
-9,3.96376764994231,7.99788470822846,3.82741572656444,96,608
-0,7.56620482350714,0.169557994370136,8.0875834174559,120,176
-1,0.237924162244983,6.81747643930492,7.64167833419101,120,248
-2,3.98865655491401,3.36888462237401,0.981930370959323,18,77
-3,5.50086995338217,4.29380202881551,14.5112124744243,244,5386
-4,8.19416207741451,1.8281251918569,13.6137246749899,291,5051
-5,4.47706674956727,8.63852504619167,11.3319329247743,310,8331
-6,9.86641512952482,9.89825908367483,2.77751552275716,112,1225
-7,2.0218067931244,7.6298837036712,4.07440274223661,73,356
-8,6.67955411910729,2.74081765793212,6.32571765985968,115,1282
-9,1.54796779679393,5.83644589625961,9.89301659451483,148,1368
-0,4.42835028381315,0.460509159908179,14.4416472797074,131,526
-1,0.46472195619875,3.4292107795417,1.97926641426989,20,25
-2,2.97761536752375,5.89733896556877,4.43870478674292,93,766
-3,6.67799801524258,2.28845835041813,9.98406147002264,180,2081
-4,3.40724641726207,7.24901996210677,7.5665652537518,148,2540
-5,9.02605788598559,6.93919743282576,8.40305322458788,261,5731
-6,8.5653851600992,9.80924583675348,11.8324679967211,438,17331
-7,5.09607597563836,8.8206255930672,1.57305966385825,52,383
-8,7.83444995734776,1.19519381427593,13.1916987700546,249,2547
-9,1.75251866057036,4.14329599143094,5.40219300367723,62,207
-0,7.11346050152477,9.06852598759699,10.7692196829653,360,13006
-1,5.05341667193451,8.86799393333611,14.8785283386487,421,14060
-2,4.6082976137419,1.79258716003357,2.32379868323728,30,109
-3,6.71535321250021,7.57464207062184,5.00426661597169,135,1416
-4,9.37280998920321,4.08366341005118,9.28407728543133,267,6586
-5,8.67967586749064,2.27331235865372,7.30485100307849,171,2056
-6,3.71295881555953,3.29771984805852,12.6425370019358,170,2758
-7,2.93534228139691,6.56074319601542,13.5927545009045,280,6050
-8,0.064873637835107,5.25804389403672,4.70628107490861,53,41
-9,1.19229172323108,0.072959863912955,1.42698912701012,4,3
-0,3.60851234252535,5.00440160900379,4.87145604620356,78,496
-1,1.39348055058863,7.00033015925948,0.53619882671396,8,18
-2,5.96482312547225,8.47226346079522,11.8262976323566,349,10968
-3,4.57168232609927,1.02929370500537,11.1838743860399,125,708
-4,7.93778763045879,6.97453255366238,14.732617395071,445,15890
-5,8.00759141617391,0.222065174541392,8.87164738459216,133,256
-6,2.57267058319112,2.47266472936484,4.77970487871781,59,294
-7,0.717186889725271,9.06113763001222,14.0672307327042,271,2398
-8,6.74508017116222,4.52204841617126,7.98938800141134,182,2501
-9,9.90497688178168,3.37995612514741,3.14927730345627,82,474
-0,6.0503629989934,5.85614957259594,7.29504338543719,166,2282
-1,0.653339924418811,6.90956894334459,1.67324214308596,23,46
-2,9.85142639441501,9.44966166311292,11.0910758970969,392,15028
-3,8.9424578385333,3.28042986157358,9.00469998927914,202,3296
-4,5.99732061163977,2.14083342765301,6.39049917341298,109,910
-5,1.28217864564923,0.683764824777804,13.6597702476366,66,382
-6,7.57537006482572,4.31223183962074,15.5361774270499,382,13025
-7,3.45071798553215,7.90953856991339,3.28993529258596,95,858
-8,2.56381748057517,8.67101247484487,12.2107778829175,277,5401
-9,4.15745295718985,1.64568532517424,1.22343937854457,13,34
-0,9.08004132409144,4.36122273523429,0.940948466422332,32,143
-1,1.75549906869679,1.26048757905735,12.4100718781766,82,747
-2,3.00950620241811,6.85755312709991,15.0613000982418,286,6398
-3,5.95900844360873,8.37465309165326,5.47237221248234,172,2218
-4,6.51525536411258,0.016833319860562,9.74125199773587,116,85
-5,2.98301643070615,2.39028910083933,6.72310538321917,71,409
-6,7.05799757247271,9.65452226901094,13.0474804850047,416,15036
-7,8.58352954886591,5.25435953880638,2.37070042985846,57,332
-8,4.34932703597591,7.43198759242516,8.8468088655212,199,3918
-9,0.843482492814286,3.05671513828223,4.71830421005482,43,154
-0,1.75015482379492,2.02628477233761,13.0556568993817,89,605
-1,0.631174204009007,7.45062104793028,5.41286136066922,88,137
-2,5.31632205700948,5.75855522846679,3.44886414640078,69,380
-3,7.16095548745673,1.54614447046338,3.14189946553064,54,172
-4,8.40934296382894,8.65147608792907,12.7148636427603,426,14289
-5,6.66157674057406,4.51667639260534,6.93777543395297,139,1720
-6,9.63370974342311,9.99187454468231,10.6235963392603,396,14427
-7,2.38264115506557,3.05462153648904,8.85400531295006,89,638
-8,3.61328639383313,0.491044821772377,15.7049391688252,130,743
-9,4.81374829556755,6.09141654911452,1.00391297570711,20,23
-0,0.74139497227975,7.31416741279196,0.925025078019392,16,17
-1,8.32835735232721,2.37421279573906,3.284138976343,73,499
-2,4.49326443410511,9.62063695212076,8.40850208187403,214,3312
-3,1.9161510268144,1.76252567709398,15.4213668878168,116,1158
-4,9.95565542524153,8.73555819826792,6.17469102417708,246,5442
-5,5.92706911605118,5.81442356707915,14.2671734354902,344,12268
-6,6.26244806696384,6.0321844032154,7.20067119333613,182,2757
-7,2.23246904021676,0.047733789678756,2.47275877238112,9,5
-8,3.29766450592931,4.78938296068863,10.9328794097104,180,3116
-9,7.44131111238846,3.53307882232463,12.0601993611958,251,5883
-0,0.611353030775142,5.28554293786953,15.9244254354453,180,935
-1,2.52282752731093,8.15705888409914,13.8633270848145,296,6656
-2,4.00769780295471,0.449087957168577,7.90670273350712,84,257
-3,6.83006537219578,3.35583037866766,3.51945549863837,66,487
-4,5.70412235423244,4.68222637670599,11.2651196724061,233,5605
-5,7.6618292896781,6.96643481008432,0.781135965242387,19,27
-6,1.06878065769766,7.36300144998256,8.7209974633122,146,794
-7,8.36205585017249,9.06222417265224,5.33038763720781,156,1851
-8,3.93538226073279,2.89761400834239,11.1322503590219,152,1775
-9,9.86258924134136,1.03794973023987,2.91613397145829,68,134
-0,0.353995569544667,8.45771405149823,6.54341527171333,122,217
-1,1.55436404082841,4.64282368827543,5.8928798368473,75,325
-2,2.61540567742777,5.1030371309751,3.13962252485627,57,231
-3,8.32158394036247,7.90446375855233,3.68092337288644,117,1116
-4,9.5702481249474,2.56860421356993,12.578316674247,316,5262
-5,7.23665537947971,1.78033824762516,11.0938520726532,197,2255
-6,4.14337567106824,3.79444120092139,9.19533793503172,143,2373
-7,6.37397025927733,9.25648052636811,0.370125299691133,14,56
-8,5.73928962874644,0.529174453181849,14.9344011501225,190,1135
-9,3.91618714807389,6.96047975156098,12.8572948710684,271,6051
-0,9.1806437332247,8.7603873821805,11.9878763030378,447,17499
-1,0.594552562360948,7.38906070961865,1.37016535810161,28,13
-2,8.42662537347639,3.75162055084481,8.83710760577391,209,3287
-3,1.33482785155535,6.38677020860296,3.7799357577833,70,252
-4,5.56563345143751,0.898706829574793,1.99637786245619,37,54
-5,6.78238309322355,5.17722238330647,7.05388156606752,158,2419
-6,2.02305278344521,4.29234742421515,10.425177767063,145,1315
-7,3.96940011256619,1.55133754269908,13.9937837715252,151,1665
-8,7.80988158570571,9.31921103588727,4.99367804187574,171,2505
-9,4.75115370364558,2.42423724529527,14.7545747011826,222,3609
-0,1.92354275693623,5.53473971541351,12.7719837308705,165,1850
-1,6.99588627575972,4.69372606146244,1.85072695647352,58,251
-2,0.599841221143114,9.8964852726425,15.2652993132769,323,2663
-3,9.03319631506556,0.146871875833207,8.15819384808584,141,176
-4,4.08159163038693,3.22973465411853,3.70959410122221,57,377
-5,2.48158905510788,6.10731866047401,5.64404147787936,83,627
-6,5.40617201291149,1.13963839922991,1.51461079354454,19,33
-7,8.60617730578177,7.51258396693423,7.02596515405357,224,4214
-8,7.43884780971136,8.32117925896747,9.70385071394347,310,6900
-9,3.93407684927148,2.42317283952675,13.4210583167365,162,2493
-0,0.045331302047994,3.03291376866117,9.19408987118018,69,46
-1,8.36161634128183,7.60795718381004,10.3950038266989,323,8507
-2,3.75648384909992,6.94909708892843,7.13898313440027,149,1890
-3,1.36891437335362,4.98972313255472,1.02336279762099,14,23
-4,6.71517195260958,2.484164247392,13.5291300550725,256,4454
-5,5.36947872363373,8.4680816210851,15.132446962025,462,18094
-6,7.42323944149019,1.6449637707452,11.2260894428584,198,2313
-7,9.56389211282927,5.55563475720497,5.07850261985169,162,2751
-8,4.95068193680708,9.63408805761412,3.9816218239932,113,856
-9,2.10886044664666,0.873999425537894,2.91624553448059,18,18
-0,0.898238598200147,0.971477070270274,7.80679846892446,31,101
-1,4.46712309269314,4.94342736625028,11.459966645403,177,2813
-2,7.39212814696927,3.26598253937994,9.70302871434598,186,3009
-3,5.72387103831472,9.22633827843749,14.8306983042747,461,20544
-4,2.90961379688291,8.95749053102122,2.57111266054113,60,351
-5,6.02503633623744,5.45862848743147,12.9969405654452,290,7987
-6,3.4482388465554,6.20829772308469,3.95455147564287,81,582
-7,9.61078135607795,7.70699696363042,9.00373937668051,320,9421
-8,8.31856309812599,1.28442032390379,6.15797918814908,122,639
-9,1.77240772212324,2.26724697507105,0.367791345923607,3,7
-0,3.04754760888116,6.46191929341064,10.0738789033626,190,3061
-1,7.64121499023502,8.14869931829755,15.7934730573598,510,22645
-2,6.06443762555269,7.72382868182295,4.82932186967232,137,1946
-3,0.733830119421456,5.18676841627001,13.6724055140192,144,1175
-4,9.53909014502225,3.73854987437011,2.76452107067831,62,274
-5,8.19858297822608,2.47935228921583,7.39496973677532,149,1286
-6,5.53952294351265,9.4537417590943,12.4168515233671,357,11084
-7,1.1829977166021,4.85143503785748,4.36110695826359,53,203
-8,2.43873720751041,1.70860352434067,1.10724911224498,12,19
-9,4.79956031624223,0.032910796954939,9.31446377748104,87,67
diff --git a/examples/poisson/example_run_causal_tests.py b/examples/poisson/example_run_causal_tests.py
deleted file mode 100644
index 1174818f..00000000
--- a/examples/poisson/example_run_causal_tests.py
+++ /dev/null
@@ -1,183 +0,0 @@
-import numpy as np
-import pandas as pd
-import scipy
-import os
-
-from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
-from causal_testing.testing.causal_test_outcome import ExactValue, Positive, Negative, NoEffect, CausalTestOutcome
-from causal_testing.testing.causal_test_result import CausalTestResult
-from causal_testing.json_front.json_class import JsonUtility
-from causal_testing.specification.scenario import Scenario
-from causal_testing.specification.variable import Input, Output, Meta
-
-import logging
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.DEBUG, format="%(message)s")
-
-
-class WidthHeightEstimator(LinearRegressionEstimator):
-    """
-    Extension of LinearRegressionEstimator class to include scenario specific user code
-    """
-
-    def add_modelling_assumptions(self):
-        """
-        Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
-        must hold if the resulting causal inference is to be considered valid.
-        """
-        self.modelling_assumptions += "The output varies according to 2i(w+h)"
-
-    def estimate_ate(self) -> (float, [float, float], float):
-        """Estimate the conditional average treatment effect of the treatment on the outcome. That is, the change
-        in outcome caused by changing the treatment variable from the control value to the treatment value.
-        :return: The conditional average treatment effect and the 95% Wald confidence intervals.
-        """
-        assert (
-            self.effect_modifiers
-        ), f"Must have at least one effect modifier to compute CATE - {self.effect_modifiers}."
-        x = pd.DataFrame()
-        x[self.treatment[0]] = [self.treatment_values, self.control_values]
-        x["Intercept"] = 1
-        for k, v in self.effect_modifiers.items():
-            self.adjustment_set.add(k)
-            x[k] = v
-        if hasattr(self, "product_terms"):
-            for a, b in self.product_terms:
-                x[f"{a}*{b}"] = x[a] * x[b]
-
-        x.drop(["width", "height"], axis=1, inplace=True)
-        self.adjustment_set = {"width*intensity", "height*intensity"}
-
-        logger.info("%s", x)
-        logger.info("%s", self.adjustment_set)
-
-        model = self._run_linear_regression()
-        logger.info("%s", model.summary())
-        y = model.predict(x)
-        treatment_outcome = y.iloc[0]
-        control_outcome = y.iloc[1]
-
-        return treatment_outcome - control_outcome, None
-
-
-class PoissonWidthHeight(CausalTestOutcome):
-    """An extension of TestOutcome representing that the expected causal effect should be positive."""
-
-    def __init__(self, atol=0.5):
-        self.atol = atol
-        self.i2c = None
-
-    def apply(self, res: CausalTestResult) -> bool:
-        # TODO: confidence intervals?
-        logger.info("=== APPLYING ===")
-        logger.info("effect_modifier_configuration", res.effect_modifier_configuration)
-        effect_modifier_configuration = {k.name: v for k, v in res.effect_modifier_configuration.items()}
-        c = res.treatment_value - res.control_value
-        i = effect_modifier_configuration["intensity"]
-        self.i2c = i * 2 * c
-        logger.info("2ic: 2 * %s * %s = %s", i, c, self.i2c)
-        logger.info("ate: %s", res.test_value.value)
-        return np.isclose(res.test_value.value, self.i2c, atol=self.atol)
-
-    def __str__(self):
-        if self.i2c is None:
-            return f"PoissonWidthHeight±{self.atol}"
-        return f"PoissonWidthHeight:{self.i2c}±{self.atol}"
-
-
-def populate_width_height(data):
-    data["width_plus_height"] = data["width"] + data["height"]
-
-
-def populate_num_lines_unit(data):
-    area = data["width"] * data["height"]
-    data["num_lines_unit"] = data["num_lines_abs"] / area
-
-
-def populate_num_shapes_unit(data):
-    area = data["width"] * data["height"]
-    data["num_shapes_unit"] = data["num_shapes_abs"] / area
-
-
-inputs = [
-    {"name": "width", "datatype": float, "distribution": scipy.stats.uniform(0, 10)},
-    {"name": "height", "datatype": float, "distribution": scipy.stats.uniform(0, 10)},
-    {"name": "intensity", "datatype": float, "distribution": scipy.stats.uniform(0, 10)},
-]
-
-outputs = [{"name": "num_lines_abs", "datatype": float}, {"name": "num_shapes_abs", "datatype": float}]
-
-metas = [
-    {"name": "num_lines_unit", "datatype": float, "populate": populate_num_lines_unit},
-    {"name": "num_shapes_unit", "datatype": float, "populate": populate_num_shapes_unit},
-    {"name": "width_plus_height", "datatype": float, "populate": populate_width_height},
-]
-
-constraints = ["width > 0", "height > 0", "intensity > 0"]
-
-effects = {
-    "PoissonWidthHeight": PoissonWidthHeight(),
-    "Positive": Positive(),
-    "Negative": Negative(),
-    "ExactValue4_05": ExactValue(4, atol=0.5),
-    "NoEffect": NoEffect(),
-}
-
-estimators = {
-    "WidthHeightEstimator": WidthHeightEstimator,
-    "LinearRegressionEstimator": LinearRegressionEstimator,
-}
-
-# Create input structure required to create a modelling scenario
-modelling_inputs = (
-    [Input(i["name"], i["datatype"], i["distribution"]) for i in inputs]
-    + [Output(i["name"], i["datatype"]) for i in outputs]
-    + ([Meta(i["name"], i["datatype"], i["populate"]) for i in metas] if metas else list())
-)
-
-# Create modelling scenario to access z3 variable mirrors
-modelling_scenario = Scenario(modelling_inputs, None)
-modelling_scenario.setup_treatment_variables()
-
-mutates = {
-    "Increase": lambda x: modelling_scenario.treatment_variables[x].z3 > modelling_scenario.variables[x].z3,
-    "ChangeByFactor(2)": lambda x: modelling_scenario.treatment_variables[x].z3
-    == modelling_scenario.variables[x].z3 * 2,
-}
-
-
-def test_run_causal_tests():
-    ROOT = os.path.realpath(os.path.dirname(__file__))
-
-    log_path = f"{ROOT}/json_frontend.log"
-    json_path = f"{ROOT}/causal_tests.json"
-    dag_path = f"{ROOT}/dag.dot"
-    data_path = f"{ROOT}/data.csv"
-
-    json_utility = JsonUtility(log_path, output_overwrite=True)  # Create an instance of the extended JsonUtility class
-    json_utility.set_paths(
-        json_path, dag_path, [data_path]
-    )  # Set the path to the data.csv, dag.dot and causal_tests.json file
-
-    # Load the Causal Variables into the JsonUtility class ready to be used in the tests
-    json_utility.setup(
-        scenario=modelling_scenario
-    )  # Sets up all the necessary parts of the json_class needed to execute tests
-
-    json_utility.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=False)
-
-
-if __name__ == "__main__":
-    args = JsonUtility.get_args()
-    json_utility = JsonUtility(args.log_path)  # Create an instance of the extended JsonUtility class
-    json_utility.set_paths(
-        args.json_path, args.dag_path, args.data_path
-    )  # Set the path to the data.csv, dag.dot and causal_tests.json file
-
-    # Load the Causal Variables into the JsonUtility class ready to be used in the tests
-    json_utility.setup(
-        scenario=modelling_scenario
-    )  # Sets up all the necessary parts of the json_class needed to execute tests
-
-    json_utility.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=args.f)
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
index 061ef661..e4bed6a8 100644
--- a/tests/testing_tests/test_causal_test_adequacy.py
+++ b/tests/testing_tests/test_causal_test_adequacy.py
@@ -59,15 +59,9 @@ def test_data_adequacy_numeric(self):
         }
         self.json_class.test_plan = example_test
         effects = {"NoEffect": NoEffect()}
-        mutates = {
-            "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3
-            > self.json_class.scenario.variables[x].z3
-        }
         estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
 
-        test_results = self.json_class.run_json_tests(
-            effects=effects, estimators=estimators, f_flag=False, mutates=mutates
-        )
+        test_results = self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
         self.assertEqual(
             test_results[0]["result"].adequacy.to_dict(),
             {"kurtosis": {"test_input": 0.0}, "bootstrap_size": 100, "passing": 100, "successful": 100},
diff --git a/width_num_shapes_results_random_1000.csv b/width_num_shapes_results_random_1000.csv
new file mode 100644
index 00000000..5ad081cf
--- /dev/null
+++ b/width_num_shapes_results_random_1000.csv
@@ -0,0 +1,145 @@
+,control,treatment,intensity,ate,ci_low,ci_high
+0,1.0,2.0,1,-7.378642492215972,-13.91823865204362,-0.8390463323883246
+1,2.0,3.0,1,-2.7096586545197052,-9.80288285818825,4.383565549148839
+2,3.0,4.0,1,-1.5424126950956385,-11.120887611821669,8.03606222163039
+3,4.0,5.0,1,-1.0755143113260122,-13.708422107725262,11.557393485073238
+4,5.0,6.0,1,-0.842065119441199,-16.741293716403792,15.057163477521392
+5,6.0,7.0,1,-0.7086655812213047,-19.97292732259147,18.555596160148863
+6,7.0,8.0,1,-0.6252908698338722,-23.308418384039193,22.057836644371445
+7,8.0,9.0,1,-0.5697077289089161,-26.70434658441146,25.564931126593628
+8,9.0,10.0,1,-0.5307995302614472,-30.138282420998813,29.076683360475922
+9,1.0,2.0,2,-7.378642492215974,-16.38113570509907,1.6238507206671215
+10,2.0,3.0,2,-2.709658654519707,-11.12501848472749,5.705701175688077
+11,3.0,4.0,2,-1.542412695095635,-10.885940125445726,7.801114735254458
+12,4.0,5.0,2,-1.075514311326014,-12.37290328513123,10.2218746624792
+13,5.0,6.0,2,-0.8420651194411981,-14.717521484023761,13.033391245141367
+14,6.0,7.0,2,-0.7086655812213039,-17.50973658032815,16.092405417885537
+15,7.0,8.0,2,-0.6252908698338722,-20.549295407558176,19.29871366789043
+16,8.0,9.0,2,-0.5697077289089165,-23.735302054174575,22.595886596356742
+17,9.0,10.0,2,-0.5307995302614472,-27.013708674772325,25.95210961424943
+18,1.0,2.0,3,-7.378642492215974,-20.043311732745323,5.286026748313375
+19,2.0,3.0,3,-2.709658654519707,-14.257652928983816,8.838335619944402
+20,3.0,4.0,3,-1.542412695095635,-12.869624916374846,9.784799526183576
+21,4.0,5.0,3,-1.0755143113260104,-13.130648317524162,10.979619694872142
+22,5.0,6.0,3,-0.8420651194412017,-14.473331702582346,12.789201463699946
+23,6.0,7.0,3,-0.7086655812213039,-16.53982963725465,15.122498474812037
+24,7.0,8.0,3,-0.6252908698338757,-19.069141710952177,17.81855997128443
+25,8.0,9.0,3,-0.5697077289089165,-21.891675412501034,20.752259954683193
+26,9.0,10.0,3,-0.5307995302614472,-24.90395032760046,23.842351267077564
+27,1.0,2.0,4,-7.378642492215974,-23.373997163493392,8.616712179061437
+28,2.0,3.0,4,-2.709658654519707,-17.345789724839037,11.92647241579963
+29,3.0,4.0,4,-1.542412695095635,-15.337988269660514,12.253162879469244
+30,4.0,5.0,4,-1.0755143113260104,-14.765501080503213,12.614472457851193
+31,5.0,6.0,4,-0.8420651194412017,-15.241135322080474,13.557005083198078
+32,6.0,7.0,4,-0.7086655812213039,-16.55358588394173,15.136254721499121
+33,7.0,8.0,4,-0.6252908698338757,-18.491752099772434,17.241170360104668
+34,8.0,9.0,4,-0.5697077289089165,-20.870158913539562,19.730743455721743
+35,9.0,10.0,4,-0.5307995302614472,-23.550598680286186,22.488999619763284
+36,1.0,2.0,5,-7.378642492215974,-26.035120136794106,11.277835152362158
+37,2.0,3.0,5,-2.709658654519714,-19.863479936076985,14.444162627037556
+38,3.0,4.0,5,-1.542412695095635,-17.49695503693026,14.41212964673899
+39,4.0,5.0,5,-1.0755143113260175,-16.40311483898148,14.252086216329445
+40,5.0,6.0,5,-0.8420651194411874,-16.261852314806973,14.577722075924598
+41,6.0,7.0,5,-0.708665581221311,-16.960028525631117,15.542697363188495
+42,7.0,8.0,5,-0.6252908698338757,-18.36211115362275,17.111529413954997
+43,8.0,9.0,5,-0.5697077289089094,-20.30936425354075,19.16994879572293
+44,9.0,10.0,5,-0.5307995302614472,-22.65598617725834,21.594387116735447
+45,1.0,2.0,6,-7.378642492215988,-27.948775665159488,13.191490680727526
+46,2.0,3.0,6,-2.7096586545197,-21.671785689375824,16.252468380336424
+47,3.0,4.0,6,-1.542412695095635,-19.070061978940473,15.985236588749174
+48,4.0,5.0,6,-1.0755143113260175,-17.635327009716008,15.484298387063987
+49,5.0,6.0,6,-0.8420651194411874,-17.07200388946137,15.387873650578996
+50,6.0,7.0,6,-0.708665581221311,-17.31952432610808,15.902193163665459
+51,7.0,8.0,6,-0.6252908698338757,-18.300276563559066,17.049694823891315
+52,8.0,9.0,6,-0.5697077289089094,-19.891016782483533,18.751601324665728
+53,9.0,10.0,6,-0.5307995302614472,-21.953616473882548,20.892017413359625
+54,1.0,2.0,7,-7.378642492215988,-29.117265138277418,14.359980153845441
+55,2.0,3.0,7,-2.7096586545197,-22.75075933458973,17.33144202555033
+56,3.0,4.0,7,-1.542412695095635,-19.977859545238175,16.893034155046905
+57,4.0,5.0,7,-1.0755143113260033,-18.30608567018311,16.155057047531102
+58,5.0,6.0,7,-0.8420651194412017,-17.45218176477536,15.768051525892957
+59,6.0,7.0,7,-0.708665581221311,-17.385478541472793,15.968147379030171
+60,7.0,8.0,7,-0.6252908698338615,-18.066675373156556,16.816093633488833
+61,8.0,9.0,7,-0.5697077289089236,-19.40070948795028,18.261294030132433
+62,9.0,10.0,7,-0.5307995302614472,-21.25887106921732,20.197272008694426
+63,1.0,2.0,8,-7.378642492215988,-29.589225422873483,14.831940438441507
+64,2.0,3.0,8,-2.7096586545197,-23.14075432553625,17.72143701649685
+65,3.0,4.0,8,-1.542412695095635,-20.235252891690664,17.150427501499394
+66,4.0,5.0,8,-1.0755143113260033,-18.39286072049805,16.241832097846043
+67,5.0,6.0,8,-0.8420651194412017,-17.341020322637092,15.65689008375469
+68,6.0,7.0,8,-0.708665581221311,-17.06994694543087,15.65261578298825
+69,7.0,8.0,8,-0.6252908698338615,-17.565333348018214,16.31475160835049
+70,8.0,9.0,8,-0.5697077289089236,-18.749087451671272,17.609671993853425
+71,9.0,10.0,8,-0.5307995302614472,-20.495979946318016,19.43438088579512
+72,1.0,2.0,9,-7.378642492215988,-29.462760189589744,14.705475205157768
+73,2.0,3.0,9,-2.7096586545196715,-22.939924975757663,17.52060766671832
+74,3.0,4.0,9,-1.5424126950956634,-19.933724348774604,16.848898958583277
+75,4.0,5.0,9,-1.0755143113260033,-17.974722820937814,15.823694198285807
+76,5.0,6.0,9,-0.8420651194412017,-16.802840553585952,15.118710314703549
+77,6.0,7.0,9,-0.7086655812213394,-16.425883438085293,15.008552275642614
+78,7.0,8.0,9,-0.6252908698338615,-16.844793222974317,15.594211483306594
+79,8.0,9.0,9,-0.5697077289088952,-17.986034056166602,16.84661859834881
+80,9.0,10.0,9,-0.5307995302614472,-19.718307331231983,18.65670827070909
+81,1.0,2.0,10,-7.378642492215988,-28.908492866963968,14.151207882531992
+82,2.0,3.0,10,-2.7096586545196715,-22.329186135144028,16.909868826104685
+83,3.0,4.0,10,-1.5424126950956634,-19.265301523111248,16.18047613291992
+84,4.0,5.0,10,-1.0755143113260033,-17.254314186285058,15.103285563633051
+85,5.0,6.0,10,-0.8420651194412017,-16.049664005016837,14.365533766134433
+86,6.0,7.0,10,-0.7086655812213394,-15.672387298506294,14.255056136063615
+87,7.0,8.0,10,-0.6252908698338615,-16.12682189679697,14.876240157129246
+88,8.0,9.0,10,-0.5697077289088952,-17.329661181871813,16.190245724054023
+89,9.0,10.0,10,-0.5307995302614472,-19.134199065817313,18.07260000529442
+90,1.0,2.0,11,-7.378642492215988,-28.211037397554605,13.45375241312263
+91,2.0,3.0,11,-2.7096586545196715,-21.62080993571135,16.201492626672007
+92,3.0,4.0,11,-1.5424126950956634,-18.578314574480032,15.493489184288705
+93,4.0,5.0,11,-1.0755143113260033,-16.619351549555233,14.468322926903227
+94,5.0,6.0,11,-0.8420651194412017,-15.504259980146628,13.820129741264225
+95,6.0,7.0,11,-0.7086655812213394,-15.252130048314598,13.834798885871919
+96,7.0,8.0,11,-0.6252908698338615,-15.850915487393195,14.600333747725472
+97,8.0,9.0,11,-0.5697077289088952,-17.19471919807603,16.05530374025824
+98,9.0,10.0,11,-0.5307995302614472,-19.121187130685882,18.059588070162988
+99,1.0,2.0,12,-7.378642492215988,-27.816475373158596,13.05919038872662
+100,2.0,3.0,12,-2.7096586545196715,-21.311564093961522,15.892246784922179
+101,3.0,4.0,12,-1.5424126950956634,-18.4316147555852,15.346789365393875
+102,4.0,5.0,12,-1.0755143113260033,-16.68850845259334,14.537479829941333
+103,5.0,6.0,12,-0.8420651194412017,-15.82369961782706,14.139569378944657
+104,6.0,7.0,12,-0.7086655812213394,-15.821709691361946,14.404378528919267
+105,7.0,8.0,12,-0.6252908698338615,-16.633085203954522,15.382503464286799
+106,8.0,9.0,12,-0.5697077289088952,-18.132790226794157,16.993374768976366
+107,9.0,10.0,12,-0.5307995302614472,-20.161775051364373,19.10017599084148
+108,1.0,2.0,13,-7.378642492215931,-28.324569726565073,13.56728474213321
+109,2.0,3.0,13,-2.709658654519785,-22.052605303921496,16.633287994881925
+110,3.0,4.0,13,-1.5424126950956634,-19.518911869390536,16.43408647919921
+111,4.0,5.0,13,-1.0755143113259464,-18.16831866989253,16.017290047240635
+112,5.0,6.0,13,-0.8420651194412585,-17.683477997062027,15.99934775817951
+113,6.0,7.0,13,-0.7086655812212257,-17.988597888701065,16.571266726258614
+114,7.0,8.0,13,-0.6252908698338615,-19.000371980537352,17.74979024086963
+115,8.0,9.0,13,-0.569707728908952,-20.599229033232973,19.45981357541507
+116,9.0,10.0,13,-0.5307995302614472,-22.65487001642896,21.593270955906064
+117,1.0,2.0,14,-7.378642492215931,-30.323636635308162,15.5663516508763
+118,2.0,3.0,14,-2.709658654519785,-24.41936770763448,19.000050398594908
+119,3.0,4.0,14,-1.5424126950956634,-22.356813121236883,19.271987731045556
+120,4.0,5.0,14,-1.0755143113259464,-21.47471564166449,19.323687019012596
+121,5.0,6.0,14,-0.8420651194412585,-21.383064154658655,19.698933915776138
+122,6.0,7.0,14,-0.7086655812212257,-21.9582335783723,20.54090241592985
+123,7.0,8.0,14,-0.6252908698338615,-23.107653715885363,21.85707197621764
+124,8.0,9.0,14,-0.569707728908952,-24.735404733257155,23.59598927543925
+125,9.0,10.0,14,-0.5307995302614472,-26.747463262163137,25.685864201640243
+126,1.0,2.0,15,-7.378642492215931,-34.13087918226529,19.373594197833427
+127,2.0,3.0,15,-2.709658654519785,-28.636980229638084,23.217662920598514
+128,3.0,4.0,15,-1.5424126950956634,-27.04241542640932,23.957590036217994
+129,4.0,5.0,15,-1.0755143113259464,-26.58007596614152,24.42904734348963
+130,5.0,6.0,15,-0.8420651194412585,-26.807796364306228,25.12366612542371
+131,6.0,7.0,15,-0.7086655812212257,-27.580783710990545,26.163452548548094
+132,7.0,8.0,15,-0.6252908698338615,-28.81249696408338,27.561915224415657
+133,8.0,9.0,15,-0.569707728908952,-30.430425062116228,29.291009604298324
+134,9.0,10.0,15,-0.5307995302614472,-32.36915735084904,31.307558290326142
+135,1.0,2.0,16,-7.378642492215931,-39.72917583196772,24.97189084753586
+136,2.0,3.0,16,-2.709658654519785,-34.60204449801245,29.182727188972876
+137,3.0,4.0,16,-1.5424126950956634,-33.39313515126685,30.30830976107552
+138,4.0,5.0,16,-1.0755143113259464,-33.25518652597691,31.104157903325017
+139,5.0,6.0,16,-0.8420651194412585,-33.71724187890618,32.03311164002366
+140,6.0,7.0,16,-0.7086655812212257,-34.628456482555634,33.21112532011318
+141,7.0,8.0,16,-0.6252908698338615,-35.91079604448066,34.66021430481294
+142,8.0,9.0,16,-0.569707728908952,-37.50821676227747,36.368801304459566
+143,9.0,10.0,16,-0.5307995302614472,-39.374044276785185,38.31244521626229

From a15e15bac55ae534d1cec990227f90ee3a7ee4ac Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 12:44:57 +0000
Subject: [PATCH 27/44] Removed causal test suite from poisson

---
 examples/poisson-line-process/example_pure_python.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/poisson-line-process/example_pure_python.py b/examples/poisson-line-process/example_pure_python.py
index d5b5005d..8959954e 100644
--- a/examples/poisson-line-process/example_pure_python.py
+++ b/examples/poisson-line-process/example_pure_python.py
@@ -12,7 +12,6 @@
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
 from causal_testing.estimation.abstract_estimator import Estimator
 from causal_testing.testing.base_test_case import BaseTestCase
-from causal_testing.testing.causal_test_suite import CausalTestSuite
 
 
 logger = logging.getLogger(__name__)

From 4b142124e36976072a29e6d93ebd67e42cd74598 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 15:13:27 +0000
Subject: [PATCH 28/44] Tests passing again

---
 causal_testing/json_front/json_class.py       |  85 +-------
 causal_testing/testing/causal_test_case.py    |  31 +--
 .../covasim_/doubling_beta/example_beta.py    |  48 +++--
 examples/lr91/example_max_conductances.py     |  23 +-
 .../example_max_conductances_test_suite.py    | 195 -----------------
 .../example_json_frontend.py                  |  64 ------
 .../example_pure_python.py                    |   6 -
 .../test_abstract_test_case.py                | 199 ------------------
 tests/json_front_tests/test_json_class.py     |  46 +---
 .../test_causal_test_adequacy.py              |   4 +-
 tests/testing_tests/test_causal_test_case.py  |  39 ++--
 tests/testing_tests/test_causal_test_suite.py | 105 ---------
 12 files changed, 90 insertions(+), 755 deletions(-)
 delete mode 100644 examples/lr91/example_max_conductances_test_suite.py
 delete mode 100644 examples/poisson-line-process/example_json_frontend.py
 delete mode 100644 tests/generation_tests/test_abstract_test_case.py
 delete mode 100644 tests/testing_tests/test_causal_test_suite.py

diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
index 9f43c9e3..a6731857 100644
--- a/causal_testing/json_front/json_class.py
+++ b/causal_testing/json_front/json_class.py
@@ -11,10 +11,7 @@
 from statistics import StatisticsError
 
 import pandas as pd
-import scipy
-from fitter import Fitter, get_common_distributions
 
-from causal_testing.generation.abstract_causal_test_case import AbstractCausalTestCase
 from causal_testing.specification.causal_dag import CausalDAG
 from causal_testing.specification.causal_specification import CausalSpecification
 from causal_testing.specification.scenario import Scenario
@@ -90,33 +87,6 @@ def setup(self, scenario: Scenario, ignore_cycles=False):
             )
         self._populate_metas()
 
-    def _create_abstract_test_case(self, test, mutates, effects):
-        assert len(test["mutations"]) == 1
-        treatment_var = next(self.scenario.variables[v] for v in test["mutations"])
-
-        if not treatment_var.distribution:
-            fitter = Fitter(self.df[treatment_var.name], distributions=get_common_distributions())
-            fitter.fit()
-            (dist, params) = list(fitter.get_best(method="sumsquare_error").items())[0]
-            treatment_var.distribution = getattr(scipy.stats, dist)(**params)
-            self._append_to_file(treatment_var.name + f" {dist}({params})", logging.INFO)
-
-        abstract_test = AbstractCausalTestCase(
-            scenario=self.scenario,
-            intervention_constraints=[mutates[v](k) for k, v in test["mutations"].items()],
-            treatment_variable=treatment_var,
-            expected_causal_effect={
-                self.scenario.variables[variable]: effects[effect]
-                for variable, effect in test["expected_effect"].items()
-            },
-            effect_modifiers=(
-                {self.scenario.variables[v] for v in test["effect_modifiers"]} if "effect_modifiers" in test else {}
-            ),
-            estimate_type=test["estimate_type"],
-            effect=test.get("effect", "total"),
-        )
-        return abstract_test
-
     def run_json_tests(self, effects: dict, estimators: dict, f_flag: bool = False, mutates: dict = None):
         """Runs and evaluates each test case specified in the JSON input
 
@@ -139,9 +109,7 @@ def run_json_tests(self, effects: dict, estimators: dict, f_flag: bool = False,
                         test=test, f_flag=f_flag, effects=effects, estimate_type=test["estimate_type"]
                     )
                 else:
-                    failed, msg = self._run_metamorphic_tests(
-                        test=test, f_flag=f_flag, effects=effects, mutates=mutates
-                    )
+                    raise NotImplementedError("Tried to call deprecated method _run_metamorphic_tests")
             test["failed"] = failed
             test["result"] = msg
         return self.test_plan["tests"]
@@ -189,8 +157,6 @@ def _run_concrete_metamorphic_test(self, test: dict, f_flag: bool, effects: dict
         causal_test_case = CausalTestCase(
             base_test_case=base_test_case,
             expected_causal_effect=effects[test["expected_effect"][outcome_variable]],
-            control_value=test["control_value"],
-            treatment_value=test["treatment_value"],
             estimate_type=test["estimate_type"],
         )
         failed, msg = self._execute_test_case(causal_test_case=causal_test_case, test=test, f_flag=f_flag)
@@ -205,41 +171,6 @@ def _run_concrete_metamorphic_test(self, test: dict, f_flag: bool, effects: dict
         self._append_to_file(msg, logging.INFO)
         return failed, msg
 
-    def _run_metamorphic_tests(self, test: dict, f_flag: bool, effects: dict, mutates: dict):
-        """Builds structures and runs test case for tests with an estimate_type of 'ate'.
-
-        :param test: Single JSON test definition stored in a mapping (dict)
-        :param f_flag: Failure flag that if True the script will stop executing when a test fails.
-        :param effects: Dictionary mapping effect class instances to string representations.
-        :param mutates: Dictionary mapping mutation functions to string representations.
-        :return: String containing the message to be outputted
-        """
-        if "sample_size" in test:
-            sample_size = test["sample_size"]
-        else:
-            sample_size = 5
-        if "target_ks_score" in test:
-            target_ks_score = test["target_ks_score"]
-        else:
-            target_ks_score = 0.05
-        abstract_test = self._create_abstract_test_case(test, mutates, effects)
-        concrete_tests, _ = abstract_test.generate_concrete_tests(
-            sample_size=sample_size, target_ks_score=target_ks_score
-        )
-        failures, _ = self._execute_tests(concrete_tests, test, f_flag)
-
-        msg = (
-            f"Executing test: {test['name']} \n"
-            + "  abstract_test \n"
-            + f"  {abstract_test} \n"
-            + f"  {abstract_test.treatment_variable.name},"
-            + f"  {abstract_test.treatment_variable.distribution} \n"
-            + f"  Number of concrete tests for test case: {str(len(concrete_tests))} \n"
-            + f"  {failures}/{len(concrete_tests)} failed for {test['name']}"
-        )
-        self._append_to_file(msg, logging.INFO)
-        return failures, msg
-
     def _execute_tests(self, concrete_tests, test, f_flag):
         failures = 0
         details = []
@@ -274,7 +205,8 @@ def _execute_test_case(
         failed = False
 
         estimation_model = self._setup_test(causal_test_case=causal_test_case, test=test)
-        causal_test_result = causal_test_case.execute_test(estimator=estimation_model)
+        causal_test_case.estimator = estimation_model
+        causal_test_result = causal_test_case.execute_test()
         test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result)
 
         if "coverage" in test and test["coverage"]:
@@ -307,6 +239,7 @@ def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping) -> Estima
                 - estimation_model - Estimator instance for the test being run
         """
         estimator_kwargs = {}
+        treatment_variable = next(self.scenario.variables[v] for v in test["mutations"])
         if "formula" in test:
             if test["estimator"] != (LinearRegressionEstimator or LogisticRegressionEstimator):
                 raise TypeError(
@@ -319,14 +252,14 @@ def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping) -> Estima
             minimal_adjustment_set = self.causal_specification.causal_dag.identification(
                 causal_test_case.base_test_case
             )
-            minimal_adjustment_set = minimal_adjustment_set - {causal_test_case.treatment_variable}
+            minimal_adjustment_set = minimal_adjustment_set - {treatment_variable}
             estimator_kwargs["adjustment_set"] = minimal_adjustment_set
 
         estimator_kwargs["query"] = test["query"] if "query" in test else ""
-        estimator_kwargs["treatment"] = causal_test_case.treatment_variable.name
-        estimator_kwargs["treatment_value"] = causal_test_case.treatment_value
-        estimator_kwargs["control_value"] = causal_test_case.control_value
-        estimator_kwargs["outcome"] = causal_test_case.outcome_variable.name
+        estimator_kwargs["treatment"] = treatment_variable.name
+        estimator_kwargs["treatment_value"] = test.get("treatment_value")
+        estimator_kwargs["control_value"] = test.get("control_value")
+        estimator_kwargs["outcome"] = next(v for v in test["expected_effect"])
         estimator_kwargs["effect_modifiers"] = causal_test_case.effect_modifier_configuration
         estimator_kwargs["df"] = self.df
         estimator_kwargs["alpha"] = test["alpha"] if "alpha" in test else 0.05
diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
index ce1c4e8d..30343560 100644
--- a/causal_testing/testing/causal_test_case.py
+++ b/causal_testing/testing/causal_test_case.py
@@ -27,8 +27,6 @@ def __init__(
         self,
         base_test_case: BaseTestCase,
         expected_causal_effect: CausalTestOutcome,
-        control_value: Any = None,
-        treatment_value: Any = None,
         estimate_type: str = "ate",
         estimate_params: dict = None,
         effect_modifier_configuration: dict[Variable:Any] = None,
@@ -37,18 +35,14 @@ def __init__(
         """
         :param base_test_case: A BaseTestCase object consisting of a treatment variable, outcome variable and effect
         :param expected_causal_effect: The expected causal effect (Positive, Negative, No Effect).
-        :param control_value: The control value for the treatment variable (before intervention).
-        :param treatment_value: The treatment value for the treatment variable (after intervention).
         :param estimate_type: A string which denotes the type of estimate to return.
         :param effect_modifier_configuration: The assignment of the effect modifiers to use for estimates.
         :param estimator: An Estimator class object
         """
         self.base_test_case = base_test_case
-        self.control_value = control_value
         self.expected_causal_effect = expected_causal_effect
         self.outcome_variable = base_test_case.outcome_variable
         self.treatment_variable = base_test_case.treatment_variable
-        self.treatment_value = treatment_value
         self.estimate_type = estimate_type
         self.estimator = estimator
         if estimate_params is None:
@@ -60,27 +54,34 @@ def __init__(
         else:
             self.effect_modifier_configuration = {}
 
-    def execute_test(self) -> CausalTestResult:
-        """Execute a causal test case and return the causal test result.
+    def execute_test(self, estimator: type(Estimator) = None) -> CausalTestResult:
+        """
+        Execute a causal test case and return the causal test result.
+        :param estimator: An alternative estimator. Defaults to `self.estimator`. This parameter is useful when you want
+        to execute a test with different data or a different equational form, but don't want to redefine the whole test
+        case.
 
         :return causal_test_result: A CausalTestResult for the executed causal test case.
         """
 
-        if not hasattr(self.estimator, f"estimate_{self.estimate_type}"):
-            raise AttributeError(f"{self.estimator.__class__} has no {self.estimate_type} method.")
-        estimate_effect = getattr(self.estimator, f"estimate_{self.estimate_type}")
+        if estimator is None:
+            estimator = self.estimator
+
+        if not hasattr(estimator, f"estimate_{self.estimate_type}"):
+            raise AttributeError(f"{estimator.__class__} has no {self.estimate_type} method.")
+        estimate_effect = getattr(estimator, f"estimate_{self.estimate_type}")
         effect, confidence_intervals = estimate_effect(**self.estimate_params)
         return CausalTestResult(
-            estimator=self.estimator,
+            estimator=estimator,
             test_value=TestValue(self.estimate_type, effect),
             effect_modifier_configuration=self.effect_modifier_configuration,
             confidence_intervals=confidence_intervals,
         )
 
     def __str__(self):
-        treatment_config = {self.treatment_variable.name: self.treatment_value}
-        control_config = {self.treatment_variable.name: self.control_value}
-        outcome_variable = {self.outcome_variable}
+        treatment_config = {self.treatment_variable.name: self.estimator.treatment_value}
+        control_config = {self.treatment_variable.name: self.estimator.control_value}
+        outcome_variable = {self.outcome_variable.name}
         return (
             f"Running {treatment_config} instead of {control_config} should cause the following "
             f"changes to {outcome_variable}: {self.expected_causal_effect}."
diff --git a/examples/covasim_/doubling_beta/example_beta.py b/examples/covasim_/doubling_beta/example_beta.py
index 6aa91b2b..b696c658 100644
--- a/examples/covasim_/doubling_beta/example_beta.py
+++ b/examples/covasim_/doubling_beta/example_beta.py
@@ -69,33 +69,37 @@ def doubling_beta_CATE_on_csv(
 
     # 6. Create a causal test case
     causal_test_case = CausalTestCase(
-        base_test_case=base_test_case, expected_causal_effect=Positive, control_value=0.016, treatment_value=0.032
-    )
-
-    linear_regression_estimator = LinearRegressionEstimator(
-        "beta",
-        0.032,
-        0.016,
-        {"avg_age", "contacts"},  # We use custom adjustment set
-        "cum_infections",
-        df=past_execution_df,
-        formula="cum_infections ~ beta + I(beta ** 2) + avg_age + contacts",
+        base_test_case=base_test_case,
+        expected_causal_effect=Positive,
+        estimator=LinearRegressionEstimator(
+            "beta",
+            0.032,
+            0.016,
+            {"avg_age", "contacts"},  # We use custom adjustment set
+            "cum_infections",
+            df=past_execution_df,
+            formula="cum_infections ~ beta + I(beta ** 2) + avg_age + contacts",
+        ),
     )
 
     # Add squared terms for beta, since it has a quadratic relationship with cumulative infections
-    causal_test_result = causal_test_case.execute_test(estimator=linear_regression_estimator)
+    causal_test_result = causal_test_case.execute_test()
 
     # Repeat for association estimate (no adjustment)
-    no_adjustment_linear_regression_estimator = LinearRegressionEstimator(
-        "beta",
-        0.032,
-        0.016,
-        set(),
-        "cum_infections",
-        df=past_execution_df,
-        formula="cum_infections ~ beta + I(beta ** 2)",
+    causal_test_case = CausalTestCase(
+        base_test_case=base_test_case,
+        expected_causal_effect=Positive,
+        estimator=LinearRegressionEstimator(
+            "beta",
+            0.032,
+            0.016,
+            set(),
+            "cum_infections",
+            df=past_execution_df,
+            formula="cum_infections ~ beta + I(beta ** 2)",
+        ),
     )
-    association_test_result = causal_test_case.execute_test(estimator=no_adjustment_linear_regression_estimator)
+    association_test_result = causal_test_case.execute_test()
 
     # Store results for plotting
     results_dict["association"] = {
@@ -116,7 +120,7 @@ def doubling_beta_CATE_on_csv(
     # Repeat causal inference after deleting all rows with treatment value to obtain counterfactual inferences
     if simulate_counterfactuals:
         counterfactual_past_execution_df = past_execution_df[past_execution_df["beta"] != 0.032]
-        counterfactual_causal_test_result = causal_test_case.execute_test(estimator=linear_regression_estimator)
+        counterfactual_causal_test_result = causal_test_case.execute_test()
 
         results_dict["counterfactual"] = {
             "ate": counterfactual_causal_test_result.test_value.value,
diff --git a/examples/lr91/example_max_conductances.py b/examples/lr91/example_max_conductances.py
index de27ac19..1fbc8779 100644
--- a/examples/lr91/example_max_conductances.py
+++ b/examples/lr91/example_max_conductances.py
@@ -129,23 +129,18 @@ def effects_on_APD90(observational_data_path, treatment_var, control_val, treatm
     causal_test_case = CausalTestCase(
         base_test_case=base_test_case,
         expected_causal_effect=expected_causal_effect,
-        control_value=control_val,
-        treatment_value=treatment_val,
-    )
-
-    # 8. Obtain the minimal adjustment set from the causal DAG
-    minimal_adjustment_set = causal_dag.identification(base_test_case)
-    linear_regression_estimator = LinearRegressionEstimator(
-        treatment_var.name,
-        treatment_val,
-        control_val,
-        minimal_adjustment_set,
-        "APD90",
-        df=pd.read_csv(observational_data_path),
+        estimator=LinearRegressionEstimator(
+            treatment=treatment_var.name,
+            treatment_value=treatment_val,
+            control_value=control_val,
+            adjustment_set=causal_dag.identification(base_test_case),
+            outcome="APD90",
+            df=pd.read_csv(observational_data_path),
+        ),
     )
 
     # 9. Run the causal test and print results
-    causal_test_result = causal_test_case.execute_test(linear_regression_estimator)
+    causal_test_result = causal_test_case.execute_test()
     logger.info("%s", causal_test_result)
     return causal_test_result.test_value.value, causal_test_result.confidence_intervals
 
diff --git a/examples/lr91/example_max_conductances_test_suite.py b/examples/lr91/example_max_conductances_test_suite.py
deleted file mode 100644
index d244d5bf..00000000
--- a/examples/lr91/example_max_conductances_test_suite.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from causal_testing.specification.causal_dag import CausalDAG
-from causal_testing.specification.scenario import Scenario
-from causal_testing.specification.variable import Input, Output
-from causal_testing.specification.causal_specification import CausalSpecification
-from causal_testing.testing.causal_test_case import CausalTestCase
-from causal_testing.testing.causal_test_outcome import Positive, Negative, NoEffect
-from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
-from causal_testing.testing.base_test_case import BaseTestCase
-from causal_testing.testing.causal_test_suite import CausalTestSuite
-from matplotlib.pyplot import rcParams
-
-import os
-
-# Uncommenting the code below will make all graphs publication quality but requires a suitable latex installation
-
-# rc_fonts = {
-#     "font.size": 8,
-#     "figure.figsize": (5, 4),
-#     "text.usetex": True,
-#     "font.family": "serif",
-#     "text.latex.preamble": r"\usepackage{libertine}",
-# }
-# rcParams.update(rc_fonts)
-ROOT = os.path.realpath(os.path.dirname(__file__))
-OBSERVATIONAL_DATA_PATH = f"{ROOT}/data/normalised_results.csv"
-
-
-def test_sensitivity_analysis():
-    """Perform causal testing to evaluate the effect of six conductance inputs on one output, APD90, over the defined
-    (normalised) design distribution to quantify the extent to which each input affects the output, and plot as a
-    graph.
-    """
-    # Read in the 200 model runs and define mean value and expected effect
-    model_runs = pd.read_csv(f"{ROOT}/data/results.csv")
-    conductance_means = {
-        "G_K": (0.5, Positive),
-        "G_b": (0.5, Positive),
-        "G_K1": (0.5, Positive),
-        "G_si": (0.5, Negative),
-        "G_Na": (0.5, NoEffect),
-        "G_Kp": (0.5, NoEffect),
-    }
-
-    # Normalise the inputs as per the original study
-    normalised_df = normalise_data(model_runs, columns=list(conductance_means.keys()))
-    normalised_df.to_csv(f"{ROOT}/data/normalised_results.csv")
-
-    # For each input, perform 10 causal tests that change the input from its mean value (0.5) to the equidistant values
-    # [0, 0.1, 0.2, ..., 0.9, 1] over the input space of each input, as defined by the normalised design distribution.
-    # For each input, this will yield 10 causal test results that measure the extent the input causes APD90 to change,
-    # enabling us to compare the magnitude and direction of each inputs' effect.
-    treatment_values = np.linspace(0, 1, 11)
-    results = {"G_K": {}, "G_b": {}, "G_K1": {}, "G_si": {}, "G_Na": {}, "G_Kp": {}}
-
-    apd90 = Output("APD90", int)
-    outcome_variable = apd90
-    test_suite = CausalTestSuite()
-    estimator_list = [LinearRegressionEstimator]
-
-    # For each parameter in conductance_means, setup variables and add a test case to the test suite
-    for conductance_param, mean_and_oracle in conductance_means.items():
-        treatment_variable = Input(conductance_param, float)
-        base_test_case = BaseTestCase(treatment_variable, outcome_variable)
-        test_list = []
-        control_value = 0.5
-        mean, oracle = mean_and_oracle
-        for treatment_value in treatment_values:
-            test_list.append(CausalTestCase(base_test_case, oracle, control_value, treatment_value))
-        test_suite.add_test_object(
-            base_test_case=base_test_case,
-            causal_test_case_list=test_list,
-            estimators_classes=estimator_list,
-            estimate_type="ate",
-        )
-
-    causal_test_results = effects_on_APD90(OBSERVATIONAL_DATA_PATH, test_suite)
-
-    # Extract data from causal_test_results needed for plotting
-    for base_test_case in causal_test_results:
-        # Place results of test_suite into format required for plotting
-        results[base_test_case.treatment_variable.name] = {
-            "ate": [
-                result.test_value.value for result in causal_test_results[base_test_case]["LinearRegressionEstimator"]
-            ],
-            "cis": [
-                result.confidence_intervals
-                for result in causal_test_results[base_test_case]["LinearRegressionEstimator"]
-            ],
-        }
-
-    plot_ates_with_cis(results, treatment_values)
-
-
-def effects_on_APD90(observational_data_path, test_suite):
-    """Perform causal testing for the scenario in which we investigate the causal effect of a given input on APD90.
-
-    :param: test_suite: A CausalTestSuite object containing a dictionary of base_test_cases and the treatment/outcome
-                        values to be tested
-    :return: causal_test_results containing a list of causal_test_result objects
-    """
-    # 1. Define Causal DAG
-    causal_dag = CausalDAG(f"{ROOT}/dag.dot")
-
-    # 2. Specify all inputs
-    g_na = Input("G_Na", float)
-    g_si = Input("G_si", float)
-    g_k = Input("G_K", float)
-    g_k1 = Input("G_K1", float)
-    g_kp = Input("G_Kp", float)
-    g_b = Input("G_b", float)
-
-    # 3. Specify all outputs
-    max_voltage = Output("max_voltage", float)
-    rest_voltage = Output("rest_voltage", float)
-    max_voltage_gradient = Output("max_voltage_gradient", float)
-    dome_voltage = Output("dome_voltage", float)
-    apd50 = Output("APD50", int)
-    apd90 = Output("APD90", int)
-
-    # 4. Create scenario by applying constraints over a subset of the inputs
-    scenario = Scenario(
-        variables={
-            g_na,
-            g_si,
-            g_k,
-            g_k1,
-            g_kp,
-            g_b,
-            max_voltage,
-            rest_voltage,
-            max_voltage_gradient,
-            dome_voltage,
-            apd50,
-            apd90,
-        },
-        constraints=set(),
-    )
-
-    # 5. Create a causal specification from the scenario and causal DAG
-    causal_specification = CausalSpecification(scenario, causal_dag)
-
-    # 8. Run the causal test suite
-    causal_test_results = test_suite.execute_test_suite(causal_specification, pd.read_csv(observational_data_path))
-    return causal_test_results
-
-
-def plot_ates_with_cis(results_dict: dict, xs: list, save: bool = False, show=False):
-    """Plot the average treatment effects for a given treatment against a list of x-values with confidence intervals.
-
-    :param results_dict: A dictionary containing results for sensitivity analysis of each input parameter.
-    :param xs: Values to be plotted on the x-axis.
-    :param save: Whether to save the plot.
-    """
-    fig, axes = plt.subplots()
-    input_colors = {"G_Na": "red", "G_si": "green", "G_K": "blue", "G_K1": "magenta", "G_Kp": "cyan", "G_b": "yellow"}
-    for treatment, test_results in results_dict.items():
-        ates = test_results["ate"]
-        cis = test_results["cis"]
-        before_underscore, after_underscore = treatment.split("_")
-        after_underscore_braces = f"{{{after_underscore}}}"
-        latex_compatible_treatment_str = rf"${before_underscore}_{after_underscore_braces}$"
-        cis_low = [c[0][0] for c in cis]
-        cis_high = [c[1][0] for c in cis]
-        axes.fill_between(
-            xs, cis_low, cis_high, alpha=0.2, color=input_colors[treatment], label=latex_compatible_treatment_str
-        )
-        axes.plot(xs, ates, color=input_colors[treatment], linewidth=1)
-        axes.plot(xs, [0] * len(xs), color="black", alpha=0.5, linestyle="--", linewidth=1)
-    axes.set_ylabel(r"ATE: Change in $APD_{90} (ms)$")
-    axes.set_xlabel(r"Treatment value")
-    axes.set_ylim(-80, 80)
-    axes.set_xlim(min(xs), max(xs))
-    box = axes.get_position()
-    axes.set_position([box.x0, box.y0 + box.height * 0.3, box.width * 0.85, box.height * 0.7])
-    plt.legend(loc="center left", bbox_to_anchor=(1.01, 0.5), fancybox=True, ncol=1, title=r"Input (95\% CIs)")
-    if save:
-        plt.savefig(f"APD90_sensitivity.pdf", format="pdf")
-    if show:
-        plt.show()
-
-
-def normalise_data(df, columns=None):
-    """Normalise the data in the dataframe into the range [0, 1]."""
-    if columns:
-        df[columns] = (df[columns] - df[columns].min()) / (df[columns].max() - df[columns].min())
-        return df
-    else:
-        return (df - df.min()) / (df.max() - df.min())
-
-
-if __name__ == "__main__":
-    test_sensitivity_analysis()
diff --git a/examples/poisson-line-process/example_json_frontend.py b/examples/poisson-line-process/example_json_frontend.py
deleted file mode 100644
index 43676ef7..00000000
--- a/examples/poisson-line-process/example_json_frontend.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import logging
-
-from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
-from causal_testing.testing.causal_test_outcome import ExactValue, Positive, Negative, NoEffect
-from causal_testing.json_front.json_class import JsonUtility
-from causal_testing.specification.scenario import Scenario
-from causal_testing.specification.variable import Input, Output
-
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.DEBUG, format="%(message)s")
-
-effects = {
-    "Positive": Positive(),
-    "Negative": Negative(),
-    "ExactValue4_05": ExactValue(4, atol=0.5),
-    "NoEffect": NoEffect(),
-}
-
-estimators = {
-    "LinearRegressionEstimator": LinearRegressionEstimator,
-}
-
-# 2. Create variables
-width = Input("width", float)
-height = Input("height", float)
-intensity = Input("intensity", float)
-
-num_lines_abs = Output("num_lines_abs", float)
-num_lines_unit = Output("num_lines_unit", float)
-num_shapes_abs = Output("num_shapes_abs", float)
-num_shapes_unit = Output("num_shapes_unit", float)
-
-# 3. Create scenario by applying constraints over a subset of the input variables
-scenario = Scenario(
-    variables={
-        width,
-        height,
-        intensity,
-        num_lines_abs,
-        num_lines_unit,
-        num_shapes_abs,
-        num_shapes_unit,
-    }
-)
-scenario.setup_treatment_variables()
-
-mutates = {
-    "Increase": lambda x: scenario.treatment_variables[x].z3 > scenario.variables[x].z3,
-    "ChangeByFactor(2)": lambda x: scenario.treatment_variables[x].z3 == scenario.variables[x].z3 * 2,
-}
-
-
-if __name__ == "__main__":
-    args = JsonUtility.get_args()
-    json_utility = JsonUtility(args.log_path)  # Create an instance of the extended JsonUtility class
-    json_utility.set_paths(
-        args.json_path, args.dag_path, args.data_path
-    )  # Set the path to the data.csv, dag.dot and causal_tests.json file
-
-    # Load the Causal Variables into the JsonUtility class ready to be used in the tests
-    json_utility.setup(scenario=scenario)  # Sets up all the necessary parts of the json_class needed to execute tests
-
-    json_utility.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=args.f)
diff --git a/examples/poisson-line-process/example_pure_python.py b/examples/poisson-line-process/example_pure_python.py
index 8959954e..19fedc0c 100644
--- a/examples/poisson-line-process/example_pure_python.py
+++ b/examples/poisson-line-process/example_pure_python.py
@@ -85,8 +85,6 @@ def test_poisson_intensity_num_shapes(save=False):
             CausalTestCase(
                 base_test_case=base_test_case,
                 expected_causal_effect=ExactValue(4, atol=0.5),
-                treatment_value=treatment_value,
-                control_value=control_value,
                 estimate_type="risk_ratio",
                 estimator=EmpiricalMeanEstimator(
                     treatment=base_test_case.treatment_variable.name,
@@ -103,8 +101,6 @@ def test_poisson_intensity_num_shapes(save=False):
             CausalTestCase(
                 base_test_case=base_test_case,
                 expected_causal_effect=ExactValue(4, atol=0.5),
-                treatment_value=treatment_value,
-                control_value=control_value,
                 estimate_type="risk_ratio",
                 estimator=LinearRegressionEstimator(
                     treatment=base_test_case.treatment_variable.name,
@@ -150,8 +146,6 @@ def test_poisson_width_num_shapes(save=False):
         CausalTestCase(
             base_test_case=base_test_case,
             expected_causal_effect=Positive(),
-            control_value=float(w),
-            treatment_value=w + 1.0,
             estimate_type="ate_calculated",
             effect_modifier_configuration={"intensity": i},
             estimator=LinearRegressionEstimator(
diff --git a/tests/generation_tests/test_abstract_test_case.py b/tests/generation_tests/test_abstract_test_case.py
deleted file mode 100644
index fd40f3de..00000000
--- a/tests/generation_tests/test_abstract_test_case.py
+++ /dev/null
@@ -1,199 +0,0 @@
-import unittest
-import os
-import shutil, tempfile
-import pandas as pd
-import numpy as np
-from causal_testing.generation.abstract_causal_test_case import AbstractCausalTestCase
-from causal_testing.generation.enum_gen import EnumGen
-from causal_testing.specification.causal_specification import Scenario
-from causal_testing.specification.variable import Input, Output
-from scipy.stats import uniform, rv_discrete
-from causal_testing.testing.causal_test_outcome import Positive
-from z3 import And
-from enum import Enum
-
-
-class Car(Enum):
-    isetta = "vehicle.bmw.isetta"
-    mkz2017 = "vehicle.lincoln.mkz2017"
-
-    def __gt__(self, other):
-        if self.__class__ is other.__class__:
-            return self.value > other.value
-        return NotImplemented
-
-
-class TestAbstractTestCase(unittest.TestCase):
-    """
-    Class to test abstract test cases.
-    """
-
-    def setUp(self) -> None:
-        self.temp_dir_path = tempfile.mkdtemp()
-        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
-        self.observational_df_path = os.path.join(self.temp_dir_path, "observational_data.csv")
-        # Y = 3*X1 + X2*X3 + 10
-        self.observational_df = pd.DataFrame({"X1": [1, 2, 3, 4], "X2": [5, 6, 7, 8], "X3": [10, 20, 30, 40]})
-        self.observational_df["Y"] = self.observational_df.apply(
-            lambda row: (3 * row.X1) + (row.X2 * row.X3) + 10, axis=1
-        )
-        self.observational_df.to_csv(self.observational_df_path)
-        self.X1 = Input("X1", float, uniform(1, 4))
-        self.X2 = Input("X2", int, rv_discrete(values=([7], [1])))
-        self.X3 = Input("X3", float, uniform(10, 40))
-        self.X4 = Input("X4", int, rv_discrete(values=([10], [1])))
-        self.X5 = Input("X5", bool, rv_discrete(values=(range(2), [0.5, 0.5])))
-        self.Car = Input("Car", Car, EnumGen(Car))
-        self.Y = Output("Y", int)
-
-    def test_generate_concrete_test_cases(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.X4})
-        scenario.setup_treatment_variables()
-        abstract = AbstractCausalTestCase(
-            scenario=scenario,
-            intervention_constraints={scenario.treatment_variables[self.X1.name].z3 > self.X1.z3},
-            treatment_variable=self.X1,
-            expected_causal_effect={self.Y: Positive()},
-            effect_modifiers=None,
-        )
-        concrete_tests, runs = abstract.generate_concrete_tests(2)
-        assert len(concrete_tests) == 2, "Expected 2 concrete tests"
-        assert len(runs) == 2, "Expected 2 runs"
-
-    def test_generate_boolean_concrete_test_cases(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.X5})
-        scenario.setup_treatment_variables()
-        abstract = AbstractCausalTestCase(
-            scenario=scenario,
-            intervention_constraints={
-                scenario.treatment_variables[self.X5.name].z3 != scenario.variables[self.X5.name].z3
-            },
-            treatment_variable=self.X5,
-            expected_causal_effect={self.Y: Positive()},
-            effect_modifiers=None,
-        )
-        concrete_tests, runs = abstract.generate_concrete_tests(2)
-        assert len(concrete_tests) == 2, "Expected 2 concrete test"
-        assert len(runs) == 2, "Expected 2 run"
-
-    def test_generate_enum_concrete_test_cases(self):
-        scenario = Scenario({self.Car})
-        scenario.setup_treatment_variables()
-        abstract = AbstractCausalTestCase(
-            scenario=scenario,
-            intervention_constraints={
-                scenario.treatment_variables[self.Car.name].z3 != scenario.variables[self.Car.name].z3
-            },
-            treatment_variable=self.Car,
-            expected_causal_effect={self.Y: Positive()},
-            effect_modifiers=None,
-        )
-        concrete_tests, runs = abstract.generate_concrete_tests(10)
-        assert len(concrete_tests) == 2, "Expected 2 concrete tests"
-        assert len(runs) == 2, "Expected 2 runs"
-
-    def test_str(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.X4})
-        scenario.setup_treatment_variables()
-        abstract = AbstractCausalTestCase(
-            scenario=scenario,
-            intervention_constraints={scenario.treatment_variables[self.X1.name].z3 > self.X1.z3},
-            treatment_variable=self.X1,
-            expected_causal_effect={self.Y: Positive()},
-            effect_modifiers=None,
-        )
-        assert (
-            str(abstract) == "When we apply intervention {X1' > X1}, the effect on Output: Y::int should be Positive"
-        ), f"Unexpected string {str(abstract)}"
-
-    def test_datapath(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.X4})
-        scenario.setup_treatment_variables()
-        abstract = AbstractCausalTestCase(
-            scenario=scenario,
-            intervention_constraints={scenario.treatment_variables[self.X1.name].z3 > self.X1.z3},
-            treatment_variable=self.X1,
-            expected_causal_effect={self.Y: Positive()},
-            effect_modifiers=None,
-        )
-        assert abstract.datapath() == "X1X1_Y_Positive.csv", f"Unexpected datapath {abstract.datapath()}"
-
-    def test_generate_concrete_test_cases_with_constraints(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.X4}, {self.X1 < self.X2})
-        scenario.setup_treatment_variables()
-        abstract = AbstractCausalTestCase(
-            scenario=scenario,
-            intervention_constraints={scenario.treatment_variables[self.X1.name].z3 > self.X1.z3},
-            treatment_variable=self.X1,
-            expected_causal_effect={self.Y: Positive()},
-            effect_modifiers=None,
-        )
-        concrete_tests, runs = abstract.generate_concrete_tests(2)
-        assert len(concrete_tests) == 2, "Expected 2 concrete tests"
-        assert len(runs) == 2, "Expected 2 runs"
-
-    def test_generate_concrete_test_cases_with_effect_modifiers(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.X4})
-        scenario.setup_treatment_variables()
-        abstract = AbstractCausalTestCase(
-            scenario=scenario,
-            intervention_constraints={scenario.treatment_variables[self.X1.name].z3 > self.X1.z3},
-            treatment_variable=self.X1,
-            expected_causal_effect={self.Y: Positive()},
-            effect_modifiers={self.X2},
-        )
-        concrete_tests, runs = abstract.generate_concrete_tests(2)
-        assert len(concrete_tests) == 2, "Expected 2 concrete tests"
-        assert len(runs) == 2, "Expected 2 runs"
-
-    def test_generate_concrete_test_cases_rct(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.X4})
-        scenario.setup_treatment_variables()
-        abstract = AbstractCausalTestCase(
-            scenario=scenario,
-            intervention_constraints={scenario.treatment_variables[self.X1.name].z3 > self.X1.z3},
-            treatment_variable=self.X1,
-            expected_causal_effect={self.Y: Positive()},
-            effect_modifiers=None,
-        )
-        concrete_tests, runs = abstract.generate_concrete_tests(2, rct=True)
-        assert len(concrete_tests) == 2, "Expected 2 concrete tests"
-        assert len(runs) == 4, "Expected 4 runs"
-
-    def test_infeasible_constraints(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.X4}, [self.X1.z3 > 2])
-        scenario.setup_treatment_variables()
-        abstract = AbstractCausalTestCase(
-            scenario=scenario,
-            intervention_constraints={scenario.treatment_variables[self.X1.name].z3 > self.X1.z3},
-            treatment_variable=self.X1,
-            expected_causal_effect={self.Y: Positive()},
-            effect_modifiers=None,
-        )
-        HARD_MAX = 10
-        NUM_STRATA = 4
-
-        with self.assertWarns(Warning):
-            concrete_tests, runs = abstract.generate_concrete_tests(4, rct=True, target_ks_score=0.1, hard_max=HARD_MAX)
-        self.assertTrue(all((x > 2 for x in runs["X1"])))
-        self.assertTrue(len(concrete_tests) <= HARD_MAX * NUM_STRATA)
-
-    def test_feasible_constraints(self):
-        scenario = Scenario({self.X1, self.X2, self.X3, self.X4})
-        scenario.setup_treatment_variables()
-        abstract = AbstractCausalTestCase(
-            scenario=scenario,
-            intervention_constraints={scenario.treatment_variables[self.X1.name].z3 > self.X1.z3},
-            treatment_variable=self.X1,
-            expected_causal_effect={self.Y: Positive()},
-            effect_modifiers=None,
-        )
-        concrete_tests, _ = abstract.generate_concrete_tests(4, rct=True, target_ks_score=0.1, hard_max=1000)
-        assert len(concrete_tests) < 1000
-
-    def tearDown(self) -> None:
-        shutil.rmtree(self.temp_dir_path)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/json_front_tests/test_json_class.py b/tests/json_front_tests/test_json_class.py
index ab565da7..07f06f65 100644
--- a/tests/json_front_tests/test_json_class.py
+++ b/tests/json_front_tests/test_json_class.py
@@ -1,4 +1,5 @@
 import unittest
+import pytest
 from pathlib import Path
 from statistics import StatisticsError
 import scipy
@@ -13,6 +14,7 @@
 from causal_testing.specification.causal_specification import CausalSpecification
 
 
+@pytest.mark.skip(reason="json frontend under reconstruction")
 class TestJsonClass(unittest.TestCase):
     """Test the JSON frontend for the Causal Testing Framework (CTF)
 
@@ -88,7 +90,7 @@ def test_f_flag(self):
                     "name": "test1",
                     "mutations": {"test_input": "Increase"},
                     "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "ate",
+                    "estimate_type": "coefficient",
                     "effect_modifiers": [],
                     "expected_effect": {"test_output": "NoEffect"},
                     "skip": False,
@@ -165,7 +167,7 @@ def test_generate_tests_from_json_no_dist(self):
                     "name": "test1",
                     "mutations": {"test_input_no_dist": "Increase"},
                     "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "ate",
+                    "estimate_type": "coefficient",
                     "effect_modifiers": [],
                     "expected_effect": {"test_output": "NoEffect"},
                     "skip": False,
@@ -194,7 +196,7 @@ def test_formula_in_json_test(self):
                     "name": "test1",
                     "mutations": {"test_input": "Increase"},
                     "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "ate",
+                    "estimate_type": "coefficient",
                     "effect_modifiers": [],
                     "expected_effect": {"test_output": "Positive"},
                     "skip": False,
@@ -224,7 +226,7 @@ def test_run_concrete_json_testcase(self):
                     "control_value": 0,
                     "treatment_value": 1,
                     "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "ate",
+                    "estimate_type": "coefficient",
                     "expected_effect": {"test_output": "NoEffect"},
                     "skip": False,
                 }
@@ -239,38 +241,6 @@ def test_run_concrete_json_testcase(self):
             temp_out = reader.readlines()
         self.assertIn("FAILED", temp_out[-1])
 
-    def test_concrete_generate_params(self):
-        example_test = {
-            "tests": [
-                {
-                    "name": "test1",
-                    "mutations": {"test_input": "Increase"},
-                    "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "ate",
-                    "effect_modifiers": [],
-                    "expected_effect": {"test_output": "NoEffect"},
-                    "sample_size": 5,
-                    "target_ks_score": 0.05,
-                    "skip": False,
-                }
-            ]
-        }
-        self.json_class.test_plan = example_test
-        effects = {"NoEffect": NoEffect()}
-        mutates = {
-            "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3
-            > self.json_class.scenario.variables[x].z3
-        }
-        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
-
-        self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False, mutates=mutates)
-
-        # Test that the final log message prints that failed tests are printed, which is expected behaviour for this
-        # scenario
-        with open("temp_out.txt", "r") as reader:
-            temp_out = reader.readlines()
-        self.assertIn("failed", temp_out[-1])
-
     def test_no_data_provided(self):
         example_test = {
             "tests": [
@@ -278,7 +248,7 @@ def test_no_data_provided(self):
                     "name": "test1",
                     "mutations": {"test_input": "Increase"},
                     "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "ate",
+                    "estimate_type": "coefficient",
                     "effect_modifiers": [],
                     "expected_effect": {"test_output": "NoEffect"},
                     "skip": False,
@@ -302,7 +272,7 @@ def add_modelling_assumptions(self):
                     "name": "test1",
                     "mutations": {"test_input": "Increase"},
                     "estimator": "ExampleEstimator",
-                    "estimate_type": "ate",
+                    "estimate_type": "coefficient",
                     "effect_modifiers": [],
                     "expected_effect": {"test_output": "Positive"},
                     "skip": False,
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
index e4bed6a8..f3e72aca 100644
--- a/tests/testing_tests/test_causal_test_adequacy.py
+++ b/tests/testing_tests/test_causal_test_adequacy.py
@@ -128,11 +128,9 @@ def test_data_adequacy_group_by(self):
         causal_test_case = CausalTestCase(
             base_test_case=base_test_case,
             expected_causal_effect=SomeEffect(),
-            control_value=control_strategy,
-            treatment_value=treatment_strategy,
             estimate_type="hazard_ratio",
+            estimator=estimation_model,
         )
-        causal_test_result = causal_test_case.execute_test(estimation_model)
         adequacy_metric = DataAdequacy(causal_test_case, estimation_model, group_by="id")
         adequacy_metric.measure_adequacy()
         adequacy_dict = adequacy_metric.to_dict()
diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py
index 2b9c086d..0edc3321 100644
--- a/tests/testing_tests/test_causal_test_case.py
+++ b/tests/testing_tests/test_causal_test_case.py
@@ -33,15 +33,20 @@ def setUp(self) -> None:
         self.causal_test_case = CausalTestCase(
             base_test_case=self.base_test_case,
             expected_causal_effect=self.expected_causal_effect,
-            control_value=0,
-            treatment_value=1,
+            estimator=LinearRegressionEstimator(
+                treatment="A",
+                adjustment_set=set(),
+                outcome="C",
+                control_value=0,
+                treatment_value=1,
+            ),
         )
 
     def test_str(self):
+        print(str(self.causal_test_case))
         self.assertEqual(
             str(self.causal_test_case),
-            "Running {'A': 1} instead of {'A': 0} should cause the following changes to"
-            " {Output: C::float}: ExactValue: 4±0.2.",
+            "Running {'A': 1} instead of {'A': 0} should cause the following changes to {'C'}: ExactValue: 4±0.2.",
         )
 
 
@@ -74,8 +79,8 @@ def setUp(self) -> None:
         self.causal_test_case = CausalTestCase(
             base_test_case=self.base_test_case,
             expected_causal_effect=self.expected_causal_effect,
-            control_value=0,
-            treatment_value=1,
+            # control_value=0,
+            # treatment_value=1,
         )
 
         # 4. Create dummy test data and write to csv
@@ -126,27 +131,25 @@ def test_execute_test_observational_linear_regression_estimator_direct_effect(se
         """Check that executing the causal test case returns the correct results for dummy data using a linear
         regression estimator."""
         base_test_case = BaseTestCase(treatment_variable=self.A, outcome_variable=self.C, effect="direct")
+        estimation_model = LinearRegressionEstimator(
+            "A",
+            self.treatment_value,
+            self.control_value,
+            self.causal_dag.identification(base_test_case),
+            "C",
+            self.df,
+        )
 
         causal_test_case = CausalTestCase(
             base_test_case=base_test_case,
             expected_causal_effect=self.expected_causal_effect,
-            control_value=0,
-            treatment_value=1,
+            estimator=estimation_model,
         )
 
-        minimal_adjustment_set = self.causal_dag.identification(base_test_case)
         # 6. Easier to access treatment and outcome values
         self.treatment_value = 1
         self.control_value = 0
-        estimation_model = LinearRegressionEstimator(
-            "A",
-            self.treatment_value,
-            self.control_value,
-            minimal_adjustment_set,
-            "C",
-            self.df,
-        )
-        causal_test_result = causal_test_case.execute_test(estimation_model)
+        causal_test_result = causal_test_case.execute_test()
         pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1e-10)
 
     def test_execute_test_observational_linear_regression_estimator_coefficient(self):
diff --git a/tests/testing_tests/test_causal_test_suite.py b/tests/testing_tests/test_causal_test_suite.py
deleted file mode 100644
index a3a5fc6b..00000000
--- a/tests/testing_tests/test_causal_test_suite.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import unittest
-import os
-import tempfile
-import numpy as np
-import shutil
-import pandas as pd
-from causal_testing.testing.causal_test_suite import CausalTestSuite
-from causal_testing.testing.causal_test_case import CausalTestCase
-from causal_testing.testing.base_test_case import BaseTestCase
-from causal_testing.specification.variable import Input, Output
-from causal_testing.testing.causal_test_outcome import ExactValue
-from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
-from causal_testing.estimation.logistic_regression_estimator import LogisticRegressionEstimator
-from causal_testing.specification.causal_specification import CausalSpecification, Scenario
-from causal_testing.specification.causal_dag import CausalDAG
-
-
-class TestCausalTestSuite(unittest.TestCase):
-    """Test the Test Suite object using dummy data."""
-
-    def setUp(self) -> None:
-        # 1. Create dummy Scenario and BaseTestCase
-        A = Input("A", float)
-        self.A = A
-        C = Output("C", float)
-        self.C = C
-        D = Output("D", float)
-        self.D = D
-        self.base_test_case = BaseTestCase(A, C)
-        self.scenario = Scenario({A, C, D})
-
-        # 2. Create DAG and dummy data and write to csvs
-        self.temp_dir_path = tempfile.mkdtemp()
-        dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
-        dag_dot = """digraph G { A -> C; D -> A; D -> C}"""
-        with open(dag_dot_path, "w") as file:
-            file.write(dag_dot)
-
-        np.random.seed(1)
-        df = pd.DataFrame({"D": list(np.random.normal(60, 10, 1000))})  # D = exogenous
-        df["A"] = [1 if d > 50 else 0 for d in df["D"]]
-        df["C"] = df["D"] + (4 * (df["A"] + 2))  # C = (4*(A+2)) + D
-        self.df = df
-        self.causal_dag = CausalDAG(dag_dot_path)
-
-        # 3. Specify data structures required for test suite
-        self.expected_causal_effect = ExactValue(4)
-        test_list = [
-            CausalTestCase(
-                self.base_test_case,
-                self.expected_causal_effect,
-                0,
-                1,
-            ),
-            CausalTestCase(self.base_test_case, self.expected_causal_effect, 0, 2),
-        ]
-        self.estimators = [LinearRegressionEstimator]
-
-        # 3. Create test_suite and add a test
-        self.test_suite = CausalTestSuite()
-        self.test_suite.add_test_object(
-            base_test_case=self.base_test_case, causal_test_case_list=test_list, estimators_classes=self.estimators
-        )
-        self.causal_specification = CausalSpecification(self.scenario, self.causal_dag)
-
-    def tearDown(self) -> None:
-        shutil.rmtree(self.temp_dir_path)
-
-    def test_adding_test_object(self):
-        "test an object can be added to the test_suite using the add_test_object function"
-        test_suite = CausalTestSuite()
-        test_list = [CausalTestCase(self.base_test_case, self.expected_causal_effect, 0, 1)]
-        estimators = [LinearRegressionEstimator]
-        test_suite.add_test_object(
-            base_test_case=self.base_test_case, causal_test_case_list=test_list, estimators_classes=estimators
-        )
-        manual_test_object = {
-            self.base_test_case: {"tests": test_list, "estimators": estimators, "estimate_type": "ate"}
-        }
-        self.assertEqual(test_suite, manual_test_object)
-
-    def test_return_single_test_object(self):
-        """Test that a single test case can be returned from the test_suite"""
-        base_test_case = BaseTestCase(self.A, self.D)
-
-        test_list = [CausalTestCase(self.base_test_case, self.expected_causal_effect, 0, 1)]
-        estimators = [LinearRegressionEstimator]
-        self.test_suite.add_test_object(
-            base_test_case=base_test_case, causal_test_case_list=test_list, estimators_classes=estimators
-        )
-
-        manual_test_case = {"tests": test_list, "estimators": estimators, "estimate_type": "ate"}
-
-        test_case = self.test_suite[base_test_case]
-
-        self.assertEqual(test_case, manual_test_case)
-
-    def test_execute_test_suite_single_base_test_case(self):
-        """Check that the test suite can return the correct results from dummy data for a single base_test-case"""
-
-        causal_test_results = self.test_suite.execute_test_suite(self.causal_specification, self.df)
-        causal_test_case_result = causal_test_results[self.base_test_case]
-        self.assertAlmostEqual(
-            causal_test_case_result["LinearRegressionEstimator"][0].test_value.value[0], 4, delta=1e-10
-        )

From 31a70b1a2a45a1ceda953e55621867c6436e8a3c Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 15:17:32 +0000
Subject: [PATCH 29/44] Removed causal test suite

---
 .../testing/causal_test_adequacy.py           |  3 +-
 causal_testing/testing/causal_test_suite.py   | 87 -------------------
 .../test_causal_test_adequacy.py              | 15 +---
 3 files changed, 4 insertions(+), 101 deletions(-)
 delete mode 100644 causal_testing/testing/causal_test_suite.py

diff --git a/causal_testing/testing/causal_test_adequacy.py b/causal_testing/testing/causal_test_adequacy.py
index aa8222c6..8a12b4e2 100644
--- a/causal_testing/testing/causal_test_adequacy.py
+++ b/causal_testing/testing/causal_test_adequacy.py
@@ -9,7 +9,6 @@
 from numpy.linalg import LinAlgError
 from lifelines.exceptions import ConvergenceError
 
-from causal_testing.testing.causal_test_suite import CausalTestSuite
 from causal_testing.specification.causal_dag import CausalDAG
 from causal_testing.estimation.abstract_estimator import Estimator
 from causal_testing.testing.causal_test_case import CausalTestCase
@@ -25,7 +24,7 @@ class DAGAdequacy:
     def __init__(
         self,
         causal_dag: CausalDAG,
-        test_suite: CausalTestSuite,
+        test_suite: list[CausalTestCase],
     ):
         self.causal_dag = causal_dag
         self.test_suite = test_suite
diff --git a/causal_testing/testing/causal_test_suite.py b/causal_testing/testing/causal_test_suite.py
deleted file mode 100644
index d5b93bc3..00000000
--- a/causal_testing/testing/causal_test_suite.py
+++ /dev/null
@@ -1,87 +0,0 @@
-"""This module contains the CausalTestSuite class, for details on using it:
-https://causal-testing-framework.readthedocs.io/en/latest/test_suite.html"""
-
-import logging
-from typing import Type, Iterable
-from collections import UserDict
-import pandas as pd
-
-from causal_testing.testing.base_test_case import BaseTestCase
-from causal_testing.testing.causal_test_case import CausalTestCase
-from causal_testing.estimation.abstract_estimator import Estimator
-from causal_testing.testing.causal_test_result import CausalTestResult
-from causal_testing.specification.causal_specification import CausalSpecification
-
-logger = logging.getLogger(__name__)
-
-
-class CausalTestSuite(UserDict):
-    """
-    A CausalTestSuite is an extension of the UserDict class, therefore it behaves as a python dictionary with the added
-    functionality of this class.
-    The dictionary structure should be the keys are base_test_cases representing the treatment and outcome Variables,
-    and the values are test objects. Test Objects hold a causal_test_case_list which is a list of causal_test_cases
-    which provide control and treatment values, and an iterator of Estimator Class References
-
-    This dictionary can be fed into the execute_test_suite function which will iterate over all the
-    base_test_case's and execute each causal_test_case with each iterator.
-    """
-
-    def add_test_object(
-        self,
-        base_test_case: BaseTestCase,
-        causal_test_case_list: Iterable[CausalTestCase],
-        estimators: Iterable[Type[Estimator]],
-        estimate_type: str = "ate",
-    ):
-        """
-        A setter object to allow for the easy construction of the dictionary test suite structure
-
-        :param base_test_case: A BaseTestCase object consisting of a treatment variable, outcome variable and effect
-        :param causal_test_case_list: A list of causal test cases to be executed
-        :param estimators: A list of estimator classes (NOT instances) to be used to execute each of the test cases.
-        Each estimator will be applied to each test case, so this will typically just be a single element list.
-        However, if you want to compare the outputs of different estimators, you may include more than one class here.
-        :param estimate_type: A string which denotes the type of estimate to return
-        """
-        test_object = {"tests": causal_test_case_list, "estimators": estimators, "estimate_type": estimate_type}
-        self.data[base_test_case] = test_object
-
-    def execute_test_suite(
-        self, causal_specification: CausalSpecification, df: pd.DataFrame
-    ) -> dict[str, CausalTestResult]:
-        """Execute a suite of causal tests and return the results in a list
-        :param causal_specification: A causal specification object which wraps up the scenario and causal DAG.
-        :param df: A dataframe containing the test data.
-        :return: A dictionary where each key is the name of the estimators specified and the values are lists of
-                causal_test_result objects
-        """
-        test_suite_results = {}
-        for edge in self:
-            logger.info("treatment: %s", edge.treatment_variable)
-            logger.info("outcome: %s", edge.outcome_variable)
-            minimal_adjustment_set = causal_specification.causal_dag.identification(edge)
-            minimal_adjustment_set = minimal_adjustment_set - set(edge.treatment_variable.name)
-            minimal_adjustment_set = minimal_adjustment_set - set(edge.outcome_variable.name)
-
-            estimators = self[edge]["estimators"]
-            tests = self[edge]["tests"]
-            results = {}
-            for estimator_class in estimators:
-                causal_test_results = []
-
-                for test in tests:
-                    estimator = estimator_class(
-                        test.treatment_variable.name,
-                        test.treatment_value,
-                        test.control_value,
-                        minimal_adjustment_set,
-                        test.outcome_variable.name,
-                        df=df,
-                    )
-                    causal_test_result = test.execute_test(estimator)
-                    causal_test_results.append(causal_test_result)
-
-                results[estimator_class.__name__] = causal_test_results
-            test_suite_results[edge] = results
-        return test_suite_results
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
index f3e72aca..c3a0227d 100644
--- a/tests/testing_tests/test_causal_test_adequacy.py
+++ b/tests/testing_tests/test_causal_test_adequacy.py
@@ -8,7 +8,6 @@
 from causal_testing.estimation.ipcw_estimator import IPCWEstimator
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.testing.causal_test_case import CausalTestCase
-from causal_testing.testing.causal_test_suite import CausalTestSuite
 from causal_testing.testing.causal_test_adequacy import DAGAdequacy
 from causal_testing.testing.causal_test_outcome import NoEffect, SomeEffect
 from causal_testing.json_front.json_class import JsonUtility, CausalVariables
@@ -93,8 +92,6 @@ def test_data_adequacy_cateogorical(self):
         test_results = self.json_class.run_json_tests(
             effects=effects, estimators=estimators, f_flag=False, mutates=mutates
         )
-        print("RESULT")
-        print(test_results[0]["result"])
         self.assertEqual(
             test_results[0]["result"].adequacy.to_dict(),
             {"kurtosis": {"test_input_no_dist[T.b]": 0.0}, "bootstrap_size": 100, "passing": 100, "successful": 100},
@@ -152,11 +149,9 @@ def test_dag_adequacy_dependent(self):
             expected_causal_effect=None,
             estimate_type=None,
         )
-        test_suite = CausalTestSuite()
-        test_suite.add_test_object(base_test_case, causal_test_case, None, None)
+        test_suite = [causal_test_case]
         dag_adequacy = DAGAdequacy(self.json_class.causal_specification.causal_dag, test_suite)
         dag_adequacy.measure_adequacy()
-        print(dag_adequacy.to_dict())
         self.assertEqual(
             dag_adequacy.to_dict(),
             {
@@ -201,11 +196,9 @@ def test_dag_adequacy_independent(self):
             expected_causal_effect=None,
             estimate_type=None,
         )
-        test_suite = CausalTestSuite()
-        test_suite.add_test_object(base_test_case, causal_test_case, None, None)
+        test_suite = [causal_test_case]
         dag_adequacy = DAGAdequacy(self.json_class.causal_specification.causal_dag, test_suite)
         dag_adequacy.measure_adequacy()
-        print(dag_adequacy.to_dict())
         self.assertEqual(
             dag_adequacy.to_dict(),
             {
@@ -250,11 +243,9 @@ def test_dag_adequacy_independent_other_way(self):
             expected_causal_effect=None,
             estimate_type=None,
         )
-        test_suite = CausalTestSuite()
-        test_suite.add_test_object(base_test_case, causal_test_case, None, None)
+        test_suite = [causal_test_case]
         dag_adequacy = DAGAdequacy(self.json_class.causal_specification.causal_dag, test_suite)
         dag_adequacy.measure_adequacy()
-        print(dag_adequacy.to_dict())
         self.assertEqual(
             dag_adequacy.to_dict(),
             {

From 7c0f0f2959b0742e3f062b19881bcff34581ebf8 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 15:20:28 +0000
Subject: [PATCH 30/44] Removed abstract causal test case + pylint

---
 causal_testing/generation/__init__.py         |   0
 .../generation/abstract_causal_test_case.py   | 279 ------------------
 causal_testing/generation/enum_gen.py         |  44 ---
 causal_testing/json_front/json_class.py       |   3 +-
 .../test_causal_test_adequacy.py              |   8 +-
 5 files changed, 2 insertions(+), 332 deletions(-)
 delete mode 100644 causal_testing/generation/__init__.py
 delete mode 100644 causal_testing/generation/abstract_causal_test_case.py
 delete mode 100644 causal_testing/generation/enum_gen.py

diff --git a/causal_testing/generation/__init__.py b/causal_testing/generation/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/causal_testing/generation/abstract_causal_test_case.py b/causal_testing/generation/abstract_causal_test_case.py
deleted file mode 100644
index 1bf30f57..00000000
--- a/causal_testing/generation/abstract_causal_test_case.py
+++ /dev/null
@@ -1,279 +0,0 @@
-"""This module contains the class AbstractCausalTestCase, which generates concrete test cases"""
-
-import itertools
-import logging
-from enum import Enum
-from typing import Iterable
-
-import lhsmdu
-import pandas as pd
-import z3
-from scipy import stats
-
-
-from causal_testing.specification.scenario import Scenario
-from causal_testing.specification.variable import Variable
-from causal_testing.testing.causal_test_case import CausalTestCase
-from causal_testing.testing.causal_test_outcome import CausalTestOutcome
-from causal_testing.testing.base_test_case import BaseTestCase
-
-
-logger = logging.getLogger(__name__)
-
-
-class AbstractCausalTestCase:
-    """
-    An abstract test case serves as a generator for concrete test cases. Instead of having concrete control
-    and treatment values, we instead just specify the intervention and the treatment variables. This then
-    enables potentially infinite concrete test cases to be generated between different values of the treatment.
-    """
-
-    def __init__(
-        # pylint: disable=too-many-arguments
-        self,
-        scenario: Scenario,
-        intervention_constraints: set[z3.ExprRef],
-        treatment_variable: Variable,
-        expected_causal_effect: dict[Variable:CausalTestOutcome],
-        effect_modifiers: set[Variable] = None,
-        estimate_type: str = "ate",
-        effect: str = "total",
-    ):
-        if treatment_variable not in scenario.variables.values():
-            raise ValueError(
-                "Treatment variables must be a subset of variables."
-                + f" Instead got:\ntreatment_variables={treatment_variable}\nvariables={scenario.variables}"
-            )
-
-        assert len(expected_causal_effect) == 1, "We currently only support tests with one causal outcome"
-
-        self.scenario = scenario
-        self.intervention_constraints = intervention_constraints
-        self.treatment_variable = treatment_variable
-        self.expected_causal_effect = expected_causal_effect
-        self.estimate_type = estimate_type
-        self.effect = effect
-
-        if effect_modifiers is not None:
-            self.effect_modifiers = effect_modifiers
-        else:
-            self.effect_modifiers = {}
-
-    def __str__(self):
-        outcome_string = " and ".join(
-            [f"the effect on {var} should be {str(effect)}" for var, effect in self.expected_causal_effect.items()]
-        )
-        return f"When we apply intervention {self.intervention_constraints}, {outcome_string}"
-
-    def datapath(self) -> str:
-        """Create and return the sanitised data path"""
-
-        def sanitise(string):
-            return "".join([x for x in string if x.isalnum()])
-
-        return (
-            sanitise("-".join([str(c) for c in self.intervention_constraints]))
-            + "_"
-            + "-".join([f"{v.name}_{e}" for v, e in self.expected_causal_effect.items()])
-            + ".csv"
-        )
-
-    def _generate_concrete_tests(
-        # pylint: disable=too-many-locals
-        self,
-        sample_size: int,
-        rct: bool = False,
-        seed: int = 0,
-    ) -> tuple[list[CausalTestCase], pd.DataFrame]:
-        """Generates a list of `num` concrete test cases.
-
-        :param sample_size: The number of strata to use for Latin hypercube sampling. Where no target_ks_score is
-        provided, this corresponds to the number of test cases to generate. Where target_ks_score is provided, the
-        number of test cases will be a multiple of this.
-        :param rct: Whether we're running an RCT, i.e. whether to add the treatment run to the concrete runs.
-        :param seed: Random seed for reproducability.
-        :return: A list of causal test cases and a dataframe representing the required model run configurations.
-        :rtype: ([CausalTestCase], pd.DataFrame)
-        """
-
-        concrete_tests = []
-        runs = []
-        run_columns = sorted([v.name for v in self.scenario.variables.values() if v.distribution])
-
-        # Generate the Latin Hypercube samples and put into a dataframe
-        # lhsmdu.setRandomSeed(seed+i)
-        samples = pd.DataFrame(
-            lhsmdu.sample(len(run_columns), sample_size, randomSeed=seed).T,
-            columns=run_columns,
-        )
-        # Project the samples to the variables' distributions
-        for name in run_columns:
-            var = self.scenario.variables[name]
-            samples[var.name] = lhsmdu.inverseTransformSample(var.distribution, samples[var.name])
-
-        for index, row in samples.iterrows():
-            model = self._optimizer_model(run_columns, row)
-
-            base_test_case = BaseTestCase(
-                treatment_variable=self.treatment_variable,
-                outcome_variable=list(self.expected_causal_effect.keys())[0],
-                effect=self.effect,
-            )
-
-            concrete_test = CausalTestCase(
-                base_test_case=base_test_case,
-                control_value=self.treatment_variable.cast(model[self.treatment_variable.z3]),
-                treatment_value=self.treatment_variable.cast(
-                    model[self.scenario.treatment_variables[self.treatment_variable.name].z3]
-                ),
-                expected_causal_effect=list(self.expected_causal_effect.values())[0],
-                estimate_type=self.estimate_type,
-                effect_modifier_configuration={v: v.cast(model[v.z3]) for v in self.effect_modifiers},
-            )
-
-            for v in self.scenario.inputs():
-                if v.name in row and row[v.name] != v.cast(model[v.z3]):
-                    constraints = "\n  ".join([str(c) for c in self.scenario.constraints if v.name in str(c)])
-                    logger.warning(
-                        f"Unable to set variable {v.name} to {row[v.name]} because of constraints\n"
-                        + f"{constraints}\nUsing value {v.cast(model[v.z3])} instead in test\n{concrete_test}"
-                    )
-
-            if not any((vars(t) == vars(concrete_test) for t in concrete_tests)):
-                concrete_tests.append(concrete_test)
-                # Control run
-                control_run = {
-                    v.name: v.cast(model[v.z3]) for v in self.scenario.variables.values() if v.name in run_columns
-                }
-                control_run["bin"] = index
-                runs.append(control_run)
-                # Treatment run
-                if rct:
-                    treatment_run = control_run.copy()
-                    treatment_run.update({concrete_test.treatment_variable.name: concrete_test.treatment_value})
-                    treatment_run["bin"] = index
-                    runs.append(treatment_run)
-
-        return concrete_tests, pd.DataFrame(runs, columns=run_columns + ["bin"])
-
-    def generate_concrete_tests(
-        # pylint: disable=too-many-arguments, too-many-locals
-        self,
-        sample_size: int,
-        target_ks_score: float = None,
-        rct: bool = False,
-        seed: int = 0,
-        hard_max: int = 1000,
-    ) -> tuple[list[CausalTestCase], pd.DataFrame]:
-        """Generates a list of `num` concrete test cases.
-
-        :param sample_size: The number of strata to use for Latin hypercube sampling. Where no target_ks_score is
-        provided, this corresponds to the number of test cases to generate. Where target_ks_score is provided, the
-        number of test cases will be a multiple of this.
-        :param target_ks_score: The target KS score. A value in range [0, 1] with lower values representing a higher
-        confidence and requireing more tests to achieve. A value of 0.05 is recommended.
-        TODO: Make this more flexible so we're not restricting ourselves just to the KS test.
-        :param rct: Whether we're running an RCT, i.e. whether to add the treatment run to the concrete runs.
-        :param seed: Random seed for reproducability.
-        :param hard_max: Number of iterations to run for before timing out if target_ks_score cannot be reached.
-        :return: A list of causal test cases and a dataframe representing the required model run configurations.
-        :rtype: ([CausalTestCase], pd.DataFrame)
-        """
-
-        if target_ks_score is not None:
-            assert 0 <= target_ks_score <= 1, "target_ks_score must be between 0 and 1."
-        else:
-            hard_max = 1
-
-        concrete_tests = []
-        runs = pd.DataFrame()
-        ks_stats = []
-
-        pre_break = False
-        for i in range(hard_max):
-            concrete_tests_temp, runs_temp = self._generate_concrete_tests(sample_size, rct, seed + i)
-            for test in concrete_tests_temp:
-                if not any((vars(test) == vars(t) for t in concrete_tests)):
-                    concrete_tests.append(test)
-            runs = pd.concat([runs, runs_temp])
-            assert concrete_tests_temp not in concrete_tests, "Duplicate entries unlikely unless something went wrong"
-
-            control_configs = pd.DataFrame([{test.treatment_variable: test.control_value} for test in concrete_tests])
-            ks_stats = {
-                var: stats.kstest(control_configs[var], var.distribution.cdf).statistic
-                for var in control_configs.columns
-            }
-            # Putting treatment and control values in messes it up because the two are not independent...
-            # This is potentially problematic as constraints might mean we don't get good coverage if we use control
-            # values alone
-            # We might then need to carefully craft our _control value_ generating distributions so that we can get
-            # good coverage
-            # without the generated treatment values violating any constraints.
-
-            # treatment_configs = pd.DataFrame([test.treatment_input_configuration for test in concrete_tests])
-            # both_configs = pd.concat([control_configs, treatment_configs])
-            # ks_stats = {var: stats.kstest(both_configs[var], var.distribution.cdf).statistic for var in
-            # both_configs.columns}
-            effect_modifier_configs = pd.DataFrame([test.effect_modifier_configuration for test in concrete_tests])
-            ks_stats.update(
-                {
-                    var: stats.kstest(effect_modifier_configs[var], var.distribution.cdf).statistic
-                    for var in effect_modifier_configs.columns
-                }
-            )
-            control_values = [test.control_value for test in concrete_tests]
-            treatment_values = [test.treatment_value for test in concrete_tests]
-
-            if self.treatment_variable.datatype is bool and {(True, False), (False, True)}.issubset(
-                set(zip(control_values, treatment_values))
-            ):
-                pre_break = True
-                break
-            if issubclass(self.treatment_variable.datatype, Enum) and set(
-                {
-                    (x, y)
-                    for x, y in itertools.product(self.treatment_variable.datatype, self.treatment_variable.datatype)
-                    if x != y
-                }
-            ).issubset(zip(control_values, treatment_values)):
-                pre_break = True
-                break
-            if target_ks_score and all((stat <= target_ks_score for stat in ks_stats.values())):
-                pre_break = True
-                break
-
-        if target_ks_score is not None and not pre_break:
-            logger.error(
-                "Hard max reached but could not achieve target ks_score of %s. Got %s. Generated %s distinct tests",
-                target_ks_score,
-                ks_stats,
-                len(concrete_tests),
-            )
-        return concrete_tests, runs
-
-    def _optimizer_model(self, run_columns: Iterable[str], row: pd.core.series) -> z3.Optimize:
-        """
-        :param run_columns: A sorted list of Variable names from the scenario variables
-        :param row: A pandas Series containing a row from the Samples dataframe
-        :return: z3 optimize model with constraints tracked and soft constraints added
-        :rtype: z3.Optimize
-        """
-        optimizer = z3.Optimize()
-        for c in self.scenario.constraints:
-            optimizer.assert_and_track(c, str(c))
-        for c in self.intervention_constraints:
-            optimizer.assert_and_track(c, str(c))
-
-        for v in run_columns:
-            optimizer.add_soft(
-                self.scenario.variables[v].z3
-                == self.scenario.variables[v].z3_val(self.scenario.variables[v].z3, row[v])
-            )
-
-        if optimizer.check() == z3.unsat:
-            logger.warning(
-                f"Satisfiability of test case was unsat.\n"
-                f"Constraints \n {optimizer} \n Unsat core {optimizer.unsat_core()}",
-            )
-        model = optimizer.model()
-        return model
diff --git a/causal_testing/generation/enum_gen.py b/causal_testing/generation/enum_gen.py
deleted file mode 100644
index 496410bc..00000000
--- a/causal_testing/generation/enum_gen.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""This module contains the class EnumGen, which allows us to easily create
-generating uniform distributions from enums."""
-
-from enum import Enum
-from scipy.stats import rv_discrete
-import numpy as np
-
-
-class EnumGen(rv_discrete):
-    """This class allows us to easily create generating uniform distributions
-    from enums. This is helpful for generating concrete test inputs from
-    abstract test cases."""
-
-    def __init__(self, datatype: Enum):
-        super().__init__()
-        self.datatype = dict(enumerate(datatype, 1))
-        self.inverse_dt = {v: k for k, v in self.datatype.items()}
-
-    def ppf(self, q):
-        """Percent point function (inverse of `cdf`) at q of the given RV.
-        Parameters
-        ----------
-        q : array_like
-            Lower tail probability.
-        Returns
-        -------
-        k : array_like
-            Quantile corresponding to the lower tail probability, q.
-        """
-        return np.vectorize(self.datatype.get)(np.ceil(len(self.datatype) * q))
-
-    def cdf(self, k):
-        """
-        Cumulative distribution function of the given RV.
-        Parameters
-        ----------
-        k : array_like
-            quantiles
-        Returns
-        -------
-        cdf : ndarray
-            Cumulative distribution function evaluated at `x`
-        """
-        return np.vectorize(self.inverse_dt.get)(k) / len(self.datatype)
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
index a6731857..d0541c38 100644
--- a/causal_testing/json_front/json_class.py
+++ b/causal_testing/json_front/json_class.py
@@ -87,11 +87,10 @@ def setup(self, scenario: Scenario, ignore_cycles=False):
             )
         self._populate_metas()
 
-    def run_json_tests(self, effects: dict, estimators: dict, f_flag: bool = False, mutates: dict = None):
+    def run_json_tests(self, effects: dict, estimators: dict, f_flag: bool = False):
         """Runs and evaluates each test case specified in the JSON input
 
         :param effects: Dictionary mapping effect class instances to string representations.
-        :param mutates: Dictionary mapping mutation functions to string representations.
         :param estimators: Dictionary mapping estimator classes to string representations.
         :param f_flag: Failure flag that if True the script will stop executing when a test fails.
         """
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
index c3a0227d..0d7c4c93 100644
--- a/tests/testing_tests/test_causal_test_adequacy.py
+++ b/tests/testing_tests/test_causal_test_adequacy.py
@@ -83,15 +83,9 @@ def test_data_adequacy_cateogorical(self):
         }
         self.json_class.test_plan = example_test
         effects = {"NoEffect": NoEffect()}
-        mutates = {
-            "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3
-            > self.json_class.scenario.variables[x].z3
-        }
         estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
 
-        test_results = self.json_class.run_json_tests(
-            effects=effects, estimators=estimators, f_flag=False, mutates=mutates
-        )
+        test_results = self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
         self.assertEqual(
             test_results[0]["result"].adequacy.to_dict(),
             {"kurtosis": {"test_input_no_dist[T.b]": 0.0}, "bootstrap_size": 100, "passing": 100, "successful": 100},

From e559295527ae365ef54e7193386f13dbf8a827f9 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 15:45:11 +0000
Subject: [PATCH 31/44] Removed Z3

---
 causal_testing/specification/scenario.py      |  11 +-
 causal_testing/specification/variable.py      | 163 +-----------------
 .../surrogate/surrogate_search_algorithms.py  |   2 +-
 dafni/main_dafni.py                           |   3 -
 pyproject.toml                                |   1 -
 tests/json_front_tests/test_json_class.py     |  32 +---
 tests/specification_tests/test_variable.py    | 160 +----------------
 .../test_causal_surrogate_assisted.py         |  16 +-
 8 files changed, 23 insertions(+), 365 deletions(-)

diff --git a/causal_testing/specification/scenario.py b/causal_testing/specification/scenario.py
index 7e984abd..e787c238 100644
--- a/causal_testing/specification/scenario.py
+++ b/causal_testing/specification/scenario.py
@@ -3,7 +3,6 @@
 from collections.abc import Iterable, Mapping
 
 from tabulate import tabulate
-from z3 import ExprRef, substitute
 
 from .variable import Input, Meta, Output, Variable
 
@@ -20,15 +19,15 @@ class Scenario:
     accordingly.
 
     :param {Variable} variables: The set of endogenous variables.
-    :param {ExprRef} constraints: The set of constraints relating the endogenous variables.
+    :param {str} constraints: The set of constraints relating the endogenous variables.
     :attr variables:
     :attr constraints:
     """
 
     variables: Mapping[str, Variable]
-    constraints: set[ExprRef]
+    constraints: set[str]
 
-    def __init__(self, variables: Iterable[Variable] = None, constraints: set[ExprRef] = None):
+    def __init__(self, variables: Iterable[Variable] = None, constraints: set[str] = None):
         if variables is not None:
             self.variables = {v.name: v for v in variables}
         else:
@@ -106,10 +105,6 @@ def setup_treatment_variables(self) -> None:
             self.prime[k] = v_prime.name
             self.unprime[v_prime.name] = k
 
-        substitutions = {(self.variables[n].z3, self.treatment_variables[n].z3) for n in self.variables}
-        treatment_constraints = {substitute(c, *substitutions) for c in self.constraints}
-        self.constraints = self.constraints.union(treatment_constraints)
-
     def variables_of_type(self, t: type) -> set[Variable]:
         """Get the set of scenario variables of a particular type, e.g. Inputs.
 
diff --git a/causal_testing/specification/variable.py b/causal_testing/specification/variable.py
index 2bef6250..7e9c7d3f 100644
--- a/causal_testing/specification/variable.py
+++ b/causal_testing/specification/variable.py
@@ -1,53 +1,16 @@
-"""This module contains the Variable abstract class, as well as its concrete extensions: Input, Output and Meta. The
-function z3_types and the private function _coerce are also in this module."""
+"""This module contains the Variable abstract class, as well as its concrete extensions: Input, Output and Meta."""
 
 from __future__ import annotations
 from abc import ABC
 from collections.abc import Callable
-from enum import Enum
 from typing import Any, TypeVar
 
 import lhsmdu
 from pandas import DataFrame
 from scipy.stats._distn_infrastructure import rv_generic
-from z3 import Bool, BoolRef, Const, EnumSort, Int, RatNumRef, Real, String
 
 # Declare type variable
 T = TypeVar("T")
-z3 = TypeVar("Z3")
-
-
-def z3_types(datatype: T) -> z3:
-    """Cast datatype to Z3 datatype
-    :param datatype: python datatype to be cast
-    :return: Type name compatible with Z3 library
-    """
-    types = {int: Int, str: String, float: Real, bool: Bool}
-    if datatype in types:
-        return types[datatype]
-    if issubclass(datatype, Enum):
-        dtype, _ = EnumSort(datatype.__name__, [str(x.value) for x in datatype])
-        return lambda x: Const(x, dtype)
-    if hasattr(datatype, "to_z3"):
-        return datatype.to_z3()
-    raise ValueError(
-        f"Cannot convert type {datatype} to Z3."
-        + " Please use a native type, an Enum, or implement a conversion manually."
-    )
-
-
-def _coerce(val: Any) -> Any:
-    """Coerce Variables to their Z3 equivalents if appropriate to do so,
-    otherwise assume literal constants.
-
-    :param any val: A value, possibly a Variable.
-    :return: Either a Z3 ExprRef representing the variable or the original value.
-    :rtype: Any
-
-    """
-    if isinstance(val, Variable):
-        return val.z3
-    return val
 
 
 class Variable(ABC):
@@ -56,7 +19,6 @@ class Variable(ABC):
     :param str name: The name of the variable.
     :param T datatype: The datatype of the variable.
     :param rv_generic distribution: The expected distribution of the variable values.
-    :attr type z3: The Z3 mirror of the variable.
     :attr name:
     :attr datatype:
     :attr distribution:
@@ -70,125 +32,12 @@ class Variable(ABC):
     def __init__(self, name: str, datatype: T, distribution: rv_generic = None, hidden: bool = False):
         self.name = name
         self.datatype = datatype
-        self.z3 = z3_types(datatype)(name)
         self.distribution = distribution
         self.hidden = hidden
 
     def __repr__(self):
         return f"{self.typestring()}: {self.name}::{self.datatype.__name__}"
 
-    # Thin wrapper for Z1 functions
-
-    def __add__(self, other: Any) -> BoolRef:
-        """Create the Z3 expression `self + other`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `self + other`.
-        :rtype: BoolRef
-        """
-        return self.z3.__add__(_coerce(other))
-
-    def __ge__(self, other: Any) -> BoolRef:
-        """Create the Z3 expression `self >= other`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `self >= other`.
-        :rtype: BoolRef
-        """
-        return self.z3.__ge__(_coerce(other))
-
-    def __gt__(self, other: Any) -> BoolRef:
-        """Create the Z3 expression `self > other`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `self > other`.
-        :rtype: BoolRef
-        """
-        return self.z3.__gt__(_coerce(other))
-
-    def __le__(self, other: Any) -> BoolRef:
-        """Create the Z3 expression `self <= other`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `self <= other`.
-        :rtype: BoolRef
-        """
-        return self.z3.__le__(_coerce(other))
-
-    def __lt__(self, other: Any) -> BoolRef:
-        """Create the Z3 expression `self < other`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `self < other`.
-        :rtype: BoolRef
-        """
-        return self.z3.__lt__(_coerce(other))
-
-    def __mod__(self, other: Any) -> BoolRef:
-        """Create the Z3 expression `self % other`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `self % other`.
-        :rtype: BoolRef
-        """
-        return self.z3.__mod__(_coerce(other))
-
-    def __mul__(self, other: Any) -> BoolRef:
-        """Create the Z3 expression `self * other`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `self * other`.
-        :rtype: BoolRef
-        """
-        return self.z3.__mul__(_coerce(other))
-
-    def __ne__(self, other: Any) -> BoolRef:
-        """Create the Z3 expression `self != other`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `self != other`.
-        :rtype: BoolRef
-        """
-        return self.z3.__ne__(_coerce(other))
-
-    def __neg__(self) -> BoolRef:
-        """Create the Z3 expression `-self`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `-self`.
-        :rtype: BoolRef
-        """
-        return self.z3.__neg__()
-
-    def __pow__(self, other: Any) -> BoolRef:
-        """Create the Z3 expression `self ^ other`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `self ^ other`.
-        :rtype: BoolRef
-        """
-        return self.z3.__pow__(_coerce(other))
-
-    def __sub__(self, other: Any) -> BoolRef:
-        """Create the Z3 expression `self - other`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `self - other`.
-        :rtype: BoolRef
-        """
-        return self.z3.__sub__(_coerce(other))
-
-    def __truediv__(self, other: Any) -> BoolRef:
-        """Create the Z3 expression `self / other`.
-
-        :param any other: The object to compare against.
-        :return: The Z3 expression `self / other`.
-        :rtype: BoolRef
-        """
-        return self.z3.__truediv__(_coerce(other))
-
-    # End thin wrapper
-
     def cast(self, val: Any) -> T:
         """Cast the supplied value to the datatype T of the variable.
 
@@ -209,16 +58,6 @@ def cast(self, val: Any) -> T:
             return self.datatype(val)
         return self.datatype(str(val))
 
-    def z3_val(self, z3_var, val: Any) -> T:
-        """Cast value to Z3 value"""
-        native_val = self.cast(val)
-        if isinstance(native_val, Enum):
-            values = [z3_var.sort().constructor(c)() for c in range(z3_var.sort().num_constructors())]
-            values = [v for v in values if val.__class__(str(v)) == val]
-            assert len(values) == 1, f"Expected {values} to be length 1"
-            return values[0]
-        return native_val
-
     def sample(self, n_samples: int) -> [T]:
         """Generate a Latin Hypercube Sample of size n_samples according to the
         Variable's distribution.
diff --git a/causal_testing/surrogate/surrogate_search_algorithms.py b/causal_testing/surrogate/surrogate_search_algorithms.py
index 54e7bb48..3911d9ec 100644
--- a/causal_testing/surrogate/surrogate_search_algorithms.py
+++ b/causal_testing/surrogate/surrogate_search_algorithms.py
@@ -98,6 +98,7 @@ def create_gene_types(
             var_space[adj] = {}
 
         for relationship in list(specification.scenario.constraints):
+            print(relationship)
             rel_split = str(relationship).split(" ")
 
             if rel_split[0] in var_space:
@@ -109,7 +110,6 @@ def create_gene_types(
                         var_space[rel_split[0]]["high"] = int(rel_split[2]) + 1
                     else:
                         var_space[rel_split[0]]["high"] = datatype(rel_split[2])
-
         gene_space = []
         gene_space.append(var_space[surrogate_model.treatment])
         for adj in surrogate_model.adjustment_set:
diff --git a/dafni/main_dafni.py b/dafni/main_dafni.py
index d2cdb457..ed310cda 100644
--- a/dafni/main_dafni.py
+++ b/dafni/main_dafni.py
@@ -126,9 +126,6 @@ def validate_variables(data_dict: dict) -> tuple:
 
         constraints = set()
 
-        for variable, input_var in zip(variables, inputs):
-            if "constraint" in variable:
-                constraints.add(input_var.z3 == variable["constraint"])
     else:
         raise ValidationError("Cannot find the variables defined by the causal tests.")
 
diff --git a/pyproject.toml b/pyproject.toml
index 8821bd4a..5810ce97 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,6 @@ requires-python = ">=3.10"
 license = { text = "MIT" }
 keywords = ["causal inference", "verification"]
 dependencies = [
-    "z3_solver~=4.11.2", # z3_solver does not follow semantic versioning and tying to 4.11 introduces problems
     "fitter~=1.7",
     "lifelines~=0.29.0",
     "lhsmdu~=1.1",
diff --git a/tests/json_front_tests/test_json_class.py b/tests/json_front_tests/test_json_class.py
index 07f06f65..b1738d24 100644
--- a/tests/json_front_tests/test_json_class.py
+++ b/tests/json_front_tests/test_json_class.py
@@ -99,13 +99,9 @@ def test_f_flag(self):
         }
         self.json_class.test_plan = example_test
         effects = {"NoEffect": NoEffect()}
-        mutates = {
-            "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3
-            > self.json_class.scenario.variables[x].z3
-        }
         estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
         with self.assertRaises(StatisticsError):
-            self.json_class.run_json_tests(effects, estimators, True, mutates)
+            self.json_class.run_json_tests(effects, estimators, True)
 
     def test_generate_coefficient_tests_from_json(self):
         example_test = {
@@ -149,15 +145,9 @@ def test_run_json_tests_from_json(self):
         }
         self.json_class.test_plan = example_test
         effects = {"NoEffect": NoEffect()}
-        mutates = {
-            "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3
-            > self.json_class.scenario.variables[x].z3
-        }
         estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
 
-        test_results = self.json_class.run_json_tests(
-            effects=effects, estimators=estimators, f_flag=False, mutates=mutates
-        )
+        test_results = self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
         self.assertTrue(test_results[0]["failed"])
 
     def test_generate_tests_from_json_no_dist(self):
@@ -176,13 +166,9 @@ def test_generate_tests_from_json_no_dist(self):
         }
         self.json_class.test_plan = example_test
         effects = {"NoEffect": NoEffect()}
-        mutates = {
-            "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3
-            > self.json_class.scenario.variables[x].z3
-        }
         estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
 
-        self.json_class.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=False)
+        self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
 
         # Test that the final log message prints that failed tests are printed, which is expected behaviour for this scenario
         with open("temp_out.txt", "r") as reader:
@@ -206,13 +192,9 @@ def test_formula_in_json_test(self):
         }
         self.json_class.test_plan = example_test
         effects = {"Positive": Positive()}
-        mutates = {
-            "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3
-            > self.json_class.scenario.variables[x].z3
-        }
         estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
 
-        self.json_class.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=False)
+        self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
         with open("temp_out.txt", "r") as reader:
             temp_out = reader.readlines()
         self.assertIn("test_output ~ test_input", "".join(temp_out))
@@ -282,13 +264,9 @@ def add_modelling_assumptions(self):
         }
         self.json_class.test_plan = example_test
         effects = {"Positive": Positive()}
-        mutates = {
-            "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3
-            > self.json_class.scenario.variables[x].z3
-        }
         estimators = {"ExampleEstimator": ExampleEstimator}
         with self.assertRaises(TypeError):
-            self.json_class.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=False)
+            self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
 
     def tearDown(self) -> None:
         if os.path.exists("temp_out.txt"):
diff --git a/tests/specification_tests/test_variable.py b/tests/specification_tests/test_variable.py
index b35724a2..d7173f2c 100644
--- a/tests/specification_tests/test_variable.py
+++ b/tests/specification_tests/test_variable.py
@@ -1,9 +1,8 @@
 import unittest
 from enum import Enum
-import z3
 from scipy.stats import norm, kstest
 
-from causal_testing.specification.variable import z3_types, Variable, Input
+from causal_testing.specification.variable import Variable, Input
 
 
 class TestVariable(unittest.TestCase):
@@ -14,117 +13,10 @@ class TestVariable(unittest.TestCase):
     def setUp(self) -> None:
         pass
 
-    def test_z3_types_enum(self):
-        class Color(Enum):
-            """
-            Example enum class color.
-            """
-
-            RED = "RED"
-            GREEN = "GREEN"
-            BLUE = "BLUE"
-
-        dtype, _ = z3.EnumSort("color", ("RED", "GREEN", "BLUE"))
-        z3_color = z3.Const("color", dtype)
-        expected = z3_types(Color)("color")
-        # No actual way to assert their equality since they are two different objects
-        expected_values = [expected.sort().constructor(c)() for c in range(expected.sort().num_constructors())]
-        z3_color_values = [z3_color.sort().constructor(c)() for c in range(z3_color.sort().num_constructors())]
-
-        # This isn't very good, but I think it's the best we can do since even
-        # z3_types(Color)("color") != z3_types(Color)("color")
-        self.assertEqual(list(map(str, expected_values)), list(map(str, z3_color_values)))
-
-    def test_cast_z3_bool(self):
-        bip = Input("bip", bool)
-        s = z3.Solver()
-        t = z3.Bool("t")
-        f = z3.Bool("f")
-        s.add(t)
-        s.add(z3.Not(f))
-        s.check()
-        self.assertEqual(bip.cast(s.model()[t]), True)
-        self.assertEqual(bip.cast(s.model()[f]), False)
-
-    def test_cast_z3_string(self):
-        ip = Input("bip", str)
-        s = z3.Solver()
-        t = z3.String("t")
-        s.add(t == "hello")
-        s.check()
-        self.assertEqual(ip.cast(s.model()[t]), "hello")
-
     def test_sample_flakey(self):
         ip = Input("ip", float, norm)
         self.assertGreater(kstest(ip.sample(10), norm.cdf).pvalue, 0.95)
 
-    def test_cast_enum(self):
-        class Color(Enum):
-            """
-            Example enum class color.
-            """
-
-            RED = "RED"
-            GREEN = "GREEN"
-            BLUE = "BLUE"
-
-        color = Input("color", Color)
-
-        dtype, colours = z3.EnumSort("color", ("RED", "GREEN", "BLUE"))
-        self.assertEqual(color.cast(colours[0]), Color.RED)
-
-    def test_z3_value_enum(self):
-        class Color(Enum):
-            """
-            Example enum class color.
-            """
-
-            RED = "RED"
-            GREEN = "GREEN"
-            BLUE = "BLUE"
-
-        dtype, members = z3.EnumSort("color", ("RED", "GREEN", "BLUE"))
-        z3_color = z3.Const("color", dtype)
-        color = Input("color", Color)
-
-        self.assertEqual(color.z3_val(z3_color, "RED"), members[0])
-
-    def test_z3_types_custom(self):
-        class Color:
-            """
-            Example enum class color.
-            """
-
-            RED = 1
-            GREEN = 2
-            BLUE = 3
-
-            @classmethod
-            def to_z3(cls):
-                dtype, _ = z3.EnumSort("Color", ("RED", "GREEN", "BLUE"))
-                return lambda x: z3.Const(x, dtype)
-
-        dtype, _ = z3.EnumSort("color", ("RED", "GREEN", "BLUE"))
-        z3_color = z3.Const("color", dtype)
-        expected = z3_types(Color)("color")
-        # No actual way to assert their equality since they are two different objects
-        expected_values = [expected.sort().constructor(c)() for c in range(expected.sort().num_constructors())]
-        z3_color_values = [z3_color.sort().constructor(c)() for c in range(z3_color.sort().num_constructors())]
-
-        # This isn't very good, but I think it's the best we can do since even
-        # z3_types(Color)("color") != z3_types(Color)("color")
-        self.assertEqual(list(map(str, expected_values)), list(map(str, z3_color_values)))
-
-    def test_z3_types_invalid(self):
-        with self.assertRaises(ValueError):
-
-            class Err:
-                """
-                The simplest class which will elicit the correct error.
-                """
-
-            z3_types(Err)
-
     def test_typestring(self):
         class Var(Variable):
             pass
@@ -139,53 +31,3 @@ def test_copy(self):
         self.assertEqual(ip.copy().datatype, ip.datatype)
         self.assertEqual(ip.copy().distribution, ip.distribution)
         self.assertEqual(repr(ip), repr(ip.copy()))
-
-
-class TestZ3Methods(unittest.TestCase):
-    """
-    Test the Variable class for Z3 methods.
-
-    TODO: These are all pretty hacky, to be honest, but Z3 makes checking this sort of thing really difficult.
-    """
-
-    def setUp(self) -> None:
-        self.i1 = Input("i1", int)
-
-    def test_ge_self(self):
-        self.assertEqual(str(self.i1 >= self.i1), "i1 >= i1")
-
-    def test_add(self):
-        self.assertEqual(str(self.i1 + 1), "i1 + 1")
-
-    def test_ge(self):
-        self.assertEqual(str(self.i1 >= 5), "i1 >= 5")
-
-    def test_mod(self):
-        self.assertEqual(str(self.i1 % 2), "i1%2")
-
-    def test_ne(self):
-        self.assertEqual(str(self.i1 != 5), "i1 != 5")
-
-    def test_neg(self):
-        self.assertEqual(str(-self.i1), "-i1")
-
-    def test_pow(self):
-        self.assertEqual(str(self.i1**5), "i1**5")
-
-    def test_le(self):
-        self.assertEqual(str(self.i1 <= 5), "i1 <= 5")
-
-    def test_mul(self):
-        self.assertEqual(str(self.i1 * 2), "i1*2")
-
-    def test_gt(self):
-        self.assertEqual(str(self.i1 > 5), "i1 > 5")
-
-    def test_truediv(self):
-        self.assertEqual(str(self.i1 / 3), "i1/3")
-
-    def test_sub(self):
-        self.assertEqual(str(self.i1 - 4), "i1 - 4")
-
-    def test_lt(self):
-        self.assertEqual(str(self.i1 < 5), "i1 < 5")
diff --git a/tests/surrogate_tests/test_causal_surrogate_assisted.py b/tests/surrogate_tests/test_causal_surrogate_assisted.py
index 6668836d..cbf6fac0 100644
--- a/tests/surrogate_tests/test_causal_surrogate_assisted.py
+++ b/tests/surrogate_tests/test_causal_surrogate_assisted.py
@@ -84,7 +84,9 @@ def test_causal_surrogate_assisted_execution(self):
         x = Input("X", float)
         m = Input("M", int)
         y = Output("Y", float)
-        scenario = Scenario(variables={z, x, m, y}, constraints={z <= 0, z >= 3, x <= 0, x >= 3, m <= 0, m >= 3})
+        scenario = Scenario(
+            variables={z, x, m, y}, constraints={"Z <= 0", "Z >= 3", "X <= 0", "X >= 3", "M <= 0", "M >= 3"}
+        )
         specification = CausalSpecification(scenario, causal_dag)
 
         search_algorithm = GeneticSearchAlgorithm(
@@ -114,7 +116,9 @@ def test_causal_surrogate_assisted_execution_failure(self):
         x = Input("X", float)
         m = Input("M", int)
         y = Output("Y", float)
-        scenario = Scenario(variables={z, x, m, y}, constraints={z <= 0, z >= 3, x <= 0, x >= 3, m <= 0, m >= 3})
+        scenario = Scenario(
+            variables={z, x, m, y}, constraints={"Z <= 0", "Z >= 3", "X <= 0", "X >= 3", "M <= 0", "M >= 3"}
+        )
         specification = CausalSpecification(scenario, causal_dag)
 
         search_algorithm = GeneticSearchAlgorithm(
@@ -144,7 +148,9 @@ def test_causal_surrogate_assisted_execution_custom_aggregator(self):
         x = Input("X", float)
         m = Input("M", int)
         y = Output("Y", float)
-        scenario = Scenario(variables={z, x, m, y}, constraints={z <= 0, z >= 3, x <= 0, x >= 3, m <= 0, m >= 3})
+        scenario = Scenario(
+            variables={z, x, m, y}, constraints={"Z <= 0", "Z >= 3", "X <= 0", "X >= 3", "M <= 0", "M >= 3"}
+        )
         specification = CausalSpecification(scenario, causal_dag)
 
         search_algorithm = GeneticSearchAlgorithm(
@@ -174,7 +180,9 @@ def test_causal_surrogate_assisted_execution_incorrect_search_config(self):
         x = Input("X", float)
         m = Input("M", int)
         y = Output("Y", float)
-        scenario = Scenario(variables={z, x, m, y}, constraints={z <= 0, z >= 3, x <= 0, x >= 3, m <= 0, m >= 3})
+        scenario = Scenario(
+            variables={z, x, m, y}, constraints={"Z <= 0", "Z >= 3", "X <= 0", "X >= 3", "M <= 0", "M >= 3"}
+        )
         specification = CausalSpecification(scenario, causal_dag)
 
         search_algorithm = GeneticSearchAlgorithm(

From 0c55a82aea2805fef0ff3e213eebdbea76dc0c6e Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 15:50:58 +0000
Subject: [PATCH 32/44] pylint

---
 causal_testing/specification/variable.py | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/causal_testing/specification/variable.py b/causal_testing/specification/variable.py
index 7e9c7d3f..87fec4f9 100644
--- a/causal_testing/specification/variable.py
+++ b/causal_testing/specification/variable.py
@@ -38,26 +38,6 @@ def __init__(self, name: str, datatype: T, distribution: rv_generic = None, hidd
     def __repr__(self):
         return f"{self.typestring()}: {self.name}::{self.datatype.__name__}"
 
-    def cast(self, val: Any) -> T:
-        """Cast the supplied value to the datatype T of the variable.
-
-        :param any val: The value to cast.
-        :return: The supplied value as an instance of T.
-        :rtype: T
-        """
-        assert val is not None, f"Invalid value None for variable {self}"
-        if isinstance(val, self.datatype):
-            return val
-        if isinstance(val, BoolRef) and self.datatype == bool:
-            return str(val) == "True"
-        if isinstance(val, RatNumRef) and self.datatype == float:
-            return float(val.numerator().as_long() / val.denominator().as_long())
-        if hasattr(val, "is_string_value") and val.is_string_value() and self.datatype == str:
-            return val.as_string()
-        if (isinstance(val, (float, int, bool))) and (self.datatype in (float, int, bool)):
-            return self.datatype(val)
-        return self.datatype(str(val))
-
     def sample(self, n_samples: int) -> [T]:
         """Generate a Latin Hypercube Sample of size n_samples according to the
         Variable's distribution.

From aaa18df4ff75fc4ea82bdd9b3c486a1173bce5e4 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 15:53:05 +0000
Subject: [PATCH 33/44] pylint

---
 causal_testing/specification/variable.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/causal_testing/specification/variable.py b/causal_testing/specification/variable.py
index 87fec4f9..47345443 100644
--- a/causal_testing/specification/variable.py
+++ b/causal_testing/specification/variable.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 from abc import ABC
 from collections.abc import Callable
-from typing import Any, TypeVar
+from typing import TypeVar
 
 import lhsmdu
 from pandas import DataFrame

From 67c973a0f1185748a1c51dfe65dca17dd2e223cb Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 15:57:20 +0000
Subject: [PATCH 34/44] metamorphic relation codecov

---
 .../testing_tests/test_metamorphic_relations.py  | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/testing_tests/test_metamorphic_relations.py b/tests/testing_tests/test_metamorphic_relations.py
index 9055c8c4..a3f5495a 100644
--- a/tests/testing_tests/test_metamorphic_relations.py
+++ b/tests/testing_tests/test_metamorphic_relations.py
@@ -55,7 +55,6 @@ def test_should_not_cause_json_stub(self):
                 "estimate_type": "coefficient",
                 "estimator": "LinearRegressionEstimator",
                 "expected_effect": {"Z": "NoEffect"},
-                "formula": "Z ~ X1",
                 "mutations": ["X1"],
                 "name": "X1 _||_ Z",
                 "formula": "Z ~ X1",
@@ -204,44 +203,45 @@ def test_generate_metamorphic_relation_(self):
             ShouldCause(BaseTestCase("X1", "Z"), []),
         )
 
+    def test_shoud_cause_string(self):
+        sc_mr = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"])
+        self.assertEqual(str(sc_mr), "X --> Y | ['A', 'B', 'C']")
+
+    def test_shoud_not_cause_string(self):
+        sc_mr = ShouldNotCause(BaseTestCase("X", "Y"), ["A", "B", "C"])
+        self.assertEqual(str(sc_mr), "X _||_ Y | ['A', 'B', 'C']")
+
     def test_equivalent_metamorphic_relations(self):
-        dag = CausalDAG(self.dag_dot_path)
         sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"])
         sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"])
         self.assertEqual(sc_mr_a == sc_mr_b, True)
 
     def test_equivalent_metamorphic_relations_empty_adjustment_set(self):
-        dag = CausalDAG(self.dag_dot_path)
         sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [])
         sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [])
         self.assertEqual(sc_mr_a == sc_mr_b, True)
 
     def test_equivalent_metamorphic_relations_different_order_adjustment_set(self):
-        dag = CausalDAG(self.dag_dot_path)
         sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A", "B", "C"])
         sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), ["C", "A", "B"])
         self.assertEqual(sc_mr_a == sc_mr_b, True)
 
     def test_different_metamorphic_relations_empty_adjustment_set_different_outcome(self):
-        dag = CausalDAG(self.dag_dot_path)
         sc_mr_a = ShouldCause(BaseTestCase("X", "Z"), [])
         sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [])
         self.assertEqual(sc_mr_a == sc_mr_b, False)
 
     def test_different_metamorphic_relations_empty_adjustment_set_different_treatment(self):
-        dag = CausalDAG(self.dag_dot_path)
         sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [])
         sc_mr_b = ShouldCause(BaseTestCase("Z", "Y"), [])
         self.assertEqual(sc_mr_a == sc_mr_b, False)
 
     def test_different_metamorphic_relations_empty_adjustment_set_adjustment_set(self):
-        dag = CausalDAG(self.dag_dot_path)
         sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), ["A"])
         sc_mr_b = ShouldCause(BaseTestCase("X", "Y"), [])
         self.assertEqual(sc_mr_a == sc_mr_b, False)
 
     def test_different_metamorphic_relations_different_type(self):
-        dag = CausalDAG(self.dag_dot_path)
         sc_mr_a = ShouldCause(BaseTestCase("X", "Y"), [])
         sc_mr_b = ShouldNotCause(BaseTestCase("X", "Y"), [])
         self.assertEqual(sc_mr_a == sc_mr_b, False)

From 77ac1b8b885e0ba99c36b6f523824810abed53ef Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Tue, 18 Feb 2025 16:00:16 +0000
Subject: [PATCH 35/44] causal dag coverage

---
 tests/specification_tests/test_causal_dag.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/specification_tests/test_causal_dag.py b/tests/specification_tests/test_causal_dag.py
index 87ae3963..28551dc3 100644
--- a/tests/specification_tests/test_causal_dag.py
+++ b/tests/specification_tests/test_causal_dag.py
@@ -197,6 +197,12 @@ def test_proper_backdoor_graph(self):
         )
         self.assertTrue(set(proper_backdoor_graph.edges).issubset(edges))
 
+    def test_proper_backdoor_graph_invalid_tratment(self):
+        """Test whether converting a Causal DAG to a proper back-door graph works correctly."""
+        causal_dag = CausalDAG(self.dag_dot_path)
+        with self.assertRaises(IndexError):
+            causal_dag.get_proper_backdoor_graph(["INVALID"], ["Y"])
+
     def test_constructive_backdoor_criterion_should_hold(self):
         """Test whether the constructive criterion holds when it should."""
         causal_dag = CausalDAG(self.dag_dot_path)

From b86c5841c33b0b0a5657c1a7467dd8c91ef0ffa5 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Thu, 20 Feb 2025 11:52:28 +0000
Subject: [PATCH 36/44] All the tests pass and got rid of JSON front

---
 .../estimation/abstract_estimator.py          |   8 +-
 .../abstract_regression_estimator.py          |  17 +-
 .../estimation/cubic_spline_estimator.py      |  16 +-
 .../estimation/experimental_estimator.py      |  69 +++-
 .../instrumental_variable_estimator.py        |  11 +-
 causal_testing/estimation/ipcw_estimator.py   |  15 +-
 .../estimation/linear_regression_estimator.py |  41 +-
 .../logistic_regression_estimator.py          |   7 +-
 causal_testing/json_front/__init__.py         |   0
 causal_testing/json_front/json_class.py       | 376 ------------------
 .../surrogate/causal_surrogate_assisted.py    |  13 +-
 .../surrogate/surrogate_search_algorithms.py  |  30 +-
 causal_testing/testing/causal_test_result.py  |  20 +-
 .../covasim_/doubling_beta/example_beta.py    |  41 +-
 examples/lr91/example_max_conductances.py     |   4 +-
 .../example_pure_python.py                    |  25 +-
 .../test_cubic_spline_estimator.py            |   6 +-
 .../test_experimental_estimator.py            |   8 +-
 .../test_instrumental_variable_estimator.py   |  25 +-
 .../test_linear_regression_estimator.py       |  42 +-
 .../test_logistic_regression_estimator.py     |  10 +-
 tests/json_front_tests/test_json_class.py     | 277 -------------
 .../test_causal_surrogate_assisted.py         |   8 +-
 .../test_causal_test_adequacy.py              | 118 +++---
 tests/testing_tests/test_causal_test_case.py  |  40 +-
 .../testing_tests/test_causal_test_outcome.py |   6 +-
 26 files changed, 289 insertions(+), 944 deletions(-)
 delete mode 100644 causal_testing/json_front/__init__.py
 delete mode 100644 causal_testing/json_front/json_class.py
 delete mode 100644 tests/json_front_tests/test_json_class.py

diff --git a/causal_testing/estimation/abstract_estimator.py b/causal_testing/estimation/abstract_estimator.py
index 47ab1efe..21c330c8 100644
--- a/causal_testing/estimation/abstract_estimator.py
+++ b/causal_testing/estimation/abstract_estimator.py
@@ -6,6 +6,8 @@
 
 import pandas as pd
 
+from causal_testing.testing.base_test_case import BaseTestCase
+
 logger = logging.getLogger(__name__)
 
 
@@ -30,21 +32,19 @@ class Estimator(ABC):
     def __init__(
         # pylint: disable=too-many-arguments
         self,
-        treatment: str,
+        base_test_case: BaseTestCase,
         treatment_value: float,
         control_value: float,
         adjustment_set: set,
-        outcome: str,
         df: pd.DataFrame = None,
         effect_modifiers: dict[str:Any] = None,
         alpha: float = 0.05,
         query: str = "",
     ):
-        self.treatment = treatment
+        self.base_test_case = base_test_case
         self.treatment_value = treatment_value
         self.control_value = control_value
         self.adjustment_set = adjustment_set
-        self.outcome = outcome
         self.alpha = alpha
         self.df = df.query(query) if query else df
 
diff --git a/causal_testing/estimation/abstract_regression_estimator.py b/causal_testing/estimation/abstract_regression_estimator.py
index c048922b..4f9a1fe4 100644
--- a/causal_testing/estimation/abstract_regression_estimator.py
+++ b/causal_testing/estimation/abstract_regression_estimator.py
@@ -10,6 +10,7 @@
 
 from causal_testing.specification.variable import Variable
 from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
 
@@ -22,11 +23,10 @@ class RegressionEstimator(Estimator):
     def __init__(
         # pylint: disable=too-many-arguments
         self,
-        treatment: str,
+        base_test_case: BaseTestCase,
         treatment_value: float,
         control_value: float,
         adjustment_set: set,
-        outcome: str,
         df: pd.DataFrame = None,
         effect_modifiers: dict[Variable:Any] = None,
         formula: str = None,
@@ -34,11 +34,10 @@ def __init__(
         query: str = "",
     ):
         super().__init__(
-            treatment=treatment,
+            base_test_case=base_test_case,
             treatment_value=treatment_value,
             control_value=control_value,
             adjustment_set=adjustment_set,
-            outcome=outcome,
             df=df,
             effect_modifiers=effect_modifiers,
             alpha=alpha,
@@ -53,8 +52,10 @@ def __init__(
         if formula is not None:
             self.formula = formula
         else:
-            terms = [treatment] + sorted(list(adjustment_set)) + sorted(list(effect_modifiers))
-            self.formula = f"{outcome} ~ {'+'.join(terms)}"
+            terms = (
+                [base_test_case.treatment_variable.name] + sorted(list(adjustment_set)) + sorted(list(effect_modifiers))
+            )
+            self.formula = f"{base_test_case.outcome_variable.name} ~ {'+'.join(terms)}"
 
     @property
     @abstractmethod
@@ -104,7 +105,7 @@ def _predict(self, data=None, adjustment_config: dict = None) -> pd.DataFrame:
 
         x = pd.DataFrame(columns=self.df.columns)
         x["Intercept"] = 1  # self.intercept
-        x[self.treatment] = [self.treatment_value, self.control_value]
+        x[self.base_test_case.treatment_variable.name] = [self.treatment_value, self.control_value]
 
         for k, v in adjustment_config.items():
             x[k] = v
@@ -116,5 +117,5 @@ def _predict(self, data=None, adjustment_config: dict = None) -> pd.DataFrame:
                 x = pd.get_dummies(x, columns=[col], drop_first=True)
 
         # This has to be here in case the treatment variable is in an I(...) block in the self.formula
-        x[self.treatment] = [self.treatment_value, self.control_value]
+        x[self.base_test_case.treatment_variable.name] = [self.treatment_value, self.control_value]
         return model.get_prediction(x).summary_frame()
diff --git a/causal_testing/estimation/cubic_spline_estimator.py b/causal_testing/estimation/cubic_spline_estimator.py
index b8ceb2fd..c32fecca 100644
--- a/causal_testing/estimation/cubic_spline_estimator.py
+++ b/causal_testing/estimation/cubic_spline_estimator.py
@@ -8,6 +8,7 @@
 
 from causal_testing.specification.variable import Variable
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
+from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
 
@@ -20,11 +21,10 @@ class CubicSplineRegressionEstimator(LinearRegressionEstimator):
     def __init__(
         # pylint: disable=too-many-arguments
         self,
-        treatment: str,
+        base_test_case: BaseTestCase,
         treatment_value: float,
         control_value: float,
         adjustment_set: set,
-        outcome: str,
         basis: int,
         df: pd.DataFrame = None,
         effect_modifiers: dict[Variable:Any] = None,
@@ -33,7 +33,7 @@ def __init__(
         expected_relationship=None,
     ):
         super().__init__(
-            treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers, formula, alpha
+            base_test_case, treatment_value, control_value, adjustment_set, df, effect_modifiers, formula, alpha
         )
 
         self.expected_relationship = expected_relationship
@@ -42,8 +42,10 @@ def __init__(
             effect_modifiers = []
 
         if formula is None:
-            terms = [treatment] + sorted(list(adjustment_set)) + sorted(list(effect_modifiers))
-            self.formula = f"{outcome} ~ cr({'+'.join(terms)}, df={basis})"
+            terms = (
+                [base_test_case.treatment_variable.name] + sorted(list(adjustment_set)) + sorted(list(effect_modifiers))
+            )
+            self.formula = f"{base_test_case.outcome_variable.name} ~ cr({'+'.join(terms)}, df={basis})"
 
     def estimate_ate_calculated(self, adjustment_config: dict = None) -> pd.Series:
         """Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
@@ -59,7 +61,7 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> pd.Series:
         """
         model = self._run_regression()
 
-        x = {"Intercept": 1, self.treatment: self.treatment_value}
+        x = {"Intercept": 1, self.base_test_case.treatment_variable.name: self.treatment_value}
         if adjustment_config is not None:
             for k, v in adjustment_config.items():
                 x[k] = v
@@ -69,7 +71,7 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> pd.Series:
 
         treatment = model.predict(x).iloc[0]
 
-        x[self.treatment] = self.control_value
+        x[self.base_test_case.treatment_variable.name] = self.control_value
         control = model.predict(x).iloc[0]
 
         return pd.Series(treatment - control)
diff --git a/causal_testing/estimation/experimental_estimator.py b/causal_testing/estimation/experimental_estimator.py
index 3d2c6ad4..cc91d853 100644
--- a/causal_testing/estimation/experimental_estimator.py
+++ b/causal_testing/estimation/experimental_estimator.py
@@ -5,6 +5,7 @@
 import pandas as pd
 
 from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.testing.base_test_case import BaseTestCase
 
 
 class ExperimentalEstimator(Estimator):
@@ -16,22 +17,20 @@ class ExperimentalEstimator(Estimator):
     def __init__(
         # pylint: disable=too-many-arguments
         self,
-        treatment: str,
+        base_test_case: BaseTestCase,
         treatment_value: float,
         control_value: float,
         adjustment_set: dict[str:Any],
-        outcome: str,
         effect_modifiers: dict[str:Any] = None,
         alpha: float = 0.05,
         repeats: int = 200,
     ):
         # pylint: disable=R0801
         super().__init__(
-            treatment=treatment,
+            base_test_case=base_test_case,
             treatment_value=treatment_value,
             control_value=control_value,
             adjustment_set=adjustment_set,
-            outcome=outcome,
             effect_modifiers=effect_modifiers,
             alpha=alpha,
         )
@@ -62,21 +61,40 @@ def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
 
         :return: The average treatment effect and the bootstrapped confidence intervals.
         """
-        control_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.control_value}
-        treatment_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.treatment_value}
+        control_configuration = (
+            self.adjustment_set
+            | self.effect_modifiers
+            | {self.base_test_case.treatment_variable.name: self.control_value}
+        )
+        treatment_configuration = (
+            self.adjustment_set
+            | self.effect_modifiers
+            | {self.base_test_case.treatment_variable.name: self.treatment_value}
+        )
 
         control_outcomes = pd.DataFrame([self.run_system(control_configuration) for _ in range(self.repeats)])
         treatment_outcomes = pd.DataFrame([self.run_system(treatment_configuration) for _ in range(self.repeats)])
 
-        difference = (treatment_outcomes[self.outcome] - control_outcomes[self.outcome]).sort_values().reset_index()
+        difference = (
+            (
+                treatment_outcomes[self.base_test_case.outcome_variable.name]
+                - control_outcomes[self.base_test_case.outcome_variable.name]
+            )
+            .sort_values()
+            .reset_index()
+        )
 
         ci_low_index = round(self.repeats * (self.alpha / 2))
         ci_low = difference.iloc[ci_low_index]
         ci_high = difference.iloc[self.repeats - ci_low_index]
 
-        return pd.Series({self.treatment: difference.mean()[self.outcome]}), [
-            pd.Series({self.treatment: ci_low[self.outcome]}),
-            pd.Series({self.treatment: ci_high[self.outcome]}),
+        return pd.Series(
+            {self.base_test_case.treatment_variable.name: difference.mean()[self.base_test_case.outcome_variable.name]}
+        ), [
+            pd.Series({self.base_test_case.treatment_variable.name: ci_low[self.base_test_case.outcome_variable.name]}),
+            pd.Series(
+                {self.base_test_case.treatment_variable.name: ci_high[self.base_test_case.outcome_variable.name]}
+            ),
         ]
 
     def estimate_risk_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
@@ -85,19 +103,38 @@ def estimate_risk_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
 
         :return: The average treatment effect and the bootstrapped confidence intervals.
         """
-        control_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.control_value}
-        treatment_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.treatment_value}
+        control_configuration = (
+            self.adjustment_set
+            | self.effect_modifiers
+            | {self.base_test_case.treatment_variable.name: self.control_value}
+        )
+        treatment_configuration = (
+            self.adjustment_set
+            | self.effect_modifiers
+            | {self.base_test_case.treatment_variable.name: self.treatment_value}
+        )
 
         control_outcomes = pd.DataFrame([self.run_system(control_configuration) for _ in range(self.repeats)])
         treatment_outcomes = pd.DataFrame([self.run_system(treatment_configuration) for _ in range(self.repeats)])
 
-        difference = (treatment_outcomes[self.outcome] / control_outcomes[self.outcome]).sort_values().reset_index()
+        difference = (
+            (
+                treatment_outcomes[self.base_test_case.outcome_variable.name]
+                / control_outcomes[self.base_test_case.outcome_variable.name]
+            )
+            .sort_values()
+            .reset_index()
+        )
 
         ci_low_index = round(self.repeats * (self.alpha / 2))
         ci_low = difference.iloc[ci_low_index]
         ci_high = difference.iloc[self.repeats - ci_low_index]
 
-        return pd.Series({self.treatment: difference.mean()[self.outcome]}), [
-            pd.Series({self.treatment: ci_low[self.outcome]}),
-            pd.Series({self.treatment: ci_high[self.outcome]}),
+        return pd.Series(
+            {self.base_test_case.treatment_variable.name: difference.mean()[self.base_test_case.outcome_variable.name]}
+        ), [
+            pd.Series({self.base_test_case.treatment_variable.name: ci_low[self.base_test_case.outcome_variable.name]}),
+            pd.Series(
+                {self.base_test_case.treatment_variable.name: ci_high[self.base_test_case.outcome_variable.name]}
+            ),
         ]
diff --git a/causal_testing/estimation/instrumental_variable_estimator.py b/causal_testing/estimation/instrumental_variable_estimator.py
index 38d0fc1b..e322f9a7 100644
--- a/causal_testing/estimation/instrumental_variable_estimator.py
+++ b/causal_testing/estimation/instrumental_variable_estimator.py
@@ -7,6 +7,7 @@
 import statsmodels.api as sm
 
 from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
 
@@ -21,22 +22,20 @@ def __init__(
         # pylint: disable=too-many-arguments
         # pylint: disable=duplicate-code
         self,
-        treatment: str,
+        base_test_case: BaseTestCase,
         treatment_value: float,
         control_value: float,
         adjustment_set: set,
-        outcome: str,
         instrument: str,
         df: pd.DataFrame = None,
         alpha: float = 0.05,
         query: str = "",
     ):
         super().__init__(
-            treatment=treatment,
+            base_test_case=base_test_case,
             treatment_value=treatment_value,
             control_value=control_value,
             adjustment_set=adjustment_set,
-            outcome=outcome,
             df=df,
             effect_modifiers=None,
             alpha=alpha,
@@ -68,10 +67,10 @@ def estimate_iv_coefficient(self, df) -> float:
         outcome.
         """
         # Estimate the total effect of instrument I on outcome Y = abI + c1
-        ab = sm.OLS(df[self.outcome], df[[self.instrument]]).fit().params[self.instrument]
+        ab = sm.OLS(df[self.base_test_case.outcome_variable.name], df[[self.instrument]]).fit().params[self.instrument]
 
         # Estimate the direct effect of instrument I on treatment X = aI + c1
-        a = sm.OLS(df[self.treatment], df[[self.instrument]]).fit().params[self.instrument]
+        a = sm.OLS(df[self.base_test_case.treatment_variable.name], df[[self.instrument]]).fit().params[self.instrument]
 
         # Estimate the coefficient of I on X by cancelling
         return ab / a
diff --git a/causal_testing/estimation/ipcw_estimator.py b/causal_testing/estimation/ipcw_estimator.py
index a7ff15cc..49d48196 100644
--- a/causal_testing/estimation/ipcw_estimator.py
+++ b/causal_testing/estimation/ipcw_estimator.py
@@ -11,6 +11,8 @@
 from lifelines import CoxPHFitter
 
 from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.testing.base_test_case import BaseTestCase
+from causal_testing.specification.variable import Input, Output
 
 logger = logging.getLogger(__name__)
 
@@ -56,13 +58,12 @@ def __init__(
                             treatment) with the most elements multiplied by `timesteps_per_observation`.
         """
         super().__init__(
-            [var for _, var, _ in treatment_strategy],
-            [val for _, _, val in treatment_strategy],
-            [val for _, _, val in control_strategy],
-            None,
-            outcome,
-            df,
-            None,
+            base_test_case=BaseTestCase(Input("_", float), Output(outcome, float)),
+            treatment_value=[val for _, _, val in treatment_strategy],
+            control_value=[val for _, _, val in control_strategy],
+            adjustment_set=None,
+            df=df,
+            effect_modifiers=None,
             alpha=alpha,
             query="",
         )
diff --git a/causal_testing/estimation/linear_regression_estimator.py b/causal_testing/estimation/linear_regression_estimator.py
index 85a4b178..41c8619d 100644
--- a/causal_testing/estimation/linear_regression_estimator.py
+++ b/causal_testing/estimation/linear_regression_estimator.py
@@ -10,6 +10,7 @@
 from causal_testing.specification.variable import Variable
 from causal_testing.estimation.genetic_programming_regression_fitter import GP
 from causal_testing.estimation.abstract_regression_estimator import RegressionEstimator
+from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
 
@@ -24,11 +25,10 @@ class LinearRegressionEstimator(RegressionEstimator):
     def __init__(
         # pylint: disable=too-many-arguments
         self,
-        treatment: str,
+        base_test_case: BaseTestCase,
         treatment_value: float,
         control_value: float,
         adjustment_set: set,
-        outcome: str,
         df: pd.DataFrame = None,
         effect_modifiers: dict[Variable:Any] = None,
         formula: str = None,
@@ -37,16 +37,15 @@ def __init__(
     ):
         # pylint: disable=too-many-arguments
         super().__init__(
-            treatment,
-            treatment_value,
-            control_value,
-            adjustment_set,
-            outcome,
-            df,
-            effect_modifiers,
-            formula,
-            alpha,
-            query,
+            base_test_case=base_test_case,
+            treatment_value=treatment_value,
+            control_value=control_value,
+            adjustment_set=adjustment_set,
+            df=df,
+            effect_modifiers=effect_modifiers,
+            alpha=alpha,
+            query=query,
+            formula=formula,
         )
         for term in self.effect_modifiers:
             self.adjustment_set.add(term)
@@ -81,8 +80,8 @@ def gp_formula(
         """
         gp = GP(
             df=self.df,
-            features=sorted(list(self.adjustment_set.union([self.treatment]))),
-            outcome=self.outcome,
+            features=sorted(list(self.adjustment_set.union([self.base_test_case.treatment_variable.name]))),
+            outcome=self.base_test_case.outcome_variable.name,
             extra_operators=extra_operators,
             sympy_conversions=sympy_conversions,
             seed=seed,
@@ -90,7 +89,7 @@ def gp_formula(
         )
         formula = gp.run_gp(ngen=ngen, pop_size=pop_size, num_offspring=num_offspring, seeds=seeds)
         formula = gp.simplify(formula)
-        self.formula = f"{self.outcome} ~ I({formula}) - 1"
+        self.formula = f"{self.base_test_case.outcome_variable.name} ~ I({formula}) - 1"
 
     def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the unit average treatment effect of the treatment on the outcome. That is, the change in outcome
@@ -100,7 +99,7 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """
         model = self._run_regression()
         newline = "\n"
-        patsy_md = ModelDesc.from_formula(self.treatment)
+        patsy_md = ModelDesc.from_formula(self.base_test_case.treatment_variable.name)
 
         if any(
             (
@@ -111,9 +110,11 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
             )
         ):
             design_info = dmatrix(self.formula.split("~")[1], self.df).design_info
-            treatment = design_info.column_names[design_info.term_name_slices[self.treatment]]
+            treatment = design_info.column_names[
+                design_info.term_name_slices[self.base_test_case.treatment_variable.name]
+            ]
         else:
-            treatment = [self.treatment]
+            treatment = [self.base_test_case.treatment_variable.name]
         assert set(treatment).issubset(
             model.params.index.tolist()
         ), f"{treatment} not in\n{'  ' + str(model.params.index).replace(newline, newline + '  ')}"
@@ -137,8 +138,8 @@ def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
 
         # It is ABSOLUTELY CRITICAL that these go last, otherwise we can't index
         # the effect with "ate = t_test_results.effect[0]"
-        individuals.loc["control", [self.treatment]] = self.control_value
-        individuals.loc["treated", [self.treatment]] = self.treatment_value
+        individuals.loc["control", [self.base_test_case.treatment_variable.name]] = self.control_value
+        individuals.loc["treated", [self.base_test_case.treatment_variable.name]] = self.treatment_value
 
         # Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
         t_test_results = model.t_test(individuals.loc["treated"] - individuals.loc["control"])
diff --git a/causal_testing/estimation/logistic_regression_estimator.py b/causal_testing/estimation/logistic_regression_estimator.py
index 4fb828ba..55f79f25 100644
--- a/causal_testing/estimation/logistic_regression_estimator.py
+++ b/causal_testing/estimation/logistic_regression_estimator.py
@@ -39,5 +39,8 @@ def estimate_unit_odds_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series
         :return: The odds ratio. Confidence intervals are not yet supported.
         """
         model = self._run_regression(self.df)
-        ci_low, ci_high = np.exp(model.conf_int(self.alpha).loc[self.treatment])
-        return pd.Series(np.exp(model.params[self.treatment])), [pd.Series(ci_low), pd.Series(ci_high)]
+        ci_low, ci_high = np.exp(model.conf_int(self.alpha).loc[self.base_test_case.treatment_variable.name])
+        return pd.Series(np.exp(model.params[self.base_test_case.treatment_variable.name])), [
+            pd.Series(ci_low),
+            pd.Series(ci_high),
+        ]
diff --git a/causal_testing/json_front/__init__.py b/causal_testing/json_front/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
deleted file mode 100644
index d0541c38..00000000
--- a/causal_testing/json_front/json_class.py
+++ /dev/null
@@ -1,376 +0,0 @@
-"""This module contains the JsonUtility class, details of using this class can be found here:
-https://causal-testing-framework.readthedocs.io/en/latest/json_front_end.html"""
-
-import argparse
-import json
-import logging
-
-from collections.abc import Mapping
-from dataclasses import dataclass
-from pathlib import Path
-from statistics import StatisticsError
-
-import pandas as pd
-
-from causal_testing.specification.causal_dag import CausalDAG
-from causal_testing.specification.causal_specification import CausalSpecification
-from causal_testing.specification.scenario import Scenario
-from causal_testing.specification.variable import Input, Meta, Output
-from causal_testing.testing.causal_test_case import CausalTestCase
-from causal_testing.testing.causal_test_result import CausalTestResult
-from causal_testing.testing.base_test_case import BaseTestCase
-from causal_testing.testing.causal_test_adequacy import DataAdequacy
-
-from causal_testing.estimation.abstract_estimator import Estimator
-from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
-from causal_testing.estimation.logistic_regression_estimator import LogisticRegressionEstimator
-
-logger = logging.getLogger(__name__)
-
-
-class JsonUtility:
-    """
-    The JsonUtility Class provides the functionality to use structured JSON to setup and run causal tests on the
-    CausalTestingFramework.
-
-    :attr {Path} json_path: Path to the JSON input file.
-    :attr {Path} dag_path: Path to the dag.dot file containing the Causal DAG.
-    :attr {Path} data_path: Path to the csv data file.
-    :attr {Input} inputs: Causal variables representing inputs.
-    :attr {Output} outputs: Causal variables representing outputs.
-    :attr {Meta} metas: Causal variables representing metavariables.
-    :attr {pd.DataFrame}: Pandas DataFrame containing runtime data.
-    :attr {dict} test_plan: Dictionary containing the key value pairs from the loaded json test plan.
-    :attr {Scenario} scenario:
-    :attr {CausalSpecification} causal_specification:
-    """
-
-    def __init__(self, output_path: str, output_overwrite: bool = False):
-        self.input_paths = None
-        self.variables = {"inputs": {}, "outputs": {}, "metas": {}}
-        self.test_plan = None
-        self.scenario = None
-        self.causal_specification = None
-        self.output_path = Path(output_path)
-        self.df = None
-        self.check_file_exists(self.output_path, output_overwrite)
-
-    def set_paths(self, json_path: str, dag_path: str, data_paths: list[str] = None):
-        """
-        Takes a path of the directory containing all scenario specific files and creates individual paths for each file
-        :param json_path: string path representation to .json file containing test specifications
-        :param dag_path: string path representation to the .dot file containing the Causal DAG
-        :param data_paths: string path representation to the data files
-        """
-        if data_paths is None:
-            data_paths = []
-        self.input_paths = JsonClassPaths(json_path=json_path, dag_path=dag_path, data_paths=data_paths)
-
-    def setup(self, scenario: Scenario, ignore_cycles=False):
-        """Function to populate all the necessary parts of the json_class needed to execute tests"""
-        self.scenario = scenario
-        self._get_scenario_variables()
-        self.scenario.setup_treatment_variables()
-        self.causal_specification = CausalSpecification(
-            scenario=self.scenario, causal_dag=CausalDAG(self.input_paths.dag_path, ignore_cycles=ignore_cycles)
-        )
-        # Parse the JSON test plan
-        with open(self.input_paths.json_path, encoding="utf-8") as f:
-            self.test_plan = json.load(f)
-        # Populate the data
-        if self.input_paths.data_paths:
-            self.df = pd.concat([pd.read_csv(data_file, header=0) for data_file in self.input_paths.data_paths])
-        if self.df is None or len(self.df) == 0:
-            raise ValueError(
-                "No data found. Please either provide a path to a file containing data or manually populate the .data "
-                "attribute with a dataframe before calling .setup()"
-            )
-        self._populate_metas()
-
-    def run_json_tests(self, effects: dict, estimators: dict, f_flag: bool = False):
-        """Runs and evaluates each test case specified in the JSON input
-
-        :param effects: Dictionary mapping effect class instances to string representations.
-        :param estimators: Dictionary mapping estimator classes to string representations.
-        :param f_flag: Failure flag that if True the script will stop executing when a test fails.
-        """
-        for test in self.test_plan["tests"]:
-            if "skip" in test and test["skip"]:
-                continue
-            test["estimator"] = estimators[test["estimator"]]
-            # If we have specified concrete control and treatment value
-            if "mutations" not in test:
-                failed, msg = self._run_concrete_metamorphic_test(test, f_flag, effects)
-            # If we have a variable to mutate
-            else:
-                if test["estimate_type"] in ["coefficient", "unit_odds_ratio"]:
-                    failed, msg = self._run_coefficient_test(
-                        test=test, f_flag=f_flag, effects=effects, estimate_type=test["estimate_type"]
-                    )
-                else:
-                    raise NotImplementedError("Tried to call deprecated method _run_metamorphic_tests")
-            test["failed"] = failed
-            test["result"] = msg
-        return self.test_plan["tests"]
-
-    def _run_coefficient_test(self, test: dict, f_flag: bool, effects: dict, estimate_type: str = "coefficient"):
-        """Builds structures and runs test case for tests with an estimate_type of 'coefficient'.
-
-        :param test: Single JSON test definition stored in a mapping (dict)
-        :param f_flag: Failure flag that if True the script will stop executing when a test fails.
-        :param effects: Dictionary mapping effect class instances to string representations.
-        :return: String containing the message to be outputted
-        """
-        base_test_case = BaseTestCase(
-            treatment_variable=next(self.scenario.variables[v] for v in test["mutations"]),
-            outcome_variable=next(self.scenario.variables[v] for v in test["expected_effect"]),
-            effect=test.get("effect", "direct"),
-        )
-        assert len(test["expected_effect"]) == 1, "Can only have one expected effect."
-        causal_test_case = CausalTestCase(
-            base_test_case=base_test_case,
-            expected_causal_effect=next(effects[effect] for variable, effect in test["expected_effect"].items()),
-            estimate_type=estimate_type,
-            effect_modifier_configuration={self.scenario.variables[v] for v in test.get("effect_modifiers", [])},
-        )
-        failed, result = self._execute_test_case(causal_test_case=causal_test_case, test=test, f_flag=f_flag)
-
-        msg = (
-            f"Executing test: {test['name']} \n"
-            + f"  {causal_test_case} \n"
-            + "  "
-            + ("\n  ").join(str(result).split("\n"))
-            + "==============\n"
-            + f"  Result: {'FAILED' if failed else 'Passed'}"
-        )
-        self._append_to_file(msg, logging.INFO)
-        return failed, result
-
-    def _run_concrete_metamorphic_test(self, test: dict, f_flag: bool, effects: dict):
-        outcome_variable = next(iter(test["expected_effect"]))  # Take first key from dictionary of expected effect
-        base_test_case = BaseTestCase(
-            treatment_variable=self.variables["inputs"][test["treatment_variable"]],
-            outcome_variable=self.variables["outputs"][outcome_variable],
-        )
-
-        causal_test_case = CausalTestCase(
-            base_test_case=base_test_case,
-            expected_causal_effect=effects[test["expected_effect"][outcome_variable]],
-            estimate_type=test["estimate_type"],
-        )
-        failed, msg = self._execute_test_case(causal_test_case=causal_test_case, test=test, f_flag=f_flag)
-
-        msg = (
-            f"Executing concrete test: {test['name']} \n"
-            + f"treatment variable: {test['treatment_variable']} \n"
-            + f"outcome_variable = {outcome_variable} \n"
-            + f"control value = {test['control_value']}, treatment value = {test['treatment_value']} \n"
-            + f"Result: {'FAILED' if failed else 'Passed'}"
-        )
-        self._append_to_file(msg, logging.INFO)
-        return failed, msg
-
-    def _execute_tests(self, concrete_tests, test, f_flag):
-        failures = 0
-        details = []
-        if "formula" in test:
-            self._append_to_file(f"Estimator formula used for test: {test['formula']}")
-
-        for concrete_test in concrete_tests:
-            failed, result = self._execute_test_case(concrete_test, test, f_flag)
-            details.append(result)
-            if failed:
-                failures += 1
-        return failures, details
-
-    def _populate_metas(self):
-        """
-        Populate data with meta-variable values and add distributions to Causal Testing Framework Variables
-        """
-        for meta in self.scenario.variables_of_type(Meta):
-            meta.populate(self.df)
-
-    def _execute_test_case(
-        self, causal_test_case: CausalTestCase, test: Mapping, f_flag: bool
-    ) -> (bool, CausalTestResult):
-        """Executes a singular test case, prints the results and returns the test case result
-        :param causal_test_case: The concrete test case to be executed
-        :param test: Single JSON test definition stored in a mapping (dict)
-        :param f_flag: Failure flag that if True the script will stop executing when a test fails.
-        :return: A boolean that if True indicates the causal test case passed and if false indicates the test case
-         failed.
-        :rtype: bool
-        """
-        failed = False
-
-        estimation_model = self._setup_test(causal_test_case=causal_test_case, test=test)
-        causal_test_case.estimator = estimation_model
-        causal_test_result = causal_test_case.execute_test()
-        test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result)
-
-        if "coverage" in test and test["coverage"]:
-            adequacy_metric = DataAdequacy(causal_test_case, estimation_model)
-            adequacy_metric.measure_adequacy()
-            causal_test_result.adequacy = adequacy_metric
-
-        if causal_test_result.ci_low() is not None and causal_test_result.ci_high() is not None:
-            result_string = (
-                f"{causal_test_result.ci_low()} < {causal_test_result.test_value.value} <  "
-                f"{causal_test_result.ci_high()}"
-            )
-        else:
-            result_string = f"{causal_test_result.test_value.value} no confidence intervals"
-
-        if not test_passes:
-            if f_flag:
-                raise StatisticsError(
-                    f"{causal_test_case}\n    FAILED - expected {causal_test_case.expected_causal_effect}, "
-                    f"got {result_string}"
-                )
-            failed = True
-        return failed, causal_test_result
-
-    def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping) -> Estimator:
-        """Create the necessary inputs for a single test case
-        :param causal_test_case: The concrete test case to be executed
-        :param test: Single JSON test definition stored in a mapping (dict)
-        :returns:
-                - estimation_model - Estimator instance for the test being run
-        """
-        estimator_kwargs = {}
-        treatment_variable = next(self.scenario.variables[v] for v in test["mutations"])
-        if "formula" in test:
-            if test["estimator"] != (LinearRegressionEstimator or LogisticRegressionEstimator):
-                raise TypeError(
-                    "Currently only LinearRegressionEstimator and LogisticRegressionEstimator supports the use of "
-                    "formulas"
-                )
-            estimator_kwargs["formula"] = test["formula"]
-            estimator_kwargs["adjustment_set"] = None
-        else:
-            minimal_adjustment_set = self.causal_specification.causal_dag.identification(
-                causal_test_case.base_test_case
-            )
-            minimal_adjustment_set = minimal_adjustment_set - {treatment_variable}
-            estimator_kwargs["adjustment_set"] = minimal_adjustment_set
-
-        estimator_kwargs["query"] = test["query"] if "query" in test else ""
-        estimator_kwargs["treatment"] = treatment_variable.name
-        estimator_kwargs["treatment_value"] = test.get("treatment_value")
-        estimator_kwargs["control_value"] = test.get("control_value")
-        estimator_kwargs["outcome"] = next(v for v in test["expected_effect"])
-        estimator_kwargs["effect_modifiers"] = causal_test_case.effect_modifier_configuration
-        estimator_kwargs["df"] = self.df
-        estimator_kwargs["alpha"] = test["alpha"] if "alpha" in test else 0.05
-
-        estimation_model = test["estimator"](**estimator_kwargs)
-        return estimation_model
-
-    def _append_to_file(self, line: str, log_level: int = None):
-        """Appends given line(s) to the current output file. If log_level is specified it also logs that message to the
-        logging level.
-        :param line: The line or lines of text to be appended to the file
-        :param log_level: An integer representing the logging level as specified by pythons inbuilt logging module. It
-        is possible to use the inbuilt logging level variables such as logging.INFO and logging.WARNING
-        """
-        with open(self.output_path, "a", encoding="utf-8") as f:
-            f.write(line + "\n")
-        if log_level:
-            logger.log(level=log_level, msg=line)
-
-    def _get_scenario_variables(self):
-        for input_var in self.scenario.inputs():
-            self.variables["inputs"][input_var.name] = input_var
-        for output_var in self.scenario.outputs():
-            self.variables["outputs"][output_var.name] = output_var
-        for meta_var in self.scenario.metas():
-            self.variables["metas"][meta_var.name] = meta_var
-
-    @staticmethod
-    def check_file_exists(output_path: Path, overwrite: bool):
-        """Method that checks if the given path to an output file already exists. If overwrite is true the check is
-        passed.
-        :param output_path: File path for the output file of the JSON Frontend
-        :param overwrite: bool that if true, the current file can be overwritten
-        """
-        if output_path.is_file():
-            if overwrite:
-                output_path.unlink()
-            else:
-                raise FileExistsError(f"Chosen file output ({output_path}) already exists")
-
-    @staticmethod
-    def get_args(test_args=None) -> argparse.Namespace:
-        """Command-line arguments
-
-        :return: parsed command line arguments
-        """
-        parser = argparse.ArgumentParser(
-            description="A script for parsing json config files for the Causal Testing Framework"
-        )
-        parser.add_argument(
-            "-f",
-            help="if included, the script will stop if a test fails",
-            action="store_true",
-        )
-        parser.add_argument(
-            "-w",
-            help="Specify to overwrite any existing output files. This can lead to the loss of existing outputs if not "
-            "careful",
-            action="store_true",
-        )
-        parser.add_argument(
-            "--log_path",
-            help="Specify a directory to change the location of the log file",
-        )
-        parser.add_argument(
-            "--data_path",
-            help="Specify path to file containing runtime data",
-            nargs="+",
-        )
-        parser.add_argument(
-            "--dag_path",
-            help="Specify path to file containing the DAG, normally a .dot file",
-            required=True,
-        )
-        parser.add_argument(
-            "--json_path",
-            help="Specify path to file containing JSON tests, normally a .json file",
-            required=True,
-        )
-        return parser.parse_args(test_args)
-
-
-@dataclass
-class JsonClassPaths:
-    """
-    A dataclass that converts strings of paths to Path objects for use in the JsonUtility class
-    :param json_path: string path representation to .json file containing test specifications
-    :param dag_path: string path representation to the .dot file containing the Causal DAG
-    :param data_path: string path representation to the data file
-    """
-
-    json_path: Path
-    dag_path: Path
-    data_paths: list[Path]
-
-    def __init__(self, json_path: str, dag_path: str, data_paths: str):
-        self.json_path = Path(json_path)
-        self.dag_path = Path(dag_path)
-        self.data_paths = [Path(path) for path in data_paths]
-
-
-@dataclass
-class CausalVariables:
-    """
-    A dataclass that converts lists of dictionaries into lists of Causal Variables
-    """
-
-    def __init__(self, inputs: list[dict], outputs: list[dict], metas: list[dict]):
-        self.inputs = [Input(**i) for i in inputs]
-        self.outputs = [Output(**o) for o in outputs]
-        self.metas = [Meta(**m) for m in metas] if metas else []
-
-    def __iter__(self):
-        for var in self.inputs + self.outputs + self.metas:
-            yield var
diff --git a/causal_testing/surrogate/causal_surrogate_assisted.py b/causal_testing/surrogate/causal_surrogate_assisted.py
index ffeadbd2..ba58d1bb 100644
--- a/causal_testing/surrogate/causal_surrogate_assisted.py
+++ b/causal_testing/surrogate/causal_surrogate_assisted.py
@@ -86,7 +86,7 @@ def execute(
 
         for i in range(max_executions):
             surrogate_models = self.generate_surrogates(self.specification, df)
-            candidate_test_case, _, surrogate = self.search_algorithm.search(surrogate_models, self.specification)
+            candidate_test_case, _, surrogate_model = self.search_algorithm.search(surrogate_models, self.specification)
 
             self.simulator.startup()
             test_result = self.simulator.run_with_config(candidate_test_case)
@@ -100,11 +100,13 @@ def execute(
                 df = pd.concat([df, test_result_df], ignore_index=True)
             if test_result.fault:
                 print(
-                    f"Fault found between {surrogate.treatment} causing {surrogate.outcome}. Contradiction with "
-                    f"expected {surrogate.expected_relationship}."
+                    f"Fault found between {surrogate_model.base_test_case.treatment_variable.name} causing "
+                    f"{surrogate_model.base_test_case.outcome_variable.name}. Contradiction with "
+                    f"expected {surrogate_model.expected_relationship}."
                 )
                 test_result.relationship = (
-                    f"{surrogate.treatment} -> {surrogate.outcome} expected {surrogate.expected_relationship}"
+                    f"{surrogate_model.base_test_case.treatment_variable.name} -> "
+                    f"{surrogate_model.base_test_case.outcome_variable.name} expected {surrogate_model.expected_relationship}"
                 )
                 return test_result, i + 1, df
 
@@ -131,11 +133,10 @@ def generate_surrogates(
                 minimal_adjustment_set = specification.causal_dag.identification(base_test_case, specification.scenario)
 
                 surrogate = CubicSplineRegressionEstimator(
-                    u,
+                    base_test_case,
                     0,
                     0,
                     minimal_adjustment_set,
-                    v,
                     4,
                     df=df,
                     expected_relationship=edge_metadata["expected"],
diff --git a/causal_testing/surrogate/surrogate_search_algorithms.py b/causal_testing/surrogate/surrogate_search_algorithms.py
index 3911d9ec..14e3254f 100644
--- a/causal_testing/surrogate/surrogate_search_algorithms.py
+++ b/causal_testing/surrogate/surrogate_search_algorithms.py
@@ -31,35 +31,35 @@ def search(
     ) -> list:
         solutions = []
 
-        for surrogate in surrogate_models:
-            contradiction_function = self.contradiction_functions[surrogate.expected_relationship]
+        for surrogate_model in surrogate_models:
+            contradiction_function = self.contradiction_functions[surrogate_model.expected_relationship]
 
             # The GA fitness function after including required variables into the function's scope
             # Unused arguments are required for pygad's fitness function signature
             # pylint: disable=cell-var-from-loop
             def fitness_function(ga, solution, idx):  # pylint: disable=unused-argument
-                surrogate.control_value = solution[0] - self.delta
-                surrogate.treatment_value = solution[0] + self.delta
+                surrogate_model.control_value = solution[0] - self.delta
+                surrogate_model.base_test_case.treatment_variable.name_value = solution[0] + self.delta
 
                 adjustment_dict = {}
-                for i, adjustment in enumerate(surrogate.adjustment_set):
+                for i, adjustment in enumerate(surrogate_model.adjustment_set):
                     adjustment_dict[adjustment] = solution[i + 1]
 
-                ate = surrogate.estimate_ate_calculated(adjustment_dict)
+                ate = surrogate_model.estimate_ate_calculated(adjustment_dict)
                 if len(ate) > 1:
                     raise ValueError(
                         "Multiple ate values provided but currently only single values supported in this method"
                     )
                 return contradiction_function(ate[0])
 
-            gene_types, gene_space = self.create_gene_types(surrogate, specification)
+            gene_types, gene_space = self.create_gene_types(surrogate_model, specification)
 
             ga = GA(
                 num_generations=200,
                 num_parents_mating=4,
                 fitness_func=fitness_function,
                 sol_per_pop=10,
-                num_genes=1 + len(surrogate.adjustment_set),
+                num_genes=1 + len(surrogate_model.adjustment_set),
                 gene_space=gene_space,
                 gene_type=gene_types,
             )
@@ -77,10 +77,10 @@ def fitness_function(ga, solution, idx):  # pylint: disable=unused-argument
             solution, fitness, _ = ga.best_solution()
 
             solution_dict = {}
-            solution_dict[surrogate.treatment] = solution[0]
-            for idx, adj in enumerate(surrogate.adjustment_set):
+            solution_dict[surrogate_model.base_test_case.treatment_variable.name] = solution[0]
+            for idx, adj in enumerate(surrogate_model.adjustment_set):
                 solution_dict[adj] = solution[idx + 1]
-            solutions.append((solution_dict, fitness, surrogate))
+            solutions.append((solution_dict, fitness, surrogate_model))
 
         return max(solutions, key=itemgetter(1))  # This can be done better with fitness normalisation between edges
 
@@ -93,7 +93,7 @@ def create_gene_types(
         :param specification: The Causal Specification (combination of Scenario and Causal Dag)"""
 
         var_space = {}
-        var_space[surrogate_model.treatment] = {}
+        var_space[surrogate_model.base_test_case.treatment_variable.name] = {}
         for adj in surrogate_model.adjustment_set:
             var_space[adj] = {}
 
@@ -111,12 +111,14 @@ def create_gene_types(
                     else:
                         var_space[rel_split[0]]["high"] = datatype(rel_split[2])
         gene_space = []
-        gene_space.append(var_space[surrogate_model.treatment])
+        gene_space.append(var_space[surrogate_model.base_test_case.treatment_variable.name])
         for adj in surrogate_model.adjustment_set:
             gene_space.append(var_space[adj])
 
         gene_types = []
-        gene_types.append(specification.scenario.variables.get(surrogate_model.treatment).datatype)
+        gene_types.append(
+            specification.scenario.variables.get(surrogate_model.base_test_case.treatment_variable.name).datatype
+        )
         for adj in surrogate_model.adjustment_set:
             gene_types.append(specification.scenario.variables.get(adj).datatype)
         return gene_types, gene_space
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index bfcfe826..1d03e842 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -53,12 +53,16 @@ def push(s, inc="  "):
         result_str = str(self.test_value.value)
         if "\n" in result_str:
             result_str = "\n" + push(self.test_value.value)
+        if isinstance(self.estimator.base_test_case.treatment_variable, list):
+            treatment = [x.name for x in self.estimator.base_test_case.treatment_variable]
+        else:
+            treatment = self.estimator.base_test_case.treatment_variable.name
         base_str = (
             f"Causal Test Result\n==============\n"
-            f"Treatment: {self.estimator.treatment}\n"
+            f"Treatment: {treatment}\n"
             f"Control value: {self.estimator.control_value}\n"
             f"Treatment value: {self.estimator.treatment_value}\n"
-            f"Outcome: {self.estimator.outcome}\n"
+            f"Outcome: {self.estimator.base_test_case.outcome_variable.name}\n"
             f"Adjustment set: {self.adjustment_set}\n"
         )
         if hasattr(self.estimator, "formula"):
@@ -80,11 +84,15 @@ def to_dict(self, json=False):
         """Return result contents as a dictionary
         :return: Dictionary containing contents of causal_test_result
         """
+        if isinstance(self.estimator.base_test_case.treatment_variable, list):
+            treatment = [x.name for x in self.estimator.base_test_case.treatment_variable]
+        else:
+            treatment = self.estimator.base_test_case.treatment_variable.name
         base_dict = {
-            "treatment": self.estimator.treatment,
+            "treatment": treatment,
             "control_value": self.estimator.control_value,
             "treatment_value": self.estimator.treatment_value,
-            "outcome": self.estimator.outcome,
+            "outcome": self.estimator.base_test_case.outcome_variable.name,
             "adjustment_set": list(self.adjustment_set) if json else self.adjustment_set,
             "effect_measure": self.test_value.type,
             "effect_estimate": (
@@ -122,7 +130,7 @@ def ci_valid(self) -> bool:
     def summary(self):
         """Summarise the causal test result as an intuitive sentence."""
         print(
-            f"The causal effect of changing {self.estimator.treatment} = {self.estimator.control_value} to "
-            f"{self.estimator.treatment}' = {self.estimator.treatment_value} is {self.test_value.value}"
+            f"The causal effect of changing {self.estimator.base_test_case.treatment_variable.name} = {self.estimator.control_value} to "
+            f"{self.estimator.base_test_case.treatment_variable.name}' = {self.estimator.treatment_value} is {self.test_value.value}"
             f"(95% confidence intervals: {self.confidence_intervals})."
         )
diff --git a/examples/covasim_/doubling_beta/example_beta.py b/examples/covasim_/doubling_beta/example_beta.py
index b696c658..1655b352 100644
--- a/examples/covasim_/doubling_beta/example_beta.py
+++ b/examples/covasim_/doubling_beta/example_beta.py
@@ -1,35 +1,20 @@
+import os
+import logging
+
 from pathlib import Path
 import matplotlib.pyplot as plt
 import pandas as pd
 import numpy as np
-from causal_testing.specification.causal_dag import CausalDAG
-from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Output
-from causal_testing.specification.causal_specification import CausalSpecification
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_outcome import Positive
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
 from causal_testing.testing.base_test_case import BaseTestCase
-from matplotlib.pyplot import rcParams
 
-import os
-import logging
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.DEBUG, format="%(message)s")
 
-# Uncommenting the code below will make all graphs publication quality but requires a suitable latex installation
-
-# plt.rcParams["figure.figsize"] = (8, 8)
-# rc_fonts = {
-#     "font.size": 8,
-#     "figure.figsize": (10, 6),
-#     "text.usetex": True,
-#     "font.family": "serif",
-#     "text.latex.preamble": r"\usepackage{libertine}",
-# }
-# rcParams.update(rc_fonts)
-
 ROOT = Path(os.path.realpath(os.path.dirname(__file__)))
 OBSERVATIONAL_DATA_PATH = ROOT / "data" / "10k_observational_data.csv"
 
@@ -53,16 +38,8 @@ def doubling_beta_CATE_on_csv(
     past_execution_df = pd.read_csv(observational_data_path)
 
     # 2. Create variables
-    pop_size = Input("pop_size", int)
-    pop_infected = Input("pop_infected", int)
-    n_days = Input("n_days", int)
     cum_infections = Output("cum_infections", int)
-    cum_deaths = Output("cum_deaths", int)
-    location = Input("location", str)
-    variants = Input("variants", str)
-    avg_age = Input("avg_age", float)
     beta = Input("beta", float)
-    contacts = Input("contacts", float)
 
     # 5. Create a base test case
     base_test_case = BaseTestCase(treatment_variable=beta, outcome_variable=cum_infections)
@@ -72,11 +49,10 @@ def doubling_beta_CATE_on_csv(
         base_test_case=base_test_case,
         expected_causal_effect=Positive,
         estimator=LinearRegressionEstimator(
-            "beta",
+            base_test_case,
             0.032,
             0.016,
             {"avg_age", "contacts"},  # We use custom adjustment set
-            "cum_infections",
             df=past_execution_df,
             formula="cum_infections ~ beta + I(beta ** 2) + avg_age + contacts",
         ),
@@ -90,11 +66,10 @@ def doubling_beta_CATE_on_csv(
         base_test_case=base_test_case,
         expected_causal_effect=Positive,
         estimator=LinearRegressionEstimator(
-            "beta",
-            0.032,
-            0.016,
-            set(),
-            "cum_infections",
+            base_test_case=base_test_case,
+            treatment_value=0.032,
+            control_value=0.016,
+            adjustment_set=set(),
             df=past_execution_df,
             formula="cum_infections ~ beta + I(beta ** 2)",
         ),
diff --git a/examples/lr91/example_max_conductances.py b/examples/lr91/example_max_conductances.py
index 1fbc8779..6bd486c2 100644
--- a/examples/lr91/example_max_conductances.py
+++ b/examples/lr91/example_max_conductances.py
@@ -123,18 +123,16 @@ def effects_on_APD90(observational_data_path, treatment_var, control_val, treatm
     )
 
     # 5. Create a causal specification from the scenario and causal DAG
-    causal_specification = CausalSpecification(scenario, causal_dag)
     base_test_case = BaseTestCase(treatment_var, apd90)
     # 6. Create a causal test case
     causal_test_case = CausalTestCase(
         base_test_case=base_test_case,
         expected_causal_effect=expected_causal_effect,
         estimator=LinearRegressionEstimator(
-            treatment=treatment_var.name,
+            base_test_case=base_test_case,
             treatment_value=treatment_val,
             control_value=control_val,
             adjustment_set=causal_dag.identification(base_test_case),
-            outcome="APD90",
             df=pd.read_csv(observational_data_path),
         ),
     )
diff --git a/examples/poisson-line-process/example_pure_python.py b/examples/poisson-line-process/example_pure_python.py
index 19fedc0c..0788896c 100644
--- a/examples/poisson-line-process/example_pure_python.py
+++ b/examples/poisson-line-process/example_pure_python.py
@@ -30,16 +30,24 @@ def estimate_ate(self) -> float:
         """Estimate the outcomes under control and treatment.
         :return: The empirical average treatment effect.
         """
-        control_results = self.df.where(self.df[self.treatment] == self.control_value)[self.outcome].dropna()
-        treatment_results = self.df.where(self.df[self.treatment] == self.treatment_value)[self.outcome].dropna()
+        control_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.control_value)[
+            self.base_test_case.outcome_variable.name
+        ].dropna()
+        treatment_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.treatment_value)[
+            self.base_test_case.outcome_variable.name
+        ].dropna()
         return treatment_results.mean() - control_results.mean(), None
 
     def estimate_risk_ratio(self) -> float:
         """Estimate the outcomes under control and treatment.
         :return: The empirical average treatment effect.
         """
-        control_results = self.df.where(self.df[self.treatment] == self.control_value)[self.outcome].dropna()
-        treatment_results = self.df.where(self.df[self.treatment] == self.treatment_value)[self.outcome].dropna()
+        control_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.control_value)[
+            self.base_test_case.outcome_variable.name
+        ].dropna()
+        treatment_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.treatment_value)[
+            self.base_test_case.outcome_variable.name
+        ].dropna()
         return treatment_results.mean() / control_results.mean(), None
 
 
@@ -87,11 +95,10 @@ def test_poisson_intensity_num_shapes(save=False):
                 expected_causal_effect=ExactValue(4, atol=0.5),
                 estimate_type="risk_ratio",
                 estimator=EmpiricalMeanEstimator(
-                    treatment=base_test_case.treatment_variable.name,
+                    base_test_case=base_test_case,
                     treatment_value=treatment_value,
                     control_value=control_value,
                     adjustment_set=causal_specification.causal_dag.identification(base_test_case),
-                    outcome=base_test_case.outcome_variable.name,
                     df=pd.read_csv(f"{ROOT}/data/smt_100/data_smt_wh{wh}_100.csv", index_col=0).astype(float),
                     effect_modifiers=None,
                     alpha=0.05,
@@ -103,11 +110,10 @@ def test_poisson_intensity_num_shapes(save=False):
                 expected_causal_effect=ExactValue(4, atol=0.5),
                 estimate_type="risk_ratio",
                 estimator=LinearRegressionEstimator(
-                    treatment=base_test_case.treatment_variable.name,
+                    base_test_case=base_test_case,
                     treatment_value=treatment_value,
                     control_value=control_value,
                     adjustment_set=causal_specification.causal_dag.identification(base_test_case),
-                    outcome=base_test_case.outcome_variable.name,
                     df=observational_df,
                     effect_modifiers=None,
                     formula="num_shapes_unit ~ I(intensity ** 2) + intensity - 1",
@@ -149,11 +155,10 @@ def test_poisson_width_num_shapes(save=False):
             estimate_type="ate_calculated",
             effect_modifier_configuration={"intensity": i},
             estimator=LinearRegressionEstimator(
-                treatment=base_test_case.treatment_variable.name,
+                base_test_case=base_test_case,
                 treatment_value=w + 1.0,
                 control_value=float(w),
                 adjustment_set=causal_specification.causal_dag.identification(base_test_case),
-                outcome=base_test_case.outcome_variable.name,
                 df=df,
                 effect_modifiers={"intensity": i},
                 formula="num_shapes_unit ~ width + I(intensity ** 2)+I(width ** -1)+intensity-1",
diff --git a/tests/estimation_tests/test_cubic_spline_estimator.py b/tests/estimation_tests/test_cubic_spline_estimator.py
index b55b0a5a..2f7ecaef 100644
--- a/tests/estimation_tests/test_cubic_spline_estimator.py
+++ b/tests/estimation_tests/test_cubic_spline_estimator.py
@@ -9,6 +9,8 @@
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
 
 from tests.estimation_tests.test_linear_regression_estimator import TestLinearRegressionEstimator
+from causal_testing.testing.base_test_case import BaseTestCase
+from causal_testing.specification.variable import Input, Output
 
 
 class TestCubicSplineRegressionEstimator(TestLinearRegressionEstimator):
@@ -24,7 +26,9 @@ def test_program_11_3_cublic_spline(self):
 
         df = self.chapter_11_df.copy()
 
-        cublic_spline_estimator = CubicSplineRegressionEstimator("treatments", 1, 0, set(), "outcomes", 3, df)
+        base_test_case = BaseTestCase(Input("treatments", float), Output("outcomes", float))
+
+        cublic_spline_estimator = CubicSplineRegressionEstimator(base_test_case, 1, 0, set(), 3, df)
 
         ate_1 = cublic_spline_estimator.estimate_ate_calculated()
 
diff --git a/tests/estimation_tests/test_experimental_estimator.py b/tests/estimation_tests/test_experimental_estimator.py
index 1ed84cd3..cabf13a4 100644
--- a/tests/estimation_tests/test_experimental_estimator.py
+++ b/tests/estimation_tests/test_experimental_estimator.py
@@ -1,5 +1,7 @@
 import unittest
 from causal_testing.estimation.experimental_estimator import ExperimentalEstimator
+from causal_testing.testing.base_test_case import BaseTestCase
+from causal_testing.specification.variable import Input, Output
 
 
 class SystemUnderTest:
@@ -33,11 +35,10 @@ class TestExperimentalEstimator(unittest.TestCase):
 
     def test_estimate_ate(self):
         estimator = ConcreteExperimentalEstimator(
-            treatment="X",
+            base_test_case=BaseTestCase(Input("X", float), Output("Y", float)),
             treatment_value=2,
             control_value=1,
             adjustment_set={},
-            outcome="Y",
             alpha=0.05,
             repeats=200,
         )
@@ -48,11 +49,10 @@ def test_estimate_ate(self):
 
     def test_estimate_risk_ratio(self):
         estimator = ConcreteExperimentalEstimator(
-            treatment="X",
+            base_test_case=BaseTestCase(Input("X", float), Output("Y", float)),
             treatment_value=2,
             control_value=1,
             adjustment_set={},
-            outcome="Y",
             effect_modifiers={},
             alpha=0.05,
             repeats=200,
diff --git a/tests/estimation_tests/test_instrumental_variable_estimator.py b/tests/estimation_tests/test_instrumental_variable_estimator.py
index c166b75e..c22819d7 100644
--- a/tests/estimation_tests/test_instrumental_variable_estimator.py
+++ b/tests/estimation_tests/test_instrumental_variable_estimator.py
@@ -1,11 +1,10 @@
 import unittest
 import pandas as pd
 import numpy as np
-import matplotlib.pyplot as plt
-from causal_testing.specification.variable import Input
-from causal_testing.utils.validation import CausalValidator
 
 from causal_testing.estimation.instrumental_variable_estimator import InstrumentalVariableEstimator
+from causal_testing.testing.base_test_case import BaseTestCase
+from causal_testing.specification.variable import Input, Output
 
 
 class TestInstrumentalVariableEstimator(unittest.TestCase):
@@ -26,27 +25,13 @@ def test_estimate_coefficient(self):
         """
         iv_estimator = InstrumentalVariableEstimator(
             df=self.df,
-            treatment="X",
+            base_test_case=BaseTestCase(Input("X", float), Output("Y", float)),
             treatment_value=None,
             control_value=None,
             adjustment_set=set(),
-            outcome="Y",
-            instrument="Z",
-        )
-        self.assertEqual(iv_estimator.estimate_coefficient(self.df), 2)
-
-    def test_estimate_coefficient(self):
-        """
-        Test we get the correct coefficient.
-        """
-        iv_estimator = InstrumentalVariableEstimator(
-            df=self.df,
-            treatment="X",
-            treatment_value=None,
-            control_value=None,
-            adjustment_set=set(),
-            outcome="Y",
             instrument="Z",
         )
         coefficient, [low, high] = iv_estimator.estimate_coefficient()
         self.assertEqual(coefficient[0], 2)
+        self.assertEqual(low[0], 2)
+        self.assertEqual(high[0], 2)
diff --git a/tests/estimation_tests/test_linear_regression_estimator.py b/tests/estimation_tests/test_linear_regression_estimator.py
index db2c3e48..0aa121ed 100644
--- a/tests/estimation_tests/test_linear_regression_estimator.py
+++ b/tests/estimation_tests/test_linear_regression_estimator.py
@@ -6,7 +6,8 @@
 from causal_testing.utils.validation import CausalValidator
 
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
-from causal_testing.estimation.genetic_programming_regression_fitter import reciprocal
+from causal_testing.testing.base_test_case import BaseTestCase
+from causal_testing.specification.variable import Input, Output
 
 
 def load_nhefs_df():
@@ -62,24 +63,27 @@ def setUpClass(cls) -> None:
         cls.nhefs_df = load_nhefs_df()
         cls.chapter_11_df = load_chapter_11_df()
         cls.scarf_df = pd.read_csv("tests/resources/data/scarf_data.csv")
+        cls.base_test_case = BaseTestCase(Input("treatments", float), Output("outcomes", float))
+        cls.program_15_base_test_case = BaseTestCase(Input("qsmk", float), Output("wt82_71", float))
 
     def test_query(self):
         df = self.nhefs_df
         linear_regression_estimator = LinearRegressionEstimator(
-            "treatments", None, None, set(), "outcomes", df, query="sex==1"
+            self.base_test_case, None, None, set(), df, query="sex==1"
         )
         self.assertTrue(linear_regression_estimator.df.sex.all())
 
     def test_linear_regression_categorical_ate(self):
         df = self.scarf_df.copy()
-        logistic_regression_estimator = LinearRegressionEstimator("color", None, None, set(), "completed", df)
+        base_test_case = BaseTestCase(Input("color", float), Output("completed", float))
+        logistic_regression_estimator = LinearRegressionEstimator(base_test_case, None, None, set(), df)
         ate, confidence = logistic_regression_estimator.estimate_coefficient()
         self.assertTrue(all([ci_low < 0 < ci_high for ci_low, ci_high in zip(confidence[0], confidence[1])]))
 
     def test_program_11_2(self):
         """Test whether our linear regression implementation produces the same results as program 11.2 (p. 141)."""
         df = self.chapter_11_df
-        linear_regression_estimator = LinearRegressionEstimator("treatments", None, None, set(), "outcomes", df)
+        linear_regression_estimator = LinearRegressionEstimator(self.base_test_case, None, None, set(), df)
         ate, _ = linear_regression_estimator.estimate_coefficient()
 
         self.assertEqual(
@@ -103,9 +107,10 @@ def test_program_11_3(self):
         """Test whether our linear regression implementation produces the same results as program 11.3 (p. 144)."""
         df = self.chapter_11_df.copy()
         linear_regression_estimator = LinearRegressionEstimator(
-            "treatments", None, None, set(), "outcomes", df, formula="outcomes ~ treatments + I(treatments ** 2)"
+            self.base_test_case, None, None, set(), df, formula="outcomes ~ treatments + I(treatments ** 2)"
         )
         ate, _ = linear_regression_estimator.estimate_coefficient()
+        print(linear_regression_estimator.model.summary())
         self.assertEqual(
             round(
                 linear_regression_estimator.model.params["Intercept"]
@@ -143,11 +148,10 @@ def test_program_15_1A(self):
             "smokeyrs",
         }
         linear_regression_estimator = LinearRegressionEstimator(
-            "qsmk",
+            self.program_15_base_test_case,
             1,
             0,
             covariates,
-            "wt82_71",
             df,
             formula=f"""wt82_71 ~ qsmk +
                              {'+'.join(sorted(list(covariates)))} +
@@ -188,11 +192,10 @@ def test_program_15_no_interaction(self):
             "smokeyrs",
         }
         linear_regression_estimator = LinearRegressionEstimator(
-            "qsmk",
+            self.program_15_base_test_case,
             1,
             0,
             covariates,
-            "wt82_71",
             df,
             formula="wt82_71 ~ qsmk + age + I(age ** 2) + wt71 + I(wt71 ** 2) + smokeintensity + I(smokeintensity ** 2) + smokeyrs + I(smokeyrs ** 2)",
         )
@@ -224,11 +227,10 @@ def test_program_15_no_interaction_ate(self):
             "smokeyrs",
         }
         linear_regression_estimator = LinearRegressionEstimator(
-            "qsmk",
+            self.program_15_base_test_case,
             1,
             0,
             covariates,
-            "wt82_71",
             df,
             formula="wt82_71 ~ qsmk + age + I(age ** 2) + wt71 + I(wt71 ** 2) + smokeintensity + I(smokeintensity ** 2) + smokeyrs + I(smokeyrs ** 2)",
         )
@@ -259,11 +261,10 @@ def test_program_15_no_interaction_ate_calculated(self):
             "smokeyrs",
         }
         linear_regression_estimator = LinearRegressionEstimator(
-            "qsmk",
+            self.program_15_base_test_case,
             1,
             0,
             covariates,
-            "wt82_71",
             df,
             formula="wt82_71 ~ qsmk + age + I(age ** 2) + wt71 + I(wt71 ** 2) + smokeintensity + I(smokeintensity ** 2) + smokeyrs + I(smokeyrs ** 2)",
         )
@@ -279,7 +280,7 @@ def test_program_15_no_interaction_ate_calculated(self):
     def test_program_11_2_with_robustness_validation(self):
         """Test whether our linear regression estimator, as used in test_program_11_2 can correctly estimate robustness."""
         df = self.chapter_11_df.copy()
-        linear_regression_estimator = LinearRegressionEstimator("treatments", 100, 90, set(), "outcomes", df)
+        linear_regression_estimator = LinearRegressionEstimator(self.base_test_case, 100, 90, set(), df)
         linear_regression_estimator.estimate_coefficient()
 
         cv = CausalValidator()
@@ -289,7 +290,8 @@ def test_gp(self):
         df = pd.DataFrame()
         df["X"] = np.arange(10)
         df["Y"] = 1 / (df["X"] + 1)
-        linear_regression_estimator = LinearRegressionEstimator("X", 0, 1, set(), "Y", df.astype(float))
+        base_test_case = BaseTestCase(Input("X", float), Output("Y", float))
+        linear_regression_estimator = LinearRegressionEstimator(base_test_case, 0, 1, set(), df.astype(float))
         linear_regression_estimator.gp_formula(seeds=["reciprocal(add(X, 1))"])
         self.assertEqual(linear_regression_estimator.formula, "Y ~ I(1/(X + 1)) - 1")
         ate, (ci_low, ci_high) = linear_regression_estimator.estimate_ate_calculated()
@@ -299,9 +301,10 @@ def test_gp(self):
 
     def test_gp_power(self):
         df = pd.DataFrame()
+        base_test_case = BaseTestCase(Input("X", float), Output("Y", float))
         df["X"] = np.arange(10)
         df["Y"] = 2 * (df["X"] ** 2)
-        linear_regression_estimator = LinearRegressionEstimator("X", 0, 1, set(), "Y", df.astype(float))
+        linear_regression_estimator = LinearRegressionEstimator(base_test_case, 0, 1, set(), df.astype(float))
         linear_regression_estimator.gp_formula(seed=1, max_order=2, seeds=["mul(2, power_2(X))"])
         self.assertEqual(
             linear_regression_estimator.formula,
@@ -326,20 +329,21 @@ def setUpClass(cls) -> None:
 
     def test_X1_effect(self):
         """When we fix the value of X2 to 0, the effect of X1 on Y should become ~2 (because X2 terms are cancelled)."""
+        base_test_case = BaseTestCase(Input("X1", float), Output("Y", float))
         lr_model = LinearRegressionEstimator(
-            "X1", 1, 0, {"X2"}, "Y", effect_modifiers={"x2": 0}, formula="Y ~ X1 + X2 + (X1 * X2)", df=self.df
+            base_test_case, 1, 0, {"X2"}, effect_modifiers={"x2": 0}, formula="Y ~ X1 + X2 + (X1 * X2)", df=self.df
         )
         test_results = lr_model.estimate_ate()
         ate = test_results[0][0]
         self.assertAlmostEqual(ate, 2.0)
 
     def test_categorical_confidence_intervals(self):
+        base_test_case = BaseTestCase(Input("color", float), Output("length_in", float))
         lr_model = LinearRegressionEstimator(
-            treatment="color",
+            base_test_case=base_test_case,
             control_value=None,
             treatment_value=None,
             adjustment_set={},
-            outcome="length_in",
             df=self.scarf_df,
         )
         coefficients, [ci_low, ci_high] = lr_model.estimate_coefficient()
diff --git a/tests/estimation_tests/test_logistic_regression_estimator.py b/tests/estimation_tests/test_logistic_regression_estimator.py
index 544e58a5..35ec5367 100644
--- a/tests/estimation_tests/test_logistic_regression_estimator.py
+++ b/tests/estimation_tests/test_logistic_regression_estimator.py
@@ -1,10 +1,8 @@
 import unittest
 import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from causal_testing.specification.variable import Input
-from causal_testing.utils.validation import CausalValidator
 from causal_testing.estimation.logistic_regression_estimator import LogisticRegressionEstimator
+from causal_testing.testing.base_test_case import BaseTestCase
+from causal_testing.specification.variable import Input, Output
 
 
 class TestLogisticRegressionEstimator(unittest.TestCase):
@@ -18,6 +16,8 @@ def setUpClass(cls) -> None:
 
     def test_odds_ratio(self):
         df = self.scarf_df.copy()
-        logistic_regression_estimator = LogisticRegressionEstimator("length_in", 65, 55, set(), "completed", df)
+        logistic_regression_estimator = LogisticRegressionEstimator(
+            BaseTestCase(Input("length_in", float), Output("completed", bool)), 65, 55, set(), df
+        )
         odds, _ = logistic_regression_estimator.estimate_unit_odds_ratio()
         self.assertEqual(round(odds[0], 4), 0.8948)
diff --git a/tests/json_front_tests/test_json_class.py b/tests/json_front_tests/test_json_class.py
deleted file mode 100644
index b1738d24..00000000
--- a/tests/json_front_tests/test_json_class.py
+++ /dev/null
@@ -1,277 +0,0 @@
-import unittest
-import pytest
-from pathlib import Path
-from statistics import StatisticsError
-import scipy
-import os
-
-from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
-from causal_testing.estimation.abstract_estimator import Estimator
-from causal_testing.testing.causal_test_outcome import NoEffect, Positive
-from causal_testing.json_front.json_class import JsonUtility, CausalVariables
-from causal_testing.specification.variable import Input, Output, Meta
-from causal_testing.specification.scenario import Scenario
-from causal_testing.specification.causal_specification import CausalSpecification
-
-
-@pytest.mark.skip(reason="json frontend under reconstruction")
-class TestJsonClass(unittest.TestCase):
-    """Test the JSON frontend for the Causal Testing Framework (CTF)
-
-    The JSON frontend is an alternative interface for the CTF where tests are specified in JSON format and ingested
-    with the frontend. Tests involve testing that this correctly interfaces with the framework with some dummy data
-    """
-
-    def setUp(self) -> None:
-        json_file_name = "tests.json"
-        dag_file_name = "dag.dot"
-        data_file_name = "data_with_meta.csv"
-        test_data_dir_path = Path("tests/resources/data")
-        self.json_path = str(test_data_dir_path / json_file_name)
-        self.dag_path = str(test_data_dir_path / dag_file_name)
-        self.data_path = [str(test_data_dir_path / data_file_name)]
-        self.json_class = JsonUtility("temp_out.txt", True)
-        self.example_distribution = scipy.stats.uniform(1, 10)
-        self.input_dict_list = [
-            {"name": "test_input", "datatype": float, "distribution": self.example_distribution},
-            {"name": "test_input_no_dist", "datatype": float},
-        ]
-        self.output_dict_list = [{"name": "test_output", "datatype": float}]
-        self.meta_dict_list = [{"name": "test_meta", "datatype": float, "populate": populate_example}]
-        variables = CausalVariables(
-            inputs=self.input_dict_list, outputs=self.output_dict_list, metas=self.meta_dict_list
-        )
-        self.scenario = Scenario(variables=variables, constraints=None)
-        self.json_class.set_paths(self.json_path, self.dag_path, self.data_path)
-        self.json_class.setup(self.scenario)
-
-    def test_setting_no_path(self):
-        json_class = JsonUtility("temp_out.txt", True)
-        json_class.set_paths(self.json_path, self.dag_path, None)
-        self.assertEqual(json_class.input_paths.data_paths, [])  # Needs to be list of Paths
-
-    def test_setting_paths(self):
-        self.assertEqual(self.json_class.input_paths.json_path, Path(self.json_path))
-        self.assertEqual(self.json_class.input_paths.dag_path, Path(self.dag_path))
-        self.assertEqual(self.json_class.input_paths.data_paths, [Path(self.data_path[0])])  # Needs to be list of Paths
-
-    def test_set_inputs(self):
-        ctf_input = [Input("test_input", float, self.example_distribution)]
-        self.assertEqual(ctf_input[0].name, self.json_class.scenario.variables["test_input"].name)
-        self.assertEqual(ctf_input[0].datatype, self.json_class.scenario.variables["test_input"].datatype)
-        self.assertEqual(ctf_input[0].distribution, self.json_class.scenario.variables["test_input"].distribution)
-
-    def test_set_outputs(self):
-        ctf_output = [Output("test_output", float)]
-        self.assertEqual(ctf_output[0].name, self.json_class.scenario.variables["test_output"].name)
-        self.assertEqual(ctf_output[0].datatype, self.json_class.scenario.variables["test_output"].datatype)
-
-    def test_set_metas(self):
-        ctf_meta = [Meta("test_meta", float, populate_example)]
-        self.assertEqual(ctf_meta[0].name, self.json_class.scenario.variables["test_meta"].name)
-        self.assertEqual(ctf_meta[0].datatype, self.json_class.scenario.variables["test_meta"].datatype)
-
-    def test_argparse(self):
-        args = self.json_class.get_args(["--data_path=data.csv", "--dag_path=dag.dot", "--json_path=tests.json"])
-        self.assertEqual(args.data_path, ["data.csv"])
-        self.assertEqual(args.dag_path, "dag.dot")
-        self.assertEqual(args.json_path, "tests.json")
-
-    def test_setup_scenario(self):
-        self.assertIsInstance(self.json_class.scenario, Scenario)
-
-    def test_setup_causal_specification(self):
-        self.assertIsInstance(self.json_class.causal_specification, CausalSpecification)
-
-    def test_f_flag(self):
-        example_test = {
-            "tests": [
-                {
-                    "name": "test1",
-                    "mutations": {"test_input": "Increase"},
-                    "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "coefficient",
-                    "effect_modifiers": [],
-                    "expected_effect": {"test_output": "NoEffect"},
-                    "skip": False,
-                }
-            ]
-        }
-        self.json_class.test_plan = example_test
-        effects = {"NoEffect": NoEffect()}
-        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
-        with self.assertRaises(StatisticsError):
-            self.json_class.run_json_tests(effects, estimators, True)
-
-    def test_generate_coefficient_tests_from_json(self):
-        example_test = {
-            "tests": [
-                {
-                    "name": "test1",
-                    "mutations": ["test_input"],
-                    "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "coefficient",
-                    "effect_modifiers": [],
-                    "expected_effect": {"test_output": "NoEffect"},
-                    "skip": False,
-                }
-            ]
-        }
-        self.json_class.test_plan = example_test
-        effects = {"NoEffect": NoEffect()}
-        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
-
-        self.json_class.run_json_tests(effects=effects, mutates={}, estimators=estimators, f_flag=False)
-
-        # Test that the final log message prints that failed tests are printed, which is expected behaviour for this scenario
-        with open("temp_out.txt", "r") as reader:
-            temp_out = reader.readlines()
-        self.assertIn("FAILED", temp_out[-1])
-
-    def test_run_json_tests_from_json(self):
-        example_test = {
-            "tests": [
-                {
-                    "name": "test1",
-                    "mutations": {"test_input": "Increase"},
-                    "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "coefficient",
-                    "effect_modifiers": [],
-                    "expected_effect": {"test_output": "NoEffect"},
-                    "coverage": True,
-                    "skip": False,
-                }
-            ]
-        }
-        self.json_class.test_plan = example_test
-        effects = {"NoEffect": NoEffect()}
-        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
-
-        test_results = self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
-        self.assertTrue(test_results[0]["failed"])
-
-    def test_generate_tests_from_json_no_dist(self):
-        example_test = {
-            "tests": [
-                {
-                    "name": "test1",
-                    "mutations": {"test_input_no_dist": "Increase"},
-                    "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "coefficient",
-                    "effect_modifiers": [],
-                    "expected_effect": {"test_output": "NoEffect"},
-                    "skip": False,
-                }
-            ]
-        }
-        self.json_class.test_plan = example_test
-        effects = {"NoEffect": NoEffect()}
-        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
-
-        self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
-
-        # Test that the final log message prints that failed tests are printed, which is expected behaviour for this scenario
-        with open("temp_out.txt", "r") as reader:
-            temp_out = reader.readlines()
-        self.assertIn("failed", temp_out[-1])
-
-    def test_formula_in_json_test(self):
-        example_test = {
-            "tests": [
-                {
-                    "name": "test1",
-                    "mutations": {"test_input": "Increase"},
-                    "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "coefficient",
-                    "effect_modifiers": [],
-                    "expected_effect": {"test_output": "Positive"},
-                    "skip": False,
-                    "formula": "test_output ~ test_input",
-                }
-            ]
-        }
-        self.json_class.test_plan = example_test
-        effects = {"Positive": Positive()}
-        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
-
-        self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
-        with open("temp_out.txt", "r") as reader:
-            temp_out = reader.readlines()
-        self.assertIn("test_output ~ test_input", "".join(temp_out))
-
-    def test_run_concrete_json_testcase(self):
-        example_test = {
-            "tests": [
-                {
-                    "name": "test1",
-                    "treatment_variable": "test_input",
-                    "control_value": 0,
-                    "treatment_value": 1,
-                    "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "coefficient",
-                    "expected_effect": {"test_output": "NoEffect"},
-                    "skip": False,
-                }
-            ]
-        }
-        self.json_class.test_plan = example_test
-        effects = {"NoEffect": NoEffect()}
-        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
-
-        self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
-        with open("temp_out.txt", "r") as reader:
-            temp_out = reader.readlines()
-        self.assertIn("FAILED", temp_out[-1])
-
-    def test_no_data_provided(self):
-        example_test = {
-            "tests": [
-                {
-                    "name": "test1",
-                    "mutations": {"test_input": "Increase"},
-                    "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "coefficient",
-                    "effect_modifiers": [],
-                    "expected_effect": {"test_output": "NoEffect"},
-                    "skip": False,
-                }
-            ]
-        }
-        json_class = JsonUtility("temp_out.txt", True)
-        json_class.set_paths(self.json_path, self.dag_path)
-
-        with self.assertRaises(ValueError):
-            json_class.setup(self.scenario)
-
-    def test_estimator_formula_type_check(self):
-        class ExampleEstimator(Estimator):
-            def add_modelling_assumptions(self):
-                pass
-
-        example_test = {
-            "tests": [
-                {
-                    "name": "test1",
-                    "mutations": {"test_input": "Increase"},
-                    "estimator": "ExampleEstimator",
-                    "estimate_type": "coefficient",
-                    "effect_modifiers": [],
-                    "expected_effect": {"test_output": "Positive"},
-                    "skip": False,
-                    "formula": "test_output ~ test_input",
-                }
-            ]
-        }
-        self.json_class.test_plan = example_test
-        effects = {"Positive": Positive()}
-        estimators = {"ExampleEstimator": ExampleEstimator}
-        with self.assertRaises(TypeError):
-            self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
-
-    def tearDown(self) -> None:
-        if os.path.exists("temp_out.txt"):
-            os.remove("temp_out.txt")
-
-
-def populate_example(*args, **kwargs):
-    pass
diff --git a/tests/surrogate_tests/test_causal_surrogate_assisted.py b/tests/surrogate_tests/test_causal_surrogate_assisted.py
index cbf6fac0..b2824b00 100644
--- a/tests/surrogate_tests/test_causal_surrogate_assisted.py
+++ b/tests/surrogate_tests/test_causal_surrogate_assisted.py
@@ -71,10 +71,10 @@ def test_surrogate_model_generation(self):
         surrogate_models = c_s_a_test_case.generate_surrogates(specification, df)
         self.assertEqual(len(surrogate_models), 2)
 
-        for surrogate in surrogate_models:
-            self.assertIsInstance(surrogate, CubicSplineRegressionEstimator)
-            self.assertNotEqual(surrogate.treatment, "Z")
-            self.assertNotEqual(surrogate.outcome, "Z")
+        for surrogate_model in surrogate_models:
+            self.assertIsInstance(surrogate_model, CubicSplineRegressionEstimator)
+            self.assertNotEqual(surrogate_model.base_test_case.treatment_variable.name, "Z")
+            self.assertNotEqual(surrogate_model.base_test_case.outcome_variable.name, "Z")
 
     def test_causal_surrogate_assisted_execution(self):
         df = self.class_df.copy()
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
index 0d7c4c93..5b36dd81 100644
--- a/tests/testing_tests/test_causal_test_adequacy.py
+++ b/tests/testing_tests/test_causal_test_adequacy.py
@@ -10,9 +10,10 @@
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_adequacy import DAGAdequacy
 from causal_testing.testing.causal_test_outcome import NoEffect, SomeEffect
-from causal_testing.json_front.json_class import JsonUtility, CausalVariables
 from causal_testing.specification.scenario import Scenario
 from causal_testing.testing.causal_test_adequacy import DataAdequacy
+from causal_testing.specification.variable import Input, Output
+from causal_testing.specification.causal_dag import CausalDAG
 
 
 class TestCausalTestAdequacy(unittest.TestCase):
@@ -22,72 +23,53 @@ class TestCausalTestAdequacy(unittest.TestCase):
     """
 
     def setUp(self) -> None:
-        json_file_name = "tests.json"
-        dag_file_name = "dag.dot"
-        data_file_name = "data_with_categorical.csv"
-        test_data_dir_path = Path("tests/resources/data")
-        self.json_path = str(test_data_dir_path / json_file_name)
-        self.dag_path = str(test_data_dir_path / dag_file_name)
-        self.data_path = [str(test_data_dir_path / data_file_name)]
-        self.json_class = JsonUtility("temp_out.txt", True)
+        self.df = pd.read_csv("tests/resources/data/data_with_categorical.csv")
+        self.dag = CausalDAG("tests/resources/data/dag.dot")
         self.example_distribution = scipy.stats.uniform(1, 10)
-        self.input_dict_list = [
-            {"name": "test_input", "datatype": float, "distribution": self.example_distribution},
-            {"name": "test_input_no_dist", "datatype": float},
+        inputs = [
+            Input("test_input", float, self.example_distribution),
+            Input("test_input_no_dist", float, self.example_distribution),
         ]
-        self.output_dict_list = [{"name": "test_output", "datatype": float}]
-        variables = CausalVariables(inputs=self.input_dict_list, outputs=self.output_dict_list, metas=[])
-        self.scenario = Scenario(variables=variables, constraints=None)
-        self.json_class.set_paths(self.json_path, self.dag_path, self.data_path)
-        self.json_class.setup(self.scenario)
+        outputs = [Output("test_output", float)]
+        self.scenario = Scenario(variables=inputs + outputs)
 
     def test_data_adequacy_numeric(self):
-        example_test = {
-            "tests": [
-                {
-                    "name": "test1",
-                    "mutations": {"test_input": "Increase"},
-                    "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "coefficient",
-                    "effect_modifiers": [],
-                    "expected_effect": {"test_output": "NoEffect"},
-                    "coverage": True,
-                    "skip": False,
-                }
-            ]
-        }
-        self.json_class.test_plan = example_test
-        effects = {"NoEffect": NoEffect()}
-        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
-
-        test_results = self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
+        base_test_case = BaseTestCase(
+            Input("test_input", float, self.example_distribution), Output("test_output", float)
+        )
+        estimator = LinearRegressionEstimator(
+            base_test_case=base_test_case, treatment_value=None, control_value=None, adjustment_set={}, df=self.df
+        )
+        causal_test_case = CausalTestCase(
+            base_test_case=base_test_case,
+            expected_causal_effect=NoEffect(),
+            estimate_type="coefficient",
+            estimator=estimator,
+        )
+        adequacy_metric = DataAdequacy(causal_test_case, estimator)
+        adequacy_metric.measure_adequacy()
         self.assertEqual(
-            test_results[0]["result"].adequacy.to_dict(),
+            adequacy_metric.to_dict(),
             {"kurtosis": {"test_input": 0.0}, "bootstrap_size": 100, "passing": 100, "successful": 100},
         )
 
-    def test_data_adequacy_cateogorical(self):
-        example_test = {
-            "tests": [
-                {
-                    "name": "test1",
-                    "mutations": ["test_input_no_dist"],
-                    "estimator": "LinearRegressionEstimator",
-                    "estimate_type": "coefficient",
-                    "effect_modifiers": [],
-                    "expected_effect": {"test_output": "NoEffect"},
-                    "coverage": True,
-                    "skip": False,
-                }
-            ]
-        }
-        self.json_class.test_plan = example_test
-        effects = {"NoEffect": NoEffect()}
-        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
-
-        test_results = self.json_class.run_json_tests(effects=effects, estimators=estimators, f_flag=False)
+    def test_data_adequacy_categorical(self):
+        base_test_case = BaseTestCase(
+            Input("test_input_no_dist", float, self.example_distribution), Output("test_output", float)
+        )
+        estimator = LinearRegressionEstimator(
+            base_test_case=base_test_case, treatment_value=None, control_value=None, adjustment_set={}, df=self.df
+        )
+        causal_test_case = CausalTestCase(
+            base_test_case=base_test_case,
+            expected_causal_effect=NoEffect(),
+            estimate_type="coefficient",
+            estimator=estimator,
+        )
+        adequacy_metric = DataAdequacy(causal_test_case, estimator)
+        adequacy_metric.measure_adequacy()
         self.assertEqual(
-            test_results[0]["result"].adequacy.to_dict(),
+            adequacy_metric.to_dict(),
             {"kurtosis": {"test_input_no_dist[T.b]": 0.0}, "bootstrap_size": 100, "passing": 100, "successful": 100},
         )
 
@@ -95,7 +77,7 @@ def test_data_adequacy_group_by(self):
         timesteps_per_intervention = 1
         control_strategy = [[t, "t", 0] for t in range(1, 4, timesteps_per_intervention)]
         treatment_strategy = [[t, "t", 1] for t in range(1, 4, timesteps_per_intervention)]
-        outcome = "outcome"
+        outcome = Output("outcome", float)
         fit_bl_switch_formula = "xo_t_do ~ time"
         df = pd.read_csv("tests/resources/data/temporal_data.csv")
         df["ok"] = df["outcome"] == 1
@@ -110,11 +92,7 @@ def test_data_adequacy_group_by(self):
             fit_bltd_switch_formula=fit_bl_switch_formula,
             eligibility=None,
         )
-        base_test_case = BaseTestCase(
-            treatment_variable=control_strategy,
-            outcome_variable=outcome,
-            effect="temporal",
-        )
+        base_test_case = estimation_model.base_test_case
 
         causal_test_case = CausalTestCase(
             base_test_case=base_test_case,
@@ -144,12 +122,12 @@ def test_dag_adequacy_dependent(self):
             estimate_type=None,
         )
         test_suite = [causal_test_case]
-        dag_adequacy = DAGAdequacy(self.json_class.causal_specification.causal_dag, test_suite)
+        dag_adequacy = DAGAdequacy(self.dag, test_suite)
         dag_adequacy.measure_adequacy()
         self.assertEqual(
             dag_adequacy.to_dict(),
             {
-                "causal_dag": self.json_class.causal_specification.causal_dag,
+                "causal_dag": self.dag,
                 "test_suite": test_suite,
                 "tested_pairs": {("test_input", "B")},
                 "pairs_to_test": {
@@ -191,12 +169,12 @@ def test_dag_adequacy_independent(self):
             estimate_type=None,
         )
         test_suite = [causal_test_case]
-        dag_adequacy = DAGAdequacy(self.json_class.causal_specification.causal_dag, test_suite)
+        dag_adequacy = DAGAdequacy(self.dag, test_suite)
         dag_adequacy.measure_adequacy()
         self.assertEqual(
             dag_adequacy.to_dict(),
             {
-                "causal_dag": self.json_class.causal_specification.causal_dag,
+                "causal_dag": self.dag,
                 "test_suite": test_suite,
                 "tested_pairs": {("test_input", "C")},
                 "pairs_to_test": {
@@ -238,12 +216,12 @@ def test_dag_adequacy_independent_other_way(self):
             estimate_type=None,
         )
         test_suite = [causal_test_case]
-        dag_adequacy = DAGAdequacy(self.json_class.causal_specification.causal_dag, test_suite)
+        dag_adequacy = DAGAdequacy(self.dag, test_suite)
         dag_adequacy.measure_adequacy()
         self.assertEqual(
             dag_adequacy.to_dict(),
             {
-                "causal_dag": self.json_class.causal_specification.causal_dag,
+                "causal_dag": self.dag,
                 "test_suite": test_suite,
                 "tested_pairs": {("test_input", "C")},
                 "pairs_to_test": {
diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py
index 0edc3321..17390819 100644
--- a/tests/testing_tests/test_causal_test_case.py
+++ b/tests/testing_tests/test_causal_test_case.py
@@ -34,9 +34,8 @@ def setUp(self) -> None:
             base_test_case=self.base_test_case,
             expected_causal_effect=self.expected_causal_effect,
             estimator=LinearRegressionEstimator(
-                treatment="A",
+                base_test_case=self.base_test_case,
                 adjustment_set=set(),
-                outcome="C",
                 control_value=0,
                 treatment_value=1,
             ),
@@ -65,19 +64,18 @@ def setUp(self) -> None:
         self.causal_dag = CausalDAG(dag_dot_path)
 
         # 2. Create Scenario and Causal Specification
-        A = Input("A", float)
-        self.A = A
-        C = Output("C", float)
-        self.C = C
-        D = Output("D", float)
-        self.scenario = Scenario({A, C, D})
+        self.A = Input("A", float)
+        self.C = Output("C", float)
+        self.D = Output("D", float)
+        self.scenario = Scenario({self.A, self.C, self.D})
         self.causal_specification = CausalSpecification(scenario=self.scenario, causal_dag=self.causal_dag)
 
         # 3. Create a causal test case
         self.expected_causal_effect = ExactValue(4)
-        self.base_test_case = BaseTestCase(A, C)
+        self.base_test_case_A_C = BaseTestCase(self.A, self.C)
+        self.base_test_case_D_A = BaseTestCase(self.D, self.A)
         self.causal_test_case = CausalTestCase(
-            base_test_case=self.base_test_case,
+            base_test_case=self.base_test_case_A_C,
             expected_causal_effect=self.expected_causal_effect,
             # control_value=0,
             # treatment_value=1,
@@ -92,7 +90,7 @@ def setUp(self) -> None:
         # self.df.to_csv(self.observational_data_csv_path, index=False)
 
         # 5. Create minimal adjustment set
-        self.minimal_adjustment_set = self.causal_dag.identification(self.base_test_case)
+        self.minimal_adjustment_set = self.causal_dag.identification(self.base_test_case_A_C)
         # 6. Easier to access treatment and outcome values
         self.treatment_value = 1
         self.control_value = 0
@@ -102,7 +100,7 @@ def tearDown(self) -> None:
 
     def test_check_minimum_adjustment_set(self):
         """Check that the minimum adjustment set is correctly made"""
-        minimal_adjustment_set = self.causal_dag.identification(self.base_test_case)
+        minimal_adjustment_set = self.causal_dag.identification(self.base_test_case_A_C)
         self.assertEqual(minimal_adjustment_set, {"D"})
 
     def test_invalid_causal_effect(self):
@@ -117,11 +115,10 @@ def test_execute_test_observational_linear_regression_estimator(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
         regression estimator."""
         estimation_model = LinearRegressionEstimator(
-            "A",
+            self.base_test_case_A_C,
             self.treatment_value,
             self.control_value,
             self.minimal_adjustment_set,
-            "C",
             self.df,
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model)
@@ -132,11 +129,10 @@ def test_execute_test_observational_linear_regression_estimator_direct_effect(se
         regression estimator."""
         base_test_case = BaseTestCase(treatment_variable=self.A, outcome_variable=self.C, effect="direct")
         estimation_model = LinearRegressionEstimator(
-            "A",
+            self.base_test_case_A_C,
             self.treatment_value,
             self.control_value,
             self.causal_dag.identification(base_test_case),
-            "C",
             self.df,
         )
 
@@ -156,11 +152,10 @@ def test_execute_test_observational_linear_regression_estimator_coefficient(self
         """Check that executing the causal test case returns the correct results for dummy data using a linear
         regression estimator."""
         estimation_model = LinearRegressionEstimator(
-            "D",
+            self.base_test_case_D_A,
             self.treatment_value,
             self.control_value,
             self.minimal_adjustment_set,
-            "A",
             self.df,
         )
         self.causal_test_case.estimate_type = "coefficient"
@@ -171,11 +166,10 @@ def test_execute_test_observational_linear_regression_estimator_risk_ratio(self)
         """Check that executing the causal test case returns the correct results for dummy data using a linear
         regression estimator."""
         estimation_model = LinearRegressionEstimator(
-            "D",
+            self.base_test_case_D_A,
             self.treatment_value,
             self.control_value,
             self.minimal_adjustment_set,
-            "A",
             self.df,
         )
         self.causal_test_case.estimate_type = "risk_ratio"
@@ -186,11 +180,10 @@ def test_invalid_estimate_type(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
         regression estimator."""
         estimation_model = LinearRegressionEstimator(
-            "D",
+            self.base_test_case_D_A,
             self.treatment_value,
             self.control_value,
             self.minimal_adjustment_set,
-            "A",
             self.df,
         )
         self.causal_test_case.estimate_type = "invalid"
@@ -201,11 +194,10 @@ def test_execute_test_observational_linear_regression_estimator_squared_term(sel
         """Check that executing the causal test case returns the correct results for dummy data with a squared term
         using a linear regression estimator. C ~ 4*(A+2) + D + D^2"""
         estimation_model = LinearRegressionEstimator(
-            "A",
+            self.base_test_case_A_C,
             self.treatment_value,
             self.control_value,
             self.minimal_adjustment_set,
-            "C",
             self.df,
             formula=f"C ~ A + {'+'.join(self.minimal_adjustment_set)} + (D ** 2)",
         )
diff --git a/tests/testing_tests/test_causal_test_outcome.py b/tests/testing_tests/test_causal_test_outcome.py
index 0bdbe4fa..e1a7c40a 100644
--- a/tests/testing_tests/test_causal_test_outcome.py
+++ b/tests/testing_tests/test_causal_test_outcome.py
@@ -4,15 +4,17 @@
 from causal_testing.testing.causal_test_result import CausalTestResult, TestValue
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
 from causal_testing.utils.validation import CausalValidator
+from causal_testing.testing.base_test_case import BaseTestCase
+from causal_testing.specification.variable import Input, Output
 
 
 class TestCausalTestOutcome(unittest.TestCase):
     """Test the TestCausalTestOutcome basic methods."""
 
     def setUp(self) -> None:
+        base_test_case = BaseTestCase(Input("A", float), Output("A", float))
         self.estimator = LinearRegressionEstimator(
-            treatment="A",
-            outcome="A",
+            base_test_case=base_test_case,
             treatment_value=1,
             control_value=0,
             adjustment_set={},

From 4d48785f033ada57126451eeca9f0655fa011f1d Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Thu, 20 Feb 2025 12:47:13 +0000
Subject: [PATCH 37/44] IPCW outcome is now an output

---
 causal_testing/estimation/ipcw_estimator.py   |   7 +-
 causal_testing/testing/causal_test_result.py  |  10 +-
 tests/estimation_tests/test_ipcw_estimator.py | 143 +++++++-----------
 .../test_causal_test_adequacy.py              |   2 +-
 4 files changed, 65 insertions(+), 97 deletions(-)

diff --git a/causal_testing/estimation/ipcw_estimator.py b/causal_testing/estimation/ipcw_estimator.py
index 49d48196..902927f7 100644
--- a/causal_testing/estimation/ipcw_estimator.py
+++ b/causal_testing/estimation/ipcw_estimator.py
@@ -12,7 +12,7 @@
 
 from causal_testing.estimation.abstract_estimator import Estimator
 from causal_testing.testing.base_test_case import BaseTestCase
-from causal_testing.specification.variable import Input, Output
+from causal_testing.specification.variable import Variable
 
 logger = logging.getLogger(__name__)
 
@@ -32,7 +32,7 @@ def __init__(
         timesteps_per_observation: int,
         control_strategy: list[tuple[int, str, Any]],
         treatment_strategy: list[tuple[int, str, Any]],
-        outcome: str,
+        outcome: Variable,
         status_column: str,
         fit_bl_switch_formula: str,
         fit_bltd_switch_formula: str,
@@ -58,7 +58,7 @@ def __init__(
                             treatment) with the most elements multiplied by `timesteps_per_observation`.
         """
         super().__init__(
-            base_test_case=BaseTestCase(Input("_", float), Output(outcome, float)),
+            base_test_case=BaseTestCase(None, outcome),
             treatment_value=[val for _, _, val in treatment_strategy],
             control_value=[val for _, _, val in control_strategy],
             adjustment_set=None,
@@ -70,7 +70,6 @@ def __init__(
         self.timesteps_per_observation = timesteps_per_observation
         self.control_strategy = control_strategy
         self.treatment_strategy = treatment_strategy
-        self.outcome = outcome
         self.status_column = status_column
         self.fit_bl_switch_formula = fit_bl_switch_formula
         self.fit_bltd_switch_formula = fit_bltd_switch_formula
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index 1d03e842..482a2d5c 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -84,12 +84,12 @@ def to_dict(self, json=False):
         """Return result contents as a dictionary
         :return: Dictionary containing contents of causal_test_result
         """
-        if isinstance(self.estimator.base_test_case.treatment_variable, list):
-            treatment = [x.name for x in self.estimator.base_test_case.treatment_variable]
-        else:
-            treatment = self.estimator.base_test_case.treatment_variable.name
         base_dict = {
-            "treatment": treatment,
+            "treatment": (
+                self.estimator.base_test_case.treatment_variable.name
+                if self.estimator.base_test_case.treatment_variable is not None
+                else None
+            ),
             "control_value": self.estimator.control_value,
             "treatment_value": self.estimator.treatment_value,
             "outcome": self.estimator.base_test_case.outcome_variable.name,
diff --git a/tests/estimation_tests/test_ipcw_estimator.py b/tests/estimation_tests/test_ipcw_estimator.py
index 1ab37dce..a1f5ff06 100644
--- a/tests/estimation_tests/test_ipcw_estimator.py
+++ b/tests/estimation_tests/test_ipcw_estimator.py
@@ -1,9 +1,6 @@
 import unittest
 import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from causal_testing.specification.variable import Input
-from causal_testing.utils.validation import CausalValidator
+from causal_testing.specification.variable import Input, Output
 
 from causal_testing.estimation.ipcw_estimator import IPCWEstimator
 
@@ -13,113 +10,85 @@ class TestIPCWEstimator(unittest.TestCase):
     Test the IPCW estimator class
     """
 
+    def setUp(self) -> None:
+        self.outcome = Output("outcome", float)
+        self.status_column = "ok"
+        self.timesteps_per_intervention = 1
+        self.control_strategy = [[t, "t", 0] for t in range(1, 4, self.timesteps_per_intervention)]
+        self.treatment_strategy = [[t, "t", 1] for t in range(1, 4, self.timesteps_per_intervention)]
+        self.fit_bl_switch_formula = "xo_t_do ~ time"
+        self.df = pd.read_csv("tests/resources/data/temporal_data.csv")
+        self.df[self.status_column] = self.df["outcome"] == 1
+
     def test_estimate_hazard_ratio(self):
-        timesteps_per_intervention = 1
-        control_strategy = [[t, "t", 0] for t in range(1, 4, timesteps_per_intervention)]
-        treatment_strategy = [[t, "t", 1] for t in range(1, 4, timesteps_per_intervention)]
-        outcome = "outcome"
-        fit_bl_switch_formula = "xo_t_do ~ time"
-        df = pd.read_csv("tests/resources/data/temporal_data.csv")
-        df["ok"] = df["outcome"] == 1
         estimation_model = IPCWEstimator(
-            df,
-            timesteps_per_intervention,
-            control_strategy,
-            treatment_strategy,
-            outcome,
-            "ok",
-            fit_bl_switch_formula=fit_bl_switch_formula,
-            fit_bltd_switch_formula=fit_bl_switch_formula,
+            self.df,
+            self.timesteps_per_intervention,
+            self.control_strategy,
+            self.treatment_strategy,
+            self.outcome,
+            self.status_column,
+            fit_bl_switch_formula=self.fit_bl_switch_formula,
+            fit_bltd_switch_formula=self.fit_bl_switch_formula,
             eligibility=None,
         )
-        estimate, intervals = estimation_model.estimate_hazard_ratio()
+        estimate, _ = estimation_model.estimate_hazard_ratio()
         self.assertEqual(round(estimate["trtrand"], 3), 1.351)
 
     def test_invalid_treatment_strategies(self):
-        timesteps_per_intervention = 1
-        control_strategy = [[t, "t", 0] for t in range(1, 4, timesteps_per_intervention)]
-        treatment_strategy = [[t, "t", 1] for t in range(1, 4, timesteps_per_intervention)]
-        outcome = "outcome"
-        fit_bl_switch_formula = "xo_t_do ~ time"
-        df = pd.read_csv("tests/resources/data/temporal_data.csv")
-        df["t"] = (["1", "0"] * len(df))[: len(df)]
-        df["ok"] = df["outcome"] == 1
         with self.assertRaises(ValueError):
-            estimation_model = IPCWEstimator(
-                df,
-                timesteps_per_intervention,
-                control_strategy,
-                treatment_strategy,
-                outcome,
-                "ok",
-                fit_bl_switch_formula=fit_bl_switch_formula,
-                fit_bltd_switch_formula=fit_bl_switch_formula,
+            IPCWEstimator(
+                self.df.assign(t=(["1", "0"] * len(self.df))[: len(self.df)]),
+                self.timesteps_per_intervention,
+                self.control_strategy,
+                self.treatment_strategy,
+                self.outcome,
+                self.status_column,
+                fit_bl_switch_formula=self.fit_bl_switch_formula,
+                fit_bltd_switch_formula=self.fit_bl_switch_formula,
                 eligibility=None,
             )
 
     def test_invalid_fault_t_do(self):
-        timesteps_per_intervention = 1
-        control_strategy = [[t, "t", 0] for t in range(1, 4, timesteps_per_intervention)]
-        treatment_strategy = [[t, "t", 1] for t in range(1, 4, timesteps_per_intervention)]
-        outcome = "outcome"
-        fit_bl_switch_formula = "xo_t_do ~ time"
-        df = pd.read_csv("tests/resources/data/temporal_data.csv")
-        df["ok"] = df["outcome"] == 1
         estimation_model = IPCWEstimator(
-            df,
-            timesteps_per_intervention,
-            control_strategy,
-            treatment_strategy,
-            outcome,
-            "ok",
-            fit_bl_switch_formula=fit_bl_switch_formula,
-            fit_bltd_switch_formula=fit_bl_switch_formula,
+            self.df.assign(outcome=1),
+            self.timesteps_per_intervention,
+            self.control_strategy,
+            self.treatment_strategy,
+            self.outcome,
+            self.status_column,
+            fit_bl_switch_formula=self.fit_bl_switch_formula,
+            fit_bltd_switch_formula=self.fit_bl_switch_formula,
             eligibility=None,
         )
         estimation_model.df["fault_t_do"] = 0
         with self.assertRaises(ValueError):
-            estimate, intervals = estimation_model.estimate_hazard_ratio()
+            estimation_model.estimate_hazard_ratio()
 
     def test_no_individual_began_control_strategy(self):
-        timesteps_per_intervention = 1
-        control_strategy = [[t, "t", 0] for t in range(1, 4, timesteps_per_intervention)]
-        treatment_strategy = [[t, "t", 1] for t in range(1, 4, timesteps_per_intervention)]
-        outcome = "outcome"
-        fit_bl_switch_formula = "xo_t_do ~ time"
-        df = pd.read_csv("tests/resources/data/temporal_data.csv")
-        df["t"] = 1
-        df["ok"] = df["outcome"] == 1
         with self.assertRaises(ValueError):
-            estimation_model = IPCWEstimator(
-                df,
-                timesteps_per_intervention,
-                control_strategy,
-                treatment_strategy,
-                outcome,
-                "ok",
-                fit_bl_switch_formula=fit_bl_switch_formula,
-                fit_bltd_switch_formula=fit_bl_switch_formula,
+            IPCWEstimator(
+                self.df.assign(t=1),
+                self.timesteps_per_intervention,
+                self.control_strategy,
+                self.treatment_strategy,
+                self.outcome,
+                self.status_column,
+                fit_bl_switch_formula=self.fit_bl_switch_formula,
+                fit_bltd_switch_formula=self.fit_bl_switch_formula,
                 eligibility=None,
             )
 
     def test_no_individual_began_treatment_strategy(self):
-        timesteps_per_intervention = 1
-        control_strategy = [[t, "t", 0] for t in range(1, 4, timesteps_per_intervention)]
-        treatment_strategy = [[t, "t", 1] for t in range(1, 4, timesteps_per_intervention)]
-        outcome = "outcome"
-        fit_bl_switch_formula = "xo_t_do ~ time"
-        df = pd.read_csv("tests/resources/data/temporal_data.csv")
-        df["t"] = 0
-        df["ok"] = df["outcome"] == 1
         with self.assertRaises(ValueError):
-            estimation_model = IPCWEstimator(
-                df,
-                timesteps_per_intervention,
-                control_strategy,
-                treatment_strategy,
-                outcome,
-                "ok",
-                fit_bl_switch_formula=fit_bl_switch_formula,
-                fit_bltd_switch_formula=fit_bl_switch_formula,
+            IPCWEstimator(
+                self.df.assign(t=0),
+                self.timesteps_per_intervention,
+                self.control_strategy,
+                self.treatment_strategy,
+                self.outcome,
+                self.status_column,
+                fit_bl_switch_formula=self.fit_bl_switch_formula,
+                fit_bltd_switch_formula=self.fit_bl_switch_formula,
                 eligibility=None,
             )
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
index 5b36dd81..56149024 100644
--- a/tests/testing_tests/test_causal_test_adequacy.py
+++ b/tests/testing_tests/test_causal_test_adequacy.py
@@ -92,7 +92,7 @@ def test_data_adequacy_group_by(self):
             fit_bltd_switch_formula=fit_bl_switch_formula,
             eligibility=None,
         )
-        base_test_case = estimation_model.base_test_case
+        base_test_case = BaseTestCase(Input("t", float), Output("outcome", float))
 
         causal_test_case = CausalTestCase(
             base_test_case=base_test_case,

From db5b8b8e30db8dbbdc612741f7196a7f49cee25f Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Thu, 20 Feb 2025 12:50:39 +0000
Subject: [PATCH 38/44] pylint

---
 causal_testing/surrogate/causal_surrogate_assisted.py | 3 ++-
 causal_testing/testing/causal_test_result.py          | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/causal_testing/surrogate/causal_surrogate_assisted.py b/causal_testing/surrogate/causal_surrogate_assisted.py
index ba58d1bb..d3dd5083 100644
--- a/causal_testing/surrogate/causal_surrogate_assisted.py
+++ b/causal_testing/surrogate/causal_surrogate_assisted.py
@@ -106,7 +106,8 @@ def execute(
                 )
                 test_result.relationship = (
                     f"{surrogate_model.base_test_case.treatment_variable.name} -> "
-                    f"{surrogate_model.base_test_case.outcome_variable.name} expected {surrogate_model.expected_relationship}"
+                    f"{surrogate_model.base_test_case.outcome_variable.name} expected "
+                    f"{surrogate_model.expected_relationship}"
                 )
                 return test_result, i + 1, df
 
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index 482a2d5c..b662d5f4 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -129,8 +129,9 @@ def ci_valid(self) -> bool:
 
     def summary(self):
         """Summarise the causal test result as an intuitive sentence."""
+        treatment_variable = self.estimator.base_test_case.treatment_variable
         print(
-            f"The causal effect of changing {self.estimator.base_test_case.treatment_variable.name} = {self.estimator.control_value} to "
-            f"{self.estimator.base_test_case.treatment_variable.name}' = {self.estimator.treatment_value} is {self.test_value.value}"
+            f"The causal effect of changing {treatment_variable.name} = {self.estimator.control_value} to "
+            f"{treatment_variable.name}' = {self.estimator.treatment_value} is {self.test_value.value}"
             f"(95% confidence intervals: {self.confidence_intervals})."
         )

From 0af5898b912b9f9f247668bd51d003e11165bacd Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Thu, 20 Feb 2025 12:53:55 +0000
Subject: [PATCH 39/44] pylint

---
 causal_testing/estimation/abstract_estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/causal_testing/estimation/abstract_estimator.py b/causal_testing/estimation/abstract_estimator.py
index 21c330c8..49e43d79 100644
--- a/causal_testing/estimation/abstract_estimator.py
+++ b/causal_testing/estimation/abstract_estimator.py
@@ -30,7 +30,7 @@ class Estimator(ABC):
     """
 
     def __init__(
-        # pylint: disable=too-many-arguments
+        # pylint: disable=too-many-arguments,R0801
         self,
         base_test_case: BaseTestCase,
         treatment_value: float,

From c86fcec690cecfb937cf67da73e326275f107b1d Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Thu, 20 Feb 2025 12:56:22 +0000
Subject: [PATCH 40/44] pylint

---
 causal_testing/estimation/linear_regression_estimator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/causal_testing/estimation/linear_regression_estimator.py b/causal_testing/estimation/linear_regression_estimator.py
index 41c8619d..e1b0a774 100644
--- a/causal_testing/estimation/linear_regression_estimator.py
+++ b/causal_testing/estimation/linear_regression_estimator.py
@@ -36,6 +36,7 @@ def __init__(
         query: str = "",
     ):
         # pylint: disable=too-many-arguments
+        # pylint: disable=R0801
         super().__init__(
             base_test_case=base_test_case,
             treatment_value=treatment_value,

From 26f814e8d6904050235035c765b5acb533fe8544 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Thu, 20 Feb 2025 12:56:40 +0000
Subject: [PATCH 41/44] Forgot to save

---
 causal_testing/estimation/abstract_estimator.py            | 3 ++-
 causal_testing/estimation/abstract_regression_estimator.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/causal_testing/estimation/abstract_estimator.py b/causal_testing/estimation/abstract_estimator.py
index 49e43d79..782aeb08 100644
--- a/causal_testing/estimation/abstract_estimator.py
+++ b/causal_testing/estimation/abstract_estimator.py
@@ -30,7 +30,8 @@ class Estimator(ABC):
     """
 
     def __init__(
-        # pylint: disable=too-many-arguments,R0801
+        # pylint: disable=too-many-arguments
+        # pylint: disable=R0801
         self,
         base_test_case: BaseTestCase,
         treatment_value: float,
diff --git a/causal_testing/estimation/abstract_regression_estimator.py b/causal_testing/estimation/abstract_regression_estimator.py
index 4f9a1fe4..4b0fba80 100644
--- a/causal_testing/estimation/abstract_regression_estimator.py
+++ b/causal_testing/estimation/abstract_regression_estimator.py
@@ -33,6 +33,7 @@ def __init__(
         alpha: float = 0.05,
         query: str = "",
     ):
+        # pylint: disable=R0801
         super().__init__(
             base_test_case=base_test_case,
             treatment_value=treatment_value,

From 949c17dc7eaf712a97e547d168deff5e29e8c11e Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Thu, 20 Feb 2025 14:02:11 +0000
Subject: [PATCH 42/44] Removed the docs for the deprecated frontends

---
 docs/source/frontends/json_front_end.rst | 91 ------------------------
 docs/source/frontends/test_suite.rst     | 29 --------
 2 files changed, 120 deletions(-)
 delete mode 100644 docs/source/frontends/json_front_end.rst
 delete mode 100644 docs/source/frontends/test_suite.rst

diff --git a/docs/source/frontends/json_front_end.rst b/docs/source/frontends/json_front_end.rst
deleted file mode 100644
index 33c9de08..00000000
--- a/docs/source/frontends/json_front_end.rst
+++ /dev/null
@@ -1,91 +0,0 @@
-JSON Frontend
-======================================
-The JSON frontend allows causal tests and parameters to be specified in JSON to allow for tests to be quickly written
-whilst retaining the flexibility of the framework.
-
-Basic Workflow
---------------
-The basic workflow of using the JSON frontend is as follows:
-
-#. Specify your test cases in the JSON format (more details below).
-#. Create your DAG in a dot file.
-#. Initialise the JsonUtility class in python with a path of where you want the outputs saved.
-#. Set the paths pointing the Json class to your json file, dag file and optionally your data file (see data section below) using the :func:`causal_testing.json_front.json_class.JsonUtility.set_paths` method.
-#. Run the :func:`causal_testing.json_front.json_class.JsonUtility.setup` method providing your scenario.
-#. Run the :func:`causal_testing.json_front.json_class.JsonUtility.run_json_tests` method, which will execute the test cases provided by the JSON file.
-
-Example Walkthrough
--------------------
-An example is provided in `examples/poisson` which contains a README with more detailed information.
-
-run_causal_tests.py
-*******************
-The `examples/poisson/example_run_causal_tests.py <https://github.com/CITCOM-project/CausalTestingFramework/blob/main/examples/poisson/example_run_causal_tests.py>`_
-contains python code written by the user to implement scenario specific features
-such as:
-
-#. Custom Estimators
-#. Causal Variable specification
-#. Causal test case outcomes
-#. Meta constraint functions
-#. Mapping JSON distributions, effects, and estimators to python objects
-
-Use-case specific information is also declared here such as the paths to the relevant files needed for the tests.
-
-causal_tests.json
-*****************
-The `examples/poisson/causal_tests.json <https://github.com/CITCOM-project/CausalTestingFramework/blob/main/examples/poisson/causal_tests.json>`_ contains Python code written by the user to implement scenario specific features
-is the JSON file that allows for the easy specification of multiple causal tests.
-Tests can be specified two ways; firstly by specifying a mutation lke in the example tests with the following structure:
-
-#. name
-#. mutations
-#. estimator
-#. estimate_type
-#. effect_modifiers
-#. expected_effects
-#. skip: boolean that if set true the test won't be executed and will be skipped
-
-The second method of specifying a test is to specify the test in a concrete form with the following structure:
-
-#. name
-#. treatment_variable
-#. control_value
-#. treatment_value
-#. estimator
-#. estimate_type
-#. expected_effect
-#. skip
-
-
-Alternatively, a ``causal_tests.json`` file can be created from a ``dag.dot`` file using the ``causal_testing/specification/metamorphic_relation.py`` script as follows::
-
-  python causal_testing/testing/metamorphic_relation.py --dag_path dag.dot --output_path causal_tests.json
-
-Run Commands
-************
-This example uses the ``Argparse`` utility built into the JSON frontend, which allows the frontend to be run from a commandline interface as shown here.
-
-To run the JSON frontend example from the root directory of the project, use::
-
-    python examples\poisson\example_run_causal_tests.py --data_path="examples\poisson\data.csv" --dag_path="examples\poisson\dag.dot" --json_path="examples\poisson\causal_tests.json
-
-A failure flag `-f` can be specified to stop the framework running if a test is failed::
-
-    python examples\poisson\example_run_causal_tests.py -f --data_path="examples\poisson\data.csv" --dag_path="examples\poisson\dag.dot" --json_path="examples\poisson\causal_tests.json
-
-There are two main outputs of this frontend, both are controlled by the logging module. Firstly outputs are printed to stdout (terminal).
-Secondly a log file is produced, by default a file called `json_frontend.log` is produced in the directory the script is called from.
-
-The behaviour of where the log file is produced and named can be altered with the --log_path argument::
-
-    python examples\poisson\run_causal_tests.py -f --data_path="examples\poisson\data.csv" --dag_path="examples\poisson\dag.dot" --json_path="examples\poisson\causal_tests.json --log_path="example_directory\logname.log"
-
-
-Runtime Data
--------------
-
-There are currently 2 methods to inputting your runtime data into the JSON frontend:
-
-#. Providing one or more file paths to `.csv` files containing your data
-#. Setting a dataframe to the `.data` attribute of the JSONUtility instance, this must be done before the setup method is called.
\ No newline at end of file
diff --git a/docs/source/frontends/test_suite.rst b/docs/source/frontends/test_suite.rst
deleted file mode 100644
index a5a488bd..00000000
--- a/docs/source/frontends/test_suite.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-.. module:: causal_testing
-Test Suite
-======================================
-The test_suite feature allows for the effective running of multiple causal_test_cases using a logical structure.
-This structure is defined by the parameters in the class: :class:`causal_testing.testing.causal_test_suite`.
-
-A current limitation of the Test Suite is that it requires references to the estimator class, not instances (objects) of
-estimator classes, which prevents the usage of some of the features of an estimator.
-
-The test_suite class is an extension of the Python UserDict_, meaning it simulates a standard Python dictionary where
-any dictionary method can be used. The class also features a setter to make adding new test cases quicker and less
-error prone :meth:`causal_testing.testing.causal_test_suite.CausalTestSuite.add_test_object`.
-
-The suite's dictionary structure is at the top level a :class:`causal_testing.testing.base_test_case` as the key and
-the value is a test object in the format of another dictionary:
-
-.. code-block:: python
-
-    test_object = {"tests": causal_test_case_list, "estimators": estimators_classes, "estimate_type": estimate_type}
-
-Each ``base_test_case`` contains the treatment and outcome variables, and only causal_test_cases testing this relationship
-should be placed in the test object for that ``base_test_case``.
-
-
-Following this, users can similarly execute a suite of causal tests and return the results in a list by executing the
-class's :meth:`causal_testing.testing.causal_test_suite.CausalTestSuite.execute_test_suite` method.
-
-
-

From 28c99b726084910b6c9ff82621e3d3852d9d098f Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Thu, 20 Feb 2025 14:02:44 +0000
Subject: [PATCH 43/44] Removed test csv output

---
 width_num_shapes_results_random_1000.csv | 145 -----------------------
 1 file changed, 145 deletions(-)
 delete mode 100644 width_num_shapes_results_random_1000.csv

diff --git a/width_num_shapes_results_random_1000.csv b/width_num_shapes_results_random_1000.csv
deleted file mode 100644
index 5ad081cf..00000000
--- a/width_num_shapes_results_random_1000.csv
+++ /dev/null
@@ -1,145 +0,0 @@
-,control,treatment,intensity,ate,ci_low,ci_high
-0,1.0,2.0,1,-7.378642492215972,-13.91823865204362,-0.8390463323883246
-1,2.0,3.0,1,-2.7096586545197052,-9.80288285818825,4.383565549148839
-2,3.0,4.0,1,-1.5424126950956385,-11.120887611821669,8.03606222163039
-3,4.0,5.0,1,-1.0755143113260122,-13.708422107725262,11.557393485073238
-4,5.0,6.0,1,-0.842065119441199,-16.741293716403792,15.057163477521392
-5,6.0,7.0,1,-0.7086655812213047,-19.97292732259147,18.555596160148863
-6,7.0,8.0,1,-0.6252908698338722,-23.308418384039193,22.057836644371445
-7,8.0,9.0,1,-0.5697077289089161,-26.70434658441146,25.564931126593628
-8,9.0,10.0,1,-0.5307995302614472,-30.138282420998813,29.076683360475922
-9,1.0,2.0,2,-7.378642492215974,-16.38113570509907,1.6238507206671215
-10,2.0,3.0,2,-2.709658654519707,-11.12501848472749,5.705701175688077
-11,3.0,4.0,2,-1.542412695095635,-10.885940125445726,7.801114735254458
-12,4.0,5.0,2,-1.075514311326014,-12.37290328513123,10.2218746624792
-13,5.0,6.0,2,-0.8420651194411981,-14.717521484023761,13.033391245141367
-14,6.0,7.0,2,-0.7086655812213039,-17.50973658032815,16.092405417885537
-15,7.0,8.0,2,-0.6252908698338722,-20.549295407558176,19.29871366789043
-16,8.0,9.0,2,-0.5697077289089165,-23.735302054174575,22.595886596356742
-17,9.0,10.0,2,-0.5307995302614472,-27.013708674772325,25.95210961424943
-18,1.0,2.0,3,-7.378642492215974,-20.043311732745323,5.286026748313375
-19,2.0,3.0,3,-2.709658654519707,-14.257652928983816,8.838335619944402
-20,3.0,4.0,3,-1.542412695095635,-12.869624916374846,9.784799526183576
-21,4.0,5.0,3,-1.0755143113260104,-13.130648317524162,10.979619694872142
-22,5.0,6.0,3,-0.8420651194412017,-14.473331702582346,12.789201463699946
-23,6.0,7.0,3,-0.7086655812213039,-16.53982963725465,15.122498474812037
-24,7.0,8.0,3,-0.6252908698338757,-19.069141710952177,17.81855997128443
-25,8.0,9.0,3,-0.5697077289089165,-21.891675412501034,20.752259954683193
-26,9.0,10.0,3,-0.5307995302614472,-24.90395032760046,23.842351267077564
-27,1.0,2.0,4,-7.378642492215974,-23.373997163493392,8.616712179061437
-28,2.0,3.0,4,-2.709658654519707,-17.345789724839037,11.92647241579963
-29,3.0,4.0,4,-1.542412695095635,-15.337988269660514,12.253162879469244
-30,4.0,5.0,4,-1.0755143113260104,-14.765501080503213,12.614472457851193
-31,5.0,6.0,4,-0.8420651194412017,-15.241135322080474,13.557005083198078
-32,6.0,7.0,4,-0.7086655812213039,-16.55358588394173,15.136254721499121
-33,7.0,8.0,4,-0.6252908698338757,-18.491752099772434,17.241170360104668
-34,8.0,9.0,4,-0.5697077289089165,-20.870158913539562,19.730743455721743
-35,9.0,10.0,4,-0.5307995302614472,-23.550598680286186,22.488999619763284
-36,1.0,2.0,5,-7.378642492215974,-26.035120136794106,11.277835152362158
-37,2.0,3.0,5,-2.709658654519714,-19.863479936076985,14.444162627037556
-38,3.0,4.0,5,-1.542412695095635,-17.49695503693026,14.41212964673899
-39,4.0,5.0,5,-1.0755143113260175,-16.40311483898148,14.252086216329445
-40,5.0,6.0,5,-0.8420651194411874,-16.261852314806973,14.577722075924598
-41,6.0,7.0,5,-0.708665581221311,-16.960028525631117,15.542697363188495
-42,7.0,8.0,5,-0.6252908698338757,-18.36211115362275,17.111529413954997
-43,8.0,9.0,5,-0.5697077289089094,-20.30936425354075,19.16994879572293
-44,9.0,10.0,5,-0.5307995302614472,-22.65598617725834,21.594387116735447
-45,1.0,2.0,6,-7.378642492215988,-27.948775665159488,13.191490680727526
-46,2.0,3.0,6,-2.7096586545197,-21.671785689375824,16.252468380336424
-47,3.0,4.0,6,-1.542412695095635,-19.070061978940473,15.985236588749174
-48,4.0,5.0,6,-1.0755143113260175,-17.635327009716008,15.484298387063987
-49,5.0,6.0,6,-0.8420651194411874,-17.07200388946137,15.387873650578996
-50,6.0,7.0,6,-0.708665581221311,-17.31952432610808,15.902193163665459
-51,7.0,8.0,6,-0.6252908698338757,-18.300276563559066,17.049694823891315
-52,8.0,9.0,6,-0.5697077289089094,-19.891016782483533,18.751601324665728
-53,9.0,10.0,6,-0.5307995302614472,-21.953616473882548,20.892017413359625
-54,1.0,2.0,7,-7.378642492215988,-29.117265138277418,14.359980153845441
-55,2.0,3.0,7,-2.7096586545197,-22.75075933458973,17.33144202555033
-56,3.0,4.0,7,-1.542412695095635,-19.977859545238175,16.893034155046905
-57,4.0,5.0,7,-1.0755143113260033,-18.30608567018311,16.155057047531102
-58,5.0,6.0,7,-0.8420651194412017,-17.45218176477536,15.768051525892957
-59,6.0,7.0,7,-0.708665581221311,-17.385478541472793,15.968147379030171
-60,7.0,8.0,7,-0.6252908698338615,-18.066675373156556,16.816093633488833
-61,8.0,9.0,7,-0.5697077289089236,-19.40070948795028,18.261294030132433
-62,9.0,10.0,7,-0.5307995302614472,-21.25887106921732,20.197272008694426
-63,1.0,2.0,8,-7.378642492215988,-29.589225422873483,14.831940438441507
-64,2.0,3.0,8,-2.7096586545197,-23.14075432553625,17.72143701649685
-65,3.0,4.0,8,-1.542412695095635,-20.235252891690664,17.150427501499394
-66,4.0,5.0,8,-1.0755143113260033,-18.39286072049805,16.241832097846043
-67,5.0,6.0,8,-0.8420651194412017,-17.341020322637092,15.65689008375469
-68,6.0,7.0,8,-0.708665581221311,-17.06994694543087,15.65261578298825
-69,7.0,8.0,8,-0.6252908698338615,-17.565333348018214,16.31475160835049
-70,8.0,9.0,8,-0.5697077289089236,-18.749087451671272,17.609671993853425
-71,9.0,10.0,8,-0.5307995302614472,-20.495979946318016,19.43438088579512
-72,1.0,2.0,9,-7.378642492215988,-29.462760189589744,14.705475205157768
-73,2.0,3.0,9,-2.7096586545196715,-22.939924975757663,17.52060766671832
-74,3.0,4.0,9,-1.5424126950956634,-19.933724348774604,16.848898958583277
-75,4.0,5.0,9,-1.0755143113260033,-17.974722820937814,15.823694198285807
-76,5.0,6.0,9,-0.8420651194412017,-16.802840553585952,15.118710314703549
-77,6.0,7.0,9,-0.7086655812213394,-16.425883438085293,15.008552275642614
-78,7.0,8.0,9,-0.6252908698338615,-16.844793222974317,15.594211483306594
-79,8.0,9.0,9,-0.5697077289088952,-17.986034056166602,16.84661859834881
-80,9.0,10.0,9,-0.5307995302614472,-19.718307331231983,18.65670827070909
-81,1.0,2.0,10,-7.378642492215988,-28.908492866963968,14.151207882531992
-82,2.0,3.0,10,-2.7096586545196715,-22.329186135144028,16.909868826104685
-83,3.0,4.0,10,-1.5424126950956634,-19.265301523111248,16.18047613291992
-84,4.0,5.0,10,-1.0755143113260033,-17.254314186285058,15.103285563633051
-85,5.0,6.0,10,-0.8420651194412017,-16.049664005016837,14.365533766134433
-86,6.0,7.0,10,-0.7086655812213394,-15.672387298506294,14.255056136063615
-87,7.0,8.0,10,-0.6252908698338615,-16.12682189679697,14.876240157129246
-88,8.0,9.0,10,-0.5697077289088952,-17.329661181871813,16.190245724054023
-89,9.0,10.0,10,-0.5307995302614472,-19.134199065817313,18.07260000529442
-90,1.0,2.0,11,-7.378642492215988,-28.211037397554605,13.45375241312263
-91,2.0,3.0,11,-2.7096586545196715,-21.62080993571135,16.201492626672007
-92,3.0,4.0,11,-1.5424126950956634,-18.578314574480032,15.493489184288705
-93,4.0,5.0,11,-1.0755143113260033,-16.619351549555233,14.468322926903227
-94,5.0,6.0,11,-0.8420651194412017,-15.504259980146628,13.820129741264225
-95,6.0,7.0,11,-0.7086655812213394,-15.252130048314598,13.834798885871919
-96,7.0,8.0,11,-0.6252908698338615,-15.850915487393195,14.600333747725472
-97,8.0,9.0,11,-0.5697077289088952,-17.19471919807603,16.05530374025824
-98,9.0,10.0,11,-0.5307995302614472,-19.121187130685882,18.059588070162988
-99,1.0,2.0,12,-7.378642492215988,-27.816475373158596,13.05919038872662
-100,2.0,3.0,12,-2.7096586545196715,-21.311564093961522,15.892246784922179
-101,3.0,4.0,12,-1.5424126950956634,-18.4316147555852,15.346789365393875
-102,4.0,5.0,12,-1.0755143113260033,-16.68850845259334,14.537479829941333
-103,5.0,6.0,12,-0.8420651194412017,-15.82369961782706,14.139569378944657
-104,6.0,7.0,12,-0.7086655812213394,-15.821709691361946,14.404378528919267
-105,7.0,8.0,12,-0.6252908698338615,-16.633085203954522,15.382503464286799
-106,8.0,9.0,12,-0.5697077289088952,-18.132790226794157,16.993374768976366
-107,9.0,10.0,12,-0.5307995302614472,-20.161775051364373,19.10017599084148
-108,1.0,2.0,13,-7.378642492215931,-28.324569726565073,13.56728474213321
-109,2.0,3.0,13,-2.709658654519785,-22.052605303921496,16.633287994881925
-110,3.0,4.0,13,-1.5424126950956634,-19.518911869390536,16.43408647919921
-111,4.0,5.0,13,-1.0755143113259464,-18.16831866989253,16.017290047240635
-112,5.0,6.0,13,-0.8420651194412585,-17.683477997062027,15.99934775817951
-113,6.0,7.0,13,-0.7086655812212257,-17.988597888701065,16.571266726258614
-114,7.0,8.0,13,-0.6252908698338615,-19.000371980537352,17.74979024086963
-115,8.0,9.0,13,-0.569707728908952,-20.599229033232973,19.45981357541507
-116,9.0,10.0,13,-0.5307995302614472,-22.65487001642896,21.593270955906064
-117,1.0,2.0,14,-7.378642492215931,-30.323636635308162,15.5663516508763
-118,2.0,3.0,14,-2.709658654519785,-24.41936770763448,19.000050398594908
-119,3.0,4.0,14,-1.5424126950956634,-22.356813121236883,19.271987731045556
-120,4.0,5.0,14,-1.0755143113259464,-21.47471564166449,19.323687019012596
-121,5.0,6.0,14,-0.8420651194412585,-21.383064154658655,19.698933915776138
-122,6.0,7.0,14,-0.7086655812212257,-21.9582335783723,20.54090241592985
-123,7.0,8.0,14,-0.6252908698338615,-23.107653715885363,21.85707197621764
-124,8.0,9.0,14,-0.569707728908952,-24.735404733257155,23.59598927543925
-125,9.0,10.0,14,-0.5307995302614472,-26.747463262163137,25.685864201640243
-126,1.0,2.0,15,-7.378642492215931,-34.13087918226529,19.373594197833427
-127,2.0,3.0,15,-2.709658654519785,-28.636980229638084,23.217662920598514
-128,3.0,4.0,15,-1.5424126950956634,-27.04241542640932,23.957590036217994
-129,4.0,5.0,15,-1.0755143113259464,-26.58007596614152,24.42904734348963
-130,5.0,6.0,15,-0.8420651194412585,-26.807796364306228,25.12366612542371
-131,6.0,7.0,15,-0.7086655812212257,-27.580783710990545,26.163452548548094
-132,7.0,8.0,15,-0.6252908698338615,-28.81249696408338,27.561915224415657
-133,8.0,9.0,15,-0.569707728908952,-30.430425062116228,29.291009604298324
-134,9.0,10.0,15,-0.5307995302614472,-32.36915735084904,31.307558290326142
-135,1.0,2.0,16,-7.378642492215931,-39.72917583196772,24.97189084753586
-136,2.0,3.0,16,-2.709658654519785,-34.60204449801245,29.182727188972876
-137,3.0,4.0,16,-1.5424126950956634,-33.39313515126685,30.30830976107552
-138,4.0,5.0,16,-1.0755143113259464,-33.25518652597691,31.104157903325017
-139,5.0,6.0,16,-0.8420651194412585,-33.71724187890618,32.03311164002366
-140,6.0,7.0,16,-0.7086655812212257,-34.628456482555634,33.21112532011318
-141,7.0,8.0,16,-0.6252908698338615,-35.91079604448066,34.66021430481294
-142,8.0,9.0,16,-0.569707728908952,-37.50821676227747,36.368801304459566
-143,9.0,10.0,16,-0.5307995302614472,-39.374044276785185,38.31244521626229

From b462aa1635bb2be9e47e153076db6a0a5ee154ac Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Thu, 27 Feb 2025 09:41:22 +0000
Subject: [PATCH 44/44] Removed all mention of data collection and json front
 end

---
 causal_testing/__init__.py                            | 6 +++---
 causal_testing/surrogate/causal_surrogate_assisted.py | 4 ++--
 docs/source/index.rst                                 | 8 --------
 docs/source/modules/causal_tests.rst                  | 6 +++---
 docs/source/usage.rst                                 | 6 +-----
 examples/covasim_/doubling_beta/README.md             | 4 ++--
 examples/covasim_/vaccinating_elderly/README.md       | 3 +--
 examples/poisson-line-process/README.md               | 1 -
 8 files changed, 12 insertions(+), 26 deletions(-)

diff --git a/causal_testing/__init__.py b/causal_testing/__init__.py
index 19ce7047..4b58692b 100644
--- a/causal_testing/__init__.py
+++ b/causal_testing/__init__.py
@@ -1,11 +1,11 @@
 """
 This is the CausalTestingFramework Module
 It contains 5 subpackages:
-data_collection
-generation
-json_front
+estimation
 specification
+surrogate
 testing
+utils
 """
 
 import logging
diff --git a/causal_testing/surrogate/causal_surrogate_assisted.py b/causal_testing/surrogate/causal_surrogate_assisted.py
index d3dd5083..a7d436a4 100644
--- a/causal_testing/surrogate/causal_surrogate_assisted.py
+++ b/causal_testing/surrogate/causal_surrogate_assisted.py
@@ -78,11 +78,11 @@ def execute(
     ):
         """For this specific test case, a search algorithm is used to find the most contradictory point in the input
         space which is, therefore, most likely to indicate incorrect behaviour. This cadidate test case is run against
-        the simulator, checked for faults and the result returned with collected data
+        the simulator, checked for faults and the result returned.
         :param df: An dataframe which contains data relevant to the specified scenario
         :param max_executions: Maximum number of simulator executions before exiting the search
         :param custom_data_aggregator:
-        :return: tuple containing SimulationResult or str, execution number and collected data"""
+        :return: tuple containing SimulationResult or str, execution number and dataframe"""
 
         for i in range(max_executions):
             surrogate_models = self.generate_surrogates(self.specification, df)
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 765c65b9..38e09664 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -119,14 +119,6 @@ system-under-test that is expected to cause a change to some output(s).
 
    /autoapi/index
 
-.. toctree::
-   :hidden:
-   :maxdepth: 1
-   :caption: Front Ends
-
-   frontends/json_front_end
-   frontends/test_suite
-
 .. toctree::
    :hidden:
    :maxdepth: 1
diff --git a/docs/source/modules/causal_tests.rst b/docs/source/modules/causal_tests.rst
index a8ea33c9..1f7551da 100644
--- a/docs/source/modules/causal_tests.rst
+++ b/docs/source/modules/causal_tests.rst
@@ -2,7 +2,7 @@
 Causal Testing
 ==============
 
-This package contains the main components of the causal testing framework, causal tests and causal oracles, which utilise both the specification and data collection packages.
+This package contains the main components of the causal testing framework, causal tests and causal oracles, which utilise the specification package.
 
 - A causal test case is a triple ``(X, \Delta, Y)`` where ``X`` is an input configuration, ``\Delta`` is an intervention, and ``Y`` is the expected causal effect of applying ``\Delta`` to ``X``. Put simply, a causal test case states the expected change in an outcome that applying an intervention to X should cause. In this context, an intervention is simply a function which manipulates the input configuration of the scenario-under-test in a way that is expected to cause a change to some outcome.
 
@@ -44,12 +44,12 @@ We then define a number of causal test cases to apply to the scenario-under-test
 
 - To run these test cases experimentally, we need to execute both ``X`` and ``\Delta(X)`` - that is, with and without the interventions. Since the only difference between these test cases is the intervention, we can conclude that the observed difference in ``n_infected_t5`` was caused by the interventions. While this is the simplest approach, it can be extremely inefficient at scale, particularly when dealing with complex software such as computational models.
 
-- To run these test cases observationally, we need to collect *valid* observational data for the scenario-under-test. This means we can only use executions with between 20 and 30 people, a square environment of size betwen 20x20 and 40x40, and where a single person was initially infected. In addition, this data must contain executions both with and without the intervention. Next, we need to identify any sources of bias in this data and determine a procedure to counteract them. This is achieved automatically using graphical causal inference techniques that identify a set of variables that can be adjusted to obtain a causal estimate. Finally, for any categorical biasing variables, we need to make sure we have executions corresponding to each category otherwise we have a positivity violation (i.e. missing data). In the worst case, this at least guides the user to an area of the system-under-test that should be executed.
+- To run these test cases observationally, we need *valid* observational data for the scenario-under-test. This means we can only use executions with between 20 and 30 people, a square environment of size betwen 20x20 and 40x40, and where a single person was initially infected. In addition, this data must contain executions both with and without the intervention. Next, we need to identify any sources of bias in this data and determine a procedure to counteract them. This is achieved automatically using graphical causal inference techniques that identify a set of variables that can be adjusted to obtain a causal estimate. Finally, for any categorical biasing variables, we need to make sure we have executions corresponding to each category otherwise we have a positivity violation (i.e. missing data). In the worst case, this at least guides the user to an area of the system-under-test that should be executed.
 
 Causal Inference
 ----------------
 
-- After collecting either observational or experimental data, we now need to apply causal inference. First, as described above, we use our causal graph to identify a set of adjustment variables which mitigate all bias in the data. Next, we use statistical models to adjust for these variables (implementing the statistical procedure necessary to isolate the causal effect) and obtain the desired causal estimate. Depending on the statistical model used, we can also generate 95% confidence intervals (or confidence intervals at any confidence level for that matter).
+- After obtaining suitable test data, we now need to apply causal inference. First, as described above, we use our causal graph to identify a set of adjustment variables which mitigate all bias in the data. Next, we use statistical models to adjust for these variables (implementing the statistical procedure necessary to isolate the causal effect) and obtain the desired causal estimate. Depending on the statistical model used, we can also generate 95% confidence intervals (or confidence intervals at any confidence level for that matter).
 
 - In our example, the causal DAG tell us it is necessary to adjust for ``environment`` in order to obtain the causal effect of ``precaution`` on ``n_infected_t5``. Supposing the relationship is linear, we can employ a linear regression model of the form ``n_infected_t5 ~ p0*precaution + p1*environment`` to carry out this adjustment. If we use experimental data, only a single environment is used by design and therefore the adjustment has no impact. However, if we use observational data, the environment may vary and therefore this adjustment will look at the causal effect within different environments and then provide a weighted average, which turns out to be the partial coefficient ``p0``.
 
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
index 92e03533..adab3719 100644
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@@ -2,11 +2,7 @@
 Usage
 -----
 
-There are currently 3 methods of using the Causal Testing Framework; 1) :doc:`JSON Front End </frontends/json_front_end>`\, 2)
-:doc:`Test Suites </frontends/test_suite>`\, or 3) directly as
-described below.
-
-The causal testing framework is made up of 3 main components: Specification, Testing, and Data Collection. The first
+The causal testing framework is made up of 2 main components: Specification and Testing. The first
 step is to specify the (part of the) system under test as a modelling ``Scenario``. Modelling scenarios specify the
 observable variables and any constraints which exist between them. We currently support 3 types of variable:
 
diff --git a/examples/covasim_/doubling_beta/README.md b/examples/covasim_/doubling_beta/README.md
index bee56573..b51bb7e1 100644
--- a/examples/covasim_/doubling_beta/README.md
+++ b/examples/covasim_/doubling_beta/README.md
@@ -1,6 +1,6 @@
 # Covasim Case Study: Doubling Beta (Infectiousness)
-In this case study, we demonstrate how to use the causal testing framework with observational
-data collected Covasim to conduct Statistical Metamorphic Testing (SMT) a posteriori. Here, we focus on a set of simple
+In this case study, we demonstrate how to use the causal testing framework with observational data from
+Covasim to conduct Statistical Metamorphic Testing (SMT) a posteriori. Here, we focus on a set of simple
 modelling scenarios that investigate how the infectiousness of the virus (encoded as the parameter beta) affects the
 cumulative number of infections over a fixed duration. We also run several causal tests that focus on increasingly
 specific causal questions pertaining to more refined metamorphic properties and enabling us to learn more about the
diff --git a/examples/covasim_/vaccinating_elderly/README.md b/examples/covasim_/vaccinating_elderly/README.md
index 4a8422fa..7f715fd8 100644
--- a/examples/covasim_/vaccinating_elderly/README.md
+++ b/examples/covasim_/vaccinating_elderly/README.md
@@ -15,8 +15,7 @@ Further details are provided in Section 5.3 (Prioritising the elderly for vaccin
 
 >[!NOTE]
 >This version of the CTF uses observational data to separate the software execution and testing.
-Older versions of this framework simulate the data using a custom experimental data collector and the `covasim`
-package (version 3.0.7) as outlined below.
+Older versions of this framework directly run the `covasim` package (version 3.0.7) as outlined below.
 
 ## How to run
 To run this case study:
diff --git a/examples/poisson-line-process/README.md b/examples/poisson-line-process/README.md
index ac6dfec2..de28a98e 100644
--- a/examples/poisson-line-process/README.md
+++ b/examples/poisson-line-process/README.md
@@ -7,6 +7,5 @@ To run this case study:
    (instructions are provided in the project README).
 2. Change directory to `causal_testing/examples/poisson-line-process`.
 3. Run the command `python example_pure_python.py` to demonstrate causal testing using pure python.
-3. Run the command `python example_json_frontend.py` to demonstrate the same causal tests using JSON.
 
 This should print a series of causal test results and produce two CSV files. `intensity_num_shapes_results_random_1000.csv` corresponds to table 1, and `width_num_shapes_results_random_1000.csv` relates to our findings regarding the relationship of width and `P_u`.