CITCOM-project
diff --git a/‎README.md
Lines changed: 4 additions & 4 deletions b/‎README.md
Lines changed: 4 additions & 4 deletions
diff --git a/‎causal_testing/data_collection/data_collector.py
Lines changed: 1 addition & 0 deletions b/‎causal_testing/data_collection/data_collector.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎causal_testing/json_front/json_class.py
Lines changed: 4 additions & 4 deletions b/‎causal_testing/json_front/json_class.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎causal_testing/testing/causal_test_engine.py
Lines changed: 39 additions & 42 deletions b/‎causal_testing/testing/causal_test_engine.py
Lines changed: 39 additions & 42 deletions
diff --git a/‎examples/covasim_/doubling_beta/causal_test_beta.py
Lines changed: 9 additions & 9 deletions b/‎examples/covasim_/doubling_beta/causal_test_beta.py
Lines changed: 9 additions & 9 deletions
@@ -14,7 +14,7 @@ The causal testing framework has three core components:
     1. Using the causal DAG, identify an estimand for the effect of the intervention on the output of interest. That is, a statistical procedure capable of estimating the causal effect of the intervention on the output.
     2. Collect the data to which the statistical procedure will be applied (see Data collection below).
     3. Apply a statistical model (e.g. linear regression or causal forest) to the data to obtain a point estimate for the causal effect. Depending on the estimator used, confidence intervals may also be obtained at a specified confidence level e.g. 0.05 corresponds to 95% confidence intervals (optional).
-    4. Return the casual test result including a point estimate and 95% confidence intervals, usally quantifying the average treatment effect (ATE).
+    4. Return the casual test result including a point estimate and 95% confidence intervals, usually quantifying the average treatment effect (ATE).
     5. Implement and apply a test oracle to the causal test result - that is, a procedure that determines whether the test should pass or fail based on the results. In the simplest case, this takes the form of an assertion which compares the point estimate to the expected causal effect specified in the causal test case.
 
 3. [Data collection](causal_testing/data_collection/README.md): Data for the system-under-test can be collected in two ways: experimentally or observationally. The former involves executing the system-under-test under controlled conditions which, by design, isolate the causal effect of interest (accurate but expensive), while the latter involves collecting suitable previous execution data and utilising our causal knowledge to draw causal inferences (potentially less accurate but efficient). To collect experimental data, the user must implement a single method which runs the system-under-test with a given input configuration. On the other hand, when dealing with observational data, we automatically check whether the data is suitable for the identified estimand in two steps. First, confirm whether the data contains a column for each variable in the causal DAG. Second, we check for [positivity violations](https://www.youtube.com/watch?v=4xc8VkrF98w). If there are positivity violations, we can provide instructions for an execution that will fill the gap (future work).
@@ -84,10 +84,10 @@ data_collector = ObservationalDataCollector(modelling_scenario, data_csv_path)
 The actual running of the tests is done using the `CausalTestEngine` class. This is still a work in progress and may change in the future to improve ease of use, but currently proceeds as follows.
 
 ```{python}
-causal_test_engine = CausalTestEngine(causal_test_case, causal_specification, data_collector)  # Instantiate the causal test engine
-minimal_adjustment_set = causal_test_engine.load_data(data_csv_path, index_col=0)  # Calculate the adjustment set
+causal_test_engine = CausalTestEngine(causal_specification, data_collector, data_csv_path, index_col=0)  # Instantiate the causal test engine
+causal_test_engine.identification(causal_test_case) #Perform identification and produce the minimum adjustment set
 treatment_vars = list(causal_test_case.treatment_input_configuration)
-minimal_adjustment_set = minimal_adjustment_set - set([v.name for v in treatment_vars])  # Remove the treatment variables from the adjustment set. This is necessary for causal inference to work properly.
+minimal_adjustment_set = causal_test_engine.minimal_adjustment_set - set([v.name for v in treatment_vars])  # Remove the treatment variables from the adjustment set. This is necessary for causal inference to work properly.
 ```
 
 Whether using fresh or pre-existing data, a key aspect of causal inference is estimation. To actually execute a test, we need an estimator. We currently support two estimators: linear regression and causal forest. These can simply be instantiated as per the [documentation](https://causal-testing-framework.readthedocs.io/en/latest/autoapi/causal_testing/testing/estimators/index.html).
 
@@ -119,6 +119,7 @@ def run_system_with_input_configuration(self, input_configuration: dict) -> pd.D
         specified input configuration.
         """
 
+
 class ObservationalDataCollector(DataCollector):
     """A data collector that extracts data that is relevant to the specified scenario from a csv of execution data."""
 
 
@@ -169,7 +169,7 @@ def _execute_test_case(self, causal_test_case: CausalTestCase, estimator: Estima
 
         causal_test_engine, estimation_model = self._setup_test(causal_test_case, estimator)
         causal_test_result = causal_test_engine.execute_test(
-            estimation_model, estimate_type=causal_test_case.estimate_type
+            estimation_model, causal_test_case, estimate_type=causal_test_case.estimate_type
         )
 
         test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result)
@@ -203,10 +203,10 @@ def _setup_test(self, causal_test_case: CausalTestCase, estimator: Estimator) ->
                 - estimation_model - Estimator instance for the test being run
         """
         data_collector = ObservationalDataCollector(self.modelling_scenario, self.data_path)
-        causal_test_engine = CausalTestEngine(causal_test_case, self.causal_specification, data_collector)
-        minimal_adjustment_set = causal_test_engine.load_data(index_col=0)
+        causal_test_engine = CausalTestEngine(self.causal_specification, data_collector, index_col=0)
+        causal_test_engine.identification(causal_test_case)
         treatment_vars = list(causal_test_case.treatment_input_configuration)
-        minimal_adjustment_set = minimal_adjustment_set - {v.name for v in treatment_vars}
+        minimal_adjustment_set = causal_test_engine.minimal_adjustment_set - {v.name for v in treatment_vars}
         estimation_model = estimator(
             (list(treatment_vars)[0].name,),
             [causal_test_case.treatment_input_configuration[v] for v in treatment_vars][0],
 
@@ -29,25 +29,9 @@ class CausalTestEngine:
         provided as an instance of the CausalTestResult class.
     (4) Define a test oracle procedure which uses the causal test results to determine whether the intervention has
         had the anticipated causal effect. This should assign a pass/fail value to the CausalTestResult.
-    """
-
-    def __init__(
-        self, causal_test_case: CausalTestCase, causal_specification: CausalSpecification, data_collector: DataCollector
-    ):
-        self.causal_test_case = causal_test_case
-        self.treatment_variables = list(self.causal_test_case.control_input_configuration)
-        self.casual_dag, self.scenario = (
-            causal_specification.causal_dag,
-            causal_specification.scenario,
-        )
-        self.data_collector = data_collector
-        self.scenario_execution_data_df = pd.DataFrame()
-
-    def load_data(self, **kwargs):
-        """Load execution data corresponding to the causal test case into a pandas dataframe and return the minimal
-        adjustment set.
 
-        Data can be loaded in two ways:
+    Data is loaded as part of the "__init__" function
+    Data can be loaded in two ways:
             (1) Experimentally - the model is executed with the treatment and control input configurations under
                 conditions that guarantee the observed change in outcome must be caused by the change in input
                 (intervention).
@@ -57,30 +41,43 @@ def load_data(self, **kwargs):
 
         After the data is loaded, both are treated in the same way and, provided the identifiability and modelling
         assumptions hold, can be used to estimate the causal effect for the causal test case.
+    """
 
-        :return self: Update the causal test case's execution data dataframe.
+    def __init__(self, causal_specification: CausalSpecification, data_collector: DataCollector, **kwargs):
+        self.casual_dag, self.scenario = (
+            causal_specification.causal_dag,
+            causal_specification.scenario,
+        )
+        self.data_collector = data_collector
+        self.scenario_execution_data_df = self.data_collector.collect_data(**kwargs)
+        self.minimal_adjustment_set = set()
+
+    def identification(self, causal_test_case):
+        """Identify and return the minimum adjustment set
+
+        :param causal_test_case: Causal test Case to get the minimum adjustment set from
         :return minimal_adjustment_set: The smallest set of variables which can be adjusted for to obtain a causal
         estimate as opposed to a purely associational estimate.
         """
 
-        self.scenario_execution_data_df = self.data_collector.collect_data(**kwargs)
-
         minimal_adjustment_sets = []
-        if self.causal_test_case.effect == "total":
+        treatment_variables = list(causal_test_case.control_input_configuration)
+        if causal_test_case.effect == "total":
             minimal_adjustment_sets = self.casual_dag.enumerate_minimal_adjustment_sets(
-                [v.name for v in self.treatment_variables], [v.name for v in self.causal_test_case.outcome_variables]
+                [v.name for v in treatment_variables], [v.name for v in causal_test_case.outcome_variables]
             )
-        elif self.causal_test_case.effect == "direct":
+        elif causal_test_case.effect == "direct":
             minimal_adjustment_sets = self.casual_dag.direct_effect_adjustment_sets(
-                [v.name for v in self.treatment_variables], [v.name for v in self.causal_test_case.outcome_variables]
+                [v.name for v in treatment_variables], [v.name for v in causal_test_case.outcome_variables]
             )
         else:
             raise ValueError("Causal effect should be 'total' or 'direct'")
 
-        minimal_adjustment_set = min(minimal_adjustment_sets, key=len)
-        return minimal_adjustment_set
+        self.minimal_adjustment_set = min(minimal_adjustment_sets, key=len)
 
-    def execute_test(self, estimator: Estimator, estimate_type: str = "ate") -> CausalTestResult:
+    def execute_test(
+        self, estimator: Estimator, causal_test_case: CausalTestCase, estimate_type: str = "ate"
+    ) -> CausalTestResult:
         """Execute a causal test case and return the causal test result.
 
         Test case execution proceeds with the following steps:
@@ -94,31 +91,31 @@ def execute_test(self, estimator: Estimator, estimate_type: str = "ate") -> Caus
         (7) Apply test oracle procedure to assign a pass/fail to the CausalTestResult and return.
 
         :param estimator: A reference to an Estimator class.
+        :param causal_test_case: The CausalTestCase object to be tested
         :param estimate_type: A string which denotes the type of estimate to return, ATE or CATE.
         :return causal_test_result: A CausalTestResult for the executed causal test case.
         """
         if self.scenario_execution_data_df.empty:
             raise Exception("No data has been loaded. Please call load_data prior to executing a causal test case.")
         if estimator.df is None:
             estimator.df = self.scenario_execution_data_df
-        treatments = [v.name for v in self.treatment_variables]
-        outcomes = [v.name for v in self.causal_test_case.outcome_variables]
-        minimal_adjustment_sets = self.casual_dag.enumerate_minimal_adjustment_sets(treatments, outcomes)
-        minimal_adjustment_set = min(minimal_adjustment_sets, key=len)
+        treatment_variables = list(causal_test_case.control_input_configuration)
+        treatments = [v.name for v in treatment_variables]
+        outcomes = [v.name for v in causal_test_case.outcome_variables]
 
         logger.info("treatments: %s", treatments)
         logger.info("outcomes: %s", outcomes)
-        logger.info("minimal_adjustment_set: %s", minimal_adjustment_set)
+        logger.info("minimal_adjustment_set: %s", self.minimal_adjustment_set)
 
-        minimal_adjustment_set = minimal_adjustment_set - {
-            v.name for v in self.causal_test_case.control_input_configuration
+        minimal_adjustment_set = self.minimal_adjustment_set - {
+            v.name for v in causal_test_case.control_input_configuration
         }
-        minimal_adjustment_set = minimal_adjustment_set - {v.name for v in self.causal_test_case.outcome_variables}
+        minimal_adjustment_set = minimal_adjustment_set - {v.name for v in causal_test_case.outcome_variables}
         assert all(
-            (v.name not in minimal_adjustment_set for v in self.causal_test_case.control_input_configuration)
+            (v.name not in minimal_adjustment_set for v in causal_test_case.control_input_configuration)
         ), "Treatment vars in adjustment set"
         assert all(
-            (v.name not in minimal_adjustment_set for v in self.causal_test_case.outcome_variables)
+            (v.name not in minimal_adjustment_set for v in causal_test_case.outcome_variables)
         ), "Outcome vars in adjustment set"
 
         variables_for_positivity = list(minimal_adjustment_set) + treatments + outcomes
@@ -142,7 +139,7 @@ def execute_test(self, estimator: Estimator, estimate_type: str = "ate") -> Caus
                     control_value=estimator.control_values,
                     adjustment_set=estimator.adjustment_set,
                     ate=cates_df,
-                    effect_modifier_configuration=self.causal_test_case.effect_modifier_configuration,
+                    effect_modifier_configuration=causal_test_case.effect_modifier_configuration,
                     confidence_intervals=confidence_intervals,
                 )
         elif estimate_type == "risk_ratio":
@@ -155,7 +152,7 @@ def execute_test(self, estimator: Estimator, estimate_type: str = "ate") -> Caus
                 control_value=estimator.control_values,
                 adjustment_set=estimator.adjustment_set,
                 ate=risk_ratio,
-                effect_modifier_configuration=self.causal_test_case.effect_modifier_configuration,
+                effect_modifier_configuration=causal_test_case.effect_modifier_configuration,
                 confidence_intervals=confidence_intervals,
             )
         elif estimate_type == "ate":
@@ -168,7 +165,7 @@ def execute_test(self, estimator: Estimator, estimate_type: str = "ate") -> Caus
                 control_value=estimator.control_values,
                 adjustment_set=estimator.adjustment_set,
                 ate=ate,
-                effect_modifier_configuration=self.causal_test_case.effect_modifier_configuration,
+                effect_modifier_configuration=causal_test_case.effect_modifier_configuration,
                 confidence_intervals=confidence_intervals,
             )
             # causal_test_result = CausalTestResult(minimal_adjustment_set, ate, confidence_intervals)
@@ -183,7 +180,7 @@ def execute_test(self, estimator: Estimator, estimate_type: str = "ate") -> Caus
                 control_value=estimator.control_values,
                 adjustment_set=estimator.adjustment_set,
                 ate=ate,
-                effect_modifier_configuration=self.causal_test_case.effect_modifier_configuration,
+                effect_modifier_configuration=causal_test_case.effect_modifier_configuration,
                 confidence_intervals=confidence_intervals,
             )
             # causal_test_result = CausalTestResult(minimal_adjustment_set, ate, confidence_intervals)
 
@@ -42,9 +42,9 @@ def doubling_beta_CATE_on_csv(observational_data_path: str, simulate_counterfact
     results_dict = {'association': {},
                     'causation': {}}
 
-    # Read in the observational data and perform identification
+    # Read in the observational data, perform identification, and setup the causal_test_engine
     past_execution_df = pd.read_csv(observational_data_path)
-    _, causal_test_engine = identification(observational_data_path)
+    _, causal_test_engine, causal_test_case = engine_setup(observational_data_path)
 
     linear_regression_estimator = LinearRegressionEstimator(('beta',), 0.032, 0.016,
                                                             {'avg_age', 'contacts'},  # We use custom adjustment set
@@ -53,15 +53,15 @@ def doubling_beta_CATE_on_csv(observational_data_path: str, simulate_counterfact
 
     # Add squared terms for beta, since it has a quadratic relationship with cumulative infections
     linear_regression_estimator.add_squared_term_to_df('beta')
-    causal_test_result = causal_test_engine.execute_test(linear_regression_estimator, 'ate')
+    causal_test_result = causal_test_engine.execute_test(linear_regression_estimator, causal_test_case, 'ate')
 
     # Repeat for association estimate (no adjustment)
     no_adjustment_linear_regression_estimator = LinearRegressionEstimator(('beta',), 0.032, 0.016,
                                                                           set(),
                                                                           ('cum_infections',),
                                                                           df=past_execution_df)
     no_adjustment_linear_regression_estimator.add_squared_term_to_df('beta')
-    association_test_result = causal_test_engine.execute_test(no_adjustment_linear_regression_estimator, 'ate')
+    association_test_result = causal_test_engine.execute_test(no_adjustment_linear_regression_estimator, causal_test_case, 'ate')
 
     # Store results for plotting
     results_dict['association'] = {'ate': association_test_result.ate,
@@ -83,7 +83,7 @@ def doubling_beta_CATE_on_csv(observational_data_path: str, simulate_counterfact
                                                                                ('cum_infections',),
                                                                                df=counterfactual_past_execution_df)
         counterfactual_linear_regression_estimator.add_squared_term_to_df('beta')
-        counterfactual_causal_test_result = causal_test_engine.execute_test(linear_regression_estimator, 'ate')
+        counterfactual_causal_test_result = causal_test_engine.execute_test(linear_regression_estimator, causal_test_case, 'ate')
         results_dict['counterfactual'] = {'ate': counterfactual_causal_test_result.ate,
                                           'cis': counterfactual_causal_test_result.confidence_intervals,
                                           'df': counterfactual_past_execution_df}
@@ -179,7 +179,7 @@ def doubling_beta_CATEs(observational_data_path: str, simulate_counterfactual: b
     age_contact_fig.savefig(outpath_base_str + "age_contact_executions.pdf", format="pdf")
 
 
-def identification(observational_data_path):
+def engine_setup(observational_data_path):
     # 1. Read in the Causal DAG
     causal_dag = CausalDAG('dag.dot')
 
@@ -213,12 +213,12 @@ def identification(observational_data_path):
     data_collector = ObservationalDataCollector(scenario, observational_data_path)
 
     # 7. Create an instance of the causal test engine
-    causal_test_engine = CausalTestEngine(causal_test_case, causal_specification, data_collector)
+    causal_test_engine = CausalTestEngine(causal_specification, data_collector)
 
     # 8. Obtain the minimal adjustment set for the causal test case from the causal DAG
-    minimal_adjustment_set = causal_test_engine.load_data(index_col=0)
+    causal_test_engine.identification(causal_test_case)
 
-    return minimal_adjustment_set, causal_test_engine
+    return causal_test_engine.minimal_adjustment_set, causal_test_engine, causal_test_case
 
 
 def plot_doubling_beta_CATEs(results_dict, title, figure=None, axes=None, row=None, col=None):