Refactor execute_test_suite into the CausalTestSuite object

christopher-wild · christopher-wild · commit e509eb306c94 · 2023-07-26T15:01:29.000+01:00
diff --git a/causal_testing/testing/causal_test_suite.py b/causal_testing/testing/causal_test_suite.py
@@ -1,10 +1,17 @@
 """This module contains the CausalTestSuite class, for details on using it:
 https://causal-testing-framework.readthedocs.io/en/latest/test_suite.html"""
+import logging
+
 from collections import UserDict
 from typing import Type, Iterable
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.estimators import Estimator
+from causal_testing.testing.causal_test_result import CausalTestResult, TestValue
+from causal_testing.data_collection.data_collector import ObservationalDataCollector
+from causal_testing.specification.causal_specification import CausalSpecification
+
+logger = logging.getLogger(__name__)
 
 
 class CausalTestSuite(UserDict):
@@ -20,11 +27,11 @@ class CausalTestSuite(UserDict):
     """
 
     def add_test_object(
-        self,
-        base_test_case: BaseTestCase,
-        causal_test_case_list: Iterable[CausalTestCase],
-        estimators_classes: Iterable[Type[Estimator]],
-        estimate_type: str = "ate",
+            self,
+            base_test_case: BaseTestCase,
+            causal_test_case_list: Iterable[CausalTestCase],
+            estimators_classes: Iterable[Type[Estimator]],
+            estimate_type: str = "ate",
     ):
         """
         A setter object to allow for the easy construction of the dictionary test suite structure
@@ -37,3 +44,98 @@ def add_test_object(
         """
         test_object = {"tests": causal_test_case_list, "estimators": estimators_classes, "estimate_type": estimate_type}
         self.data[base_test_case] = test_object
+
+    def execute_test_suite(self, data_collector: ObservationalDataCollector,
+                           causal_specification: CausalSpecification) -> list[CausalTestResult]:
+        """Execute a suite of causal tests and return the results in a list
+        :param test_suite: CasualTestSuite object
+        :return: A dictionary where each key is the name of the estimators specified and the values are lists of
+                causal_test_result objects
+        """
+        if data_collector.data.empty:
+            raise ValueError("No data has been loaded. Please call load_data prior to executing a causal test case.")
+        data_collector.collect_data()
+        test_suite_results = {}
+        for edge in self:
+            logger.info("treatment: %s", edge.treatment_variable)
+            logger.info("outcome: %s", edge.outcome_variable)
+            minimal_adjustment_set = causal_specification.causal_dag.identification(edge)
+            minimal_adjustment_set = minimal_adjustment_set - set(edge.treatment_variable.name)
+            minimal_adjustment_set = minimal_adjustment_set - set(edge.outcome_variable.name)
+
+            variables_for_positivity = list(minimal_adjustment_set) + [
+                edge.treatment_variable.name,
+                edge.outcome_variable.name,
+            ]
+
+            if self._check_positivity_violation(variables_for_positivity, causal_specification.scenario, data_collector.data):
+                raise ValueError("POSITIVITY VIOLATION -- Cannot proceed.")
+
+            estimators = self[edge]["estimators"]
+            tests = self[edge]["tests"]
+            results = {}
+            for estimator_class in estimators:
+                causal_test_results = []
+
+                for test in tests:
+                    estimator = estimator_class(
+                        test.treatment_variable.name,
+                        test.treatment_value,
+                        test.control_value,
+                        minimal_adjustment_set,
+                        test.outcome_variable.name,
+                    )
+                    if estimator.df is None:
+                        estimator.df = data_collector.data
+                    causal_test_result = self._return_causal_test_results(estimator, test)
+                    causal_test_results.append(causal_test_result)
+
+                results[estimator_class.__name__] = causal_test_results
+            test_suite_results[edge] = results
+        return test_suite_results
+
+    def _return_causal_test_results(self, estimator, causal_test_case):
+        """Depending on the estimator used, calculate the 95% confidence intervals and return in a causal_test_result
+
+        :param estimator: An Estimator class object
+        :param causal_test_case: The concrete test case to be executed
+        :return: a CausalTestResult object containing the confidence intervals
+        """
+        if not hasattr(estimator, f"estimate_{causal_test_case.estimate_type}"):
+            raise AttributeError(f"{estimator.__class__} has no {causal_test_case.estimate_type} method.")
+        estimate_effect = getattr(estimator, f"estimate_{causal_test_case.estimate_type}")
+        effect, confidence_intervals = estimate_effect(**causal_test_case.estimate_params)
+        causal_test_result = CausalTestResult(
+            estimator=estimator,
+            test_value=TestValue(causal_test_case.estimate_type, effect),
+            effect_modifier_configuration=causal_test_case.effect_modifier_configuration,
+            confidence_intervals=confidence_intervals,
+        )
+
+        return causal_test_result
+
+    def _check_positivity_violation(self, variables_list, scenario, data):
+        """Check whether the dataframe has a positivity violation relative to the specified variables list.
+
+        A positivity violation occurs when there is a stratum of the dataframe which does not have any data. Put simply,
+        if we split the dataframe into covariate sub-groups, each sub-group must contain both a treated and untreated
+        individual. If a positivity violation occurs, causal inference is still possible using a properly specified
+        parametric estimator. Therefore, we should not throw an exception upon violation but raise a warning instead.
+
+        :param variables_list: The list of variables for which positivity must be satisfied.
+        :return: True if positivity is violated, False otherwise.
+        """
+        if not (set(variables_list) - {x.name for x in scenario.hidden_variables()}).issubset(
+                data.columns
+        ):
+            missing_variables = set(variables_list) - set(data.columns)
+            logger.warning(
+                "Positivity violation: missing data for variables %s.\n"
+                "Causal inference is only valid if a well-specified parametric model is used.\n"
+                "Alternatively, consider restricting analysis to executions without the variables:"
+                ".",
+                missing_variables,
+            )
+            return True
+
+        return False
diff --git a/tests/testing_tests/test_causal_test_suite.py b/tests/testing_tests/test_causal_test_suite.py
@@ -2,8 +2,7 @@
 import os
 import numpy as np
 import pandas as pd
-from causal_testing.testing.causal_test_engine import CausalTestEngine
-from causal_testing.testing.causal_test_engine import CausalTestSuite
+from causal_testing.testing.causal_test_suite import CausalTestSuite
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.specification.variable import Input, Output
@@ -61,6 +60,9 @@ def setUp(self) -> None:
         self.test_suite.add_test_object(
             base_test_case=self.base_test_case, causal_test_case_list=test_list, estimators_classes=self.estimators
         )
+        self.causal_specification = CausalSpecification(self.scenario, self.causal_dag)
+
+        self.data_collector = ObservationalDataCollector(self.scenario, self.df)
 
     def test_adding_test_object(self):
         "test an object can be added to the test_suite using the add_test_object function"
@@ -93,9 +95,8 @@ def test_return_single_test_object(self):
 
     def test_execute_test_suite_single_base_test_case(self):
         """Check that the test suite can return the correct results from dummy data for a single base_test-case"""
-        causal_test_engine = self.create_causal_test_engine()
 
-        causal_test_results = causal_test_engine.execute_test_suite(test_suite=self.test_suite)
+        causal_test_results = self.test_suite.execute_test_suite(self.data_collector, self.causal_specification)
         causal_test_case_result = causal_test_results[self.base_test_case]
         self.assertAlmostEqual(causal_test_case_result["LinearRegressionEstimator"][0].test_value.value, 4, delta=1e-10)
 
@@ -109,21 +110,10 @@ def test_execute_test_suite_multiple_estimators(self):
         test_suite_2_estimators.add_test_object(
             base_test_case=self.base_test_case, causal_test_case_list=test_list, estimators_classes=estimators
         )
-        causal_test_engine = self.create_causal_test_engine()
-        causal_test_results = causal_test_engine.execute_test_suite(test_suite=test_suite_2_estimators)
+        causal_test_results = test_suite_2_estimators.execute_test_suite(self.data_collector, self.causal_specification)
         causal_test_case_result = causal_test_results[self.base_test_case]
         linear_regression_result = causal_test_case_result["LinearRegressionEstimator"][0]
         causal_forrest_result = causal_test_case_result["CausalForestEstimator"][0]
         self.assertAlmostEqual(linear_regression_result.test_value.value, 4, delta=1e-1)
         self.assertAlmostEqual(causal_forrest_result.test_value.value, 4, delta=1e-1)
 
-    def create_causal_test_engine(self):
-        """
-        Creating test engine is relatively computationally complex, this function allows for it to
-        easily be made in only the tests that require it.
-        """
-        causal_specification = CausalSpecification(self.scenario, self.causal_dag)
-
-        data_collector = ObservationalDataCollector(self.scenario, self.df)
-        causal_test_engine = CausalTestEngine(causal_specification, data_collector)
-        return causal_test_engine