CITCOM-project · f-allian · May 21, 2025 · May 30, 2025 · May 30, 2025 · Jun 2, 2025
diff --git a/docs/source/_static/images/CITCOM-logo-white.png b/docs/source/_static/images/CITCOM-logo-white.png
diff --git a/docs/source/_static/images/Sheffield-logo.png b/docs/source/_static/images/Sheffield-logo.png
diff --git a/examples/covasim_/doubling_beta/doubling_beta.py.ipynb b/examples/covasim_/doubling_beta/doubling_beta.py.ipynb
@@ -0,0 +1,91 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "5772c02c-eeba-4f48-9425-53abf2439345",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "<div style=\"display: flex; justify-content: left; align-items: center; gap: 20px;\">\n",
+       "    <div>\n",
+       "        <img src=\"../../../docs/source/_static/images/CITCOM-logo.png\" width=\"300\">\n",
+       "    </div>\n",
+       "    <div>\n",
+       "        <img src=\"../../../docs/source/_static/images/Sheffield-logo.png\" width=\"700\">\n",
+       "    </div>\n",
+       "</div>\n"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from IPython.display import HTML, display\n",
+    "\n",
+    "html = \"\"\"\n",
+    "<div style=\"display: flex; justify-content: left; align-items: center; gap: 20px;\">\n",
+    "    <div>\n",
+    "        <img src=\"../../../docs/source/_static/images/CITCOM-logo.png\" width=\"300\">\n",
+    "    </div>\n",
+    "    <div>\n",
+    "        <img src=\"../../../docs/source/_static/images/Sheffield-logo.png\" width=\"700\">\n",
+    "    </div>\n",
+    "</div>\n",
+    "\"\"\"\n",
+    "\n",
+    "display(HTML(html))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "723a51ed-ff3c-4205-b2d3-4ebc0434dfac",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>div.input { display: none; }</style>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/poisson-line-process/example_pure_python.py b/examples/poisson-line-process/example_pure_python.py
@@ -1,191 +1,100 @@
-import os
-import logging
+"""
+Causal testing framework example script.
 
-import pandas as pd
+This example demonstrates the use of a custom EmpiricalMeanEstimator
+for causal testing and runs causal tests using the framework.
+"""
 
-from causal_testing.specification.causal_dag import CausalDAG
-from causal_testing.specification.scenario import Scenario
-from causal_testing.specification.variable import Input, Output
-from causal_testing.specification.causal_specification import CausalSpecification
-from causal_testing.testing.causal_test_case import CausalTestCase
-from causal_testing.testing.causal_effect import ExactValue, Positive
-from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
-from causal_testing.estimation.abstract_estimator import Estimator
-from causal_testing.testing.base_test_case import BaseTestCase
+import os
+from typing import Tuple, Optional, Any
 
+from causal_testing.main import (
+    CausalTestingFramework,
+    CausalTestingPaths,
+    setup_logging
+)
+from causal_testing.estimation.abstract_estimator import Estimator
 
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.DEBUG, format="%(message)s")
+setup_logging(verbose=True)
 
 
 class EmpiricalMeanEstimator(Estimator):
+    """
+    Estimator that computes treatment effects using empirical means.
+
+    This estimator calculates the Average Treatment Effect (ATE) and risk ratio
+    by directly comparing the means of treatment and control groups.
+    """
+
     def add_modelling_assumptions(self):
+        """Add modeling assumptions for this estimator."""
+        self.modelling_assumptions += (
+            "The data must contain runs with the exact configuration of interest."
+        )
+
+    def estimate_ate(self) -> Tuple[float, Optional[Any]]:
         """
-        Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
-        must hold if the resulting causal inference is to be considered valid.
-        """
-        self.modelling_assumptions += "The data must contain runs with the exact configuration of interest."
+        Estimate the Average Treatment Effect.
 
-    def estimate_ate(self) -> float:
-        """Estimate the outcomes under control and treatment.
-        :return: The empirical average treatment effect.
+        Returns:
+            Tuple containing the ATE estimate and optional additional data.
         """
-        control_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.control_value)[
-            self.base_test_case.outcome_variable.name
-        ].dropna()
-        treatment_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.treatment_value)[
-            self.base_test_case.outcome_variable.name
-        ].dropna()
+        control_results = self.df.where(
+            self.df[self.base_test_case.treatment_variable.name] == self.control_value
+        )[self.base_test_case.outcome_variable.name].dropna()
+
+        treatment_results = self.df.where(
+            self.df[self.base_test_case.treatment_variable.name] == self.treatment_value
+        )[self.base_test_case.outcome_variable.name].dropna()
+
         return treatment_results.mean() - control_results.mean(), None
 
-    def estimate_risk_ratio(self) -> float:
-        """Estimate the outcomes under control and treatment.
-        :return: The empirical average treatment effect.
+    def estimate_risk_ratio(self) -> Tuple[float, Optional[Any]]:
         """
-        control_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.control_value)[
-            self.base_test_case.outcome_variable.name
-        ].dropna()
-        treatment_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.treatment_value)[
-            self.base_test_case.outcome_variable.name
-        ].dropna()
+        Estimate the risk ratio.
+
+        Returns:
+            Tuple containing the risk ratio estimate and optional additional data.
+        """
+        control_results = self.df.where(
+            self.df[self.base_test_case.treatment_variable.name] == self.control_value
+        )[self.base_test_case.outcome_variable.name].dropna()
+
+        treatment_results = self.df.where(
+            self.df[self.base_test_case.treatment_variable.name] == self.treatment_value
+        )[self.base_test_case.outcome_variable.name].dropna()
+
         return treatment_results.mean() / control_results.mean(), None
 
 
-# 1. Read in the Causal DAG
 ROOT = os.path.realpath(os.path.dirname(__file__))
-causal_dag = CausalDAG(f"{ROOT}/dag.dot")
-
-# 2. Create variables
-width = Input("width", float)
-height = Input("height", float)
-intensity = Input("intensity", float)
-
-num_lines_abs = Output("num_lines_abs", float)
-num_lines_unit = Output("num_lines_unit", float)
-num_shapes_abs = Output("num_shapes_abs", float)
-num_shapes_unit = Output("num_shapes_unit", float)
-
-# 3. Create scenario
-scenario = Scenario(
-    variables={
-        width,
-        height,
-        intensity,
-        num_lines_abs,
-        num_lines_unit,
-        num_shapes_abs,
-        num_shapes_unit,
-    }
-)
 
-# 4. Construct a causal specification from the scenario and causal DAG
-causal_specification = CausalSpecification(scenario, causal_dag)
-
-observational_data_path = f"{ROOT}/data/random/data_random_1000.csv"
-
-
-def test_poisson_intensity_num_shapes(save=False):
-    intensity_num_shapes_results = []
-    base_test_case = BaseTestCase(treatment_variable=intensity, outcome_variable=num_shapes_unit)
-    observational_df = pd.read_csv(observational_data_path, index_col=0).astype(float)
-    causal_test_cases = [
-        (
-            CausalTestCase(
-                base_test_case=base_test_case,
-                expected_causal_effect=ExactValue(4, atol=0.5),
-                estimate_type="risk_ratio",
-                estimator=EmpiricalMeanEstimator(
-                    base_test_case=base_test_case,
-                    treatment_value=treatment_value,
-                    control_value=control_value,
-                    adjustment_set=causal_specification.causal_dag.identification(base_test_case),
-                    df=pd.read_csv(f"{ROOT}/data/smt_100/data_smt_wh{wh}_100.csv", index_col=0).astype(float),
-                    effect_modifiers=None,
-                    alpha=0.05,
-                    query="",
-                ),
-            ),
-            CausalTestCase(
-                base_test_case=base_test_case,
-                expected_causal_effect=ExactValue(4, atol=0.5),
-                estimate_type="risk_ratio",
-                estimator=LinearRegressionEstimator(
-                    base_test_case=base_test_case,
-                    treatment_value=treatment_value,
-                    control_value=control_value,
-                    adjustment_set=causal_specification.causal_dag.identification(base_test_case),
-                    df=observational_df,
-                    effect_modifiers=None,
-                    formula="num_shapes_unit ~ I(intensity ** 2) + intensity - 1",
-                    alpha=0.05,
-                    query="",
-                ),
-            ),
-        )
-        for control_value, treatment_value in [(1, 2), (2, 4), (4, 8), (8, 16)]
-        for wh in range(1, 11)
-    ]
-
-    test_results = [(smt.execute_test(), observational.execute_test()) for smt, observational in causal_test_cases]
-
-    intensity_num_shapes_results += [
-        {
-            "width": obs_causal_test_result.estimator.control_value,
-            "height": obs_causal_test_result.estimator.treatment_value,
-            "control": obs_causal_test_result.estimator.control_value,
-            "treatment": obs_causal_test_result.estimator.treatment_value,
-            "smt_risk_ratio": smt_causal_test_result.test_value.value,
-            "obs_risk_ratio": obs_causal_test_result.test_value.value[0],
-        }
-        for smt_causal_test_result, obs_causal_test_result in test_results
-    ]
-    intensity_num_shapes_results = pd.DataFrame(intensity_num_shapes_results)
-    if save:
-        intensity_num_shapes_results.to_csv("intensity_num_shapes_results_random_1000.csv")
-    logger.info("%s", intensity_num_shapes_results)
-
-
-def test_poisson_width_num_shapes(save=False):
-    base_test_case = BaseTestCase(treatment_variable=width, outcome_variable=num_shapes_unit)
-    df = pd.read_csv(observational_data_path, index_col=0).astype(float)
-    causal_test_cases = [
-        CausalTestCase(
-            base_test_case=base_test_case,
-            expected_causal_effect=Positive(),
-            estimate_type="ate_calculated",
-            effect_modifier_configuration={"intensity": i},
-            estimator=LinearRegressionEstimator(
-                base_test_case=base_test_case,
-                treatment_value=w + 1.0,
-                control_value=float(w),
-                adjustment_set=causal_specification.causal_dag.identification(base_test_case),
-                df=df,
-                effect_modifiers={"intensity": i},
-                formula="num_shapes_unit ~ width + I(intensity ** 2)+I(width ** -1)+intensity-1",
-                alpha=0.05,
-            ),
-        )
-        for i in range(1, 17)
-        for w in range(1, 10)
-    ]
-    test_results = [test.execute_test() for test in causal_test_cases]
-    width_num_shapes_results = [
-        {
-            "control": causal_test_result.estimator.control_value,
-            "treatment": causal_test_result.estimator.treatment_value,
-            "intensity": causal_test_result.effect_modifier_configuration["intensity"],
-            "ate": causal_test_result.test_value.value[0],
-            "ci_low": causal_test_result.confidence_intervals[0][0],
-            "ci_high": causal_test_result.confidence_intervals[1][0],
-        }
-        for causal_test_result in test_results
-    ]
-    width_num_shapes_results = pd.DataFrame(width_num_shapes_results)
-    if save:
-        width_num_shapes_results.to_csv("width_num_shapes_results_random_1000.csv")
-    logger.info("%s", width_num_shapes_results)
+
+def run_causal_tests() -> None:
+    """
+    Run causal tests using the framework.
+
+    Sets up paths, initialises the framework, loads tests, runs them,
+    and saves the results.
+    """
+    dag_path = os.path.join(ROOT, "dag.dot")
+    data_paths = [os.path.join(ROOT, "data/random/data_random_1000.csv")]
+    test_config_path = os.path.join(ROOT, "causal_tests.json")
+    output_path = os.path.join(ROOT, "causal_test_results.json")
+
+    paths = CausalTestingPaths(
+        dag_path=dag_path,
+        data_paths=data_paths,
+        test_config_path=test_config_path,
+        output_path=output_path
+    )
+
+    framework = CausalTestingFramework(paths=paths, ignore_cycles=False)
+    framework.setup()
+    framework.load_tests()
+    results = framework.run_tests()
+    framework.save_results(results)
 
 
 if __name__ == "__main__":
-    test_poisson_intensity_num_shapes(save=False)
-    test_poisson_width_num_shapes(save=True)
+    run_causal_tests()