diff --git a/docs/source/_static/images/CITCOM-logo-white.png b/docs/source/_static/images/CITCOM-logo-white.png
new file mode 100644
index 00000000..832b448d
Binary files /dev/null and b/docs/source/_static/images/CITCOM-logo-white.png differ
diff --git a/docs/source/_static/images/Sheffield-logo.png b/docs/source/_static/images/Sheffield-logo.png
new file mode 100644
index 00000000..65252823
Binary files /dev/null and b/docs/source/_static/images/Sheffield-logo.png differ
diff --git a/examples/covasim_/doubling_beta/doubling_beta.py.ipynb b/examples/covasim_/doubling_beta/doubling_beta.py.ipynb
new file mode 100644
index 00000000..86124b27
--- /dev/null
+++ b/examples/covasim_/doubling_beta/doubling_beta.py.ipynb
@@ -0,0 +1,91 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "5772c02c-eeba-4f48-9425-53abf2439345",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "

\n",
+ "
\n",
+ "
\n",
+ "

\n",
+ "
\n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from IPython.display import HTML, display\n",
+ "\n",
+ "html = \"\"\"\n",
+ "\n",
+ "
\n",
+ "

\n",
+ "
\n",
+ "
\n",
+ "

\n",
+ "
\n",
+ "
\n",
+ "\"\"\"\n",
+ "\n",
+ "display(HTML(html))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "723a51ed-ff3c-4205-b2d3-4ebc0434dfac",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.11"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/poisson-line-process/example_pure_python.py b/examples/poisson-line-process/example_pure_python.py
index 04f18616..46bfdf28 100644
--- a/examples/poisson-line-process/example_pure_python.py
+++ b/examples/poisson-line-process/example_pure_python.py
@@ -1,191 +1,100 @@
-import os
-import logging
+"""
+Causal testing framework example script.
-import pandas as pd
+This example demonstrates the use of a custom EmpiricalMeanEstimator
+for causal testing and runs causal tests using the framework.
+"""
-from causal_testing.specification.causal_dag import CausalDAG
-from causal_testing.specification.scenario import Scenario
-from causal_testing.specification.variable import Input, Output
-from causal_testing.specification.causal_specification import CausalSpecification
-from causal_testing.testing.causal_test_case import CausalTestCase
-from causal_testing.testing.causal_effect import ExactValue, Positive
-from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
-from causal_testing.estimation.abstract_estimator import Estimator
-from causal_testing.testing.base_test_case import BaseTestCase
+import os
+from typing import Tuple, Optional, Any
+from causal_testing.main import (
+ CausalTestingFramework,
+ CausalTestingPaths,
+ setup_logging
+)
+from causal_testing.estimation.abstract_estimator import Estimator
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.DEBUG, format="%(message)s")
+setup_logging(verbose=True)
class EmpiricalMeanEstimator(Estimator):
+ """
+ Estimator that computes treatment effects using empirical means.
+
+ This estimator calculates the Average Treatment Effect (ATE) and risk ratio
+ by directly comparing the means of treatment and control groups.
+ """
+
def add_modelling_assumptions(self):
+ """Add modeling assumptions for this estimator."""
+ self.modelling_assumptions += (
+ "The data must contain runs with the exact configuration of interest."
+ )
+
+ def estimate_ate(self) -> Tuple[float, Optional[Any]]:
"""
- Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
- must hold if the resulting causal inference is to be considered valid.
- """
- self.modelling_assumptions += "The data must contain runs with the exact configuration of interest."
+ Estimate the Average Treatment Effect.
- def estimate_ate(self) -> float:
- """Estimate the outcomes under control and treatment.
- :return: The empirical average treatment effect.
+ Returns:
+ Tuple containing the ATE estimate and optional additional data.
"""
- control_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.control_value)[
- self.base_test_case.outcome_variable.name
- ].dropna()
- treatment_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.treatment_value)[
- self.base_test_case.outcome_variable.name
- ].dropna()
+ control_results = self.df.where(
+ self.df[self.base_test_case.treatment_variable.name] == self.control_value
+ )[self.base_test_case.outcome_variable.name].dropna()
+
+ treatment_results = self.df.where(
+ self.df[self.base_test_case.treatment_variable.name] == self.treatment_value
+ )[self.base_test_case.outcome_variable.name].dropna()
+
return treatment_results.mean() - control_results.mean(), None
- def estimate_risk_ratio(self) -> float:
- """Estimate the outcomes under control and treatment.
- :return: The empirical average treatment effect.
+ def estimate_risk_ratio(self) -> Tuple[float, Optional[Any]]:
"""
- control_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.control_value)[
- self.base_test_case.outcome_variable.name
- ].dropna()
- treatment_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.treatment_value)[
- self.base_test_case.outcome_variable.name
- ].dropna()
+ Estimate the risk ratio.
+
+ Returns:
+ Tuple containing the risk ratio estimate and optional additional data.
+ """
+ control_results = self.df.where(
+ self.df[self.base_test_case.treatment_variable.name] == self.control_value
+ )[self.base_test_case.outcome_variable.name].dropna()
+
+ treatment_results = self.df.where(
+ self.df[self.base_test_case.treatment_variable.name] == self.treatment_value
+ )[self.base_test_case.outcome_variable.name].dropna()
+
return treatment_results.mean() / control_results.mean(), None
-# 1. Read in the Causal DAG
ROOT = os.path.realpath(os.path.dirname(__file__))
-causal_dag = CausalDAG(f"{ROOT}/dag.dot")
-
-# 2. Create variables
-width = Input("width", float)
-height = Input("height", float)
-intensity = Input("intensity", float)
-
-num_lines_abs = Output("num_lines_abs", float)
-num_lines_unit = Output("num_lines_unit", float)
-num_shapes_abs = Output("num_shapes_abs", float)
-num_shapes_unit = Output("num_shapes_unit", float)
-
-# 3. Create scenario
-scenario = Scenario(
- variables={
- width,
- height,
- intensity,
- num_lines_abs,
- num_lines_unit,
- num_shapes_abs,
- num_shapes_unit,
- }
-)
-# 4. Construct a causal specification from the scenario and causal DAG
-causal_specification = CausalSpecification(scenario, causal_dag)
-
-observational_data_path = f"{ROOT}/data/random/data_random_1000.csv"
-
-
-def test_poisson_intensity_num_shapes(save=False):
- intensity_num_shapes_results = []
- base_test_case = BaseTestCase(treatment_variable=intensity, outcome_variable=num_shapes_unit)
- observational_df = pd.read_csv(observational_data_path, index_col=0).astype(float)
- causal_test_cases = [
- (
- CausalTestCase(
- base_test_case=base_test_case,
- expected_causal_effect=ExactValue(4, atol=0.5),
- estimate_type="risk_ratio",
- estimator=EmpiricalMeanEstimator(
- base_test_case=base_test_case,
- treatment_value=treatment_value,
- control_value=control_value,
- adjustment_set=causal_specification.causal_dag.identification(base_test_case),
- df=pd.read_csv(f"{ROOT}/data/smt_100/data_smt_wh{wh}_100.csv", index_col=0).astype(float),
- effect_modifiers=None,
- alpha=0.05,
- query="",
- ),
- ),
- CausalTestCase(
- base_test_case=base_test_case,
- expected_causal_effect=ExactValue(4, atol=0.5),
- estimate_type="risk_ratio",
- estimator=LinearRegressionEstimator(
- base_test_case=base_test_case,
- treatment_value=treatment_value,
- control_value=control_value,
- adjustment_set=causal_specification.causal_dag.identification(base_test_case),
- df=observational_df,
- effect_modifiers=None,
- formula="num_shapes_unit ~ I(intensity ** 2) + intensity - 1",
- alpha=0.05,
- query="",
- ),
- ),
- )
- for control_value, treatment_value in [(1, 2), (2, 4), (4, 8), (8, 16)]
- for wh in range(1, 11)
- ]
-
- test_results = [(smt.execute_test(), observational.execute_test()) for smt, observational in causal_test_cases]
-
- intensity_num_shapes_results += [
- {
- "width": obs_causal_test_result.estimator.control_value,
- "height": obs_causal_test_result.estimator.treatment_value,
- "control": obs_causal_test_result.estimator.control_value,
- "treatment": obs_causal_test_result.estimator.treatment_value,
- "smt_risk_ratio": smt_causal_test_result.test_value.value,
- "obs_risk_ratio": obs_causal_test_result.test_value.value[0],
- }
- for smt_causal_test_result, obs_causal_test_result in test_results
- ]
- intensity_num_shapes_results = pd.DataFrame(intensity_num_shapes_results)
- if save:
- intensity_num_shapes_results.to_csv("intensity_num_shapes_results_random_1000.csv")
- logger.info("%s", intensity_num_shapes_results)
-
-
-def test_poisson_width_num_shapes(save=False):
- base_test_case = BaseTestCase(treatment_variable=width, outcome_variable=num_shapes_unit)
- df = pd.read_csv(observational_data_path, index_col=0).astype(float)
- causal_test_cases = [
- CausalTestCase(
- base_test_case=base_test_case,
- expected_causal_effect=Positive(),
- estimate_type="ate_calculated",
- effect_modifier_configuration={"intensity": i},
- estimator=LinearRegressionEstimator(
- base_test_case=base_test_case,
- treatment_value=w + 1.0,
- control_value=float(w),
- adjustment_set=causal_specification.causal_dag.identification(base_test_case),
- df=df,
- effect_modifiers={"intensity": i},
- formula="num_shapes_unit ~ width + I(intensity ** 2)+I(width ** -1)+intensity-1",
- alpha=0.05,
- ),
- )
- for i in range(1, 17)
- for w in range(1, 10)
- ]
- test_results = [test.execute_test() for test in causal_test_cases]
- width_num_shapes_results = [
- {
- "control": causal_test_result.estimator.control_value,
- "treatment": causal_test_result.estimator.treatment_value,
- "intensity": causal_test_result.effect_modifier_configuration["intensity"],
- "ate": causal_test_result.test_value.value[0],
- "ci_low": causal_test_result.confidence_intervals[0][0],
- "ci_high": causal_test_result.confidence_intervals[1][0],
- }
- for causal_test_result in test_results
- ]
- width_num_shapes_results = pd.DataFrame(width_num_shapes_results)
- if save:
- width_num_shapes_results.to_csv("width_num_shapes_results_random_1000.csv")
- logger.info("%s", width_num_shapes_results)
+
+def run_causal_tests() -> None:
+ """
+ Run causal tests using the framework.
+
+ Sets up paths, initialises the framework, loads tests, runs them,
+ and saves the results.
+ """
+ dag_path = os.path.join(ROOT, "dag.dot")
+ data_paths = [os.path.join(ROOT, "data/random/data_random_1000.csv")]
+ test_config_path = os.path.join(ROOT, "causal_tests.json")
+ output_path = os.path.join(ROOT, "causal_test_results.json")
+
+ paths = CausalTestingPaths(
+ dag_path=dag_path,
+ data_paths=data_paths,
+ test_config_path=test_config_path,
+ output_path=output_path
+ )
+
+ framework = CausalTestingFramework(paths=paths, ignore_cycles=False)
+ framework.setup()
+ framework.load_tests()
+ results = framework.run_tests()
+ framework.save_results(results)
if __name__ == "__main__":
- test_poisson_intensity_num_shapes(save=False)
- test_poisson_width_num_shapes(save=True)
+ run_causal_tests()
diff --git a/examples/poisson-line-process/poisson_line_process.ipynb b/examples/poisson-line-process/poisson_line_process.ipynb
new file mode 100644
index 00000000..ac359ec1
--- /dev/null
+++ b/examples/poisson-line-process/poisson_line_process.ipynb
@@ -0,0 +1,865 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "b93e5b58-e86f-4b41-b2df-c5c59f37778f",
+ "metadata": {},
+ "source": [
+ "\n",
+ "

\n",
+ "
\n",
+ "

\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "56965fba-b90b-4233-a819-bb747ecd9d81",
+ "metadata": {},
+ "source": [
+ "# Poisson Line Process Tutorial: Statistical Metamorphic Testing"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5adf7cdc-fd96-47a4-a194-f1f060a4c0c5",
+ "metadata": {},
+ "source": [
+ "## Overview"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5b5f1661-9d24-43e3-88ee-add75b744e87",
+ "metadata": {},
+ "source": [
+ "The purpose of this tutorial is to use the Causal Testing Framework's **core Python API** to demonstrate how it can be employed to implement statistical metamorphic testing. More specifically, this example involves running a series of causal test cases that incrementally change the width and height of the sampling window of a Poisson Line Tessellation (PLT) model. Further details on the methodology can be found in Section 5.1 of our [paper](https://dl.acm.org/doi/10.1145/3607184) and additional resources for this tutorial can be found at the end of this notebook."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ea9c0446-61ee-4535-995d-b9d80dfd5735",
+ "metadata": {},
+ "source": [
+ "### Step 1: Defining your Input Configurations\n",
+ "\n",
+ "Before diving into the details, a good first step is to define your file paths, including your input configurations:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "ee177b18-218e-4466-9069-01f659e2df33",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Step 1: Define your file paths\n",
+ "\n",
+ "from pathlib import Path\n",
+ "\n",
+ "base_dir = Path.cwd() # Get the current working directory\n",
+ "\n",
+ "data_dir = base_dir / \"data\" # Define your data directory here\n",
+ "\n",
+ "dag_file = \"dag.dot\" # Define your DAG filename here\n",
+ "\n",
+ "data_file = \"data/random/data_random_1000.csv\" # This is your input runtime data (usually a .csv file)\n",
+ "\n",
+ "output_file = \"causal_test_results.json\" # This is your output file containing the causal test results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4928e126-dc5e-4cb1-9136-23c3f8292924",
+ "metadata": {},
+ "source": [
+ "Under the hood, the CTF consists of 2 main components, namely, a Causal Specification and a Causal Test Case. Let's break down the Causal Specification first.\n",
+ "\n",
+ "A Causal Specification consists of two sub-components called a Modelling Scenario and a Causal Directed Acyclic Graph (DAG). \n",
+ "\n",
+ "Firstly, the modelling scenario specifies the (or part of the) system under test by defining the observable variables and any constraints that exist between those variables. The CTF currently supports three types of variables:\n",
+ "\n",
+ "- `Input` variables, which are inputs to the system.\n",
+ "\n",
+ "- `Output` variables, which are outputs from the system.\n",
+ "\n",
+ "- `Meta` variables, which are not directly observable but can be related to the system.\n",
+ "\n",
+ "Secondly, the causal DAG encodes information about the expected causal structure of the system through nodes representing variables and directed edges representing causal relationships, which is a model of how the data could have been generated. Together, the Causal DAG and modelling scenario form the `Causal Specification`.\n",
+ "\n",
+ "**Note**: The CTF doesn't support native visualisation tools, but it is possible to use existing frameworks such as NetworkX to visualise your DAG. Alternatively, browser-based environments such as [DAGitty](https://www.dagitty.net/) may also be useful."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "15354565-eeb5-4722-bf6b-0b987eabb8c2",
+ "metadata": {},
+ "source": [
+ "### Step 2: Create a Casual Specification"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "40f85921-40b7-46e5-aede-606900582f4b",
+ "metadata": {},
+ "source": [
+ "At this point, it might be worthwhile to interrogate your data and apply any pre-processing, transforming or cleaning as necessary. However, for the purposes of this tutorial, there won't be any additional processing required. Section 5.13 of our [paper](https://dl.acm.org/doi/10.1145/3607184) explains in detail how this dataset was generated."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "d7d27532-7995-4d76-b40e-e6ae9e7cc645",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " width | \n",
+ " height | \n",
+ " intensity | \n",
+ " num_lines_abs | \n",
+ " num_shapes_abs | \n",
+ " num_lines_unit | \n",
+ " num_shapes_unit | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 4.908266 | \n",
+ " 8.367783 | \n",
+ " 10.684103 | \n",
+ " 278 | \n",
+ " 13991 | \n",
+ " 6.768716 | \n",
+ " 340.651464 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 9.239562 | \n",
+ " 0.632306 | \n",
+ " 12.826541 | \n",
+ " 238 | \n",
+ " 2603 | \n",
+ " 40.737877 | \n",
+ " 445.549133 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 5.144895 | \n",
+ " 9.633530 | \n",
+ " 15.219349 | \n",
+ " 425 | \n",
+ " 33859 | \n",
+ " 8.574859 | \n",
+ " 683.143879 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3.489453 | \n",
+ " 4.535775 | \n",
+ " 8.362393 | \n",
+ " 126 | \n",
+ " 3305 | \n",
+ " 7.960892 | \n",
+ " 208.815469 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 7.985650 | \n",
+ " 1.090290 | \n",
+ " 1.032276 | \n",
+ " 24 | \n",
+ " 74 | \n",
+ " 2.756506 | \n",
+ " 8.499228 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 8.198583 | \n",
+ " 2.479352 | \n",
+ " 7.394970 | \n",
+ " 152 | \n",
+ " 3378 | \n",
+ " 7.477674 | \n",
+ " 166.181475 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 5.539523 | \n",
+ " 9.453742 | \n",
+ " 12.416852 | \n",
+ " 378 | \n",
+ " 22500 | \n",
+ " 7.217980 | \n",
+ " 429.641692 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 1.182998 | \n",
+ " 4.851435 | \n",
+ " 4.361107 | \n",
+ " 53 | \n",
+ " 534 | \n",
+ " 9.234678 | \n",
+ " 93.043734 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 2.438737 | \n",
+ " 1.708604 | \n",
+ " 1.107249 | \n",
+ " 6 | \n",
+ " 14 | \n",
+ " 1.439942 | \n",
+ " 3.359864 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 4.799560 | \n",
+ " 0.032911 | \n",
+ " 9.314464 | \n",
+ " 94 | \n",
+ " 129 | \n",
+ " 595.097328 | \n",
+ " 816.676121 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1000 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " width height intensity num_lines_abs num_shapes_abs \\\n",
+ "0 4.908266 8.367783 10.684103 278 13991 \n",
+ "1 9.239562 0.632306 12.826541 238 2603 \n",
+ "2 5.144895 9.633530 15.219349 425 33859 \n",
+ "3 3.489453 4.535775 8.362393 126 3305 \n",
+ "4 7.985650 1.090290 1.032276 24 74 \n",
+ ".. ... ... ... ... ... \n",
+ "5 8.198583 2.479352 7.394970 152 3378 \n",
+ "6 5.539523 9.453742 12.416852 378 22500 \n",
+ "7 1.182998 4.851435 4.361107 53 534 \n",
+ "8 2.438737 1.708604 1.107249 6 14 \n",
+ "9 4.799560 0.032911 9.314464 94 129 \n",
+ "\n",
+ " num_lines_unit num_shapes_unit \n",
+ "0 6.768716 340.651464 \n",
+ "1 40.737877 445.549133 \n",
+ "2 8.574859 683.143879 \n",
+ "3 7.960892 208.815469 \n",
+ "4 2.756506 8.499228 \n",
+ ".. ... ... \n",
+ "5 7.477674 166.181475 \n",
+ "6 7.217980 429.641692 \n",
+ "7 9.234678 93.043734 \n",
+ "8 1.439942 3.359864 \n",
+ "9 595.097328 816.676121 \n",
+ "\n",
+ "[1000 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "df = pd.read_csv(data_file, index_col=0)\n",
+ "\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "19249775-2397-4bcd-b03f-a7f3a69e0cc6",
+ "metadata": {},
+ "source": [
+ "In this case, the PLT model has three positive floating-point input parameters: thee width and height of the sampling window, and the intensity of the Poisson process. The model then outputs the total number of lines intersecting the sampling window, and the number of polygons formed by the intersecting lines. Note: in this dataset, the output variables appended by the suffix `_unit` are normalised with respect to their respective areas (`width*height`)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "ac297d2d-5a2f-4c33-bbdc-967d54e24e3f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Step 2: Create a Causal Specification using the Modelling Scenario and Causal DAG\n",
+ "\n",
+ "from causal_testing.specification.variable import Input, Output\n",
+ "from causal_testing.specification.causal_dag import CausalDAG\n",
+ "from causal_testing.specification.scenario import Scenario\n",
+ "from causal_testing.specification.causal_specification import CausalSpecification\n",
+ "\n",
+ "# Define the input variables \n",
+ "\n",
+ "width = Input(\"width\", float) \n",
+ "\n",
+ "height = Input(\"height\", float)\n",
+ "\n",
+ "intensity = Input(\"intensity\", float)\n",
+ "\n",
+ "# Define the output variables \n",
+ "\n",
+ "num_lines_abs = Output(\"num_lines_abs\", float)\n",
+ "\n",
+ "num_lines_unit = Output(\"num_lines_unit\", float)\n",
+ "\n",
+ "num_shapes_abs = Output(\"num_shapes_abs\", float)\n",
+ "\n",
+ "num_shapes_unit = Output(\"num_shapes_unit\", float)\n",
+ "\n",
+ "# Pass these variables into the Scenario class \n",
+ "scenario = Scenario(\n",
+ " variables={\n",
+ " width,\n",
+ " height,\n",
+ " intensity,\n",
+ " num_lines_abs,\n",
+ " num_lines_unit,\n",
+ " num_shapes_abs,\n",
+ " num_shapes_unit})\n",
+ "\n",
+ "causal_dag = CausalDAG(dag_file) # Secondly, create the Causal DAG \n",
+ "\n",
+ "# Finally, we instantiate the CausalSpecification and pass in the scenario and Causal DAG\n",
+ "\n",
+ "causal_specification = CausalSpecification(scenario, causal_dag) "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "877d413d-ff96-4481-953f-891c19493531",
+ "metadata": {},
+ "source": [
+ "### Step 3: Create Causal Test Cases"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "be854667-44de-4f40-a37d-fb35588f047a",
+ "metadata": {},
+ "source": [
+ "Now that we've created our Causal Specification, we're ready to create our Causal Tests. Causal tests are essentially metamorphic tests that are executed using statistical causal inference. A causal test expresses the change in a given output that we expect to see when we change a particular input in some way. \n",
+ "\n",
+ "Firstly, a `base test case`, which specifies the relationship between the given output and input and the desired effect, is required to build a `causal test case`. Together, the causal test case forms the complete executable test, which is the minimum required to perform identification on the DAG.\n",
+ "\n",
+ "In this tutorial, the two metamorphic relations we would like to investigate are the following:\n",
+ "\n",
+ "1. Doubling the intensity should cause the number of polygons per unit area to increase by a factor of 4.\n",
+ "2. The number of polygons per unit area should be independent of width and height."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c5392fa4-e374-43c3-ae1f-ef475ac7bd84",
+ "metadata": {},
+ "source": [
+ "#### Metamorphic Relation 1: Doubling the intensity should cause the number of polygons per unit area to increase by a factor of 4"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "9b8491ab-0a90-4061-baee-8e1ecef7371d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from causal_testing.testing.base_test_case import BaseTestCase\n",
+ "from causal_testing.testing.causal_test_case import CausalTestCase\n",
+ "from causal_testing.testing.causal_effect import ExactValue, Positive\n",
+ "from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator\n",
+ "\n",
+ "base_test_case = BaseTestCase(treatment_variable=intensity, outcome_variable=num_shapes_unit) # Create the base test case\n",
+ "\n",
+ "# Perform identification on the DAG using the base test case\n",
+ "adjustment_set = causal_specification.causal_dag.identification(base_test_case) # Note: an empty adjustment set means there are no confounding variables that need to be controlled for"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e8026067-4df6-43f4-8927-6ac9415b9232",
+ "metadata": {},
+ "source": [
+ "Following this, we can now create our causal test case. The minimum parameter's well need to create this are: the expected causal effect as a `CausalEffect` object (e.g. `ExactValue`), the estimate type, which is a `str` specifying the type of estimate to return, and an estimator, which can be is an `Estimator` object. Since the relation we're investigating is inherently linear, we can use the `LinearRegressionEstimator` class to build our causal test case."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "fa53a888-68e1-4f6f-babf-16d3a206ea49",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator\n",
+ "\n",
+ "control_values, treatment_values = 2 ** np.arange(0, 4), 2 ** np.arange(1, 5) # Initialise the dummy intensity variables\n",
+ "\n",
+ "intensity_results = [] # Initiate an empty list to store the causal test results\n",
+ "\n",
+ "for (control, treatment) in zip(control_values, treatment_values): # Simultaneously loop over control and treatment\n",
+ " \n",
+ " estimator=LinearRegressionEstimator(\n",
+ " df=df, # Pass in the dataframe\n",
+ " base_test_case=base_test_case, # Base test case we created above\n",
+ " treatment_value=treatment, # Doubled intensity values\n",
+ " control_value=control, # Baseline intensity values\n",
+ " adjustment_set=adjustment_set, # Adjustment set (no confounders in this example)\n",
+ " formula=\"num_shapes_unit ~ I(intensity ** 2) + intensity - 1\", # Patsy formula describing a linear regression model\n",
+ " alpha=0.05) # Significance level\n",
+ " \n",
+ " causal_test_case = CausalTestCase(\n",
+ " base_test_case=base_test_case, # Pass in the base test case\n",
+ " expected_causal_effect=ExactValue(4, atol=0.5), # Include a tolerence of 0.5\n",
+ " estimate_type=\"risk_ratio\", # As described in our paper\n",
+ " estimator = estimator) # Pass in the estimator we created above\n",
+ "\n",
+ "\n",
+ " test_results = causal_test_case.execute_test() # Execute the tests\n",
+ "\n",
+ " # Parse the test result values we need:\n",
+ " intensity_results += [\n",
+ " {\n",
+ " \"width\": test_results.estimator.control_value,\n",
+ " \"height\": test_results.estimator.treatment_value,\n",
+ " \"control\": test_results.estimator.control_value,\n",
+ " \"treatment\": test_results.estimator.treatment_value,\n",
+ " \"risk_ratio\": test_results.test_value.value[0],\n",
+ " }]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d6793dc5-6425-4722-b430-f57aaf3fd181",
+ "metadata": {},
+ "source": [
+ "Finally, we can parse the causal test results as a `pandas` dataframe and optionally export it to a `.csv` file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "6bc8be40-bc95-4187-8771-4ce096acc7b5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " width | \n",
+ " height | \n",
+ " control | \n",
+ " treatment | \n",
+ " risk_ratio | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2.827958 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 3.171104 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 8 | \n",
+ " 4 | \n",
+ " 8 | \n",
+ " 3.477219 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 8 | \n",
+ " 16 | \n",
+ " 8 | \n",
+ " 16 | \n",
+ " 3.699311 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " width height control treatment risk_ratio\n",
+ "0 1 2 1 2 2.827958\n",
+ "1 2 4 2 4 3.171104\n",
+ "2 4 8 4 8 3.477219\n",
+ "3 8 16 8 16 3.699311"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "intensity_results_df = pd.DataFrame(intensity_results)\n",
+ "\n",
+ "intensity_results_df\n",
+ "\n",
+ "# intensity_results_df.to_csv(\"intensity_test_results.csv\", index=0) # Uncomment this to save as a csv."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f8ad21d8-6451-4f6c-a69d-cb939ea4f96b",
+ "metadata": {},
+ "source": [
+ "### Summary"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e62216b2-60ed-49b7-a2eb-0ad755bd91fc",
+ "metadata": {},
+ "source": [
+ "From the above causal test results and the risk ratios, we can conclude that doubling the intensity **does not** cause the number of polygons per unit area to increase by a factor of 4 as we expected - but by factors ranging from 2.8 - 3.7, meaning that the metamorphic relation is not satisfied. This is a significant result since our hypothesis was that "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "18c1a6b7-454a-44e1-88d7-0eeb0c86f48a",
+ "metadata": {},
+ "source": [
+ "#### Metamorphic Relation 2: The number of polygons per unit area should be independent of the width and height"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0126b036-19b1-49e4-93f1-454985c36001",
+ "metadata": {},
+ "source": [
+ "In a very similar way to the method above, we can test our second metamorphic relation that the number of polygons per unit area should be independent of sample width and height. Since we are only interested in whether there is some effect, we use the average treatment effect (ATE) instead of the risk ratio from above, which quantifies the additive change in outcome caused by the intervention.\n",
+ "\n",
+ "To investigate whether the width affects number of polygons per unit area, we need to execute a new set of test cases, but this time fixing the intensity and varying the width. Note: we don't need to redefine the causal specification, nor the perform identification again; but we have to redefine our base test case since we're now considering the Polygon's width as the treatment variable."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "67bf5061-720f-4b3a-a371-3ff3092e81e1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "control_values, treatment_values = np.arange(1,10), np.arange(1, 17) # Initialise the dummy width variables\n",
+ "\n",
+ "width_results = [] # Empty list for storing test case results \n",
+ "\n",
+ "base_test_case = BaseTestCase(treatment_variable=width, outcome_variable=num_shapes_unit) # Create the base test case\n",
+ "\n",
+ "adjustment_set = causal_specification.causal_dag.identification(base_test_case) # Calculate the adjustment set again (if it exists)\n",
+ "\n",
+ "for intensity in treatment_values:\n",
+ " \n",
+ " for width in control_values:\n",
+ " \n",
+ " estimator = LinearRegressionEstimator(\n",
+ " df=df, # Pass in the dataframe\n",
+ " base_test_case = base_test_case, # Base test case we created above\n",
+ " treatment_value = width + 1.0, # Changing the width\n",
+ " control_value=float(width), # Baseline width values\n",
+ " adjustment_set=adjustment_set, # Use the same adjustment set as list comprehension\n",
+ " effect_modifiers={\"intensity\": intensity},\n",
+ " formula=\"num_shapes_unit ~ width + I(intensity ** 2)+I(width ** -1)+intensity-1\", # Patsy formula describing a linear regression model\n",
+ " alpha=0.05) # Significance level\n",
+ " \n",
+ " causal_test_case = CausalTestCase(\n",
+ " base_test_case = base_test_case, # Pass in the base test case\n",
+ " expected_causal_effect = Positive(), # We expect a positive increase\n",
+ " estimate_type = \"ate_calculated\", # Calls the ate_calculated method in the linear regression estimator\n",
+ " effect_modifier_configuration = {\"intensity\": intensity}, # Condition on (hold constant) the intensity value when calculating width\n",
+ " estimator=estimator) # Pass in the estimator we created above\n",
+ " \n",
+ " test_results = causal_test_case.execute_test() # Execute the tests\n",
+ "\n",
+ " # Parse the test result values we need:\n",
+ " width_results += [\n",
+ " {\n",
+ " \"control\": test_results.estimator.control_value,\n",
+ " \"treatment\": test_results.estimator.treatment_value,\n",
+ " \"intensity\": test_results.effect_modifier_configuration[\"intensity\"],\n",
+ " \"ate\": test_results.test_value.value[0],\n",
+ " \"ci_low\": test_results.confidence_intervals[0][0],\n",
+ " \"ci_high\": test_results.confidence_intervals[1][0],\n",
+ " }]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "6c54392c-4e6b-42b3-b39a-e0d1d0ab25b7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " control | \n",
+ " treatment | \n",
+ " intensity | \n",
+ " ate | \n",
+ " ci_low | \n",
+ " ci_high | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 2.0 | \n",
+ " 1 | \n",
+ " -7.378642 | \n",
+ " -13.918239 | \n",
+ " -0.839046 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2.0 | \n",
+ " 3.0 | \n",
+ " 1 | \n",
+ " -2.709659 | \n",
+ " -9.802883 | \n",
+ " 4.383566 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 1 | \n",
+ " -1.542413 | \n",
+ " -11.120888 | \n",
+ " 8.036062 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4.0 | \n",
+ " 5.0 | \n",
+ " 1 | \n",
+ " -1.075514 | \n",
+ " -13.708422 | \n",
+ " 11.557393 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5.0 | \n",
+ " 6.0 | \n",
+ " 1 | \n",
+ " -0.842065 | \n",
+ " -16.741294 | \n",
+ " 15.057163 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 6.0 | \n",
+ " 7.0 | \n",
+ " 1 | \n",
+ " -0.708666 | \n",
+ " -19.972927 | \n",
+ " 18.555596 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 7.0 | \n",
+ " 8.0 | \n",
+ " 1 | \n",
+ " -0.625291 | \n",
+ " -23.308418 | \n",
+ " 22.057837 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 8.0 | \n",
+ " 9.0 | \n",
+ " 1 | \n",
+ " -0.569708 | \n",
+ " -26.704347 | \n",
+ " 25.564931 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 9.0 | \n",
+ " 10.0 | \n",
+ " 1 | \n",
+ " -0.530800 | \n",
+ " -30.138282 | \n",
+ " 29.076683 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 1.0 | \n",
+ " 2.0 | \n",
+ " 2 | \n",
+ " -7.378642 | \n",
+ " -16.381136 | \n",
+ " 1.623851 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " control treatment intensity ate ci_low ci_high\n",
+ "0 1.0 2.0 1 -7.378642 -13.918239 -0.839046\n",
+ "1 2.0 3.0 1 -2.709659 -9.802883 4.383566\n",
+ "2 3.0 4.0 1 -1.542413 -11.120888 8.036062\n",
+ "3 4.0 5.0 1 -1.075514 -13.708422 11.557393\n",
+ "4 5.0 6.0 1 -0.842065 -16.741294 15.057163\n",
+ "5 6.0 7.0 1 -0.708666 -19.972927 18.555596\n",
+ "6 7.0 8.0 1 -0.625291 -23.308418 22.057837\n",
+ "7 8.0 9.0 1 -0.569708 -26.704347 25.564931\n",
+ "8 9.0 10.0 1 -0.530800 -30.138282 29.076683\n",
+ "9 1.0 2.0 2 -7.378642 -16.381136 1.623851"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Convert the results to a dataframe and print the first 10 rows\n",
+ "\n",
+ "width_results_df = pd.DataFrame(width_results) \n",
+ "\n",
+ "width_results_df.head(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8c085e53-a56e-4f4a-b273-e6af46beba72",
+ "metadata": {},
+ "source": [
+ "### Summary"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7fcfe837-5d71-4603-b8a5-d3e3f50ceccc",
+ "metadata": {},
+ "source": [
+ "The causal test results in this case demonstrate that the ATE values for width increases from `1 → 2` through `9 → 10`, revealing that while most changes produce non-significant effects (ATEs ranging from `-2.7097` to `-0.5308` with confidence intervals containing zero), the width change from `1 → 2` produces a statistically significant negative effect of `-7.3786` with a confidence interval of `[-13.9182, -0.8390]`. This either indicates there is a problem with either the program, or the metamorphic property itself. A likely interpretation is that, geometrically, lines are less likely to intersect a smaller sample window. As the sample window becomes larger, there is more area to average over. Therefore, the metamorphic relations should ideally specify a minimum window size to which they apply.\n",
+ "\n",
+ "Additionally, in the paper we further demonstrate that these results show that the CTF was able to identify the same discrepancy as conventional statistical metamorphic testing, but using only a fifth of the data. Ultimately, this highlights the potential of causal inference-driven approaches to offer economical alternatives to testing techniques that depend on repeated potentially costly executions of the system under test."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "41545617-60e9-468d-a356-9dc1f433953d",
+ "metadata": {},
+ "source": [
+ "## Additional Resources"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "22c44cf0-2a46-41f3-bd6b-c9b91c55da19",
+ "metadata": {},
+ "source": [
+ "- [GitHub Repository](https://github.com/CITCOM-project/CausalTestingFramework)\n",
+ "- [Documentation](https://causal-testing-framework.readthedocs.io/en/latest/index.html)\n",
+ "- [Paper](https://dl.acm.org/doi/10.1145/3607184)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.11"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}