CITCOM-project
diff --git a/‎.github/workflows/ci-tests-drafts.yaml
Lines changed: 39 additions & 0 deletions b/‎.github/workflows/ci-tests-drafts.yaml
Lines changed: 39 additions & 0 deletions
diff --git a/‎.github/workflows/ci-tests.yaml
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/ci-tests.yaml
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/figshare.yaml
Lines changed: 28 additions & 0 deletions b/‎.github/workflows/figshare.yaml
Lines changed: 28 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 12 additions & 4 deletions b/‎README.md
Lines changed: 12 additions & 4 deletions
diff --git a/‎causal_testing/json_front/json_class.py
Lines changed: 2 additions & 3 deletions b/‎causal_testing/json_front/json_class.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎causal_testing/specification/causal_dag.py
Lines changed: 16 additions & 2 deletions b/‎causal_testing/specification/causal_dag.py
Lines changed: 16 additions & 2 deletions
diff --git a/‎causal_testing/surrogate/__init__.py b/‎causal_testing/surrogate/__init__.py
diff --git a/‎causal_testing/surrogate/causal_surrogate_assisted.py
Lines changed: 142 additions & 0 deletions b/‎causal_testing/surrogate/causal_surrogate_assisted.py
Lines changed: 142 additions & 0 deletions
@@ -0,0 +1,39 @@
+name: Continuous Integration Tests Draft PR (pytest)
+# This duplicate ci workflow is required so the badge in the README.md is not effected by draft PRs
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build:
+    if: github.event.pull_request.draft == true
+    name: Ex1 (${{ matrix.python-version }}, ${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: ["ubuntu-latest", "windows-latest", "macos-latest"]
+        python-version: ["3.9"]
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python using Miniconda
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          auto-update-conda: true
+          python-version: ${{ matrix.python-version }}
+      - name: Install package and dependencies
+        run: |
+          python --version
+          pip install -e .
+          pip install -e .[test]
+          pip install pytest pytest-cov
+        shell: bash -l {0}
+      - name: Test with pytest
+        run: |
+          pytest --cov=causal_testing --cov-report=xml
+        shell: bash -l {0}
+      - name: "Upload coverage to Codecov"
+        uses: codecov/codecov-action@v2
+        with:
+          fail_ci_if_error: true
+          token: ${{ secrets.CODECOV_TOKEN }}
@@ -4,9 +4,15 @@ on:
   pull_request:
     branches:
       - main
+    types:
+      - opened
+      - synchronize
+      - reopened
+      - ready_for_review
 
 jobs:
   build:
+    if: github.event.pull_request.draft == false # Filter out draft PRs
     name: Ex1 (${{ matrix.python-version }}, ${{ matrix.os }})
     runs-on: ${{ matrix.os }}
     strategy:
 
@@ -0,0 +1,28 @@
+name: Release to Figshare
+on:
+  workflow_dispatch:
+  release:
+    types: [published]
+jobs:
+  upload:
+    runs-on: ubuntu-latest
+    env:
+      ARCHIVE_NAME: ${{ github.event.repository.name }}-${{ github.event.release.tag_name }}
+    steps:
+      - name: prepare-data-folder
+        run : mkdir 'data'
+      - name: download-archive
+        run: |
+          curl -sL "${{ github.event.release.zipball_url }}" > "$ARCHIVE_NAME".zip
+          curl -sL "${{ github.event.release.tarball_url }}" > "$ARCHIVE_NAME".tar.gz
+      - name: move-archive
+        run: |
+          mv "$ARCHIVE_NAME".zip data/
+          mv "$ARCHIVE_NAME".tar.gz data/
+      - name: upload-to-figshare
+        uses: figshare/[email protected]
+        with:
+          FIGSHARE_TOKEN: ${{ secrets.FIGSHARE_TOKEN }}
+          FIGSHARE_ENDPOINT: 'https://api.figshare.com/v2'
+          FIGSHARE_ARTICLE_ID: 24427516
+          DATA_DIR: 'data'
@@ -1,6 +1,15 @@
-# Causal Testing Framework: A Causal Inference-Driven Software Testing Framework
+# Causal Testing Framework
+### A Causal Inference-Driven Software Testing Framework
 
-![example workflow](https://github.com/CITCOM-project/CausalTestingFramework/actions/workflows/ci-tests.yaml/badge.svg) [![codecov](https://codecov.io/gh/CITCOM-project/CausalTestingFramework/branch/main/graph/badge.svg?token=04ijFVrb4a)](https://codecov.io/gh/CITCOM-project/CausalTestingFramework) [![Documentation Status](https://readthedocs.org/projects/causal-testing-framework/badge/?version=latest)](https://causal-testing-framework.readthedocs.io/en/latest/?badge=latest)
+
+[![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) 
+![example workflow](https://github.com/CITCOM-project/CausalTestingFramework/actions/workflows/ci-tests.yaml/badge.svg) 
+[![codecov](https://codecov.io/gh/CITCOM-project/CausalTestingFramework/branch/main/graph/badge.svg?token=04ijFVrb4a)](https://codecov.io/gh/CITCOM-project/CausalTestingFramework) 
+[![Documentation Status](https://readthedocs.org/projects/causal-testing-framework/badge/?version=latest)](https://causal-testing-framework.readthedocs.io/en/latest/?badge=latest)
+![Dynamic TOML Badge](https://img.shields.io/badge/dynamic/toml?url=https%3A%2F%2Fraw.githubusercontent.com%2FCITCOM-project%2FCausalTestingFramework%2Fmain%2Fpyproject.toml&query=%24.project%5B'requires-python'%5D&label=python)
+![PyPI - Version](https://img.shields.io/pypi/v/causal-testing-framework)
+[![DOI](https://t.ly/FCT1B)](https://orda.shef.ac.uk/articles/software/CITCOM_Software_Release/24427516)
+![GitHub License](https://img.shields.io/github/license/CITCOM-project/CausalTestingFramework)
 
 Causal testing is a causal inference-driven framework for functional black-box testing. This framework utilises
 graphical causal inference (CI) techniques for the specification and functional testing of software from a black-box
@@ -12,10 +21,9 @@ system-under-test that is expected to cause a change to some output(s).
 
 ![Causal Testing Workflow](images/workflow.png)
 
-
 ## Installation
 
-See the readthedocs site for [installation
+See the Read the Docs site for [installation
 instructions](https://causal-testing-framework.readthedocs.io/en/latest/installation.html).
 
 ## Documentation
 
@@ -301,9 +301,6 @@ def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping) -> Estima
         """Create the necessary inputs for a single test case
         :param causal_test_case: The concrete test case to be executed
         :param test: Single JSON test definition stored in a mapping (dict)
-        :param conditions: A list of conditions which should be applied to the
-        data. Conditions should be in the query format detailed at
-        https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html
         :returns:
                 - estimation_model - Estimator instance for the test being run
         """
@@ -323,11 +320,13 @@ def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping) -> Estima
             minimal_adjustment_set = minimal_adjustment_set - {causal_test_case.treatment_variable}
             estimator_kwargs["adjustment_set"] = minimal_adjustment_set
 
+        estimator_kwargs["query"] = test["query"] if "query" in test else ""
         estimator_kwargs["treatment"] = causal_test_case.treatment_variable.name
         estimator_kwargs["treatment_value"] = causal_test_case.treatment_value
         estimator_kwargs["control_value"] = causal_test_case.control_value
         estimator_kwargs["outcome"] = causal_test_case.outcome_variable.name
         estimator_kwargs["effect_modifiers"] = causal_test_case.effect_modifier_configuration
+        estimator_kwargs["df"] = self.data_collector.collect_data()
         estimator_kwargs["alpha"] = test["alpha"] if "alpha" in test else 0.05
 
         estimation_model = test["estimator"](**estimator_kwargs)
 
@@ -125,7 +125,6 @@ def close_separator(
 
 
 class CausalDAG(nx.DiGraph):
-
     """A causal DAG is a directed acyclic graph in which nodes represent random variables and edges represent causality
     between a pair of random variables. We implement a CausalDAG as a networkx DiGraph with an additional check that
     ensures it is acyclic. A CausalDAG must be specified as a dot file.
@@ -500,11 +499,20 @@ def depends_on_outputs(self, node: Node, scenario: Scenario) -> bool:
             return True
         return any((self.depends_on_outputs(n, scenario) for n in self.graph.predecessors(node)))
 
-    def identification(self, base_test_case: BaseTestCase):
+    @staticmethod
+    def remove_hidden_adjustment_sets(minimal_adjustment_sets: list[str], scenario: Scenario):
+        """Remove variables labelled as hidden from adjustment set(s)
+        :param minimal_adjustment_sets: list of minimal adjustment set(s) to have hidden variables removed from
+        :param scenario: The modelling scenario which informs the variables that are hidden
+        """
+        return [adj for adj in minimal_adjustment_sets if all(not scenario.variables.get(x).hidden for x in adj)]
+
+    def identification(self, base_test_case: BaseTestCase, scenario: Scenario = None):
         """Identify and return the minimum adjustment set
 
         :param base_test_case: A base test case instance containing the outcome_variable and the
         treatment_variable required for identification.
+        :param scenario: The modelling scenario relating to the tests
         :return minimal_adjustment_set: The smallest set of variables which can be adjusted for to obtain a causal
         estimate as opposed to a purely associational estimate.
         """
@@ -520,6 +528,12 @@ def identification(self, base_test_case: BaseTestCase):
         else:
             raise ValueError("Causal effect should be 'total' or 'direct'")
 
+        if scenario is not None:
+            minimal_adjustment_sets = self.remove_hidden_adjustment_sets(minimal_adjustment_sets, scenario)
+
+        if len(minimal_adjustment_sets) == 0:
+            return set()
+
         minimal_adjustment_set = min(minimal_adjustment_sets, key=len)
         return minimal_adjustment_set
 
 
@@ -0,0 +1,142 @@
+"""Module containing classes to define and run causal surrogate assisted test cases"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Callable
+
+from causal_testing.data_collection.data_collector import ObservationalDataCollector
+from causal_testing.specification.causal_specification import CausalSpecification
+from causal_testing.testing.base_test_case import BaseTestCase
+from causal_testing.testing.estimators import CubicSplineRegressionEstimator
+
+
+@dataclass
+class SimulationResult:
+    """Data class holding the data and result metadata of a simulation"""
+
+    data: dict
+    fault: bool
+    relationship: str
+
+
+class SearchAlgorithm(ABC): # pylint: disable=too-few-public-methods
+    """Class to be inherited with the search algorithm consisting of a search function and the fitness function of the
+    space to be searched"""
+
+    @abstractmethod
+    def search(
+        self, surrogate_models: list[CubicSplineRegressionEstimator], specification: CausalSpecification
+    ) -> list:
+        """Function which implements a search routine which searches for the optimal fitness value for the specified
+        scenario
+        :param surrogate_models: The surrogate models to be searched
+        :param specification:  The Causal Specification (combination of Scenario and Causal Dag)"""
+
+
+class Simulator(ABC):
+    """Class to be inherited with Simulator specific functions to start, shutdown and run the simulation with the give
+    config file"""
+
+    @abstractmethod
+    def startup(self, **kwargs):
+        """Function that when run, initialises and opens the Simulator"""
+
+    @abstractmethod
+    def shutdown(self, **kwargs):
+        """Function to safely exit and shutdown the Simulator"""
+
+    @abstractmethod
+    def run_with_config(self, configuration: dict) -> SimulationResult:
+        """Run the simulator with the given configuration and return the results in the structure of a
+        SimulationResult
+        :param configuration: The configuration required to initialise the Simulation
+        :return: Simulation results in the structure of the SimulationResult data class"""
+
+
+class CausalSurrogateAssistedTestCase:
+    """A class representing a single causal surrogate assisted test case."""
+
+    def __init__(
+        self,
+        specification: CausalSpecification,
+        search_algorithm: SearchAlgorithm,
+        simulator: Simulator,
+    ):
+        self.specification = specification
+        self.search_algorithm = search_algorithm
+        self.simulator = simulator
+
+    def execute(
+        self,
+        data_collector: ObservationalDataCollector,
+        max_executions: int = 200,
+        custom_data_aggregator: Callable[[dict, dict], dict] = None,
+    ):
+        """For this specific test case, a search algorithm is used to find the most contradictory point in the input
+        space which is, therefore, most likely to indicate incorrect behaviour. This cadidate test case is run against
+        the simulator, checked for faults and the result returned with collected data
+        :param data_collector: An ObservationalDataCollector which gathers data relevant to the specified scenario
+        :param max_executions: Maximum number of simulator executions before exiting the search
+        :param custom_data_aggregator:
+        :return: tuple containing SimulationResult or str, execution number and collected data"""
+        data_collector.collect_data()
+
+        for i in range(max_executions):
+            surrogate_models = self.generate_surrogates(self.specification, data_collector)
+            candidate_test_case, _, surrogate = self.search_algorithm.search(surrogate_models, self.specification)
+
+            self.simulator.startup()
+            test_result = self.simulator.run_with_config(candidate_test_case)
+            self.simulator.shutdown()
+
+            if custom_data_aggregator is not None:
+                if data_collector.data is not None:
+                    data_collector.data = custom_data_aggregator(data_collector.data, test_result.data)
+            else:
+                data_collector.data = data_collector.data.append(test_result.data, ignore_index=True)
+
+            if test_result.fault:
+                print(
+                    f"Fault found between {surrogate.treatment} causing {surrogate.outcome}. Contradiction with "
+                    f"expected {surrogate.expected_relationship}."
+                )
+                test_result.relationship = (
+                    f"{surrogate.treatment} -> {surrogate.outcome} expected {surrogate.expected_relationship}"
+                )
+                return test_result, i + 1, data_collector.data
+
+        print("No fault found")
+        return "No fault found", i + 1, data_collector.data
+
+    def generate_surrogates(
+        self, specification: CausalSpecification, data_collector: ObservationalDataCollector
+    ) -> list[CubicSplineRegressionEstimator]:
+        """Generate a surrogate model for each edge of the dag that specifies it is included in the DAG metadata.
+        :param specification: The Causal Specification (combination of Scenario and Causal Dag)
+        :param data_collector: An ObservationalDataCollector which gathers data relevant to the specified scenario
+        :return: A list of surrogate models
+        """
+        surrogate_models = []
+
+        for u, v in specification.causal_dag.graph.edges:
+            edge_metadata = specification.causal_dag.graph.adj[u][v]
+            if "included" in edge_metadata:
+                from_var = specification.scenario.variables.get(u)
+                to_var = specification.scenario.variables.get(v)
+                base_test_case = BaseTestCase(from_var, to_var)
+
+                minimal_adjustment_set = specification.causal_dag.identification(base_test_case, specification.scenario)
+
+                surrogate = CubicSplineRegressionEstimator(
+                    u,
+                    0,
+                    0,
+                    minimal_adjustment_set,
+                    v,
+                    4,
+                    df=data_collector.data,
+                    expected_relationship=edge_metadata["expected"],
+                )
+                surrogate_models.append(surrogate)
+
+        return surrogate_models