Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
5495caa
Removed datacollector from surrogate assisted
jmafoster1 Feb 17, 2025
b8ace8e
Removed data collector from causal test suite
jmafoster1 Feb 17, 2025
2ceb654
Removed datacollector from testing
jmafoster1 Feb 17, 2025
bb70338
Removed data collection tests
jmafoster1 Feb 17, 2025
736cf6d
Removed data collector from metamorphic relation
jmafoster1 Feb 17, 2025
b3b5261
removed data collector classes
jmafoster1 Feb 17, 2025
73eb5f1
Removed unnecessary methods and arguments from metamorphic relation
jmafoster1 Feb 17, 2025
127a2f4
Added experimental estimator to keep functionality of experimental da…
jmafoster1 Feb 17, 2025
4b7de4c
pylint
jmafoster1 Feb 17, 2025
a80ccef
pylint
jmafoster1 Feb 17, 2025
9cfff7d
codecov
jmafoster1 Feb 17, 2025
2d85097
Clarified experiemental estimator test.
jmafoster1 Feb 17, 2025
b9d2504
fixed pytest error
jmafoster1 Feb 17, 2025
1f1b8ab
Merge branch 'main' of github.com:CITCOM-project/CausalTestingFramewo…
jmafoster1 Feb 17, 2025
5b78c16
pylint
jmafoster1 Feb 17, 2025
ee98a2a
Updated docs
jmafoster1 Feb 17, 2025
7e33f80
fix: schematic diagram on homepage
f-allian Feb 17, 2025
61d1e3d
fix: remove mentions of data collector
f-allian Feb 17, 2025
9dd94f8
fix: misc typos
f-allian Feb 17, 2025
bedf769
add: note about 32-bit systems
f-allian Feb 17, 2025
3310bbe
Removed all mention of the causal test engine
jmafoster1 Feb 18, 2025
2786fdc
Fixed metamorphic relation
jmafoster1 Feb 18, 2025
c38706d
Fixed default behaviour of MR generation
jmafoster1 Feb 18, 2025
9a82172
Pylint
jmafoster1 Feb 18, 2025
1cef1fb
Examples
jmafoster1 Feb 18, 2025
78d1c3d
causal test case
jmafoster1 Feb 18, 2025
6071bd3
Reworked examples
jmafoster1 Feb 18, 2025
a15e15b
Removed causal test suite from poisson
jmafoster1 Feb 18, 2025
4b14212
Tests passing again
jmafoster1 Feb 18, 2025
31a70b1
Removed causal test suite
jmafoster1 Feb 18, 2025
7c0f0f2
Removed abstract causal test case + pylint
jmafoster1 Feb 18, 2025
e559295
Removed Z3
jmafoster1 Feb 18, 2025
0c55a82
pylint
jmafoster1 Feb 18, 2025
aaa18df
pylint
jmafoster1 Feb 18, 2025
67c973a
metamorphic relation codecov
jmafoster1 Feb 18, 2025
77ac1b8
causal dag coverage
jmafoster1 Feb 18, 2025
b86c584
All the tests pass and got rid of JSON front
jmafoster1 Feb 20, 2025
4d48785
IPCW outcome is now an output
jmafoster1 Feb 20, 2025
db5b8b8
pylint
jmafoster1 Feb 20, 2025
0af5898
pylint
jmafoster1 Feb 20, 2025
c86fcec
pylint
jmafoster1 Feb 20, 2025
26f814e
Forgot to save
jmafoster1 Feb 20, 2025
949c17d
Removed the docs for the deprecated frontends
jmafoster1 Feb 20, 2025
28c99b7
Removed test csv output
jmafoster1 Feb 20, 2025
98016cc
Merge branch 'main' into jmafoster1/remove-data-collector
f-allian Feb 26, 2025
b462aa1
Removed all mention of data collection and json front end
jmafoster1 Feb 27, 2025
15fa8ee
Merge branch 'jmafoster1/remove-data-collector' of github.com:CITCOM-…
jmafoster1 Feb 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
161 changes: 0 additions & 161 deletions causal_testing/data_collection/data_collector.py

This file was deleted.

103 changes: 103 additions & 0 deletions causal_testing/estimation/experimental_estimator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""This module contains the ExperimentalEstimator class for directly interacting with the system under test."""

from typing import Any
from abc import abstractmethod
import pandas as pd

from causal_testing.estimation.abstract_estimator import Estimator


class ExperimentalEstimator(Estimator):
"""A Logistic Regression Estimator is a parametric estimator which restricts the variables in the data to a linear
combination of parameters and functions of the variables (note these functions need not be linear). It is designed
for estimating categorical outcomes.
"""

def __init__(
# pylint: disable=too-many-arguments
self,
treatment: str,
treatment_value: float,
control_value: float,
adjustment_set: dict[str:Any],
outcome: str,
effect_modifiers: dict[str:Any] = None,
alpha: float = 0.05,
repeats: int = 200,
):
# pylint: disable=R0801
super().__init__(
treatment=treatment,
treatment_value=treatment_value,
control_value=control_value,
adjustment_set=adjustment_set,
outcome=outcome,
effect_modifiers=effect_modifiers,
alpha=alpha,
)
if effect_modifiers is None:
self.effect_modifiers = {}
self.repeats = repeats

def add_modelling_assumptions(self):
"""
Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
must hold if the resulting causal inference is to be considered valid.
"""
self.modelling_assumptions.append(
"The supplied number of repeats must be sufficient for statistical significance"
)

@abstractmethod
def run_system(self, configuration: dict) -> dict:
"""
Runs the system under test with the supplied configuration and supplies the outputs as a dict.
:param configuration: The run configuration arguments.
:returns: The resulting output as a dict.
"""

def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
"""Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
by changing the treatment variable from the control value to the treatment value.

:return: The average treatment effect and the bootstrapped confidence intervals.
"""
control_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.control_value}
treatment_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.treatment_value}

control_outcomes = pd.DataFrame([self.run_system(control_configuration) for _ in range(self.repeats)])
treatment_outcomes = pd.DataFrame([self.run_system(treatment_configuration) for _ in range(self.repeats)])

difference = (treatment_outcomes[self.outcome] - control_outcomes[self.outcome]).sort_values().reset_index()

ci_low_index = round(self.repeats * (self.alpha / 2))
ci_low = difference.iloc[ci_low_index]
ci_high = difference.iloc[self.repeats - ci_low_index]

return pd.Series({self.treatment: difference.mean()[self.outcome]}), [
pd.Series({self.treatment: ci_low[self.outcome]}),
pd.Series({self.treatment: ci_high[self.outcome]}),
]

def estimate_risk_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
"""Estimate the risk ratio of the treatment on the outcome. That is, the change in outcome caused
by changing the treatment variable from the control value to the treatment value.

:return: The average treatment effect and the bootstrapped confidence intervals.
"""
control_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.control_value}
treatment_configuration = self.adjustment_set | self.effect_modifiers | {self.treatment: self.treatment_value}

control_outcomes = pd.DataFrame([self.run_system(control_configuration) for _ in range(self.repeats)])
treatment_outcomes = pd.DataFrame([self.run_system(treatment_configuration) for _ in range(self.repeats)])

difference = (treatment_outcomes[self.outcome] / control_outcomes[self.outcome]).sort_values().reset_index()

ci_low_index = round(self.repeats * (self.alpha / 2))
ci_low = difference.iloc[ci_low_index]
ci_high = difference.iloc[self.repeats - ci_low_index]

return pd.Series({self.treatment: difference.mean()[self.outcome]}), [
pd.Series({self.treatment: ci_low[self.outcome]}),
pd.Series({self.treatment: ci_high[self.outcome]}),
]
20 changes: 8 additions & 12 deletions causal_testing/json_front/json_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import scipy
from fitter import Fitter, get_common_distributions

from causal_testing.data_collection.data_collector import ObservationalDataCollector
from causal_testing.generation.abstract_causal_test_case import AbstractCausalTestCase
from causal_testing.specification.causal_dag import CausalDAG
from causal_testing.specification.causal_specification import CausalSpecification
Expand Down Expand Up @@ -56,8 +55,8 @@ def __init__(self, output_path: str, output_overwrite: bool = False):
self.scenario = None
self.causal_specification = None
self.output_path = Path(output_path)
self.df = None
self.check_file_exists(self.output_path, output_overwrite)
self.data_collector = None

def set_paths(self, json_path: str, dag_path: str, data_paths: list[str] = None):
"""
Expand All @@ -70,7 +69,7 @@ def set_paths(self, json_path: str, dag_path: str, data_paths: list[str] = None)
data_paths = []
self.input_paths = JsonClassPaths(json_path=json_path, dag_path=dag_path, data_paths=data_paths)

def setup(self, scenario: Scenario, data=None, ignore_cycles=False):
def setup(self, scenario: Scenario, ignore_cycles=False):
"""Function to populate all the necessary parts of the json_class needed to execute tests"""
self.scenario = scenario
self._get_scenario_variables()
Expand All @@ -83,21 +82,20 @@ def setup(self, scenario: Scenario, data=None, ignore_cycles=False):
self.test_plan = json.load(f)
# Populate the data
if self.input_paths.data_paths:
data = pd.concat([pd.read_csv(data_file, header=0) for data_file in self.input_paths.data_paths])
if data is None or len(data) == 0:
self.df = pd.concat([pd.read_csv(data_file, header=0) for data_file in self.input_paths.data_paths])
if self.df is None or len(self.df) == 0:
raise ValueError(
"No data found. Please either provide a path to a file containing data or manually populate the .data "
"attribute with a dataframe before calling .setup()"
)
self.data_collector = ObservationalDataCollector(self.scenario, data)
self._populate_metas()

def _create_abstract_test_case(self, test, mutates, effects):
assert len(test["mutations"]) == 1
treatment_var = next(self.scenario.variables[v] for v in test["mutations"])

if not treatment_var.distribution:
fitter = Fitter(self.data_collector.data[treatment_var.name], distributions=get_common_distributions())
fitter = Fitter(self.df[treatment_var.name], distributions=get_common_distributions())
fitter.fit()
(dist, params) = list(fitter.get_best(method="sumsquare_error").items())[0]
treatment_var.distribution = getattr(scipy.stats, dist)(**params)
Expand Down Expand Up @@ -257,7 +255,7 @@ def _populate_metas(self):
Populate data with meta-variable values and add distributions to Causal Testing Framework Variables
"""
for meta in self.scenario.variables_of_type(Meta):
meta.populate(self.data_collector.data)
meta.populate(self.df)

def _execute_test_case(
self, causal_test_case: CausalTestCase, test: Mapping, f_flag: bool
Expand All @@ -273,9 +271,7 @@ def _execute_test_case(
failed = False

estimation_model = self._setup_test(causal_test_case=causal_test_case, test=test)
causal_test_result = causal_test_case.execute_test(
estimator=estimation_model, data_collector=self.data_collector
)
causal_test_result = causal_test_case.execute_test(estimator=estimation_model)
test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result)

if "coverage" in test and test["coverage"]:
Expand Down Expand Up @@ -329,7 +325,7 @@ def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping) -> Estima
estimator_kwargs["control_value"] = causal_test_case.control_value
estimator_kwargs["outcome"] = causal_test_case.outcome_variable.name
estimator_kwargs["effect_modifiers"] = causal_test_case.effect_modifier_configuration
estimator_kwargs["df"] = self.data_collector.collect_data()
estimator_kwargs["df"] = self.df
estimator_kwargs["alpha"] = test["alpha"] if "alpha" in test else 0.05

estimation_model = test["estimator"](**estimator_kwargs)
Expand Down
Loading