Skip to content

Commit 532558a

Browse files
committed
Merge branch 'main' of github.com:CITCOM-project/CausalTestingFramework into json-cate
2 parents 0b9433a + b68aee3 commit 532558a

File tree

9 files changed

+153
-106
lines changed

9 files changed

+153
-106
lines changed

.pylintrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ limit-inference-results=100
7575

7676
# List of plugins (as comma separated values of python module names) to load,
7777
# usually to register additional checkers.
78-
load-plugins=
78+
load-plugins=pylint.extensions.docparams
7979

8080
# Pickle collected data for later comparisons.
8181
persistent=yes

causal_testing/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,3 @@
1212

1313
logger = logging.getLogger(__name__)
1414
logger.setLevel(logging.INFO)
15-
logger.addHandler(logging.StreamHandler())

causal_testing/json_front/json_class.py

Lines changed: 92 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
import json
66
import logging
77

8-
from abc import ABC
98
from dataclasses import dataclass
109
from pathlib import Path
10+
from statistics import StatisticsError
1111

1212
import pandas as pd
1313
import scipy
@@ -27,7 +27,7 @@
2727
logger = logging.getLogger(__name__)
2828

2929

30-
class JsonUtility(ABC):
30+
class JsonUtility:
3131
"""
3232
The JsonUtility Class provides the functionality to use structured JSON to setup and run causal tests on the
3333
CausalTestingFramework.
@@ -40,58 +40,58 @@ class JsonUtility(ABC):
4040
:attr {Meta} metas: Causal variables representing metavariables.
4141
:attr {pd.DataFrame}: Pandas DataFrame containing runtime data.
4242
:attr {dict} test_plan: Dictionary containing the key value pairs from the loaded json test plan.
43-
:attr {Scenario} modelling_scenario:
43+
:attr {Scenario} scenario:
4444
:attr {CausalSpecification} causal_specification:
4545
"""
4646

47-
def __init__(self, log_path):
48-
self.paths = None
47+
def __init__(self, output_path: str, output_overwrite: bool = False):
48+
self.input_paths = None
4949
self.variables = None
5050
self.data = []
5151
self.test_plan = None
52-
self.modelling_scenario = None
52+
self.scenario = None
5353
self.causal_specification = None
54-
self.setup_logger(log_path)
54+
self.output_path = Path(output_path)
55+
self.check_file_exists(self.output_path, output_overwrite)
5556

5657
def set_paths(self, json_path: str, dag_path: str, data_paths: str):
5758
"""
5859
Takes a path of the directory containing all scenario specific files and creates individual paths for each file
5960
:param json_path: string path representation to .json file containing test specifications
6061
:param dag_path: string path representation to the .dot file containing the Causal DAG
61-
:param data_path: string path representation to the data file
62+
:param data_paths: string path representation to the data files
6263
"""
63-
self.paths = JsonClassPaths(json_path=json_path, dag_path=dag_path, data_paths=data_paths)
64+
self.input_paths = JsonClassPaths(json_path=json_path, dag_path=dag_path, data_paths=data_paths)
6465

65-
def set_variables(self, inputs: list[dict], outputs: list[dict], metas: list[dict]):
66-
"""Populate the Causal Variables
67-
:param inputs:
68-
:param outputs:
69-
:param metas:
70-
"""
71-
72-
self.variables = CausalVariables(inputs=inputs, outputs=outputs, metas=metas)
73-
74-
def setup(self):
66+
def setup(self, scenario: Scenario):
7567
"""Function to populate all the necessary parts of the json_class needed to execute tests"""
76-
self.modelling_scenario = Scenario(self.variables.inputs + self.variables.outputs + self.variables.metas, None)
77-
self.modelling_scenario.setup_treatment_variables()
68+
self.scenario = scenario
69+
self.scenario.setup_treatment_variables()
7870
self.causal_specification = CausalSpecification(
79-
scenario=self.modelling_scenario, causal_dag=CausalDAG(self.paths.dag_path)
71+
scenario=self.scenario, causal_dag=CausalDAG(self.input_paths.dag_path)
8072
)
8173
self._json_parse()
8274
self._populate_metas()
8375

8476
def _create_abstract_test_case(self, test, mutates, effects):
8577
assert len(test["mutations"]) == 1
78+
treatment_var = next(self.scenario.variables[v] for v in test["mutations"])
79+
if not treatment_var.distribution:
80+
fitter = Fitter(self.data[var.name], distributions=get_common_distributions())
81+
fitter.fit()
82+
(dist, params) = list(fitter.get_best(method="sumsquare_error").items())[0]
83+
var.distribution = getattr(scipy.stats, dist)(**params)
84+
self._append_to_file(var.name + f" {dist}({params})", logging.INFO)
85+
8686
abstract_test = AbstractCausalTestCase(
87-
scenario=self.modelling_scenario,
87+
scenario=self.scenario,
8888
intervention_constraints=[mutates[v](k) for k, v in test["mutations"].items()],
89-
treatment_variable=next(self.modelling_scenario.variables[v] for v in test["mutations"]),
89+
treatment_variable=treatment_var,
9090
expected_causal_effect={
91-
self.modelling_scenario.variables[variable]: effects[effect]
91+
self.scenario.variables[variable]: effects[effect]
9292
for variable, effect in test["expectedEffect"].items()
9393
},
94-
effect_modifiers={self.modelling_scenario.variables[v] for v in test["effect_modifiers"]}
94+
effect_modifiers={self.scenario.variables[v] for v in test["effect_modifiers"]}
9595
if "effect_modifiers" in test
9696
else {},
9797
estimate_type=test["estimate_type"],
@@ -108,14 +108,15 @@ def generate_tests(self, effects: dict, mutates: dict, estimators: dict, f_flag:
108108
:param f_flag: Failure flag that if True the script will stop executing when a test fails.
109109
"""
110110
failures = 0
111+
msg = ""
111112
for test in self.test_plan["tests"]:
112113
if "skip" in test and test["skip"]:
113114
continue
114115

115116
if test["estimate_type"] == "coefficient":
116117
base_test_case = BaseTestCase(
117-
treatment_variable=next(self.modelling_scenario.variables[v] for v in test["mutations"]),
118-
outcome_variable=next(self.modelling_scenario.variables[v] for v in test["expectedEffect"]),
118+
treatment_variable=next(self.scenario.variables[v] for v in test["mutations"]),
119+
outcome_variable=next(self.scenario.variables[v] for v in test["expectedEffect"]),
119120
effect=test["effect"],
120121
)
121122
assert len(test["expectedEffect"]) == 1, "Can only have one expected effect."
@@ -127,20 +128,29 @@ def generate_tests(self, effects: dict, mutates: dict, estimators: dict, f_flag:
127128
),
128129
estimate_type="coefficient",
129130
effect_modifier_configuration={
130-
self.modelling_scenario.variables[v] for v in test.get("effect_modifiers", [])
131+
self.scenario.variables[v] for v in test.get("effect_modifiers", [])
131132
},
132133
)
133134
]
135+
failures = self._execute_tests(concrete_tests, estimators, test, f_flag)
136+
msg = (
137+
f"Executing test: {test['name']} \n"
138+
+ f" {concrete_tests[0]} \n"
139+
+ f" {failures}/{len(concrete_tests)} failed for {test['name']}"
140+
)
134141
else:
135142
abstract_test = self._create_abstract_test_case(test, mutates, effects)
136-
137143
concrete_tests, dummy = abstract_test.generate_concrete_tests(5, 0.05)
138-
logger.info("Executing test: %s", test["name"])
139-
logger.info(abstract_test)
140-
logger.info([abstract_test.treatment_variable.name, abstract_test.treatment_variable.distribution])
141-
logger.info("Number of concrete tests for test case: %s", str(len(concrete_tests)))
142-
failures = self._execute_tests(concrete_tests, estimators, test, f_flag)
143-
logger.info("%s/%s failed for %s\n", failures, len(concrete_tests), test["name"])
144+
failures = self._execute_tests(concrete_tests, estimators, test, f_flag)
145+
msg = (
146+
f"Executing test: {test['name']} \n"
147+
+ " abstract_test \n"
148+
+ f" {abstract_test} \n"
149+
+ f" {abstract_test.treatment_variable.name},{abstract_test.treatment_variable.distribution} \n"
150+
+ f" Number of concrete tests for test case: {str(len(concrete_tests))} \n"
151+
+ f" {failures}/{len(concrete_tests)} failed for {test['name']}"
152+
)
153+
self._append_to_file(msg, logging.INFO)
144154

145155
def _execute_tests(self, concrete_tests, estimators, test, f_flag):
146156
failures = 0
@@ -154,9 +164,9 @@ def _execute_tests(self, concrete_tests, estimators, test, f_flag):
154164

155165
def _json_parse(self):
156166
"""Parse a JSON input file into inputs, outputs, metas and a test plan"""
157-
with open(self.paths.json_path, encoding="utf-8") as f:
167+
with open(self.input_paths.json_path, encoding="utf-8") as f:
158168
self.test_plan = json.load(f)
159-
for data_file in self.paths.data_paths:
169+
for data_file in self.input_paths.data_paths:
160170
df = pd.read_csv(data_file, header=0)
161171
self.data.append(df)
162172
self.data = pd.concat(self.data)
@@ -165,20 +175,9 @@ def _populate_metas(self):
165175
"""
166176
Populate data with meta-variable values and add distributions to Causal Testing Framework Variables
167177
"""
168-
for meta in self.variables.metas:
178+
for meta in self.scenario.variables_of_type(Meta):
169179
meta.populate(self.data)
170180

171-
for var in self.variables.metas + self.variables.outputs:
172-
if not var.distribution:
173-
try:
174-
fitter = Fitter(self.data[var.name], distributions=get_common_distributions())
175-
fitter.fit()
176-
(dist, params) = list(fitter.get_best(method="sumsquare_error").items())[0]
177-
var.distribution = getattr(scipy.stats, dist)(**params)
178-
logger.info(var.name + f" {dist}({params})")
179-
except:
180-
logger.warn(f"Could not fit distriubtion for {var.name}.")
181-
182181
def _execute_test_case(
183182
self, causal_test_case: CausalTestCase, estimator: Estimator, f_flag: bool, conditions: list[str]
184183
) -> bool:
@@ -191,7 +190,6 @@ def _execute_test_case(
191190
"""
192191
failed = False
193192

194-
print(causal_test_case)
195193
causal_test_engine, estimation_model = self._setup_test(causal_test_case, estimator, conditions)
196194
causal_test_result = causal_test_engine.execute_test(
197195
estimation_model, causal_test_case, estimate_type=causal_test_case.estimate_type
@@ -207,12 +205,13 @@ def _execute_test_case(
207205
)
208206
else:
209207
result_string = f"{causal_test_result.test_value.value} no confidence intervals"
210-
if f_flag:
211-
assert test_passes, (
212-
f"{causal_test_case}\n FAILED - expected {causal_test_case.expected_causal_effect}, "
213-
f"got {result_string}"
214-
)
208+
215209
if not test_passes:
210+
if f_flag:
211+
raise StatisticsError(
212+
f"{causal_test_case}\n FAILED - expected {causal_test_case.expected_causal_effect}, "
213+
f"got {result_string}"
214+
)
216215
failed = True
217216
logger.warning(" FAILED- expected %s, got %s", causal_test_case.expected_causal_effect, result_string)
218217
return failed
@@ -228,7 +227,7 @@ def _setup_test(
228227
"""
229228

230229
data_collector = ObservationalDataCollector(
231-
self.modelling_scenario, self.data.query(" & ".join(conditions)) if conditions else self.data
230+
self.scenario, self.data.query(" & ".join(conditions)) if conditions else self.data
232231
)
233232
causal_test_engine = CausalTestEngine(self.causal_specification, data_collector, index_col=0)
234233

@@ -256,15 +255,32 @@ def add_modelling_assumptions(self, estimation_model: Estimator): # pylint: dis
256255
"""
257256
return
258257

258+
def _append_to_file(self, line: str, log_level: int = None):
259+
"""Appends given line(s) to the current output file. If log_level is specified it also logs that message to the
260+
logging level.
261+
:param line: The line or lines of text to be appended to the file
262+
:param log_level: An integer representing the logging level as specified by pythons inbuilt logging module. It
263+
is possible to use the inbuilt logging level variables such as logging.INFO and logging.WARNING
264+
"""
265+
with open(self.output_path, "a", encoding="utf-8") as f:
266+
f.write(
267+
line + "\n",
268+
)
269+
if log_level:
270+
logger.log(level=log_level, msg=line)
271+
259272
@staticmethod
260-
def setup_logger(log_path: str):
261-
"""Setups up logging instance for the module and adds a FileHandler stream so all stdout prints are also
262-
sent to the logfile
263-
:param log_path: Path specifying location and name of the logging file to be used
273+
def check_file_exists(output_path: Path, overwrite: bool):
274+
"""Method that checks if the given path to an output file already exists. If overwrite is true the check is
275+
passed.
276+
:param output_path: File path for the output file of the JSON Frontend
277+
:param overwrite: bool that if true, the current file can be overwritten
264278
"""
265-
setup_log = logging.getLogger(__name__)
266-
file_handler = logging.FileHandler(Path(log_path))
267-
setup_log.addHandler(file_handler)
279+
if output_path.is_file():
280+
if overwrite:
281+
output_path.unlink()
282+
else:
283+
raise FileExistsError(f"Chosen file output ({output_path}) already exists")
268284

269285
@staticmethod
270286
def get_args(test_args=None) -> argparse.Namespace:
@@ -280,6 +296,12 @@ def get_args(test_args=None) -> argparse.Namespace:
280296
help="if included, the script will stop if a test fails",
281297
action="store_true",
282298
)
299+
parser.add_argument(
300+
"-w",
301+
help="Specify to overwrite any existing output files. This can lead to the loss of existing outputs if not "
302+
"careful",
303+
action="store_true",
304+
)
283305
parser.add_argument(
284306
"--log_path",
285307
help="Specify a directory to change the location of the log file",
@@ -323,17 +345,17 @@ def __init__(self, json_path: str, dag_path: str, data_paths: str):
323345
self.data_paths = [Path(path) for path in data_paths]
324346

325347

326-
@dataclass()
348+
@dataclass
327349
class CausalVariables:
328350
"""
329-
A dataclass that converts
351+
A dataclass that converts lists of dictionaries into lists of Causal Variables
330352
"""
331353

332-
inputs: list[Input]
333-
outputs: list[Output]
334-
metas: list[Meta]
335-
336354
def __init__(self, inputs: list[dict], outputs: list[dict], metas: list[dict]):
337355
self.inputs = [Input(**i) for i in inputs]
338356
self.outputs = [Output(**o) for o in outputs]
339357
self.metas = [Meta(**m) for m in metas] if metas else []
358+
359+
def __iter__(self):
360+
for var in self.inputs + self.outputs + self.metas:
361+
yield var

causal_testing/specification/causal_dag.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
import networkx as nx
1111

12+
from causal_testing.testing.base_test_case import BaseTestCase
13+
1214
from .scenario import Scenario
1315
from .variable import Output
1416

@@ -496,7 +498,7 @@ def depends_on_outputs(self, node: Node, scenario: Scenario) -> bool:
496498
return True
497499
return any((self.depends_on_outputs(n, scenario) for n in self.graph.predecessors(node)))
498500

499-
def identification(self, base_test_case):
501+
def identification(self, base_test_case: BaseTestCase):
500502
"""Identify and return the minimum adjustment set
501503
502504
:param base_test_case: A base test case instance containing the outcome_variable and the

causal_testing/specification/metamorphic_relation.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,6 @@ def generate_metamorphic_relations(dag: CausalDAG) -> list[MetamorphicRelation]:
219219

220220
# Create a ShouldNotCause relation for each pair of nodes that are not directly connected
221221
if ((u, v) not in dag.graph.edges) and ((v, u) not in dag.graph.edges):
222-
223222
# Case 1: U --> ... --> V
224223
if u in nx.ancestors(dag.graph, v):
225224
adj_set = list(dag.direct_effect_adjustment_sets([u], [v])[0])

causal_testing/testing/estimators.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,10 @@ def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
150150
model = smf.logit(formula=self.formula, data=data).fit(disp=0)
151151
return model
152152

153-
def estimate(self, data: pd.DataFrame, adjustment_config=None) -> RegressionResultsWrapper:
153+
def estimate(self, data: pd.DataFrame, adjustment_config: dict = None) -> RegressionResultsWrapper:
154154
"""add terms to the dataframe and estimate the outcome from the data
155155
:param data: A pandas dataframe containing execution data from the system-under-test.
156-
156+
:param adjustment_config: Dictionary containing the adjustment configuration of the adjustment set
157157
"""
158158
if adjustment_config is None:
159159
adjustment_config = {}

examples/poisson/example_run_causal_tests.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def populate_num_shapes_unit(data):
135135
modelling_inputs = (
136136
[Input(i["name"], i["datatype"], i["distribution"]) for i in inputs]
137137
+ [Output(i["name"], i["datatype"]) for i in outputs]
138-
+ ([Meta(i["name"], i["datatype"], [i["populate"]]) for i in metas] if metas else list())
138+
+ ([Meta(i["name"], i["datatype"], i["populate"]) for i in metas] if metas else list())
139139
)
140140

141141
# Create modelling scenario to access z3 variable mirrors
@@ -172,8 +172,7 @@ def test_run_causal_tests():
172172
) # Set the path to the data.csv, dag.dot and causal_tests.json file
173173

174174
# Load the Causal Variables into the JsonUtility class ready to be used in the tests
175-
json_utility.set_variables(inputs, outputs, metas)
176-
json_utility.setup() # Sets up all the necessary parts of the json_class needed to execute tests
175+
json_utility.setup(scenario=modelling_scenario) # Sets up all the necessary parts of the json_class needed to execute tests
177176

178177
json_utility.generate_tests(effects, mutates, estimators, False)
179178

@@ -186,7 +185,6 @@ def test_run_causal_tests():
186185
) # Set the path to the data.csv, dag.dot and causal_tests.json file
187186

188187
# Load the Causal Variables into the JsonUtility class ready to be used in the tests
189-
json_utility.set_variables(inputs, outputs, metas)
190-
json_utility.setup() # Sets up all the necessary parts of the json_class needed to execute tests
188+
json_utility.setup(scenario=modelling_scenario) # Sets up all the necessary parts of the json_class needed to execute tests
191189

192190
json_utility.generate_tests(effects, mutates, estimators, args.f)

0 commit comments

Comments
 (0)