CITCOM-project
diff --git a/‎.pylintrc
Lines changed: 3 additions & 0 deletions b/‎.pylintrc
Lines changed: 3 additions & 0 deletions
diff --git a/‎causal_testing/data_collection/data_collector.py
Lines changed: 4 additions & 4 deletions b/‎causal_testing/data_collection/data_collector.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎causal_testing/generation/abstract_causal_test_case.py
Lines changed: 45 additions & 31 deletions b/‎causal_testing/generation/abstract_causal_test_case.py
Lines changed: 45 additions & 31 deletions
diff --git a/‎causal_testing/json_front/json_class.py
Lines changed: 8 additions & 5 deletions b/‎causal_testing/json_front/json_class.py
Lines changed: 8 additions & 5 deletions
diff --git a/‎causal_testing/specification/causal_dag.py
Lines changed: 8 additions & 10 deletions b/‎causal_testing/specification/causal_dag.py
Lines changed: 8 additions & 10 deletions
diff --git a/‎causal_testing/specification/causal_specification.py
Lines changed: 6 additions & 5 deletions b/‎causal_testing/specification/causal_specification.py
Lines changed: 6 additions & 5 deletions
diff --git a/‎causal_testing/specification/scenario.py
Lines changed: 9 additions & 4 deletions b/‎causal_testing/specification/scenario.py
Lines changed: 9 additions & 4 deletions
@@ -152,6 +152,7 @@ disable=raw-checker-failed,
         useless-suppression,
         deprecated-pragma,
         use-symbolic-message-instead,
+        logging-fstring-interpolation,
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
@@ -239,7 +240,9 @@ good-names=i,
            j,
            k,
            ex,
+           df,
            Run,
+           z3,
            _
 
 # Good variable names regexes, separated by a comma. If names match any regex,
 
@@ -1,3 +1,6 @@
+"""This module contains the DataCollector abstract class, as well as its concrete extensions: ExperimentalDataCollector
+and ObservationalDataCollector"""
+
 import logging
 from abc import ABC, abstractmethod
 from enum import Enum
@@ -73,10 +76,7 @@ def filter_valid_data(self, data: pd.DataFrame, check_pos: bool = True) -> pd.Da
         size_diff = len(data) - len(satisfying_data)
         if size_diff > 0:
             logger.warning(
-                "Discarded %s/%s values due to constraint violations.\n" "For example%s",
-                size_diff,
-                len(data),
-                unsat_core,
+                f"Discarded {size_diff}/{len(data)} values due to constraint violations.\n For example {unsat_core}",
             )
         return satisfying_data
 
 
@@ -1,18 +1,21 @@
+"""This module contains the class AbstractCausalTestCase, which generates concrete test cases"""
+import itertools
 import logging
+from enum import Enum
+from typing import Iterable
 
 import lhsmdu
 import pandas as pd
 import z3
 from scipy import stats
-import itertools
+
 
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Variable
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_outcome import CausalTestOutcome
 from causal_testing.testing.base_test_case import BaseTestCase
 
-from enum import Enum
 
 logger = logging.getLogger(__name__)
 
@@ -60,7 +63,9 @@ def __str__(self):
         )
         return f"When we apply intervention {self.intervention_constraints}, {outcome_string}"
 
-    def datapath(self):
+    def datapath(self) -> str:
+        """Create and return the sanitised data path"""
+
         def sanitise(string):
             return "".join([x for x in string if x.isalnum()])
 
@@ -101,25 +106,7 @@ def _generate_concrete_tests(
             samples[var.name] = lhsmdu.inverseTransformSample(var.distribution, samples[var.name])
 
         for index, row in samples.iterrows():
-            optimizer = z3.Optimize()
-            for c in self.scenario.constraints:
-                optimizer.assert_and_track(c, str(c))
-            for c in self.intervention_constraints:
-                optimizer.assert_and_track(c, str(c))
-
-            for v in run_columns:
-                optimizer.add_soft(
-                    self.scenario.variables[v].z3
-                    == self.scenario.variables[v].z3_val(self.scenario.variables[v].z3, row[v])
-                )
-
-            if optimizer.check() == z3.unsat:
-                logger.warning(
-                    "Satisfiability of test case was unsat.\n" "Constraints \n %s \n Unsat core %s",
-                    optimizer,
-                    optimizer.unsat_core(),
-                )
-            model = optimizer.model()
+            model = self._optimizer_model(run_columns, row)
 
             base_test_case = BaseTestCase(
                 treatment_variable=self.treatment_variable,
@@ -146,7 +133,7 @@ def _generate_concrete_tests(
                         + f"{constraints}\nUsing value {v.cast(model[v.z3])} instead in test\n{concrete_test}"
                     )
 
-            if not any([vars(t) == vars(concrete_test) for t in concrete_tests]):
+            if not any((vars(t) == vars(concrete_test) for t in concrete_tests)):
                 concrete_tests.append(concrete_test)
                 # Control run
                 control_run = {
@@ -197,12 +184,12 @@ def generate_concrete_tests(
 
         pre_break = False
         for i in range(hard_max):
-            concrete_tests_, runs_ = self._generate_concrete_tests(sample_size, rct, seed + i)
-            for t_ in concrete_tests_:
-                if not any([vars(t_) == vars(t) for t in concrete_tests]):
-                    concrete_tests.append(t_)
-            runs = pd.concat([runs, runs_])
-            assert concrete_tests_ not in concrete_tests, "Duplicate entries unlikely unless something went wrong"
+            concrete_tests_temp, runs_temp = self._generate_concrete_tests(sample_size, rct, seed + i)
+            for test in concrete_tests_temp:
+                if not any((vars(test) == vars(t) for t in concrete_tests)):
+                    concrete_tests.append(test)
+            runs = pd.concat([runs, runs_temp])
+            assert concrete_tests_temp not in concrete_tests, "Duplicate entries unlikely unless something went wrong"
 
             control_configs = pd.DataFrame([{test.treatment_variable: test.control_value} for test in concrete_tests])
             ks_stats = {
@@ -230,7 +217,7 @@ def generate_concrete_tests(
             control_values = [test.control_value for test in concrete_tests]
             treatment_values = [test.treatment_value for test in concrete_tests]
 
-            if self.treatment_variable.datatype is bool and set([(True, False), (False, True)]).issubset(
+            if self.treatment_variable.datatype is bool and {(True, False), (False, True)}.issubset(
                 set(zip(control_values, treatment_values))
             ):
                 pre_break = True
@@ -244,7 +231,7 @@ def generate_concrete_tests(
             ).issubset(zip(control_values, treatment_values)):
                 pre_break = True
                 break
-            elif target_ks_score and all((stat <= target_ks_score for stat in ks_stats.values())):
+            if target_ks_score and all((stat <= target_ks_score for stat in ks_stats.values())):
                 pre_break = True
                 break
 
@@ -256,3 +243,30 @@ def generate_concrete_tests(
                 len(concrete_tests),
             )
         return concrete_tests, runs
+
+    def _optimizer_model(self, run_columns: Iterable[str], row: pd.core.series) -> z3.Optimize:
+        """
+        :param run_columns: A sorted list of Variable names from the scenario variables
+        :param row: A pandas Series containing a row from the Samples dataframe
+        :return: z3 optimize model with constraints tracked and soft constraints added
+        :rtype: z3.Optimize
+        """
+        optimizer = z3.Optimize()
+        for c in self.scenario.constraints:
+            optimizer.assert_and_track(c, str(c))
+        for c in self.intervention_constraints:
+            optimizer.assert_and_track(c, str(c))
+
+        for v in run_columns:
+            optimizer.add_soft(
+                self.scenario.variables[v].z3
+                == self.scenario.variables[v].z3_val(self.scenario.variables[v].z3, row[v])
+            )
+
+        if optimizer.check() == z3.unsat:
+            logger.warning(
+                f"Satisfiability of test case was unsat.\n"
+                f"Constraints \n {optimizer} \n Unsat core {optimizer.unsat_core()}",
+            )
+        model = optimizer.model()
+        return model
@@ -1,3 +1,5 @@
+"""This module contains the JsonUtility class, details of using this class can be found here:
+https://causal-testing-framework.readthedocs.io/en/latest/json_front_end.html"""
 import argparse
 import json
 import logging
@@ -68,7 +70,6 @@ def set_path(self, json_path: str, dag_path: str, data_path: str):
         self.data_path = Path(data_path)
 
     def set_variables(self, inputs: dict, outputs: dict, metas: dict):
-
         """Populate the Causal Variables
         :param inputs:
         :param outputs:
@@ -137,9 +138,8 @@ def _execute_tests(self, concrete_tests, estimators, test, f_flag):
         return failures
 
     def _json_parse(self):
-
         """Parse a JSON input file into inputs, outputs, metas and a test plan"""
-        with open(self.json_path) as f:
+        with open(self.json_path, encoding="utf-8") as f:
             self.test_plan = json.load(f)
 
         self.data = pd.read_csv(self.data_path)
@@ -179,7 +179,10 @@ def _execute_test_case(self, causal_test_case: CausalTestCase, estimator: Estima
 
         result_string = str()
         if causal_test_result.ci_low() and causal_test_result.ci_high():
-            result_string = f"{causal_test_result.ci_low()} < {causal_test_result.test_value.value} <  {causal_test_result.ci_high()}"
+            result_string = (
+                f"{causal_test_result.ci_low()} < {causal_test_result.test_value.value} <  "
+                f"{causal_test_result.ci_high()}"
+            )
         else:
             result_string = f"{causal_test_result.test_value.value} no confidence intervals"
         if f_flag:
@@ -218,7 +221,7 @@ def _setup_test(self, causal_test_case: CausalTestCase, estimator: Estimator) ->
 
         return causal_test_engine, estimation_model
 
-    def add_modelling_assumptions(self, estimation_model: Estimator):
+    def add_modelling_assumptions(self, estimation_model: Estimator):  # pylint: disable=unused-argument
         """Optional abstract method where user functionality can be written to determine what assumptions are required
         for specific test cases
         :param estimation_model: estimator model instance for the current running test.
 
@@ -1,15 +1,18 @@
+"""This module contains the CausalDAG class, as well as the functions list_all_min_sep and close_seperator"""
+
+from __future__ import annotations
+
 import logging
 from itertools import combinations
 from random import sample
-from typing import TypeVar, Union
+from typing import Union
 
 import networkx as nx
 
 from .scenario import Scenario
 from .variable import Output
 
 Node = Union[str, int]  # Node type hint: A node is a string or an int
-CausalDAG = TypeVar("CausalDAG")
 
 logger = logging.getLogger(__name__)
 
@@ -49,7 +52,6 @@ def list_all_min_sep(
 
     # 4. Confirm that the connected component containing the treatment node is disjoint with the outcome node set
     if not treatment_connected_component_node_set.intersection(outcome_node_set):
-
         # 5. Update the treatment node set to the set of nodes in the connected component containing the treatment node
         treatment_node_set = treatment_connected_component_node_set
 
@@ -60,7 +62,6 @@ def list_all_min_sep(
 
         # 7. Check that there exists at least one neighbour of the treatment nodes that is not in the outcome node set
         if treatment_node_set_neighbours.difference(outcome_node_set):
-
             # 7.1. If so, sample a random node from the set of treatment nodes' neighbours not in the outcome node set
             node = set(sample(treatment_node_set_neighbours.difference(outcome_node_set), 1))
 
@@ -82,7 +83,6 @@ def list_all_min_sep(
                 outcome_node_set.union(node),
             )
         else:
-
             # 8. If all neighbours of the treatments nodes are in the outcome node set, return the set of treatment
             # node neighbours
             yield treatment_node_set_neighbours
@@ -352,10 +352,8 @@ def adjustment_set_is_minimal(self, treatments: list[str], outcomes: list[str],
                 proper_backdoor_graph, treatments, outcomes, smaller_adjustment_set
             ):
                 logger.info(
-                    "Z=%s is not minimal because Z'=Z\\{{'%s'}}=" "%s is also a valid adjustment set.",
-                    adjustment_set,
-                    variable,
-                    smaller_adjustment_set,
+                    f"Z={adjustment_set} is not minimal because Z'=Z\\{variable} = {smaller_adjustment_set} is also a"
+                    f"valid adjustment set.",
                 )
                 return False
 
@@ -466,7 +464,7 @@ def depends_on_outputs(self, node: Node, scenario: Scenario) -> bool:
         """
         if isinstance(scenario.variables[node], Output):
             return True
-        return any([self.depends_on_outputs(n, scenario) for n in self.graph.predecessors(node)])
+        return any((self.depends_on_outputs(n, scenario) for n in self.graph.predecessors(node)))
 
     def identification(self, base_test_case):
         """Identify and return the minimum adjustment set
 
@@ -1,22 +1,23 @@
-import logging
+"""This module holds the abstract CausalSpecification data class, which holds a Scenario and CausalDag"""
+
 from abc import ABC
+from dataclasses import dataclass
 from typing import Union
 
 from causal_testing.specification.causal_dag import CausalDAG
 from causal_testing.specification.scenario import Scenario
 
 Node = Union[str, int]  # Node type hint: A node is a string or an int
-logger = logging.getLogger(__name__)
 
 
+@dataclass
 class CausalSpecification(ABC):
     """
     Abstract Class for the Causal Specification (combination of Scenario and Causal Dag)
     """
 
-    def __init__(self, scenario: Scenario, causal_dag: CausalDAG):
-        self.scenario = scenario
-        self.causal_dag = causal_dag
+    scenario: Scenario
+    causal_dag: CausalDAG
 
     def __str__(self):
         return f"Scenario: {self.scenario}\nCausal DAG:\n{self.causal_dag}"
@@ -1,3 +1,4 @@
+"""This module holds the Scenario Class"""
 from collections.abc import Iterable, Mapping
 
 from tabulate import tabulate
@@ -30,12 +31,16 @@ def __init__(self, variables: Iterable[Variable] = None, constraints: set[ExprRe
         if variables is not None:
             self.variables = {v.name: v for v in variables}
         else:
-            self.variables = dict()
+            self.variables = {}
         if constraints is not None:
             self.constraints = set(constraints)
         else:
             self.constraints = set()
 
+        self.prime = {}
+        self.unprime = {}
+        self.treatment_variables = {}
+
     def __str__(self):
         """Returns a printable string of a scenario, e.g.
         Modelling scenario with variables:
@@ -94,9 +99,6 @@ def setup_treatment_variables(self) -> None:
         to the contraint set such that the "primed" variables are constrained in
         the same way as their unprimed counterparts.
         """
-        self.prime = {}
-        self.unprime = {}
-        self.treatment_variables = {}
         for k, v in self.variables.items():
             v_prime = self._fresh(v)
             self.treatment_variables[k] = v_prime
@@ -141,4 +143,7 @@ def metas(self) -> set[Meta]:
         return self.variables_of_type(Meta)
 
     def add_variable(self, v: Variable) -> None:
+        """Add variable to variables attribute
+        :param v: Variable to be added
+        """
         self.variables[v.name]: v