CITCOM-project
diff --git a/‎.github/ISSUE_TEMPLATE/bug_report.md
Lines changed: 38 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/bug_report.md
Lines changed: 38 additions & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/feature_request.md
Lines changed: 20 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/feature_request.md
Lines changed: 20 additions & 0 deletions
diff --git a/‎.github/workflows/publish-to-pypi.yaml
Lines changed: 7 additions & 0 deletions b/‎.github/workflows/publish-to-pypi.yaml
Lines changed: 7 additions & 0 deletions
diff --git a/‎.github/workflows/publish-to-test-pypi.yaml
Lines changed: 0 additions & 32 deletions b/‎.github/workflows/publish-to-test-pypi.yaml
Lines changed: 0 additions & 32 deletions
diff --git a/‎.pylintrc
Lines changed: 4 additions & 0 deletions b/‎.pylintrc
Lines changed: 4 additions & 0 deletions
diff --git a/‎causal_testing/__init__.py
Lines changed: 10 additions & 0 deletions b/‎causal_testing/__init__.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎causal_testing/data_collection/data_collector.py
Lines changed: 18 additions & 12 deletions b/‎causal_testing/data_collection/data_collector.py
Lines changed: 18 additions & 12 deletions
diff --git a/‎causal_testing/generation/abstract_causal_test_case.py
Lines changed: 52 additions & 32 deletions b/‎causal_testing/generation/abstract_causal_test_case.py
Lines changed: 52 additions & 32 deletions
@@ -0,0 +1,38 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Desktop (please complete the following information):**
+ - OS: [e.g. iOS]
+ - Browser [e.g. chrome, safari]
+ - Version [e.g. 22]
+
+**Smartphone (please complete the following information):**
+ - Device: [e.g. iPhone6]
+ - OS: [e.g. iOS8.1]
+ - Browser [e.g. stock browser, safari]
+ - Version [e.g. 22]
+
+**Additional context**
+Add any other context about the problem here.
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
@@ -1,5 +1,10 @@
 name: Publish python PyPI
 
+on:
+  push:
+    tags:
+      - v*
+
 jobs:
   build-release:
     name: Build and publish PyPI
@@ -17,6 +22,8 @@ jobs:
           pip3 install .
           pip3 install .[pypi]
           pip3 install build
+          pip3 install setuptools --upgrade
+          pip3 install setuptools_scm
       - name: Build Package
         run: |
           python -m build --no-isolation
 
@@ -152,6 +152,8 @@ disable=raw-checker-failed,
         useless-suppression,
         deprecated-pragma,
         use-symbolic-message-instead,
+        logging-fstring-interpolation,
+        import-error,
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
@@ -239,7 +241,9 @@ good-names=i,
            j,
            k,
            ex,
+           df,
            Run,
+           z3,
            _
 
 # Good variable names regexes, separated by a comma. If names match any regex,
 
@@ -1,3 +1,13 @@
+"""
+This is the CausalTestingFramework Module
+It contains 5 subpackages:
+data_collection
+generation
+json_front
+specification
+testing
+"""
+
 import logging
 
 logger = logging.getLogger(__name__)
 
@@ -1,3 +1,6 @@
+"""This module contains the DataCollector abstract class, as well as its concrete extensions: ExperimentalDataCollector
+and ObservationalDataCollector"""
+
 import logging
 from abc import ABC, abstractmethod
 from enum import Enum
@@ -35,11 +38,15 @@ def filter_valid_data(self, data: pd.DataFrame, check_pos: bool = True) -> pd.Da
         """
 
         # Check positivity
-        scenario_variables = set(self.scenario.variables)
+        scenario_variables = set(self.scenario.variables) - {x.name for x in self.scenario.hidden_variables()}
 
-        if check_pos and not scenario_variables.issubset(data.columns):
+        if check_pos and not (scenario_variables - {x.name for x in self.scenario.hidden_variables()}).issubset(
+            set(data.columns)
+        ):
             missing_variables = scenario_variables - set(data.columns)
-            raise IndexError(f"Positivity violation: missing data for variables {missing_variables}.")
+            raise IndexError(
+                f"Missing columns: missing data for variables {missing_variables}. Should they be marked as hidden?"
+            )
 
         # For each row, does it satisfy the constraints?
         solver = z3.Solver()
@@ -54,6 +61,7 @@ def filter_valid_data(self, data: pd.DataFrame, check_pos: bool = True) -> pd.Da
                 self.scenario.variables[var].z3
                 == self.scenario.variables[var].z3_val(self.scenario.variables[var].z3, row[var])
                 for var in self.scenario.variables
+                if var in row
             ]
             for c in model:
                 solver.assert_and_track(c, f"model: {c}")
@@ -73,10 +81,7 @@ def filter_valid_data(self, data: pd.DataFrame, check_pos: bool = True) -> pd.Da
         size_diff = len(data) - len(satisfying_data)
         if size_diff > 0:
             logger.warning(
-                "Discarded %s/%s values due to constraint violations.\n" "For example%s",
-                size_diff,
-                len(data),
-                unsat_core,
+                f"Discarded {size_diff}/{len(data)} values due to constraint violations.\n For example {unsat_core}",
             )
         return satisfying_data
 
@@ -122,22 +127,23 @@ def run_system_with_input_configuration(self, input_configuration: dict) -> pd.D
 
 
 class ObservationalDataCollector(DataCollector):
-    """A data collector that extracts data that is relevant to the specified scenario from a csv of execution data."""
+    """A data collector that extracts data that is relevant to the specified scenario from a dataframe of execution
+    data."""
 
-    def __init__(self, scenario: Scenario, csv_path: str):
+    def __init__(self, scenario: Scenario, data: pd.DataFrame):
         super().__init__(scenario)
-        self.csv_path = csv_path
+        self.data = data
 
     def collect_data(self, **kwargs) -> pd.DataFrame:
-        """Read a csv containing execution data for the system-under-test into a pandas dataframe and filter to remove
+        """Read a pandas dataframe and filter to remove
         any data which is invalid for the scenario-under-test.
 
         Data is invalid if it does not meet the constraints outlined in the scenario-under-test (Scenario).
 
         :return: A pandas dataframe containing execution data that is valid for the scenario-under-test.
         """
 
-        execution_data_df = pd.read_csv(self.csv_path, **kwargs)
+        execution_data_df = self.data
         for meta in self.scenario.metas():
             meta.populate(execution_data_df)
         scenario_execution_data_df = self.filter_valid_data(execution_data_df)
 
@@ -1,18 +1,21 @@
+"""This module contains the class AbstractCausalTestCase, which generates concrete test cases"""
+import itertools
 import logging
+from enum import Enum
+from typing import Iterable
 
 import lhsmdu
 import pandas as pd
 import z3
 from scipy import stats
-import itertools
+
 
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Variable
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_outcome import CausalTestOutcome
 from causal_testing.testing.base_test_case import BaseTestCase
 
-from enum import Enum
 
 logger = logging.getLogger(__name__)
 
@@ -25,6 +28,7 @@ class AbstractCausalTestCase:
     """
 
     def __init__(
+        # pylint: disable=too-many-arguments
         self,
         scenario: Scenario,
         intervention_constraints: set[z3.ExprRef],
@@ -60,7 +64,9 @@ def __str__(self):
         )
         return f"When we apply intervention {self.intervention_constraints}, {outcome_string}"
 
-    def datapath(self):
+    def datapath(self) -> str:
+        """Create and return the sanitised data path"""
+
         def sanitise(string):
             return "".join([x for x in string if x.isalnum()])
 
@@ -72,7 +78,11 @@ def sanitise(string):
         )
 
     def _generate_concrete_tests(
-        self, sample_size: int, rct: bool = False, seed: int = 0
+        # pylint: disable=too-many-locals
+        self,
+        sample_size: int,
+        rct: bool = False,
+        seed: int = 0,
     ) -> tuple[list[CausalTestCase], pd.DataFrame]:
         """Generates a list of `num` concrete test cases.
 
@@ -101,25 +111,7 @@ def _generate_concrete_tests(
             samples[var.name] = lhsmdu.inverseTransformSample(var.distribution, samples[var.name])
 
         for index, row in samples.iterrows():
-            optimizer = z3.Optimize()
-            for c in self.scenario.constraints:
-                optimizer.assert_and_track(c, str(c))
-            for c in self.intervention_constraints:
-                optimizer.assert_and_track(c, str(c))
-
-            for v in run_columns:
-                optimizer.add_soft(
-                    self.scenario.variables[v].z3
-                    == self.scenario.variables[v].z3_val(self.scenario.variables[v].z3, row[v])
-                )
-
-            if optimizer.check() == z3.unsat:
-                logger.warning(
-                    "Satisfiability of test case was unsat.\n" "Constraints \n %s \n Unsat core %s",
-                    optimizer,
-                    optimizer.unsat_core(),
-                )
-            model = optimizer.model()
+            model = self._optimizer_model(run_columns, row)
 
             base_test_case = BaseTestCase(
                 treatment_variable=self.treatment_variable,
@@ -146,7 +138,7 @@ def _generate_concrete_tests(
                         + f"{constraints}\nUsing value {v.cast(model[v.z3])} instead in test\n{concrete_test}"
                     )
 
-            if not any([vars(t) == vars(concrete_test) for t in concrete_tests]):
+            if not any((vars(t) == vars(concrete_test) for t in concrete_tests)):
                 concrete_tests.append(concrete_test)
                 # Control run
                 control_run = {
@@ -164,6 +156,7 @@ def _generate_concrete_tests(
         return concrete_tests, pd.DataFrame(runs, columns=run_columns + ["bin"])
 
     def generate_concrete_tests(
+        # pylint: disable=too-many-arguments, too-many-locals
         self,
         sample_size: int,
         target_ks_score: float = None,
@@ -197,12 +190,12 @@ def generate_concrete_tests(
 
         pre_break = False
         for i in range(hard_max):
-            concrete_tests_, runs_ = self._generate_concrete_tests(sample_size, rct, seed + i)
-            for t_ in concrete_tests_:
-                if not any([vars(t_) == vars(t) for t in concrete_tests]):
-                    concrete_tests.append(t_)
-            runs = pd.concat([runs, runs_])
-            assert concrete_tests_ not in concrete_tests, "Duplicate entries unlikely unless something went wrong"
+            concrete_tests_temp, runs_temp = self._generate_concrete_tests(sample_size, rct, seed + i)
+            for test in concrete_tests_temp:
+                if not any((vars(test) == vars(t) for t in concrete_tests)):
+                    concrete_tests.append(test)
+            runs = pd.concat([runs, runs_temp])
+            assert concrete_tests_temp not in concrete_tests, "Duplicate entries unlikely unless something went wrong"
 
             control_configs = pd.DataFrame([{test.treatment_variable: test.control_value} for test in concrete_tests])
             ks_stats = {
@@ -230,7 +223,7 @@ def generate_concrete_tests(
             control_values = [test.control_value for test in concrete_tests]
             treatment_values = [test.treatment_value for test in concrete_tests]
 
-            if self.treatment_variable.datatype is bool and set([(True, False), (False, True)]).issubset(
+            if self.treatment_variable.datatype is bool and {(True, False), (False, True)}.issubset(
                 set(zip(control_values, treatment_values))
             ):
                 pre_break = True
@@ -244,7 +237,7 @@ def generate_concrete_tests(
             ).issubset(zip(control_values, treatment_values)):
                 pre_break = True
                 break
-            elif target_ks_score and all((stat <= target_ks_score for stat in ks_stats.values())):
+            if target_ks_score and all((stat <= target_ks_score for stat in ks_stats.values())):
                 pre_break = True
                 break
 
@@ -256,3 +249,30 @@ def generate_concrete_tests(
                 len(concrete_tests),
             )
         return concrete_tests, runs
+
+    def _optimizer_model(self, run_columns: Iterable[str], row: pd.core.series) -> z3.Optimize:
+        """
+        :param run_columns: A sorted list of Variable names from the scenario variables
+        :param row: A pandas Series containing a row from the Samples dataframe
+        :return: z3 optimize model with constraints tracked and soft constraints added
+        :rtype: z3.Optimize
+        """
+        optimizer = z3.Optimize()
+        for c in self.scenario.constraints:
+            optimizer.assert_and_track(c, str(c))
+        for c in self.intervention_constraints:
+            optimizer.assert_and_track(c, str(c))
+
+        for v in run_columns:
+            optimizer.add_soft(
+                self.scenario.variables[v].z3
+                == self.scenario.variables[v].z3_val(self.scenario.variables[v].z3, row[v])
+            )
+
+        if optimizer.check() == z3.unsat:
+            logger.warning(
+                f"Satisfiability of test case was unsat.\n"
+                f"Constraints \n {optimizer} \n Unsat core {optimizer.unsat_core()}",
+            )
+        model = optimizer.model()
+        return model