linter

jmafoster1 · jmafoster1 · commit 1604d7a8fdc2 · 2023-08-03T12:07:13.000+01:00
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -11,7 +11,6 @@
 from statistics import StatisticsError
 
 import pandas as pd
-import numpy as np
 import scipy
 from fitter import Fitter, get_common_distributions
 
@@ -68,7 +67,7 @@ def set_paths(self, json_path: str, dag_path: str, data_paths: list[str] = None)
             data_paths = []
         self.input_paths = JsonClassPaths(json_path=json_path, dag_path=dag_path, data_paths=data_paths)
 
-    def setup(self, scenario: Scenario, data=[]):
+    def setup(self, scenario: Scenario, data=None):
         """Function to populate all the necessary parts of the json_class needed to execute tests"""
         self.scenario = scenario
         self._get_scenario_variables()
@@ -82,7 +81,7 @@ def setup(self, scenario: Scenario, data=[]):
         # Populate the data
         if self.input_paths.data_paths:
             data = pd.concat([pd.read_csv(data_file, header=0) for data_file in self.input_paths.data_paths])
-        if len(data) == 0:
+        if data is None or len(data) == 0:
             raise ValueError(
                 "No data found. Please either provide a path to a file containing data or manually populate the .data "
                 "attribute with a dataframe before calling .setup()"
@@ -131,7 +130,7 @@ def run_json_tests(self, effects: dict, estimators: dict, f_flag: bool = False,
             test["estimator"] = estimators[test["estimator"]]
             # If we have specified concrete control and treatment value
             if "mutations" not in test:
-                failed, msg = self._run_concrete_metamorphic_test(test, f_flag, effects, mutates)
+                failed, msg = self._run_concrete_metamorphic_test(test, f_flag, effects)
             # If we have a variable to mutate
             else:
                 if test["estimate_type"] == "coefficient":
@@ -176,7 +175,7 @@ def _run_coefficient_test(self, test: dict, f_flag: bool, effects: dict):
         self._append_to_file(msg, logging.INFO)
         return failed, result
 
-    def _run_concrete_metamorphic_test(self, test: dict, f_flag: bool, effects: dict, mutates: dict):
+    def _run_concrete_metamorphic_test(self, test: dict, f_flag: bool, effects: dict):
         outcome_variable = next(iter(test["expected_effect"]))  # Take first key from dictionary of expected effect
         base_test_case = BaseTestCase(
             treatment_variable=self.variables["inputs"][test["treatment_variable"]],
diff --git a/causal_testing/testing/causal_test_adequacy.py b/causal_testing/testing/causal_test_adequacy.py
@@ -1,39 +1,66 @@
 """
 This module contains code to measure various aspects of causal test adequacy.
 """
+from itertools import combinations
+from copy import deepcopy
+import pandas as pd
+
 from causal_testing.testing.causal_test_suite import CausalTestSuite
 from causal_testing.data_collection.data_collector import DataCollector
 from causal_testing.specification.causal_specification import CausalSpecification
 from causal_testing.testing.estimators import Estimator
 from causal_testing.testing.causal_test_case import CausalTestCase
-from itertools import combinations
-from copy import deepcopy
-from sklearn.model_selection import KFold
-from sklearn.metrics import mean_squared_error as mse
-import numpy as np
-from sklearn.model_selection import cross_val_score
-import pandas as pd
 
 
 class DAGAdequacy:
+    """
+    Measures the adequacy of a given DAG by hos many edges and independences are tested.
+    """
+
     def __init__(
         self,
         causal_specification: CausalSpecification,
         test_suite: CausalTestSuite,
     ):
         self.causal_dag = causal_specification.causal_dag
         self.test_suite = test_suite
+        self.tested_pairs = None
+        self.pairs_to_test = None
+        self.untested_edges = None
+        self.dag_adequacy = None
 
     def measure_adequacy(self):
+        """
+        Calculate the adequacy measurement, and populate the `dat_adequacy` field.
+        """
         self.tested_pairs = {
-            (t.base_test_case.treatment_variable, t.base_test_case.outcome_variable) for t in self.causal_test_suite
+            (t.base_test_case.treatment_variable, t.base_test_case.outcome_variable) for t in self.test_suite
         }
         self.pairs_to_test = set(combinations(self.causal_dag.graph.nodes, 2))
-        self.untested_edges = pairs_to_test.difference(tested_pairs)
-        self.dag_adequacy = len(tested_pairs) / len(pairs_to_test)
+        self.untested_edges = self.pairs_to_test.difference(self.tested_pairs)
+        self.dag_adequacy = len(self.tested_pairs) / len(self.pairs_to_test)
+
+    def to_dict(self):
+        "Returns the adequacy object as a dictionary."
+        return {
+            "causal_dag": self.causal_dag,
+            "test_suite": self.test_suite,
+            "tested_pairs": self.tested_pairs,
+            "pairs_to_test": self.pairs_to_test,
+            "untested_edges": self.untested_edges,
+            "dag_adequacy": self.dag_adequacy,
+        }
 
 
 class DataAdequacy:
+    """
+    Measures the adequacy of a given test according to the Fisher kurtosis of the bootstrapped result.
+    - Positive kurtoses indicate the model doesn't have enough data so is unstable.
+    - Negative kurtoses indicate the model doesn't have enough data, but is too stable, indicating that the spread of
+      inputs is insufficient.
+    - Zero kurtosis is optimal.
+    """
+
     def __init__(
         self, test_case: CausalTestCase, estimator: Estimator, data_collector: DataCollector, bootstrap_size: int = 100
     ):
@@ -45,6 +72,9 @@ def __init__(
         self.bootstrap_size = bootstrap_size
 
     def measure_adequacy(self):
+        """
+        Calculate the adequacy measurement, and populate the data_adequacy field.
+        """
         results = []
         for i in range(self.bootstrap_size):
             estimator = deepcopy(self.estimator)
@@ -75,4 +105,5 @@ def convert_to_df(field):
         self.outcomes = sum(outcomes)
 
     def to_dict(self):
+        "Returns the adequacy object as a dictionary."
         return {"kurtosis": self.kurtosis.to_dict(), "bootstrap_size": self.bootstrap_size, "passing": self.outcomes}
diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
@@ -18,8 +18,8 @@ class CausalTestCase:
     """
     A CausalTestCase extends the information held in a BaseTestCase. As well as storing the treatment and outcome
     variables, a CausalTestCase stores the values of these variables. Also the outcome variable and value are
-    specified. The goal of a CausalTestCase is to test whether the intervention made to the control via the treatment causes the
-    model-under-test to produce the expected change.
+    specified. The goal of a CausalTestCase is to test whether the intervention made to the control via the treatment
+               causes the model-under-test to produce the expected change.
     """
 
     def __init__(
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
@@ -43,7 +43,8 @@ class NoEffect(CausalTestOutcome):
 
     def __init__(self, atol: float = 1e-10, ctol: float = 0.05):
         """
-        :param atol: Arithmetic tolerance. The test will pass if the absolute value of the causal effect is less than atol.
+        :param atol: Arithmetic tolerance. The test will pass if the absolute value of the causal effect is less than
+                     atol.
         :param ctol: Categorical tolerance. The test will pass if this proportion of categories pass.
         """
         self.atol = atol