Merge branch 'main' into z3_operations

jmafoster1 · web-flow · commit d042d4fa2130 · 2023-03-21T14:29:41.000Z
diff --git a/causal_testing/data_collection/data_collector.py b/causal_testing/data_collection/data_collector.py
@@ -112,8 +112,10 @@ def collect_data(self, **kwargs) -> pd.DataFrame:
         executions.
         """
         control_results_df = self.run_system_with_input_configuration(self.control_input_configuration)
+        control_results_df.rename(lambda x: f"control_{x}", inplace=True)
         treatment_results_df = self.run_system_with_input_configuration(self.treatment_input_configuration)
-        results_df = pd.concat([control_results_df, treatment_results_df], ignore_index=True)
+        treatment_results_df.rename(lambda x: f"treatment_{x}", inplace=True)
+        results_df = pd.concat([control_results_df, treatment_results_df], ignore_index=False)
         return results_df
 
     @abstractmethod
diff --git a/causal_testing/specification/metamorphic_relation.py b/causal_testing/specification/metamorphic_relation.py
@@ -0,0 +1,219 @@
+"""
+This module contains the ShouldCause and ShouldNotCause metamorphic relations as
+defined in our ICST paper [https://eprints.whiterose.ac.uk/195317/].
+"""
+
+from dataclasses import dataclass
+from abc import abstractmethod
+from typing import Iterable
+from itertools import combinations
+import numpy as np
+import pandas as pd
+import networkx as nx
+
+from causal_testing.specification.causal_specification import CausalDAG, Node
+from causal_testing.data_collection.data_collector import ExperimentalDataCollector
+
+
+@dataclass(order=True)
+class MetamorphicRelation:
+    """Class representing a metamorphic relation."""
+
+    treatment_var: Node
+    output_var: Node
+    adjustment_vars: Iterable[Node]
+    dag: CausalDAG
+    tests: Iterable = None
+
+    def generate_follow_up(self, n_tests: int, min_val: float, max_val: float, seed: int = 0):
+        """Generate numerical follow-up input configurations."""
+        np.random.seed(seed)
+
+        # Get set of variables to change, excluding the treatment itself
+        variables_to_change = {node for node in self.dag.graph.nodes if self.dag.graph.in_degree(node) == 0}
+        if self.adjustment_vars:
+            variables_to_change |= set(self.adjustment_vars)
+        if self.treatment_var in variables_to_change:
+            variables_to_change.remove(self.treatment_var)
+
+        # Assign random numerical values to the variables to change
+        test_inputs = pd.DataFrame(
+            np.random.randint(min_val, max_val, size=(n_tests, len(variables_to_change))),
+            columns=sorted(variables_to_change),
+        )
+
+        # Enumerate the possible source, follow-up pairs for the treatment
+        candidate_source_follow_up_pairs = np.array(list(combinations(range(int(min_val), int(max_val + 1)), 2)))
+
+        # Sample without replacement from the possible source, follow-up pairs
+        sampled_source_follow_up_indices = np.random.choice(
+            candidate_source_follow_up_pairs.shape[0], n_tests, replace=False
+        )
+
+        follow_up_input = f"{self.treatment_var}'"
+        source_follow_up_test_inputs = pd.DataFrame(
+            candidate_source_follow_up_pairs[sampled_source_follow_up_indices],
+            columns=sorted([self.treatment_var] + [follow_up_input]),
+        )
+        self.tests = [
+            MetamorphicTest(
+                source_inputs,
+                follow_up_inputs,
+                other_inputs,
+                self.output_var,
+                str(self),
+            )
+            for source_inputs, follow_up_inputs, other_inputs in zip(
+                source_follow_up_test_inputs[[self.treatment_var]].to_dict(orient="records"),
+                source_follow_up_test_inputs[[follow_up_input]]
+                .rename(columns={follow_up_input: self.treatment_var})
+                .to_dict(orient="records"),
+                test_inputs.to_dict(orient="records")
+                if not test_inputs.empty
+                else [{}] * len(source_follow_up_test_inputs),
+            )
+        ]
+
+    def execute_tests(self, data_collector: ExperimentalDataCollector):
+        """Execute the generated list of metamorphic tests, returning a dictionary of tests that pass and fail.
+
+        :param data_collector: An experimental data collector for the system-under-test.
+        """
+        test_results = {"pass": [], "fail": []}
+        for metamorphic_test in self.tests:
+            # Update the control and treatment configuration to take generated values for source and follow-up tests
+            control_input_config = metamorphic_test.source_inputs | metamorphic_test.other_inputs
+            treatment_input_config = metamorphic_test.follow_up_inputs | metamorphic_test.other_inputs
+            data_collector.control_input_configuration = control_input_config
+            data_collector.treatment_input_configuration = treatment_input_config
+            metamorphic_test_results_df = data_collector.collect_data()
+
+            # Apply assertion to control and treatment outputs
+            control_output = metamorphic_test_results_df.loc["control_0"][metamorphic_test.output]
+            treatment_output = metamorphic_test_results_df.loc["treatment_0"][metamorphic_test.output]
+
+            if not self.assertion(control_output, treatment_output):
+                test_results["fail"].append(metamorphic_test)
+            else:
+                test_results["pass"].append(metamorphic_test)
+        return test_results
+
+    @abstractmethod
+    def assertion(self, source_output, follow_up_output):
+        """An assertion that should be applied to an individual metamorphic test run."""
+
+    @abstractmethod
+    def test_oracle(self, test_results):
+        """A test oracle that assert whether the MR holds or not based on ALL test results.
+
+        This method must raise an assertion, not return a bool."""
+
+    def __eq__(self, other):
+        same_type = self.__class__ == other.__class__
+        same_treatment = self.treatment_var == other.treatment_var
+        same_output = self.output_var == other.output_var
+        same_adjustment_set = set(self.adjustment_vars) == set(other.adjustment_vars)
+        return same_type and same_treatment and same_output and same_adjustment_set
+
+
+class ShouldCause(MetamorphicRelation):
+    """Class representing a should cause metamorphic relation."""
+
+    def assertion(self, source_output, follow_up_output):
+        """If there is a causal effect, the outputs should not be the same."""
+        return source_output != follow_up_output
+
+    def test_oracle(self, test_results):
+        """A single passing test is sufficient to show presence of a causal effect."""
+        assert len(test_results["fail"]) < len(
+            self.tests
+        ), f"{str(self)}: {len(test_results['fail'])}/{len(self.tests)} tests failed."
+
+    def __str__(self):
+        formatted_str = f"{self.treatment_var} --> {self.output_var}"
+        if self.adjustment_vars:
+            formatted_str += f" | {self.adjustment_vars}"
+        return formatted_str
+
+
+class ShouldNotCause(MetamorphicRelation):
+    """Class representing a should cause metamorphic relation."""
+
+    def assertion(self, source_output, follow_up_output):
+        """If there is a causal effect, the outputs should not be the same."""
+        return source_output == follow_up_output
+
+    def test_oracle(self, test_results):
+        """A single passing test is sufficient to show presence of a causal effect."""
+        assert (
+            len(test_results["fail"]) == 0
+        ), f"{str(self)}: {len(test_results['fail'])}/{len(self.tests)} tests failed."
+
+    def __str__(self):
+        formatted_str = f"{self.treatment_var} _||_ {self.output_var}"
+        if self.adjustment_vars:
+            formatted_str += f" | {self.adjustment_vars}"
+        return formatted_str
+
+
+@dataclass(order=True)
+class MetamorphicTest:
+    """Class representing a metamorphic test case."""
+
+    source_inputs: dict
+    follow_up_inputs: dict
+    other_inputs: dict
+    output: str
+    relation: str
+
+    def __str__(self):
+        return (
+            f"Source inputs: {self.source_inputs}\n"
+            f"Follow-up inputs: {self.follow_up_inputs}\n"
+            f"Other inputs: {self.other_inputs}\n"
+            f"Output: {self.output}"
+            f"Metamorphic Relation: {self.relation}"
+        )
+
+
+def generate_metamorphic_relations(dag: CausalDAG) -> list[MetamorphicRelation]:
+    """Construct a list of metamorphic relations implied by the Causal DAG.
+
+    This list of metamorphic relations contains a ShouldCause relation for every edge, and a ShouldNotCause
+    relation for every (minimal) conditional independence relation implied by the structure of the DAG.
+
+    :param CausalDAG dag: Causal DAG from which the metamorphic relations will be generated.
+    :return: A list containing ShouldCause and ShouldNotCause metamorphic relations.
+    """
+    metamorphic_relations = []
+    for node_pair in combinations(dag.graph.nodes, 2):
+        (u, v) = node_pair
+
+        # Create a ShouldNotCause relation for each pair of nodes that are not directly connected
+        if ((u, v) not in dag.graph.edges) and ((v, u) not in dag.graph.edges):
+
+            # Case 1: U --> ... --> V
+            if u in nx.ancestors(dag.graph, v):
+                adj_set = list(dag.direct_effect_adjustment_sets([u], [v])[0])
+                metamorphic_relations.append(ShouldNotCause(u, v, adj_set, dag))
+
+            # Case 2: V --> ... --> U
+            elif v in nx.ancestors(dag.graph, u):
+                adj_set = list(dag.direct_effect_adjustment_sets([v], [u])[0])
+                metamorphic_relations.append(ShouldNotCause(v, u, adj_set, dag))
+
+            # Case 3: V _||_ U (No directed walk from V to U but there may be a back-door path e.g. U <-- Z --> V).
+            # Only make one MR since V _||_ U == U _||_ V
+            else:
+                adj_set = list(dag.direct_effect_adjustment_sets([u], [v])[0])
+                metamorphic_relations.append(ShouldNotCause(u, v, adj_set, dag))
+
+        # Create a ShouldCause relation for each edge (u, v) or (v, u)
+        elif (u, v) in dag.graph.edges:
+            adj_set = list(dag.direct_effect_adjustment_sets([u], [v])[0])
+            metamorphic_relations.append(ShouldCause(u, v, adj_set, dag))
+        else:
+            adj_set = list(dag.direct_effect_adjustment_sets([v], [u])[0])
+            metamorphic_relations.append(ShouldCause(v, u, adj_set, dag))
+
+    return metamorphic_relations
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
@@ -22,7 +22,29 @@ def __str__(self) -> str:
         return type(self).__name__
 
 
-class ExactValue(CausalTestOutcome):
+class SomeEffect(CausalTestOutcome):
+    """An extension of TestOutcome representing that the expected causal effect should not be zero."""
+
+    def apply(self, res: CausalTestResult) -> bool:
+        if res.test_value.type == "ate":
+            return (0 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 0)
+        if res.test_value.type == "risk_ratio":
+            return (1 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 1)
+        raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
+
+
+class NoEffect(CausalTestOutcome):
+    """An extension of TestOutcome representing that the expected causal effect should be zero."""
+
+    def apply(self, res: CausalTestResult) -> bool:
+        if res.test_value.type == "ate":
+            return (res.ci_low() < 0 < res.ci_high()) or (abs(res.test_value.value) < 1e-10)
+        if res.test_value.type == "risk_ratio":
+            return (res.ci_low() < 1 < res.ci_high()) or np.isclose(res.test_value.value, 1.0, atol=1e-10)
+        raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
+
+
+class ExactValue(SomeEffect):
     """An extension of TestOutcome representing that the expected causal effect should be a specific value."""
 
     def __init__(self, value: float, tolerance: float = None):
@@ -33,6 +55,8 @@ def __init__(self, value: float, tolerance: float = None):
             self.tolerance = tolerance
 
     def apply(self, res: CausalTestResult) -> bool:
+        if res.ci_valid():
+            return super().apply(res) and np.isclose(res.test_value.value, self.value, atol=self.tolerance)
         return np.isclose(res.test_value.value, self.value, atol=self.tolerance)
 
     def __str__(self):
@@ -43,6 +67,8 @@ class Positive(CausalTestOutcome):
     """An extension of TestOutcome representing that the expected causal effect should be positive."""
 
     def apply(self, res: CausalTestResult) -> bool:
+        if res.ci_valid() and not super().apply(res):
+            return False
         if res.test_value.type == "ate":
             return res.test_value.value > 0
         if res.test_value.type == "risk_ratio":
@@ -54,36 +80,10 @@ class Negative(CausalTestOutcome):
     """An extension of TestOutcome representing that the expected causal effect should be negative."""
 
     def apply(self, res: CausalTestResult) -> bool:
+        if res.ci_valid() and not super().apply(res):
+            return False
         if res.test_value.type == "ate":
             return res.test_value.value < 0
         if res.test_value.type == "risk_ratio":
             return res.test_value.value < 1
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
-
-
-class SomeEffect(CausalTestOutcome):
-    """An extension of TestOutcome representing that the expected causal effect should not be zero."""
-
-    def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type == "ate":
-            return (0 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 0)
-        if res.test_value.type == "risk_ratio":
-            return (1 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 1)
-        raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
-
-    def __str__(self):
-        return "Changed"
-
-
-class NoEffect(CausalTestOutcome):
-    """An extension of TestOutcome representing that the expected causal effect should be zero."""
-
-    def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type == "ate":
-            return (res.ci_low() < 0 < res.ci_high()) or (abs(res.test_value.value) < 1e-10)
-        if res.test_value.type == "risk_ratio":
-            return (res.ci_low() < 1 < res.ci_high()) or np.isclose(res.test_value.value, 1.0, atol=1e-10)
-        raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
-
-    def __str__(self):
-        return "Unchanged"
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
@@ -85,6 +85,10 @@ def ci_high(self):
             return max(self.confidence_intervals)
         return None
 
+    def ci_valid(self) -> bool:
+        """Return whether or not the result has valid confidence invervals"""
+        return self.ci_low() and self.ci_high()
+
     def summary(self):
         """Summarise the causal test result as an intuitive sentence."""
         print(
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -355,6 +355,7 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
         :return: The average treatment effect and the 95% Wald confidence intervals.
         """
         model = self._run_linear_regression()
+        self.model = model
 
         # Create an empty individual for the control and treated
         individuals = pd.DataFrame(1, index=["control", "treated"], columns=model.params.index)
diff --git a/causal_testing/testing/validation.py b/causal_testing/testing/validation.py
@@ -0,0 +1,59 @@
+"""This module contains the CausalValidator class for performing Quantitive Bias Analysis techniques"""
+import math
+import numpy as np
+from scipy.stats import t
+from statsmodels.regression.linear_model import RegressionResultsWrapper
+
+
+class CausalValidator:
+    """A suite of validation tools to perform Quantitive Bias Analysis to back up causal claims"""
+
+    def estimate_robustness(self, model: RegressionResultsWrapper, q=1, alpha=1):
+        """Calculate the robustness of a linear regression model. This allow
+        the user to identify how large an unidentified confounding variable
+        would need to be to nullify the causal relationship under test."""
+
+        dof = model.df_resid
+        t_values = model.tvalues
+
+        fq = q * abs(t_values / math.sqrt(dof))
+        f_crit = abs(t.ppf(alpha / 2, dof - 1)) / math.sqrt(dof - 1)
+        fqa = fq - f_crit
+
+        rv = 0.5 * (np.sqrt(fqa**4 + (4 * fqa**2)) - fqa**2)
+
+        return rv
+
+    def estimate_e_value(self, risk_ratio: float) -> float:
+        """Calculate the E value from a risk ratio. This allow
+        the user to identify how large a risk an unidentified confounding
+        variable would need to be to nullify the causal relationship
+        under test."""
+
+        if risk_ratio >= 1:
+            return risk_ratio + math.sqrt(risk_ratio * (risk_ratio - 1))
+
+        risk_ratio_prime = 1 / risk_ratio
+        return risk_ratio_prime + math.sqrt(risk_ratio_prime * (risk_ratio_prime - 1))
+
+    def estimate_e_value_using_ci(self, risk_ratio: float, confidence_intervals: tuple[float, float]) -> float:
+        """Calculate the E value from a risk ratio and it's confidence intervals.
+        This allow the user to identify how large a risk an unidentified
+        confounding variable would need to be to nullify the causal relationship
+        under test."""
+
+        if risk_ratio >= 1:
+            lower_limit = confidence_intervals[0]
+            e = 1
+            if lower_limit > 1:
+                e = lower_limit + math.sqrt(lower_limit * (lower_limit - 1))
+
+            return e
+
+        upper_limit = confidence_intervals[1]
+        e = 1
+        if upper_limit < 1:
+            upper_limit_prime = 1 / upper_limit
+            e = upper_limit_prime + math.sqrt(upper_limit_prime * (upper_limit_prime - 1))
+
+        return e
diff --git a/tests/specification_tests/test_metamorphic_relations.py b/tests/specification_tests/test_metamorphic_relations.py
diff --git a/tests/testing_tests/test_causal_test_outcome.py b/tests/testing_tests/test_causal_test_outcome.py
diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py