Merge pull request #113 from CITCOM-project/rsomers/robustness

rsomers1998 · web-flow · commit 0d533785fbdb · 2023-03-21T09:13:59.000Z
Robustness Estimators
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -355,6 +355,7 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
         :return: The average treatment effect and the 95% Wald confidence intervals.
         """
         model = self._run_linear_regression()
+        self.model = model
 
         # Create an empty individual for the control and treated
         individuals = pd.DataFrame(1, index=["control", "treated"], columns=model.params.index)
diff --git a/causal_testing/testing/validation.py b/causal_testing/testing/validation.py
@@ -0,0 +1,59 @@
+"""This module contains the CausalValidator class for performing Quantitive Bias Analysis techniques"""
+import math
+import numpy as np
+from scipy.stats import t
+from statsmodels.regression.linear_model import RegressionResultsWrapper
+
+
+class CausalValidator:
+    """A suite of validation tools to perform Quantitive Bias Analysis to back up causal claims"""
+
+    def estimate_robustness(self, model: RegressionResultsWrapper, q=1, alpha=1):
+        """Calculate the robustness of a linear regression model. This allow
+        the user to identify how large an unidentified confounding variable
+        would need to be to nullify the causal relationship under test."""
+
+        dof = model.df_resid
+        t_values = model.tvalues
+
+        fq = q * abs(t_values / math.sqrt(dof))
+        f_crit = abs(t.ppf(alpha / 2, dof - 1)) / math.sqrt(dof - 1)
+        fqa = fq - f_crit
+
+        rv = 0.5 * (np.sqrt(fqa**4 + (4 * fqa**2)) - fqa**2)
+
+        return rv
+
+    def estimate_e_value(self, risk_ratio: float) -> float:
+        """Calculate the E value from a risk ratio. This allow
+        the user to identify how large a risk an unidentified confounding
+        variable would need to be to nullify the causal relationship
+        under test."""
+
+        if risk_ratio >= 1:
+            return risk_ratio + math.sqrt(risk_ratio * (risk_ratio - 1))
+
+        risk_ratio_prime = 1 / risk_ratio
+        return risk_ratio_prime + math.sqrt(risk_ratio_prime * (risk_ratio_prime - 1))
+
+    def estimate_e_value_using_ci(self, risk_ratio: float, confidence_intervals: tuple[float, float]) -> float:
+        """Calculate the E value from a risk ratio and it's confidence intervals.
+        This allow the user to identify how large a risk an unidentified
+        confounding variable would need to be to nullify the causal relationship
+        under test."""
+
+        if risk_ratio >= 1:
+            lower_limit = confidence_intervals[0]
+            e = 1
+            if lower_limit > 1:
+                e = lower_limit + math.sqrt(lower_limit * (lower_limit - 1))
+
+            return e
+
+        upper_limit = confidence_intervals[1]
+        e = 1
+        if upper_limit < 1:
+            upper_limit_prime = 1 / upper_limit
+            e = upper_limit_prime + math.sqrt(upper_limit_prime * (upper_limit_prime - 1))
+
+        return e
diff --git a/tests/testing_tests/test_causal_test_outcome.py b/tests/testing_tests/test_causal_test_outcome.py
@@ -2,6 +2,7 @@
 from causal_testing.testing.causal_test_outcome import ExactValue, SomeEffect, Positive, Negative
 from causal_testing.testing.causal_test_result import CausalTestResult, TestValue
 from causal_testing.testing.estimators import LinearRegressionEstimator
+from causal_testing.testing.validation import CausalValidator
 
 
 class TestCausalTestOutcome(unittest.TestCase):
@@ -176,3 +177,23 @@ def test_someEffect_fail(self):
                 "ci_high": 0.2,
             },
         )
+
+    def test_positive_risk_ratio_e_value(self):
+        cv = CausalValidator()
+        e_value = cv.estimate_e_value(1.5)
+        self.assertEqual(round(e_value, 4), 2.366)
+
+    def test_positive_risk_ratio_e_value_using_ci(self):
+        cv = CausalValidator()
+        e_value = cv.estimate_e_value_using_ci(1.5, [1.2, 1.8])
+        self.assertEqual(round(e_value, 4), 1.6899)
+
+    def test_negative_risk_ratio_e_value(self):
+        cv = CausalValidator()
+        e_value = cv.estimate_e_value(0.8)
+        self.assertEqual(round(e_value, 4), 1.809)
+
+    def test_negative_risk_ratio_e_value_using_ci(self):
+        cv = CausalValidator()
+        e_value = cv.estimate_e_value_using_ci(0.8, [0.2, 0.9])
+        self.assertEqual(round(e_value, 4), 1.4625)
diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py
@@ -9,6 +9,7 @@
     InstrumentalVariableEstimator,
 )
 from causal_testing.specification.variable import Input
+from causal_testing.testing.validation import CausalValidator
 
 
 def plot_results_df(df):
@@ -372,6 +373,15 @@ def test_program_15_no_interaction_ate_calculated(self):
         self.assertEqual(round(ate, 1), 3.5)
         self.assertEqual([round(ci_low, 1), round(ci_high, 1)], [1.9, 5])
 
+    def test_program_11_2_with_robustness_validation(self):
+        """Test whether our linear regression estimator, as used in test_program_11_2 can correctly estimate robustness."""
+        df = self.chapter_11_df.copy()
+        linear_regression_estimator = LinearRegressionEstimator("treatments", 100, 90, set(), "outcomes", df)
+        model = linear_regression_estimator._run_linear_regression()
+
+        cv = CausalValidator()
+        self.assertEqual(round(cv.estimate_robustness(model)["treatments"], 4), 0.7353)
+
 
 class TestCausalForestEstimator(unittest.TestCase):
     """Test the linear regression estimator against the programming exercises in Section 2 of Hernán and Robins [1].