Merge branch 'interaction-terms' of https://github.com/CITCOM-project/CausalTestingFramework into interaction-terms

rsomers1998 · rsomers1998 · commit f66f85411aeb · 2024-02-26T08:50:44.000Z
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
@@ -28,14 +28,13 @@ class SomeEffect(CausalTestOutcome):
     """An extension of TestOutcome representing that the expected causal effect should not be zero."""
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type == "ate":
-            return (0 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 0)
-        if res.test_value.type == "coefficient":
-            ci_low = res.ci_low() if isinstance(res.ci_low(), Iterable) else [res.ci_low()]
-            ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]
-            return any(0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(ci_low, ci_high))
         if res.test_value.type == "risk_ratio":
-            return (1 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 1)
+            return any(
+                1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()))
+        if res.test_value.type in ('coefficient', 'ate'):
+            return any(
+                0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()))
+
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
 
 
@@ -52,23 +51,20 @@ def __init__(self, atol: float = 1e-10, ctol: float = 0.05):
         self.ctol = ctol
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type == "ate":
-            return (res.ci_low() < 0 < res.ci_high()) or (abs(res.test_value.value) < self.atol)[0]
-        if res.test_value.type == "coefficient":
-            ci_low = res.ci_low() if isinstance(res.ci_low(), Iterable) else [res.ci_low()]
-            ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]
+        if res.test_value.type == "risk_ratio":
+            return any(ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol) for ci_low, ci_high, value in
+                       zip(res.ci_low(), res.ci_high(), res.test_value.value))
+        if res.test_value.type in ('coefficient', 'ate'):
             value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
-            value = value[0] if isinstance(value[0], pd.Series) else value
             return (
-                sum(
-                    not ((ci_low < 0 < ci_high) or abs(v) < self.atol)
-                    for ci_low, ci_high, v in zip(ci_low, ci_high, value)
-                )
-                / len(value)
-                < self.ctol
+                    sum(
+                        not ((ci_low < 0 < ci_high) or abs(v) < self.atol)
+                        for ci_low, ci_high, v in zip(res.ci_low(), res.ci_high(), value)
+                    )
+                    / len(value)
+                    < self.ctol
             )
-        if res.test_value.type == "risk_ratio":
-            return (res.ci_low() < 1 < res.ci_high()) or np.isclose(res.test_value.value, 1.0, atol=self.atol)
+
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
 
 
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
@@ -27,7 +27,7 @@ def __init__(
         self,
         estimator: Estimator,
         test_value: TestValue,
-        confidence_intervals: [float, float] = None,
+        confidence_intervals: [pd.Series, pd.Series] = None,
         effect_modifier_configuration: {Variable: Any} = None,
         adequacy=None,
     ):
@@ -100,15 +100,15 @@ def ci_low(self):
         """Return the lower bracket of the confidence intervals."""
         if self.confidence_intervals:
             if isinstance(self.confidence_intervals[0], pd.Series):
-                return self.confidence_intervals[0][0]
+                return self.confidence_intervals[0].to_list()
             return self.confidence_intervals[0]
         return None
 
     def ci_high(self):
         """Return the higher bracket of the confidence intervals."""
         if self.confidence_intervals:
             if isinstance(self.confidence_intervals[1], pd.Series):
-                return self.confidence_intervals[1][0]
+                return self.confidence_intervals[1].to_list()
             return self.confidence_intervals[1]
         return None
 
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -453,8 +453,8 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
     def _get_confidence_intervals(self, model, treatment):
         confidence_intervals = model.conf_int(alpha=self.alpha, cols=None)
         ci_low, ci_high = (
-            confidence_intervals[0].loc[treatment],
-            confidence_intervals[1].loc[treatment],
+            pd.Series(confidence_intervals[0].loc[treatment]),
+            pd.Series(confidence_intervals[1].loc[treatment]),
         )
         return [ci_low, ci_high]
 
diff --git a/tests/data/scarf_data.csv b/tests/data/scarf_data.csv
@@ -0,0 +1,12 @@
+length_in,large_gauge,color,completed
+55,1,orange,1
+55,0,orange,1
+55,0,brown,1
+60,0,brown,1
+60,0,grey,0
+70,0,grey,1
+70,0,orange,0
+82,1,grey,1
+82,0,brown,0
+82,0,orange,0
+82,1,brown,0
diff --git a/tests/testing_tests/test_causal_test_outcome.py b/tests/testing_tests/test_causal_test_outcome.py
@@ -107,7 +107,7 @@ def test_Positive_fail_ci(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-1, 1],
+            confidence_intervals=[pd.Series(-1), pd.Series(1)],
             effect_modifier_configuration=None,
         )
         ev = Positive()
@@ -151,7 +151,7 @@ def test_Negative_fail_ci(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-1, 1],
+            confidence_intervals=[pd.Series(-1), pd.Series(1)],
             effect_modifier_configuration=None,
         )
         ev = Negative()
@@ -173,7 +173,7 @@ def test_exactValue_pass_ci(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4, 6],
+            confidence_intervals=[pd.Series(4), pd.Series(6)],
             effect_modifier_configuration=None,
         )
         ev = ExactValue(5, 0.1)
@@ -199,7 +199,7 @@ def test_invalid(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         with self.assertRaises(ValueError):
@@ -216,7 +216,7 @@ def test_someEffect_pass_coefficient(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         self.assertTrue(SomeEffect().apply(ctr))
@@ -227,7 +227,7 @@ def test_someEffect_pass_ate(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         self.assertTrue(SomeEffect().apply(ctr))
@@ -238,7 +238,7 @@ def test_someEffect_pass_rr(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         self.assertTrue(SomeEffect().apply(ctr))
@@ -249,7 +249,7 @@ def test_someEffect_fail(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-0.1, 0.2],
+            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
             effect_modifier_configuration=None,
         )
         self.assertFalse(SomeEffect().apply(ctr))
@@ -260,7 +260,7 @@ def test_someEffect_str(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-0.1, 0.2],
+            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
             effect_modifier_configuration=None,
         )
         ev = SomeEffect()
@@ -274,8 +274,8 @@ def test_someEffect_str(self):
                 "adjustment_set": set(),
                 "effect_estimate": 0,
                 "effect_measure": "ate",
-                "ci_low": -0.1,
-                "ci_high": 0.2,
+                "ci_low": [-0.1],
+                "ci_high": [0.2],
             },
         )
 
@@ -284,7 +284,7 @@ def test_someEffect_dict(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-0.1, 0.2],
+            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
             effect_modifier_configuration=None,
         )
         ev = SomeEffect()
@@ -298,8 +298,8 @@ def test_someEffect_dict(self):
                 "adjustment_set": set(),
                 "effect_estimate": 0,
                 "effect_measure": "ate",
-                "ci_low": -0.1,
-                "ci_high": 0.2,
+                "ci_low": [-0.1],
+                "ci_high": [0.2],
             },
         )
 
diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py
@@ -7,7 +7,7 @@
     CausalForestEstimator,
     LogisticRegressionEstimator,
     InstrumentalVariableEstimator,
-    CubicSplineRegressionEstimator
+    CubicSplineRegressionEstimator,
 )
 from causal_testing.specification.variable import Input
 from causal_testing.utils.validation import CausalValidator
@@ -78,21 +78,7 @@ class TestLogisticRegressionEstimator(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls) -> None:
-        cls.scarf_df = pd.DataFrame(
-            [
-                {"length_in": 55, "large_gauge": 1, "color": "orange", "completed": 1},
-                {"length_in": 55, "large_gauge": 0, "color": "orange", "completed": 1},
-                {"length_in": 55, "large_gauge": 0, "color": "brown", "completed": 1},
-                {"length_in": 60, "large_gauge": 0, "color": "brown", "completed": 1},
-                {"length_in": 60, "large_gauge": 0, "color": "grey", "completed": 0},
-                {"length_in": 70, "large_gauge": 0, "color": "grey", "completed": 1},
-                {"length_in": 70, "large_gauge": 0, "color": "orange", "completed": 0},
-                {"length_in": 82, "large_gauge": 1, "color": "grey", "completed": 1},
-                {"length_in": 82, "large_gauge": 0, "color": "brown", "completed": 0},
-                {"length_in": 82, "large_gauge": 0, "color": "orange", "completed": 0},
-                {"length_in": 82, "large_gauge": 1, "color": "brown", "completed": 0},
-            ]
-        )
+        cls.scarf_df = pd.read_csv("tests/data/scarf_data.csv")
 
     # Yes, this probably shouldn't be in here, but it uses the scarf data so it makes more sense to put it
     # here than duplicating the scarf data for a single test
@@ -231,7 +217,7 @@ def test_program_11_2(self):
         self.assertEqual(round(model.params["Intercept"] + 90 * model.params["treatments"], 1), 216.9)
 
         # Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
-        self.assertEqual(round(model.params["treatments"], 1), round(ate[0], 1))
+        self.assertTrue(all(round(model.params["treatments"], 1) == round(ate_single, 1) for ate_single in ate))
 
     def test_program_11_3(self):
         """Test whether our linear regression implementation produces the same results as program 11.3 (p. 144)."""
@@ -251,7 +237,7 @@ def test_program_11_3(self):
             197.1,
         )
         # Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
-        self.assertEqual(round(model.params["treatments"], 3), round(ate[0], 3))
+        self.assertTrue(all(round(model.params["treatments"], 3) == round(ate_single, 3) for ate_single in ate))
 
     def test_program_15_1A(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)."""
@@ -329,6 +315,7 @@ def test_program_15_no_interaction(self):
         # terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
         # for term_to_square in terms_to_square:
         ate, [ci_low, ci_high] = linear_regression_estimator.estimate_coefficient()
+
         self.assertEqual(round(ate[0], 1), 3.5)
         self.assertEqual([round(ci_low[0], 1), round(ci_high[0], 1)], [2.6, 4.3])
 
@@ -416,12 +403,11 @@ def test_program_11_2_with_robustness_validation(self):
 
 
 class TestCubicSplineRegressionEstimator(TestLinearRegressionEstimator):
-
     @classmethod
-
     def setUpClass(cls):
 
         super().setUpClass()
+
     def test_program_11_3_cublic_spline(self):
 
         """Test whether the cublic_spline regression implementation produces the same results as program 11.3 (p. 162).
@@ -431,8 +417,7 @@ def test_program_11_3_cublic_spline(self):
 
         df = self.chapter_11_df.copy()
 
-        cublic_spline_estimator = CubicSplineRegressionEstimator(
-            "treatments", 1, 0, set(), "outcomes", 3, df)
+        cublic_spline_estimator = CubicSplineRegressionEstimator("treatments", 1, 0, set(), "outcomes", 3, df)
 
         model = cublic_spline_estimator._run_linear_regression()
 
@@ -453,8 +438,6 @@ def test_program_11_3_cublic_spline(self):
         self.assertAlmostEqual(ate_1[0] * 2, ate_2[0])
 
 
-
-
 class TestCausalForestEstimator(unittest.TestCase):
     """Test the linear regression estimator against the programming exercises in Section 2 of Hernán and Robins [1].
 
@@ -527,15 +510,29 @@ def setUpClass(cls) -> None:
         df = pd.DataFrame({"X1": np.random.uniform(-1000, 1000, 1000), "X2": np.random.uniform(-1000, 1000, 1000)})
         df["Y"] = 2 * df["X1"] - 3 * df["X2"] + 2 * df["X1"] * df["X2"] + 10
         cls.df = df
+        cls.scarf_df = pd.read_csv("tests/data/scarf_data.csv")
 
     def test_X1_effect(self):
         """When we fix the value of X2 to 0, the effect of X1 on Y should become ~2 (because X2 terms are cancelled)."""
-        x2 = Input("X2", float)
         lr_model = LinearRegressionEstimator(
-            "X1", 1, 0, {"X2"}, "Y", effect_modifiers={x2.name: 0}, formula="Y ~ X1 + X2 + (X1 * X2)", df=self.df
+            "X1", 1, 0, {"X2"}, "Y", effect_modifiers={"x2": 0}, formula="Y ~ X1 + X2 + (X1 * X2)", df=self.df
         )
         test_results = lr_model.estimate_ate()
         ate = test_results[0][0]
         self.assertAlmostEqual(ate, 2.0)
 
+    def test_categorical_confidence_intervals(self):
+        lr_model = LinearRegressionEstimator(
+            treatment="color",
+            control_value=None,
+            treatment_value=None,
+            adjustment_set={},
+            outcome="length_in",
+            df=self.scarf_df,
+        )
+        coefficients, [ci_low, ci_high] = lr_model.estimate_coefficient()
 
+        # The precise values don't really matter. This test is primarily intended to make sure the return type is correct.
+        self.assertTrue(coefficients.round(2).equals(pd.Series({"color[T.grey]": 0.92, "color[T.orange]": -4.25})))
+        self.assertTrue(ci_low.round(2).equals(pd.Series({"color[T.grey]": -22.12, "color[T.orange]": -25.58})))
+        self.assertTrue(ci_high.round(2).equals(pd.Series({"color[T.grey]": 23.95, "color[T.orange]": 17.08})))