pytest

jmafoster1 · jmafoster1 · commit 41ea94973511 · 2024-07-29T09:58:05.000+01:00
diff --git a/tests/resources/data/data.csv b/tests/resources/data/data.csv
@@ -1,2 +1,2 @@
 index,test_input,test_input_no_dist,test_output
-0,1,1,2
+0,1.0,1.0,2.0
diff --git a/tests/resources/data/data_with_meta.csv b/tests/resources/data/data_with_meta.csv
@@ -1,2 +1,2 @@
 index,test_input,test_input_no_dist,test_output,test_meta
-0,1,1,2,3
+0,1.0,1.0,2.0,3
diff --git a/tests/resources/data/nhefs.csv b/tests/resources/data/nhefs.csv
diff --git a/tests/resources/data/scarf_data.csv b/tests/resources/data/scarf_data.csv
diff --git a/tests/resources/data/temporal_data.csv b/tests/resources/data/temporal_data.csv
@@ -59,3 +59,8 @@ t,outcome,id,time
 1,0,11,2
 1,0,11,3
 1,0,11,4
+0,1,12,0
+1,1,12,1
+0,1,12,2
+1,0,12,3
+0,0,12,4
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
@@ -3,17 +3,20 @@
 from statistics import StatisticsError
 import scipy
 import os
+import pandas as pd
 
-from causal_testing.testing.estimators import LinearRegressionEstimator
+from causal_testing.testing.estimators import LinearRegressionEstimator, IPCWEstimator
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_suite import CausalTestSuite
 from causal_testing.testing.causal_test_adequacy import DAGAdequacy
-from causal_testing.testing.causal_test_outcome import NoEffect, Positive
+from causal_testing.testing.causal_test_outcome import NoEffect, Positive, SomeEffect
 from causal_testing.json_front.json_class import JsonUtility, CausalVariables
 from causal_testing.specification.variable import Input, Output, Meta
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.causal_specification import CausalSpecification
+from causal_testing.specification.capabilities import TreatmentSequence
+from causal_testing.testing.causal_test_adequacy import DataAdequacy
 
 
 class TestCausalTestAdequacy(unittest.TestCase):
@@ -106,6 +109,48 @@ def test_data_adequacy_cateogorical(self):
             {"kurtosis": {"test_input_no_dist[T.b]": 0.0}, "bootstrap_size": 100, "passing": 100, "successful": 100},
         )
 
+    def test_data_adequacy_group_by(self):
+        timesteps_per_intervention = 1
+        control_strategy = TreatmentSequence(timesteps_per_intervention, [("t", 0), ("t", 0), ("t", 0)])
+        treatment_strategy = TreatmentSequence(timesteps_per_intervention, [("t", 1), ("t", 1), ("t", 1)])
+        outcome = "outcome"
+        fit_bl_switch_formula = "xo_t_do ~ time"
+        df = pd.read_csv("tests/resources/data/temporal_data.csv")
+        df["ok"] = df["outcome"] == 1
+        estimation_model = IPCWEstimator(
+            df,
+            timesteps_per_intervention,
+            control_strategy,
+            treatment_strategy,
+            outcome,
+            "ok",
+            fit_bl_switch_formula=fit_bl_switch_formula,
+            fit_bltd_switch_formula=fit_bl_switch_formula,
+            eligibility=None,
+        )
+        base_test_case = BaseTestCase(
+            treatment_variable=control_strategy,
+            outcome_variable=outcome,
+            effect="temporal",
+        )
+
+        causal_test_case = CausalTestCase(
+            base_test_case=base_test_case,
+            expected_causal_effect=SomeEffect(),
+            control_value=control_strategy,
+            treatment_value=treatment_strategy,
+            estimate_type="hazard_ratio",
+        )
+        causal_test_result = causal_test_case.execute_test(estimation_model, None)
+        adequacy_metric = DataAdequacy(causal_test_case, estimation_model, group_by="id")
+        adequacy_metric.measure_adequacy()
+        causal_test_result.adequacy = adequacy_metric
+        print(causal_test_result.adequacy.to_dict())
+        self.assertEqual(
+            causal_test_result.adequacy.to_dict(),
+            {"kurtosis": {"trtrand": 0.0}, "bootstrap_size": 100, "passing": 0, "successful": 95},
+        )
+
     def test_dag_adequacy_dependent(self):
         base_test_case = BaseTestCase(
             treatment_variable="test_input",
diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py
@@ -35,7 +35,7 @@ def load_nhefs_df():
     """Get the NHEFS data from chapter 12 and put into a dataframe. NHEFS = National Health and Nutrition Examination
     Survey Data I Epidemiological Follow-up Study."""
 
-    nhefs_df = pd.read_csv("tests/data/nhefs.csv")
+    nhefs_df = pd.read_csv("tests/resources/data/nhefs.csv")
     nhefs_df["one"] = 1
     nhefs_df["zero"] = 0
     edu_dummies = pd.get_dummies(nhefs_df.education, prefix="edu")
@@ -79,7 +79,7 @@ class TestLogisticRegressionEstimator(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls) -> None:
-        cls.scarf_df = pd.read_csv("tests/data/scarf_data.csv")
+        cls.scarf_df = pd.read_csv("tests/resources/data/scarf_data.csv")
 
     # Yes, this probably shouldn't be in here, but it uses the scarf data so it makes more sense to put it
     # here than duplicating the scarf data for a single test
@@ -446,7 +446,7 @@ def setUpClass(cls) -> None:
         df = pd.DataFrame({"X1": np.random.uniform(-1000, 1000, 1000), "X2": np.random.uniform(-1000, 1000, 1000)})
         df["Y"] = 2 * df["X1"] - 3 * df["X2"] + 2 * df["X1"] * df["X2"] + 10
         cls.df = df
-        cls.scarf_df = pd.read_csv("tests/data/scarf_data.csv")
+        cls.scarf_df = pd.read_csv("tests/resources/data/scarf_data.csv")
 
     def test_X1_effect(self):
         """When we fix the value of X2 to 0, the effect of X1 on Y should become ~2 (because X2 terms are cancelled)."""
@@ -485,7 +485,7 @@ def test_estimate_hazard_ratio(self):
         treatment_strategy = TreatmentSequence(timesteps_per_intervention, [("t", 1), ("t", 1), ("t", 1)])
         outcome = "outcome"
         fit_bl_switch_formula = "xo_t_do ~ time"
-        df = pd.read_csv("tests/data/temporal_data.csv")
+        df = pd.read_csv("tests/resources/data/temporal_data.csv")
         df["ok"] = df["outcome"] == 1
         estimation_model = IPCWEstimator(
             df,
@@ -500,3 +500,48 @@ def test_estimate_hazard_ratio(self):
         )
         estimate, intervals = estimation_model.estimate_hazard_ratio()
         self.assertEqual(estimate["trtrand"], 1.0)
+
+    def test_invalid_treatment_strategies(self):
+        timesteps_per_intervention = 1
+        control_strategy = TreatmentSequence(timesteps_per_intervention, [("t", 0), ("t", 0), ("t", 0)])
+        treatment_strategy = TreatmentSequence(timesteps_per_intervention, [("t", 1), ("t", 1), ("t", 1)])
+        outcome = "outcome"
+        fit_bl_switch_formula = "xo_t_do ~ time"
+        df = pd.read_csv("tests/resources/data/temporal_data.csv")
+        df["t"] = (["1", "0"] * len(df))[: len(df)]
+        df["ok"] = df["outcome"] == 1
+        with self.assertRaises(ValueError):
+            estimation_model = IPCWEstimator(
+                df,
+                timesteps_per_intervention,
+                control_strategy,
+                treatment_strategy,
+                outcome,
+                "ok",
+                fit_bl_switch_formula=fit_bl_switch_formula,
+                fit_bltd_switch_formula=fit_bl_switch_formula,
+                eligibility=None,
+            )
+
+    def test_invalid_fault_t_do(self):
+        timesteps_per_intervention = 1
+        control_strategy = TreatmentSequence(timesteps_per_intervention, [("t", 0), ("t", 0), ("t", 0)])
+        treatment_strategy = TreatmentSequence(timesteps_per_intervention, [("t", 1), ("t", 1), ("t", 1)])
+        outcome = "outcome"
+        fit_bl_switch_formula = "xo_t_do ~ time"
+        df = pd.read_csv("tests/resources/data/temporal_data.csv")
+        df["ok"] = df["outcome"] == 1
+        estimation_model = IPCWEstimator(
+            df,
+            timesteps_per_intervention,
+            control_strategy,
+            treatment_strategy,
+            outcome,
+            "ok",
+            fit_bl_switch_formula=fit_bl_switch_formula,
+            fit_bltd_switch_formula=fit_bl_switch_formula,
+            eligibility=None,
+        )
+        estimation_model.df["fault_t_do"] = 0
+        with self.assertRaises(ValueError):
+            estimate, intervals = estimation_model.estimate_hazard_ratio()

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`index,test_input,test_input_no_dist,test_output`
`2`		`-0,1,1,2`
	`2`	`+0,1.0,1.0,2.0`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`index,test_input,test_input_no_dist,test_output,test_meta`
`2`		`-0,1,1,2,3`
	`2`	`+0,1.0,1.0,2.0,3`