IPCW outcome is now an output

jmafoster1 · jmafoster1 · commit 4d48785f033a · 2025-02-20T12:47:13.000Z
diff --git a/causal_testing/estimation/ipcw_estimator.py b/causal_testing/estimation/ipcw_estimator.py
@@ -12,7 +12,7 @@
 
 from causal_testing.estimation.abstract_estimator import Estimator
 from causal_testing.testing.base_test_case import BaseTestCase
-from causal_testing.specification.variable import Input, Output
+from causal_testing.specification.variable import Variable
 
 logger = logging.getLogger(__name__)
 
@@ -32,7 +32,7 @@ def __init__(
         timesteps_per_observation: int,
         control_strategy: list[tuple[int, str, Any]],
         treatment_strategy: list[tuple[int, str, Any]],
-        outcome: str,
+        outcome: Variable,
         status_column: str,
         fit_bl_switch_formula: str,
         fit_bltd_switch_formula: str,
@@ -58,7 +58,7 @@ def __init__(
                             treatment) with the most elements multiplied by `timesteps_per_observation`.
         """
         super().__init__(
-            base_test_case=BaseTestCase(Input("_", float), Output(outcome, float)),
+            base_test_case=BaseTestCase(None, outcome),
             treatment_value=[val for _, _, val in treatment_strategy],
             control_value=[val for _, _, val in control_strategy],
             adjustment_set=None,
@@ -70,7 +70,6 @@ def __init__(
         self.timesteps_per_observation = timesteps_per_observation
         self.control_strategy = control_strategy
         self.treatment_strategy = treatment_strategy
-        self.outcome = outcome
         self.status_column = status_column
         self.fit_bl_switch_formula = fit_bl_switch_formula
         self.fit_bltd_switch_formula = fit_bltd_switch_formula
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
@@ -84,12 +84,12 @@ def to_dict(self, json=False):
         """Return result contents as a dictionary
         :return: Dictionary containing contents of causal_test_result
         """
-        if isinstance(self.estimator.base_test_case.treatment_variable, list):
-            treatment = [x.name for x in self.estimator.base_test_case.treatment_variable]
-        else:
-            treatment = self.estimator.base_test_case.treatment_variable.name
         base_dict = {
-            "treatment": treatment,
+            "treatment": (
+                self.estimator.base_test_case.treatment_variable.name
+                if self.estimator.base_test_case.treatment_variable is not None
+                else None
+            ),
             "control_value": self.estimator.control_value,
             "treatment_value": self.estimator.treatment_value,
             "outcome": self.estimator.base_test_case.outcome_variable.name,
diff --git a/tests/estimation_tests/test_ipcw_estimator.py b/tests/estimation_tests/test_ipcw_estimator.py
@@ -1,9 +1,6 @@
 import unittest
 import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from causal_testing.specification.variable import Input
-from causal_testing.utils.validation import CausalValidator
+from causal_testing.specification.variable import Input, Output
 
 from causal_testing.estimation.ipcw_estimator import IPCWEstimator
 
@@ -13,113 +10,85 @@ class TestIPCWEstimator(unittest.TestCase):
     Test the IPCW estimator class
     """
 
+    def setUp(self) -> None:
+        self.outcome = Output("outcome", float)
+        self.status_column = "ok"
+        self.timesteps_per_intervention = 1
+        self.control_strategy = [[t, "t", 0] for t in range(1, 4, self.timesteps_per_intervention)]
+        self.treatment_strategy = [[t, "t", 1] for t in range(1, 4, self.timesteps_per_intervention)]
+        self.fit_bl_switch_formula = "xo_t_do ~ time"
+        self.df = pd.read_csv("tests/resources/data/temporal_data.csv")
+        self.df[self.status_column] = self.df["outcome"] == 1
+
     def test_estimate_hazard_ratio(self):
-        timesteps_per_intervention = 1
-        control_strategy = [[t, "t", 0] for t in range(1, 4, timesteps_per_intervention)]
-        treatment_strategy = [[t, "t", 1] for t in range(1, 4, timesteps_per_intervention)]
-        outcome = "outcome"
-        fit_bl_switch_formula = "xo_t_do ~ time"
-        df = pd.read_csv("tests/resources/data/temporal_data.csv")
-        df["ok"] = df["outcome"] == 1
         estimation_model = IPCWEstimator(
-            df,
-            timesteps_per_intervention,
-            control_strategy,
-            treatment_strategy,
-            outcome,
-            "ok",
-            fit_bl_switch_formula=fit_bl_switch_formula,
-            fit_bltd_switch_formula=fit_bl_switch_formula,
+            self.df,
+            self.timesteps_per_intervention,
+            self.control_strategy,
+            self.treatment_strategy,
+            self.outcome,
+            self.status_column,
+            fit_bl_switch_formula=self.fit_bl_switch_formula,
+            fit_bltd_switch_formula=self.fit_bl_switch_formula,
             eligibility=None,
         )
-        estimate, intervals = estimation_model.estimate_hazard_ratio()
+        estimate, _ = estimation_model.estimate_hazard_ratio()
         self.assertEqual(round(estimate["trtrand"], 3), 1.351)
 
     def test_invalid_treatment_strategies(self):
-        timesteps_per_intervention = 1
-        control_strategy = [[t, "t", 0] for t in range(1, 4, timesteps_per_intervention)]
-        treatment_strategy = [[t, "t", 1] for t in range(1, 4, timesteps_per_intervention)]
-        outcome = "outcome"
-        fit_bl_switch_formula = "xo_t_do ~ time"
-        df = pd.read_csv("tests/resources/data/temporal_data.csv")
-        df["t"] = (["1", "0"] * len(df))[: len(df)]
-        df["ok"] = df["outcome"] == 1
         with self.assertRaises(ValueError):
-            estimation_model = IPCWEstimator(
-                df,
-                timesteps_per_intervention,
-                control_strategy,
-                treatment_strategy,
-                outcome,
-                "ok",
-                fit_bl_switch_formula=fit_bl_switch_formula,
-                fit_bltd_switch_formula=fit_bl_switch_formula,
+            IPCWEstimator(
+                self.df.assign(t=(["1", "0"] * len(self.df))[: len(self.df)]),
+                self.timesteps_per_intervention,
+                self.control_strategy,
+                self.treatment_strategy,
+                self.outcome,
+                self.status_column,
+                fit_bl_switch_formula=self.fit_bl_switch_formula,
+                fit_bltd_switch_formula=self.fit_bl_switch_formula,
                 eligibility=None,
             )
 
     def test_invalid_fault_t_do(self):
-        timesteps_per_intervention = 1
-        control_strategy = [[t, "t", 0] for t in range(1, 4, timesteps_per_intervention)]
-        treatment_strategy = [[t, "t", 1] for t in range(1, 4, timesteps_per_intervention)]
-        outcome = "outcome"
-        fit_bl_switch_formula = "xo_t_do ~ time"
-        df = pd.read_csv("tests/resources/data/temporal_data.csv")
-        df["ok"] = df["outcome"] == 1
         estimation_model = IPCWEstimator(
-            df,
-            timesteps_per_intervention,
-            control_strategy,
-            treatment_strategy,
-            outcome,
-            "ok",
-            fit_bl_switch_formula=fit_bl_switch_formula,
-            fit_bltd_switch_formula=fit_bl_switch_formula,
+            self.df.assign(outcome=1),
+            self.timesteps_per_intervention,
+            self.control_strategy,
+            self.treatment_strategy,
+            self.outcome,
+            self.status_column,
+            fit_bl_switch_formula=self.fit_bl_switch_formula,
+            fit_bltd_switch_formula=self.fit_bl_switch_formula,
             eligibility=None,
         )
         estimation_model.df["fault_t_do"] = 0
         with self.assertRaises(ValueError):
-            estimate, intervals = estimation_model.estimate_hazard_ratio()
+            estimation_model.estimate_hazard_ratio()
 
     def test_no_individual_began_control_strategy(self):
-        timesteps_per_intervention = 1
-        control_strategy = [[t, "t", 0] for t in range(1, 4, timesteps_per_intervention)]
-        treatment_strategy = [[t, "t", 1] for t in range(1, 4, timesteps_per_intervention)]
-        outcome = "outcome"
-        fit_bl_switch_formula = "xo_t_do ~ time"
-        df = pd.read_csv("tests/resources/data/temporal_data.csv")
-        df["t"] = 1
-        df["ok"] = df["outcome"] == 1
         with self.assertRaises(ValueError):
-            estimation_model = IPCWEstimator(
-                df,
-                timesteps_per_intervention,
-                control_strategy,
-                treatment_strategy,
-                outcome,
-                "ok",
-                fit_bl_switch_formula=fit_bl_switch_formula,
-                fit_bltd_switch_formula=fit_bl_switch_formula,
+            IPCWEstimator(
+                self.df.assign(t=1),
+                self.timesteps_per_intervention,
+                self.control_strategy,
+                self.treatment_strategy,
+                self.outcome,
+                self.status_column,
+                fit_bl_switch_formula=self.fit_bl_switch_formula,
+                fit_bltd_switch_formula=self.fit_bl_switch_formula,
                 eligibility=None,
             )
 
     def test_no_individual_began_treatment_strategy(self):
-        timesteps_per_intervention = 1
-        control_strategy = [[t, "t", 0] for t in range(1, 4, timesteps_per_intervention)]
-        treatment_strategy = [[t, "t", 1] for t in range(1, 4, timesteps_per_intervention)]
-        outcome = "outcome"
-        fit_bl_switch_formula = "xo_t_do ~ time"
-        df = pd.read_csv("tests/resources/data/temporal_data.csv")
-        df["t"] = 0
-        df["ok"] = df["outcome"] == 1
         with self.assertRaises(ValueError):
-            estimation_model = IPCWEstimator(
-                df,
-                timesteps_per_intervention,
-                control_strategy,
-                treatment_strategy,
-                outcome,
-                "ok",
-                fit_bl_switch_formula=fit_bl_switch_formula,
-                fit_bltd_switch_formula=fit_bl_switch_formula,
+            IPCWEstimator(
+                self.df.assign(t=0),
+                self.timesteps_per_intervention,
+                self.control_strategy,
+                self.treatment_strategy,
+                self.outcome,
+                self.status_column,
+                fit_bl_switch_formula=self.fit_bl_switch_formula,
+                fit_bltd_switch_formula=self.fit_bl_switch_formula,
                 eligibility=None,
             )
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
@@ -92,7 +92,7 @@ def test_data_adequacy_group_by(self):
             fit_bltd_switch_formula=fit_bl_switch_formula,
             eligibility=None,
         )
-        base_test_case = estimation_model.base_test_case
+        base_test_case = BaseTestCase(Input("t", float), Output("outcome", float))
 
         causal_test_case = CausalTestCase(
             base_test_case=base_test_case,

Original file line number	Diff line number	Diff line change
`@@ -92,7 +92,7 @@ def test_data_adequacy_group_by(self):`
`92`	`92`	`fit_bltd_switch_formula=fit_bl_switch_formula,`
`93`	`93`	`eligibility=None,`
`94`	`94`	`)`
`95`		`- base_test_case = estimation_model.base_test_case`
	`95`	`+ base_test_case = BaseTestCase(Input("t", float), Output("outcome", float))`
`96`	`96`
`97`	`97`	`causal_test_case = CausalTestCase(`
`98`	`98`	`base_test_case=base_test_case,`