tests pass again

jmafoster1 · jmafoster1 · commit e6d46cdb9c6c · 2024-10-11T15:25:42.000+01:00
diff --git a/causal_testing/estimation/ipcw_estimator.py b/causal_testing/estimation/ipcw_estimator.py
@@ -75,6 +75,8 @@ def __init__(
         self.fit_bltd_switch_formula = fit_bltd_switch_formula
         self.eligibility = eligibility
         self.df = df.sort_values(["id", "time"])
+        self.len_control_group = None
+        self.len_treatment_group = None
 
         if total_time is None:
             total_time = (
@@ -249,13 +251,15 @@ def preprocess_data(self):
         treatment_group["id"] = [f"t-{id}" for id in treatment_group["id"]]
         assert not treatment_group["id"].isnull().any(), "Null treatment IDs"
 
+        premature_failures = living_runs.groupby("id", sort=False).filter(lambda gp: gp["time"].max() < trt_time)
         logger.debug(
-            len(control_group.groupby("id")),
-            "control individuals",
-            len(treatment_group.groupby("id")),
-            "treatment individuals",
+            f"{len(control_group.groupby('id'))} control individuals "
+            f"{len(treatment_group.groupby('id'))} treatment individuals "
+            f"{len(premature_failures.groupby('id'))} premature failures"
         )
 
+        self.len_control_group = len(control_group.groupby("id"))
+        self.len_treatment_group = len(treatment_group.groupby("id"))
         individuals = pd.concat([control_group, treatment_group])
         individuals = individuals.loc[
             (
@@ -274,7 +278,7 @@ def preprocess_data(self):
             individuals["time"]
             < np.ceil(individuals["fault_time"] / self.timesteps_per_observation) * self.timesteps_per_observation
         ].reset_index()
-        logger.debug(len(individuals.groupby("id")), "individuals")
+        logger.debug(f"{len(individuals.groupby('id'))} individuals")
 
         if len(self.df.loc[self.df["trtrand"] == 0]) == 0:
             raise ValueError(f"No individuals began the control strategy {self.control_strategy}")
@@ -293,20 +297,39 @@ def estimate_hazard_ratio(self):
 
         # Use logistic regression to predict switching given baseline covariates
         logger.debug("Use logistic regression to predict switching given baseline covariates")
-        fit_bl_switch = smf.logit(self.fit_bl_switch_formula, data=self.df).fit()
+        fit_bl_switch_c = smf.logit(self.fit_bl_switch_formula, data=self.df.loc[self.df.trtrand == 0]).fit(
+            method="bfgs"
+        )
+        fit_bl_switch_t = smf.logit(self.fit_bl_switch_formula, data=self.df.loc[self.df.trtrand == 1]).fit(
+            method="bfgs"
+        )
 
-        preprocessed_data["pxo1"] = fit_bl_switch.predict(preprocessed_data)
+        preprocessed_data.loc[preprocessed_data["trtrand"] == 0, "pxo1"] = fit_bl_switch_c.predict(
+            self.df.loc[self.df.trtrand == 0]
+        )
+        preprocessed_data.loc[preprocessed_data["trtrand"] == 1, "pxo1"] = fit_bl_switch_t.predict(
+            self.df.loc[self.df.trtrand == 1]
+        )
 
         # Use logistic regression to predict switching given baseline and time-updated covariates (model S12)
         logger.debug(
             "Use logistic regression to predict switching given baseline and time-updated covariates (model S12)"
         )
-        fit_bltd_switch = smf.logit(
+        fit_bltd_switch_c = smf.logit(
             self.fit_bltd_switch_formula,
-            data=self.df,
-        ).fit()
+            data=self.df.loc[self.df.trtrand == 0],
+        ).fit(method="bfgs")
+        fit_bltd_switch_t = smf.logit(
+            self.fit_bltd_switch_formula,
+            data=self.df.loc[self.df.trtrand == 1],
+        ).fit(method="bfgs")
 
-        preprocessed_data["pxo2"] = fit_bltd_switch.predict(preprocessed_data)
+        preprocessed_data.loc[preprocessed_data["trtrand"] == 0, "pxo2"] = fit_bltd_switch_c.predict(
+            self.df.loc[self.df.trtrand == 0]
+        )
+        preprocessed_data.loc[preprocessed_data["trtrand"] == 1, "pxo2"] = fit_bltd_switch_t.predict(
+            self.df.loc[self.df.trtrand == 1]
+        )
         if (preprocessed_data["pxo2"] == 1).any():
             raise ValueError(
                 "Probability of switching given baseline and time-varying confounders (pxo2) cannot be one."
diff --git a/tests/estimation_tests/test_ipcw_estimator.py b/tests/estimation_tests/test_ipcw_estimator.py
@@ -33,7 +33,7 @@ def test_estimate_hazard_ratio(self):
             eligibility=None,
         )
         estimate, intervals = estimation_model.estimate_hazard_ratio()
-        self.assertEqual(round(estimate["trtrand"], 3), 1.936)
+        self.assertEqual(round(estimate["trtrand"], 3), 1.351)
 
     def test_invalid_treatment_strategies(self):
         timesteps_per_intervention = 1
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
@@ -1,8 +1,7 @@
+import os
 import unittest
 from pathlib import Path
-from statistics import StatisticsError
 import scipy
-import os
 import pandas as pd
 
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
@@ -11,11 +10,9 @@
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_suite import CausalTestSuite
 from causal_testing.testing.causal_test_adequacy import DAGAdequacy
-from causal_testing.testing.causal_test_outcome import NoEffect, Positive, SomeEffect
+from causal_testing.testing.causal_test_outcome import NoEffect, SomeEffect
 from causal_testing.json_front.json_class import JsonUtility, CausalVariables
-from causal_testing.specification.variable import Input, Output, Meta
 from causal_testing.specification.scenario import Scenario
-from causal_testing.specification.causal_specification import CausalSpecification
 from causal_testing.testing.causal_test_adequacy import DataAdequacy
 
 
@@ -145,11 +142,11 @@ def test_data_adequacy_group_by(self):
         adequacy_metric = DataAdequacy(causal_test_case, estimation_model, group_by="id")
         adequacy_metric.measure_adequacy()
         adequacy_dict = adequacy_metric.to_dict()
-        self.assertEqual(round(adequacy_dict["kurtosis"]["trtrand"], 3), -0.336)
+        self.assertEqual(round(adequacy_dict["kurtosis"]["trtrand"], 3), -0.857)
         adequacy_dict.pop("kurtosis")
         self.assertEqual(
             adequacy_dict,
-            {"bootstrap_size": 100, "passing": 28, "successful": 95},
+            {"bootstrap_size": 100, "passing": 32, "successful": 100},
         )
 
     def test_dag_adequacy_dependent(self):

Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@ def test_estimate_hazard_ratio(self):`
`33`	`33`	`eligibility=None,`
`34`	`34`	`)`
`35`	`35`	`estimate, intervals = estimation_model.estimate_hazard_ratio()`
`36`		`- self.assertEqual(round(estimate["trtrand"], 3), 1.936)`
	`36`	`+ self.assertEqual(round(estimate["trtrand"], 3), 1.351)`
`37`	`37`
`38`	`38`	`def test_invalid_treatment_strategies(self):`
`39`	`39`	`timesteps_per_intervention = 1`