@@ -622,7 +622,6 @@ def __init__(
622
622
fit_bltd_switch_formula : str ,
623
623
eligibility = None ,
624
624
alpha : float = 0.05 ,
625
- query : str = "" ,
626
625
):
627
626
super ().__init__ (
628
627
[c .variable for c in treatment_strategy .capabilities ],
@@ -633,7 +632,7 @@ def __init__(
633
632
df ,
634
633
None ,
635
634
alpha = alpha ,
636
- query = query ,
635
+ query = "" ,
637
636
)
638
637
self .timesteps_per_intervention = timesteps_per_intervention
639
638
self .control_strategy = control_strategy
@@ -645,6 +644,7 @@ def __init__(
645
644
self .fit_bltd_switch_formula = fit_bltd_switch_formula
646
645
self .eligibility = eligibility
647
646
self .df = df
647
+ self .preprocess_data ()
648
648
649
649
def add_modelling_assumptions (self ):
650
650
self .modelling_assumptions .append ("The variables in the data vary over time." )
@@ -764,27 +764,27 @@ def preprocess_data(self):
764
764
if len (individuals ) == 0 :
765
765
raise ValueError ("No individuals followed either strategy." )
766
766
767
- return pd .concat (individuals )
767
+ self . df = pd .concat (individuals )
768
768
769
769
def estimate_hazard_ratio (self ):
770
770
"""
771
771
Estimate the hazard ratio.
772
772
"""
773
773
774
- preprocessed_data = self .preprocess_data ()
775
-
776
- if preprocessed_data ["fault_t_do" ].sum () == 0 :
774
+ if self .df ["fault_t_do" ].sum () == 0 :
777
775
raise ValueError ("No recorded faults" )
778
776
777
+ preprocessed_data = self .df .loc [self .df ["xo_t_do" ] == 0 ].copy ()
778
+
779
779
# Use logistic regression to predict switching given baseline covariates
780
- fit_bl_switch = smf .logit (self .fit_bl_switch_formula , data = preprocessed_data ).fit ()
780
+ fit_bl_switch = smf .logit (self .fit_bl_switch_formula , data = self . df ).fit ()
781
781
782
782
preprocessed_data ["pxo1" ] = fit_bl_switch .predict (preprocessed_data )
783
783
784
784
# Use logistic regression to predict switching given baseline and time-updated covariates (model S12)
785
785
fit_bltd_switch = smf .logit (
786
786
self .fit_bltd_switch_formula ,
787
- data = preprocessed_data ,
787
+ data = self . df ,
788
788
).fit ()
789
789
790
790
preprocessed_data ["pxo2" ] = fit_bltd_switch .predict (preprocessed_data )
@@ -808,23 +808,21 @@ def estimate_hazard_ratio(self):
808
808
preprocessed_data ["weight" ] = 1 / preprocessed_data ["denom" ]
809
809
preprocessed_data ["sweight" ] = preprocessed_data ["num" ] / preprocessed_data ["denom" ]
810
810
811
- preprocessed_data_km = preprocessed_data .loc [preprocessed_data ["xo_t_do" ] == 0 ].copy ()
812
- preprocessed_data_km ["tin" ] = preprocessed_data_km ["time" ]
813
- preprocessed_data_km ["tout" ] = pd .concat (
814
- [(preprocessed_data_km ["time" ] + self .timesteps_per_intervention ), preprocessed_data_km ["fault_time" ]],
811
+ preprocessed_data ["tin" ] = preprocessed_data ["time" ]
812
+ preprocessed_data ["tout" ] = pd .concat (
813
+ [(preprocessed_data ["time" ] + self .timesteps_per_intervention ), preprocessed_data ["fault_time" ]],
815
814
axis = 1 ,
816
815
).min (axis = 1 )
817
816
818
- assert (preprocessed_data_km ["tin" ] <= preprocessed_data_km ["tout" ]).all (), (
819
- f"Left before joining\n "
820
- f"{ preprocessed_data_km .loc [preprocessed_data_km ['tin' ] >= preprocessed_data_km ['tout' ]]} "
817
+ assert (preprocessed_data ["tin" ] <= preprocessed_data ["tout" ]).all (), (
818
+ f"Left before joining\n " f"{ preprocessed_data .loc [preprocessed_data ['tin' ] >= preprocessed_data ['tout' ]]} "
821
819
)
822
820
823
821
# IPCW step 4: Use these weights in a weighted analysis of the outcome model
824
822
# Estimate the KM graph and IPCW hazard ratio using Cox regression.
825
823
cox_ph = CoxPHFitter ()
826
824
cox_ph .fit (
827
- df = preprocessed_data_km ,
825
+ df = preprocessed_data ,
828
826
duration_col = "tout" ,
829
827
event_col = "fault_t_do" ,
830
828
weights_col = "weight" ,
0 commit comments