@@ -75,6 +75,8 @@ def __init__(
75
75
self .fit_bltd_switch_formula = fit_bltd_switch_formula
76
76
self .eligibility = eligibility
77
77
self .df = df .sort_values (["id" , "time" ])
78
+ self .len_control_group = None
79
+ self .len_treatment_group = None
78
80
79
81
if total_time is None :
80
82
total_time = (
@@ -249,13 +251,15 @@ def preprocess_data(self):
249
251
treatment_group ["id" ] = [f"t-{ id } " for id in treatment_group ["id" ]]
250
252
assert not treatment_group ["id" ].isnull ().any (), "Null treatment IDs"
251
253
254
+ premature_failures = living_runs .groupby ("id" , sort = False ).filter (lambda gp : gp ["time" ].max () < trt_time )
252
255
logger .debug (
253
- len (control_group .groupby ("id" )),
254
- "control individuals" ,
255
- len (treatment_group .groupby ("id" )),
256
- "treatment individuals" ,
256
+ f"{ len (control_group .groupby ('id' ))} control individuals "
257
+ f"{ len (treatment_group .groupby ('id' ))} treatment individuals "
258
+ f"{ len (premature_failures .groupby ('id' ))} premature failures"
257
259
)
258
260
261
+ self .len_control_group = len (control_group .groupby ("id" ))
262
+ self .len_treatment_group = len (treatment_group .groupby ("id" ))
259
263
individuals = pd .concat ([control_group , treatment_group ])
260
264
individuals = individuals .loc [
261
265
(
@@ -274,7 +278,7 @@ def preprocess_data(self):
274
278
individuals ["time" ]
275
279
< np .ceil (individuals ["fault_time" ] / self .timesteps_per_observation ) * self .timesteps_per_observation
276
280
].reset_index ()
277
- logger .debug (len (individuals .groupby ("id" )), " individuals" )
281
+ logger .debug (f" { len (individuals .groupby ('id' )) } individuals" )
278
282
279
283
if len (self .df .loc [self .df ["trtrand" ] == 0 ]) == 0 :
280
284
raise ValueError (f"No individuals began the control strategy { self .control_strategy } " )
@@ -293,20 +297,39 @@ def estimate_hazard_ratio(self):
293
297
294
298
# Use logistic regression to predict switching given baseline covariates
295
299
logger .debug ("Use logistic regression to predict switching given baseline covariates" )
296
- fit_bl_switch = smf .logit (self .fit_bl_switch_formula , data = self .df ).fit ()
300
+ fit_bl_switch_c = smf .logit (self .fit_bl_switch_formula , data = self .df .loc [self .df .trtrand == 0 ]).fit (
301
+ method = "bfgs"
302
+ )
303
+ fit_bl_switch_t = smf .logit (self .fit_bl_switch_formula , data = self .df .loc [self .df .trtrand == 1 ]).fit (
304
+ method = "bfgs"
305
+ )
297
306
298
- preprocessed_data ["pxo1" ] = fit_bl_switch .predict (preprocessed_data )
307
+ preprocessed_data .loc [preprocessed_data ["trtrand" ] == 0 , "pxo1" ] = fit_bl_switch_c .predict (
308
+ self .df .loc [self .df .trtrand == 0 ]
309
+ )
310
+ preprocessed_data .loc [preprocessed_data ["trtrand" ] == 1 , "pxo1" ] = fit_bl_switch_t .predict (
311
+ self .df .loc [self .df .trtrand == 1 ]
312
+ )
299
313
300
314
# Use logistic regression to predict switching given baseline and time-updated covariates (model S12)
301
315
logger .debug (
302
316
"Use logistic regression to predict switching given baseline and time-updated covariates (model S12)"
303
317
)
304
- fit_bltd_switch = smf .logit (
318
+ fit_bltd_switch_c = smf .logit (
305
319
self .fit_bltd_switch_formula ,
306
- data = self .df ,
307
- ).fit ()
320
+ data = self .df .loc [self .df .trtrand == 0 ],
321
+ ).fit (method = "bfgs" )
322
+ fit_bltd_switch_t = smf .logit (
323
+ self .fit_bltd_switch_formula ,
324
+ data = self .df .loc [self .df .trtrand == 1 ],
325
+ ).fit (method = "bfgs" )
308
326
309
- preprocessed_data ["pxo2" ] = fit_bltd_switch .predict (preprocessed_data )
327
+ preprocessed_data .loc [preprocessed_data ["trtrand" ] == 0 , "pxo2" ] = fit_bltd_switch_c .predict (
328
+ self .df .loc [self .df .trtrand == 0 ]
329
+ )
330
+ preprocessed_data .loc [preprocessed_data ["trtrand" ] == 1 , "pxo2" ] = fit_bltd_switch_t .predict (
331
+ self .df .loc [self .df .trtrand == 1 ]
332
+ )
310
333
if (preprocessed_data ["pxo2" ] == 1 ).any ():
311
334
raise ValueError (
312
335
"Probability of switching given baseline and time-varying confounders (pxo2) cannot be one."
0 commit comments