@@ -142,6 +142,9 @@ def _run_logistic_regression(self) -> RegressionResultsWrapper:
142
142
cols += [x for x in self .adjustment_set if x not in cols ]
143
143
treatment_and_adjustments_cols = reduced_df [cols + ["Intercept" ]]
144
144
outcome_col = reduced_df [list (self .outcome )]
145
+ for col in treatment_and_adjustments_cols :
146
+ if str (treatment_and_adjustments_cols .dtypes [col ]) == "object" :
147
+ treatment_and_adjustments_cols = pd .get_dummies (treatment_and_adjustments_cols , columns = [col ], drop_first = True )
145
148
regression = sm .Logit (outcome_col , treatment_and_adjustments_cols )
146
149
model = regression .fit ()
147
150
return model
@@ -166,6 +169,10 @@ def estimate_control_treatment(self) -> tuple[pd.Series, pd.Series]:
166
169
x ["1/" + t ] = 1 / x [t ]
167
170
for a , b in self .product_terms :
168
171
x [f"{ a } *{ b } " ] = x [a ] * x [b ]
172
+
173
+ for col in x :
174
+ if str (x .dtypes [col ]) == "object" :
175
+ x = pd .get_dummies (x , columns = [col ], drop_first = True )
169
176
x = x [model .params .index ]
170
177
171
178
y = model .predict (x )
@@ -360,6 +367,8 @@ def estimate_control_treatment(self) -> tuple[pd.Series, pd.Series]:
360
367
"""
361
368
model = self ._run_linear_regression ()
362
369
self .model = model
370
+ print (model .summary ())
371
+
363
372
364
373
x = pd .DataFrame ()
365
374
x [self .treatment [0 ]] = [self .treatment_values , self .control_values ]
@@ -376,13 +385,14 @@ def estimate_control_treatment(self) -> tuple[pd.Series, pd.Series]:
376
385
print (x )
377
386
for col in x :
378
387
if str (x .dtypes [col ]) == "object" :
379
- x [col ] = [v .value for v in x [ ]]
380
388
x = pd .get_dummies (x , columns = [col ], drop_first = True )
381
389
print ("dummy" )
382
390
print (x )
383
391
x = x [model .params .index ]
384
392
385
393
y = model .get_prediction (x ).summary_frame ()
394
+
395
+ print ("control" , y .iloc [1 ], "treatment" , y .iloc [0 ])
386
396
return y .iloc [1 ], y .iloc [0 ]
387
397
388
398
def estimate_risk_ratio (self ) -> tuple [float , list [float , float ]]:
@@ -406,6 +416,7 @@ def estimate_ate_calculated(self) -> tuple[float, list[float, float]]:
406
416
:return: The average treatment effect and the 95% Wald confidence intervals.
407
417
"""
408
418
control_outcome , treatment_outcome = self .estimate_control_treatment ()
419
+ assert False
409
420
ci_low = treatment_outcome ["mean_ci_lower" ] - control_outcome ["mean_ci_upper" ]
410
421
ci_high = treatment_outcome ["mean_ci_upper" ] - control_outcome ["mean_ci_lower" ]
411
422
@@ -461,8 +472,6 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
461
472
cols += [x for x in self .adjustment_set if x not in cols ]
462
473
treatment_and_adjustments_cols = reduced_df [cols + ["Intercept" ]]
463
474
outcome_col = reduced_df [list (self .outcome )]
464
- print ("train_data" )
465
- print (treatment_and_adjustments_cols )
466
475
for col in treatment_and_adjustments_cols :
467
476
if str (treatment_and_adjustments_cols .dtypes [col ]) == "object" :
468
477
treatment_and_adjustments_cols = pd .get_dummies (treatment_and_adjustments_cols , columns = [col ], drop_first = True )
0 commit comments