@@ -103,19 +103,24 @@ def __init__(
103
103
outcome : str ,
104
104
df : pd .DataFrame = None ,
105
105
effect_modifiers : dict [Variable :Any ] = None ,
106
- intercept : int = 1 ,
106
+ formula : str = None
107
107
):
108
108
super ().__init__ (treatment , treatment_value , control_value , adjustment_set , outcome , df , effect_modifiers )
109
109
110
+ self .model = None
111
+ if effect_modifiers is None :
112
+ effect_modifiers = []
113
+
114
+ if formula is not None :
115
+ # TODO: validate it
116
+ self .formula = formula
117
+ else :
118
+ terms = [treatment ] + sorted (list (adjustment_set )) + sorted (list (effect_modifiers ))
119
+ self .formula = f"{ outcome } ~ { '+' .join (((terms )))} "
120
+
110
121
for term in self .effect_modifiers :
111
122
self .adjustment_set .add (term )
112
123
113
- self .product_terms = []
114
- self .square_terms = []
115
- self .inverse_terms = []
116
- self .intercept = intercept
117
- self .model = None
118
-
119
124
def add_modelling_assumptions (self ):
120
125
"""
121
126
Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
@@ -143,7 +148,7 @@ def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
143
148
logger .debug (reduced_df [necessary_cols ])
144
149
145
150
# 2. Add intercept
146
- reduced_df ["Intercept" ] = self .intercept
151
+ reduced_df ["Intercept" ] = 1 # self.intercept
147
152
148
153
# 3. Estimate the unit difference in outcome caused by unit difference in treatment
149
154
cols = [self .treatment ]
@@ -155,35 +160,34 @@ def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
155
160
treatment_and_adjustments_cols = pd .get_dummies (
156
161
treatment_and_adjustments_cols , columns = [col ], drop_first = True
157
162
)
158
- regression = sm .Logit (outcome_col , treatment_and_adjustments_cols )
159
- model = regression .fit ()
163
+ # regression = sm.Logit(outcome_col, treatment_and_adjustments_cols) # This one works
164
+ regression = smf .logit (formula = self .formula , data = self .df ) # This one doesn't work
165
+ model = regression .fit (disp = 0 )
160
166
return model
161
167
162
- def estimate (self , data : pd .DataFrame ) -> RegressionResultsWrapper :
168
+ def estimate (self , data : pd .DataFrame , adjustment_config = None ) -> RegressionResultsWrapper :
163
169
"""add terms to the dataframe and estimate the outcome from the data
164
170
:param data: A pandas dataframe containing execution data from the system-under-test.
165
171
166
172
"""
173
+ if adjustment_config is None :
174
+ adjustment_config = {}
175
+
167
176
model = self ._run_logistic_regression (data )
168
177
self .model = model
169
178
170
- x = pd .DataFrame ()
179
+ x = pd .DataFrame (columns = self .df .columns )
180
+ x ["Intercept" ] = 1 #self.intercept
171
181
x [self .treatment ] = [self .treatment_value , self .control_value ]
172
- x ["Intercept" ] = self .intercept
182
+ for k , v in adjustment_config .items ():
183
+ x [k ] = v
173
184
for k , v in self .effect_modifiers .items ():
174
185
x [k ] = v
175
- for t in self .square_terms :
176
- x [t + "^2" ] = x [t ] ** 2
177
- for t in self .inverse_terms :
178
- x ["1/" + t ] = 1 / x [t ]
179
- for a , b in self .product_terms :
180
- x [f"{ a } *{ b } " ] = x [a ] * x [b ]
181
-
186
+ x = dmatrix (self .formula .split ("~" )[1 ], x , return_type = "dataframe" )
182
187
for col in x :
183
188
if str (x .dtypes [col ]) == "object" :
184
189
x = pd .get_dummies (x , columns = [col ], drop_first = True )
185
- x = x [model .params .index ]
186
-
190
+ # x = x[model.params.index]
187
191
return model .predict (x )
188
192
189
193
def estimate_control_treatment (self , bootstrap_size = 100 ) -> tuple [pd .Series , pd .Series ]:
0 commit comments