@@ -113,7 +113,8 @@ def fit(self, X, y=None, feature_names=None):
113113 self .feature_names = np .array (list (self .feature_dict_ .values ()))
114114
115115 extracted_rules = self ._extract_rules (X , y )
116- self .rules_without_feature_names_ , self .coef , self .intercept = self ._score_rules (X , y , extracted_rules )
116+ self .rules_without_feature_names_ , self .coef , self .intercept = self ._score_rules (
117+ X , y , extracted_rules )
117118 self .rules_ = [
118119 replace_feature_name (rule , self .feature_dict_ ) for rule in self .rules_without_feature_names_
119120 ]
@@ -160,7 +161,8 @@ def predict_proba(self, X):
160161 X = X .toarray ()
161162 X = check_array (X )
162163 continuous_output = self ._predict_continuous_output (X )
163- logits = np .vstack ((1 - continuous_output , continuous_output )).transpose ()
164+ logits = np .vstack (
165+ (1 - continuous_output , continuous_output )).transpose ()
164166 return softmax (logits , axis = 1 )
165167
166168 def transform (self , X = None , rules = None ):
@@ -178,9 +180,15 @@ def transform(self, X=None, rules=None):
178180 Transformed data set
179181 """
180182 df = pd .DataFrame (X , columns = self .feature_placeholders )
183+ print ('df' , df .dtypes , df .head ())
181184 X_transformed = np .zeros ((X .shape [0 ], len (rules )))
182185 for i , r in enumerate (rules ):
183186 features_r_uses = [term .split (' ' )[0 ] for term in r .split (' and ' )]
187+ # print('r', r)
188+ # print('feats', df[features_r_uses])
189+ # print('ans', df[features_r_uses].query(r))
190+ # print(
191+ # 'tra', X_transformed[df[features_r_uses].query(r).index.values, i])
184192 X_transformed [df [features_r_uses ].query (r ).index .values , i ] = 1
185193 return X_transformed
186194
@@ -216,21 +224,26 @@ def _get_rules(self, exclude_zero_coef=False, subregion=None):
216224 subregion = np .array (subregion )
217225 importance = sum (abs (coef ) * abs ([x [i ] for x in self .winsorizer .trim (subregion )] - self .mean [i ])) / len (
218226 subregion )
219- output_rules += [(self .feature_names [i ], 'linear' , coef , 1 , importance )]
227+ output_rules += [(self .feature_names [i ],
228+ 'linear' , coef , 1 , importance )]
220229
221230 # Add rules
222231 for i in range (0 , len (self .rules_ )):
223232 rule = rule_ensemble [i ]
224233 coef = self .coef [i + n_features ]
225234
226235 if subregion is None :
227- importance = abs (coef ) * (rule .support * (1 - rule .support )) ** (1 / 2 )
236+ importance = abs (coef ) * (rule .support *
237+ (1 - rule .support )) ** (1 / 2 )
228238 else :
229239 rkx = self .transform (subregion , [rule ])[:, - 1 ]
230- importance = sum (abs (coef ) * abs (rkx - rule .support )) / len (subregion )
240+ importance = sum (
241+ abs (coef ) * abs (rkx - rule .support )) / len (subregion )
231242
232- output_rules += [(self .rules_ [i ].rule , 'rule' , coef , rule .support , importance )]
233- rules = pd .DataFrame (output_rules , columns = ["rule" , "type" , "coef" , "support" , "importance" ])
243+ output_rules += [(self .rules_ [i ].rule , 'rule' ,
244+ coef , rule .support , importance )]
245+ rules = pd .DataFrame (output_rules , columns = [
246+ "rule" , "type" , "coef" , "support" , "importance" ])
234247 if exclude_zero_coef :
235248 rules = rules .ix [rules .coef != 0 ]
236249 return rules
@@ -292,7 +305,8 @@ def _score_rules(self, X, y, rules) -> Tuple[List[Rule], List[float], float]:
292305 # no rules fit and self.include_linear == False
293306 if X_concat .shape [1 ] == 0 :
294307 return [], [], 0
295- prediction_task = 'regression' if isinstance (self , RegressorMixin ) else 'classification'
308+ prediction_task = 'regression' if isinstance (
309+ self , RegressorMixin ) else 'classification'
296310 return score_linear (X_concat , y , rules ,
297311 prediction_task = prediction_task ,
298312 max_rules = self .max_rules ,
0 commit comments