1212from sklearn .externals import six
1313from sklearn .tree import _tree
1414
15- from .rule import Rule
15+ from .rule import Rule , replace_feature_name
1616
1717INTEGER_TYPES = (numbers .Integral , np .integer )
18-
18+ BASE_FEATURE_NAME = "__C__"
1919
2020class SkopeRules (BaseEstimator ):
2121 """ An easy-interpretable classifier optimizing simple logical rules.
@@ -249,11 +249,17 @@ def fit(self, X, y, sample_weight=None):
249249 self .estimators_samples_ = []
250250 self .estimators_features_ = []
251251
252- # default columns names of the form ['c0', 'c1', ...]:
253- feature_names_ = (self .feature_names if self .feature_names is not None
254- else ['c' + x for x in
255- np .arange (X .shape [1 ]).astype (str )])
252+ # default columns names :
253+ feature_names_ = [BASE_FEATURE_NAME + x for x in
254+ np .arange (X .shape [1 ]).astype (str )]
255+ if self .feature_names is not None :
256+ self .feature_dict_ = {BASE_FEATURE_NAME + str (i ): feat
257+ for i , feat in enumerate (self .feature_names )}
258+ else :
259+ self .feature_dict_ = {BASE_FEATURE_NAME + str (i ): feat
260+ for i , feat in enumerate (feature_names_ )}
256261 self .feature_names_ = feature_names_
262+
257263 clfs = []
258264 regs = []
259265
@@ -356,6 +362,10 @@ def fit(self, X, y, sample_weight=None):
356362 for rule in
357363 [Rule (r , args = args ) for r , args in rules_ ]]
358364
365+
366+
367+
368+
359369 # keep only rules verifying precision_min and recall_min:
360370 for rule , score in rules_ :
361371 if score [0 ] >= self .precision_min and score [1 ] >= self .recall_min :
@@ -377,7 +387,14 @@ def fit(self, X, y, sample_weight=None):
377387 # Deduplicate the rule using semantic tree
378388 if self .max_depth_duplication is not None :
379389 self .rules_ = self .deduplicate (self .rules_ )
390+
380391 self .rules_ = sorted (self .rules_ , key = lambda x : - self .f1_score (x ))
392+ self .rules_without_feature_names_ = self .rules_
393+
394+ # Replace generic feature names by real feature names
395+ self .rules_ = [(replace_feature_name (rule , self .feature_dict_ ), perf )
396+ for rule , perf in self .rules_ ]
397+
381398 return self
382399
383400 def predict (self , X ):
@@ -432,7 +449,7 @@ def decision_function(self, X):
432449 % (X .shape [1 ], self .n_features_ ))
433450
434451 df = pandas .DataFrame (X , columns = self .feature_names_ )
435- selected_rules = self .rules_
452+ selected_rules = self .rules_without_feature_names_
436453
437454 scores = np .zeros (X .shape [0 ])
438455 for (r , w ) in selected_rules :
0 commit comments