1414
1515INTEGER_TYPES = (numbers .Integral , np .integer )
1616
17-
1817class SkopeRules (BaseEstimator ):
1918 """ An easy-interpretable classifier optimizing simple logical rules.
2019
@@ -35,11 +34,6 @@ class SkopeRules(BaseEstimator):
3534 The number of base estimators (rules) to use for prediction. More are
3635 built before selection. All are available in the estimators_ attribute.
3736
38- similarity_thres : float, optional (default=0.99)
39- Similarity threshold between rules. Rules too similar
40- (> similarity_thres) are fused. The similarity between two rules is
41- computed according to the formula `# {intersection} / # {union}`.
42-
4337 max_samples : int or float, optional (default=.8)
4438 The number of samples to draw from X to train each decision tree, from
4539 which rules are generated and selected.
@@ -66,7 +60,8 @@ class SkopeRules(BaseEstimator):
6660 than min_samples_split samples.
6761
6862 max_depth_duplication : integer or None, optional (default=3)
69- The maximum depth of the decision tree for rule deduplication, if None then no deduplication occurs.
63+ The maximum depth of the decision tree for rule deduplication,
64+ if None then no deduplication occurs.
7065
7166 max_features : int, float, string or None, optional (default="auto")
7267 The number of features considered (by each decision tree) when looking
@@ -140,13 +135,12 @@ def __init__(self,
140135 precision_min = 0.5 ,
141136 recall_min = 0.01 ,
142137 n_estimators = 10 ,
143- similarity_thres = 0.95 ,
144138 max_samples = .8 ,
145139 max_samples_features = 1. ,
146140 bootstrap = False ,
147141 bootstrap_features = False ,
148142 max_depth = 3 ,
149- max_depth_duplication = 3 ,
143+ max_depth_duplication = None ,
150144 max_features = 1. ,
151145 min_samples_split = 2 ,
152146 n_jobs = 1 ,
@@ -156,7 +150,6 @@ def __init__(self,
156150 self .recall_min = recall_min
157151 self .feature_names = feature_names
158152 self .n_estimators = n_estimators
159- self .similarity_thres = similarity_thres
160153 self .max_samples = max_samples
161154 self .max_samples_features = max_samples_features
162155 self .bootstrap = bootstrap
@@ -214,11 +207,6 @@ def fit(self, X, y, sample_weight=None):
214207 % set (self .classes_ ))
215208 y = (y > 0 )
216209
217- # ensure similarity_thres is in (0., 1.]:
218- if not (0. < self .similarity_thres <= 1. ):
219- raise ValueError ("similarity_thres must be in (0, 1], got %r"
220- % self .similarity_thres )
221-
222210 # ensure that max_samples is in [1, n_samples]:
223211 n_samples = X .shape [0 ]
224212
@@ -640,8 +628,10 @@ def split_with_best_feature(rules, depth, exceptions=[]):
640628 rules_splitted [2 ].append (rule )
641629
642630 # Choose best term
643- return [split_with_best_feature (ruleset , depth - 1 , exceptions = exceptions + [most_represented_term ]) for ruleset in rules_splitted ]
644-
631+ return [split_with_best_feature (ruleset ,
632+ depth - 1 ,
633+ exceptions = exceptions + [most_represented_term ])
634+ for ruleset in rules_splitted ]
645635
646636 def breadth_first_search (rules , leaves = None ):
647637 if len (rules ) == 0 or not isinstance (rules [0 ], list ):
0 commit comments