Skip to content

Commit 928f2e5

Browse files
committed
limit pandas version to 2.1.4 (issues with query function)
1 parent b250a5f commit 928f2e5

File tree

3 files changed

+34
-17
lines changed

3 files changed

+34
-17
lines changed

imodels/rule_set/rule_fit.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ def fit(self, X, y=None, feature_names=None):
113113
self.feature_names = np.array(list(self.feature_dict_.values()))
114114

115115
extracted_rules = self._extract_rules(X, y)
116-
self.rules_without_feature_names_, self.coef, self.intercept = self._score_rules(X, y, extracted_rules)
116+
self.rules_without_feature_names_, self.coef, self.intercept = self._score_rules(
117+
X, y, extracted_rules)
117118
self.rules_ = [
118119
replace_feature_name(rule, self.feature_dict_) for rule in self.rules_without_feature_names_
119120
]
@@ -160,7 +161,8 @@ def predict_proba(self, X):
160161
X = X.toarray()
161162
X = check_array(X)
162163
continuous_output = self._predict_continuous_output(X)
163-
logits = np.vstack((1 - continuous_output, continuous_output)).transpose()
164+
logits = np.vstack(
165+
(1 - continuous_output, continuous_output)).transpose()
164166
return softmax(logits, axis=1)
165167

166168
def transform(self, X=None, rules=None):
@@ -178,9 +180,15 @@ def transform(self, X=None, rules=None):
178180
Transformed data set
179181
"""
180182
df = pd.DataFrame(X, columns=self.feature_placeholders)
183+
print('df', df.dtypes, df.head())
181184
X_transformed = np.zeros((X.shape[0], len(rules)))
182185
for i, r in enumerate(rules):
183186
features_r_uses = [term.split(' ')[0] for term in r.split(' and ')]
187+
# print('r', r)
188+
# print('feats', df[features_r_uses])
189+
# print('ans', df[features_r_uses].query(r))
190+
# print(
191+
# 'tra', X_transformed[df[features_r_uses].query(r).index.values, i])
184192
X_transformed[df[features_r_uses].query(r).index.values, i] = 1
185193
return X_transformed
186194

@@ -216,21 +224,26 @@ def _get_rules(self, exclude_zero_coef=False, subregion=None):
216224
subregion = np.array(subregion)
217225
importance = sum(abs(coef) * abs([x[i] for x in self.winsorizer.trim(subregion)] - self.mean[i])) / len(
218226
subregion)
219-
output_rules += [(self.feature_names[i], 'linear', coef, 1, importance)]
227+
output_rules += [(self.feature_names[i],
228+
'linear', coef, 1, importance)]
220229

221230
# Add rules
222231
for i in range(0, len(self.rules_)):
223232
rule = rule_ensemble[i]
224233
coef = self.coef[i + n_features]
225234

226235
if subregion is None:
227-
importance = abs(coef) * (rule.support * (1 - rule.support)) ** (1 / 2)
236+
importance = abs(coef) * (rule.support *
237+
(1 - rule.support)) ** (1 / 2)
228238
else:
229239
rkx = self.transform(subregion, [rule])[:, -1]
230-
importance = sum(abs(coef) * abs(rkx - rule.support)) / len(subregion)
240+
importance = sum(
241+
abs(coef) * abs(rkx - rule.support)) / len(subregion)
231242

232-
output_rules += [(self.rules_[i].rule, 'rule', coef, rule.support, importance)]
233-
rules = pd.DataFrame(output_rules, columns=["rule", "type", "coef", "support", "importance"])
243+
output_rules += [(self.rules_[i].rule, 'rule',
244+
coef, rule.support, importance)]
245+
rules = pd.DataFrame(output_rules, columns=[
246+
"rule", "type", "coef", "support", "importance"])
234247
if exclude_zero_coef:
235248
rules = rules.ix[rules.coef != 0]
236249
return rules
@@ -292,7 +305,8 @@ def _score_rules(self, X, y, rules) -> Tuple[List[Rule], List[float], float]:
292305
# no rules fit and self.include_linear == False
293306
if X_concat.shape[1] == 0:
294307
return [], [], 0
295-
prediction_task = 'regression' if isinstance(self, RegressorMixin) else 'classification'
308+
prediction_task = 'regression' if isinstance(
309+
self, RegressorMixin) else 'classification'
296310
return score_linear(X_concat, y, rules,
297311
prediction_task=prediction_task,
298312
max_rules=self.max_rules,

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88

99
required_pypi = [
1010
'matplotlib',
11-
'mlxtend>=0.18.0', # some lower version are missing fpgrowth
11+
'mlxtend>=0.18.0', # some lower versions are missing fpgrowth
1212
'numpy',
13-
'pandas',
13+
'pandas<=2.1.4', # pandas 2.2 introduced some issues with the query function
1414
'requests', # used in c4.5
1515
'scipy',
16-
'scikit-learn>=1.2.0', # recently updates this
16+
'scikit-learn>=1.2.0', # recently updated this
1717
'tqdm', # used in BART
1818
]
1919

tests/classification_continuous_inputs_test.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,20 @@ class TestClassClassificationContinuousInputs:
88
'''Tests simple classification for different models. Note: still doesn't test all the models!
99
'''
1010

11-
def setup(self):
11+
def setup_method(self):
1212
np.random.seed(13)
1313
random.seed(13)
1414
self.n = 40
1515
self.p = 2
1616
self.X_classification_binary = np.random.randn(self.n, self.p)
17-
17+
1818
# y = x0 > 0
19-
self.y_classification_binary = (self.X_classification_binary[:, 0] > 0).astype(int)
19+
self.y_classification_binary = (
20+
self.X_classification_binary[:, 0] > 0).astype(int)
2021

2122
# flip labels for last few
22-
self.y_classification_binary[-2:] = 1 - self.y_classification_binary[-2:]
23+
self.y_classification_binary[-2:] = 1 - \
24+
self.y_classification_binary[-2:]
2325

2426
def test_classification_binary(self):
2527
'''Test imodels on basic binary classification task
@@ -58,7 +60,8 @@ def test_classification_binary(self):
5860
preds_proba = m.predict_proba(X)
5961
assert len(preds_proba.shape) == 2, 'preds_proba has 2 columns'
6062
assert preds_proba.shape[1] == 2, 'preds_proba has 2 columns'
61-
assert np.max(preds_proba) < 1.1, 'preds_proba has no values over 1'
63+
assert np.max(
64+
preds_proba) < 1.1, 'preds_proba has no values over 1'
6265
assert (np.argmax(preds_proba, axis=1) == preds).all(), ("predict_proba and "
6366
"predict agree")
6467

@@ -70,5 +73,5 @@ def test_classification_binary(self):
7073

7174
if __name__ == '__main__':
7275
t = TestClassClassificationContinuousInputs()
73-
t.setup()
76+
t.setup_method()
7477
t.test_classification_binary()

0 commit comments

Comments
 (0)