Skip to content

Commit e723256

Browse files
author
[zebinyang]
committed
change glmtree classifier default predict function from pred proba to decision function; version 0.2.2
1 parent e6f43f8 commit e723256

File tree

6 files changed

+44
-30
lines changed

6 files changed

+44
-30
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from setuptools import setup
22

33
setup(name='simtree',
4-
version='0.2.1',
4+
version='0.2.2',
55
description='Single-index model tree',
66
url='https://github.com/ZebinYang/SIMTree',
77
author='Zebin Yang',

simtree/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@
88
"SIMTreeRegressor", "SIMTreeClassifier",
99
"CustomMobTreeRegressor", "CustomMobTreeClassifier"]
1010

11-
__version__ = '0.2.1'
11+
__version__ = '0.2.2'
1212
__author__ = 'Zebin Yang'

simtree/customtree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,6 @@ def build_leaf(self, sample_indice):
9090
cv=5, refit="auc", n_jobs=1, error_score=np.nan)
9191
grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
9292
best_estimator = grid.best_estimator_
93-
predict_func = lambda x: best_estimator.predict_proba(x)[:, 1]
93+
predict_func = lambda x: best_estimator.decision_function(x)
9494
best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict_proba(self.x[sample_indice])[:, 1])
9595
return predict_func, best_estimator, best_impurity

simtree/glmtree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,6 @@ def build_leaf(self, sample_indice):
9393
best_estimator.intercept_ = best_estimator.intercept_ - np.dot(mx, best_estimator.coef_.T)
9494
xmin = np.min(np.dot(self.x[sample_indice], best_estimator.coef_.ravel()))
9595
xmax = np.max(np.dot(self.x[sample_indice], best_estimator.coef_.ravel()))
96-
predict_func = lambda x: 1 / (1 + np.exp(- np.clip(np.dot(x, best_estimator.coef_.ravel()), xmin, xmax) - best_estimator.intercept_))
96+
predict_func = lambda x: np.clip(np.dot(x, best_estimator.coef_.ravel()), xmin, xmax) + best_estimator.intercept_
9797
best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict_proba(self.x[sample_indice])[:, 1])
9898
return predict_func, best_estimator, best_impurity

simtree/mobtree.py

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from matplotlib import pyplot as plt
55
from abc import ABCMeta, abstractmethod
66

7+
from sklearn.utils.extmath import softmax
78
from sklearn.preprocessing import LabelBinarizer
89
from sklearn.utils import check_X_y, column_or_1d
910
from sklearn.utils.validation import check_is_fitted
@@ -475,41 +476,53 @@ def decision_path_indice(self, x, node_id):
475476

476477
def decision_path(self, x):
477478

479+
check_is_fitted(self, "tree")
480+
478481
n_samples = x.shape[0]
479482
path_all = np.zeros((n_samples, self.node_count))
480-
for idx, row in enumerate(x):
483+
for node_id in self.leaf_idx_list:
481484
path = []
482-
node = self.tree[1]
483-
while not node['is_leaf']:
484-
path.append(node["node_id"] - 1)
485-
if row[node['feature']] <= node['threshold']:
486-
node = self.tree[node['left_child_id']]
485+
idx = node_id
486+
sample_indice = np.ones((x.shape[0], )).astype(np.bool)
487+
while True:
488+
path.append(idx - 1)
489+
current_node = self.tree[idx]
490+
if current_node["parent_id"] is None:
491+
break
487492
else:
488-
node = self.tree[node['right_child_id']]
489-
path.append(node["node_id"] - 1)
490-
path_all[idx][path] = 1
493+
parent_node = self.tree[current_node["parent_id"]]
494+
if current_node["is_left"]:
495+
sample_indice = np.logical_and(sample_indice, x[:, parent_node["feature"]] <= parent_node["threshold"])
496+
else:
497+
sample_indice = np.logical_and(sample_indice, x[:, parent_node["feature"]] > parent_node["threshold"])
498+
idx = current_node["parent_id"]
499+
if sample_indice.sum() > 0:
500+
path_all[np.ix_(np.where(sample_indice)[0], path)] = 1
491501
return path_all
492502

493503
def decision_function(self, x):
494504

495505
check_is_fitted(self, "tree")
496506

497-
leaf_idx = []
498507
x = np.array(x)
499-
for row in x:
500-
node = self.tree[1]
501-
while not node['is_leaf']:
502-
if row[node['feature']] <= node['threshold']:
503-
node = self.tree[node['left_child_id']]
504-
else:
505-
node = self.tree[node['right_child_id']]
506-
leaf_idx.append(node['node_id'])
507-
508508
n_samples = x.shape[0]
509509
pred = np.zeros((n_samples))
510-
for node_id in np.unique(leaf_idx):
511-
sample_indice = np.array(leaf_idx) == node_id
512-
pred[sample_indice] = self.tree[node_id]['predict_func'](x[sample_indice, :]).ravel()
510+
for node_id in self.leaf_idx_list:
511+
idx = node_id
512+
sample_indice = np.ones((x.shape[0], )).astype(np.bool)
513+
while True:
514+
current_node = self.tree[idx]
515+
if current_node["parent_id"] is None:
516+
break
517+
else:
518+
parent_node = self.tree[current_node["parent_id"]]
519+
if current_node["is_left"]:
520+
sample_indice = np.logical_and(sample_indice, x[:, parent_node["feature"]] <= parent_node["threshold"])
521+
else:
522+
sample_indice = np.logical_and(sample_indice, x[:, parent_node["feature"]] > parent_node["threshold"])
523+
idx = current_node["parent_id"]
524+
if sample_indice.sum() > 0:
525+
pred[sample_indice] = self.tree[node_id]['predict_func'](x[sample_indice, :]).ravel()
513526
return pred
514527

515528

@@ -647,9 +660,10 @@ def evaluate_estimator(self, estimator, x, y):
647660
return loss
648661

649662
def predict_proba(self, x):
650-
proba = self.decision_function(x).reshape(-1, 1)
651-
return np.hstack([1 - proba, proba])
663+
pred = self.decision_function(x).reshape(-1, 1)
664+
pred_proba = softmax(np.hstack([-pred, pred]) / 2, copy=False)
665+
return pred_proba
652666

653667
def predict(self, x):
654-
pred_proba = self.decision_function(x)
668+
pred_proba = self.predict_proba(x)
655669
return self._label_binarizer.inverse_transform(pred_proba)

simtree/simtree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,6 @@ def build_leaf(self, sample_indice):
489489
cv=5, refit="auc", n_jobs=1, error_score=np.nan)
490490
grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
491491
best_estimator = grid.best_estimator_
492-
predict_func = lambda x: best_estimator.predict_proba(x)[:, 1]
492+
predict_func = lambda x: best_estimator.decision_function(x)
493493
best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict_proba(self.x[sample_indice])[:, 1])
494494
return predict_func, best_estimator, best_impurity

0 commit comments

Comments
 (0)