Skip to content

Commit 47267ed

Browse files
author
ZebinYang
committed
skip grid search cv in build leaf if param_dict is empty; version 0.2.4
1 parent f1a780e commit 47267ed

File tree

6 files changed

+95
-42
lines changed

6 files changed

+95
-42
lines changed

examples/demo.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@
147147
],
148148
"metadata": {
149149
"kernelspec": {
150-
"display_name": "py37",
150+
"display_name": "Python (py37)",
151151
"language": "python",
152152
"name": "py37"
153153
},
@@ -161,7 +161,7 @@
161161
"name": "python",
162162
"nbconvert_exporter": "python",
163163
"pygments_lexer": "ipython3",
164-
"version": "3.7.7"
164+
"version": "3.7.9"
165165
},
166166
"latex_envs": {
167167
"LaTeX_envs_menu_present": true,

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from setuptools import setup
22

33
setup(name='simtree',
4-
version='0.2.3',
4+
version='0.2.4',
55
description='Single-index model tree',
66
url='https://github.com/ZebinYang/SIMTree',
77
author='Zebin Yang',

simtree/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@
88
"SIMTreeRegressor", "SIMTreeClassifier",
99
"CustomMobTreeRegressor", "CustomMobTreeClassifier"]
1010

11-
__version__ = '0.2.3'
11+
__version__ = '0.2.4'
1212
__author__ = 'Zebin Yang'

simtree/customtree.py

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from sklearn.utils.validation import check_is_fitted
44
from sklearn.model_selection import GridSearchCV
55
from sklearn.metrics import make_scorer, roc_auc_score, mean_squared_error
6-
from sklearn.base import RegressorMixin, ClassifierMixin, is_regressor, is_classifier
6+
from sklearn.base import RegressorMixin, ClassifierMixin, is_regressor, is_classifier, clone
77

88
from .mobtree import MoBTreeRegressor, MoBTreeClassifier
99

@@ -42,11 +42,23 @@ def build_root(self):
4242

4343
def build_leaf(self, sample_indice):
4444

45-
grid = GridSearchCV(self.base_estimator, param_grid=self.param_dict,
46-
scoring={"mse": make_scorer(mean_squared_error, greater_is_better=False)},
47-
cv=5, refit="mse", n_jobs=1, error_score=np.nan)
48-
grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
49-
best_estimator = grid.best_estimator_
45+
if len(self.param_dict) == 0:
46+
self.base_estimator.fit(self.x[sample_indice], self.y[sample_indice].ravel())
47+
best_estimator = self.base_estimator
48+
else:
49+
param_size = 1
50+
for key, item in self.param_dict.items():
51+
param_size *= len(item)
52+
if param_size == 1:
53+
self.base_estimator.set_params(**{key: item[0] for key, item in self.param_dict.items()})
54+
self.base_estimator.fit(self.x[sample_indice], self.y[sample_indice].ravel())
55+
best_estimator = self.base_estimator
56+
else:
57+
grid = GridSearchCV(self.base_estimator, param_grid=self.param_dict,
58+
scoring={"mse": make_scorer(mean_squared_error, greater_is_better=False)},
59+
cv=5, refit="mse", n_jobs=1, error_score=np.nan)
60+
grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
61+
best_estimator = grid.best_estimator_
5062
predict_func = lambda x: best_estimator.predict(x)
5163
best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict(self.x[sample_indice]))
5264
return predict_func, best_estimator, best_impurity
@@ -80,16 +92,33 @@ def build_root(self):
8092

8193
def build_leaf(self, sample_indice):
8294

83-
if (self.y[sample_indice].std() == 0) | (self.y[sample_indice].sum() < 5) | ((1 - self.y[sample_indice]).sum() < 5):
84-
best_estimator = None
85-
predict_func = lambda x: np.ones(x.shape[0]) * self.y[sample_indice].mean()
86-
best_impurity = self.get_loss(self.y[sample_indice], predict_func(self.x[sample_indice]))
87-
else:
88-
grid = GridSearchCV(self.base_estimator, param_grid=self.param_dict,
89-
scoring={"auc": make_scorer(roc_auc_score, needs_proba=True)},
90-
cv=5, refit="auc", n_jobs=1, error_score=np.nan)
91-
grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
92-
best_estimator = grid.best_estimator_
95+
if len(self.param_dict) == 0:
96+
best_estimator = clone(self.base_estimator)
97+
best_estimator.fit(self.x[sample_indice], self.y[sample_indice].ravel())
9398
predict_func = lambda x: best_estimator.decision_function(x)
9499
best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict_proba(self.x[sample_indice])[:, 1])
100+
else:
101+
param_size = 1
102+
for key, item in self.param_dict.items():
103+
param_size *= len(item)
104+
if param_size == 1:
105+
best_estimator = clone(self.base_estimator)
106+
best_estimator.set_params(**{key: item[0] for key, item in self.param_dict.items()})
107+
best_estimator.fit(self.x[sample_indice], self.y[sample_indice].ravel())
108+
predict_func = lambda x: best_estimator.decision_function(x)
109+
best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict_proba(self.x[sample_indice])[:, 1])
110+
else:
111+
if (self.y[sample_indice].std() == 0) | (self.y[sample_indice].sum() < 5) | ((1 - self.y[sample_indice]).sum() < 5):
112+
best_estimator = None
113+
predict_func = lambda x: np.ones(x.shape[0]) * self.y[sample_indice].mean()
114+
best_impurity = self.get_loss(self.y[sample_indice], predict_func(self.x[sample_indice]))
115+
else:
116+
grid = GridSearchCV(self.base_estimator, param_grid=self.param_dict,
117+
scoring={"auc": make_scorer(roc_auc_score, needs_proba=True)},
118+
cv=5, refit="auc", n_jobs=1, error_score=np.nan)
119+
grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
120+
best_estimator = grid.best_estimator_
121+
122+
predict_func = lambda x: best_estimator.decision_function(x)
123+
best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict_proba(self.x[sample_indice])[:, 1])
95124
return predict_func, best_estimator, best_impurity

simtree/glmtree.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,16 @@ def build_leaf(self, sample_indice):
4242
sx = self.x[sample_indice].std(0) + self.EPSILON
4343
nx = (self.x[sample_indice] - mx) / sx
4444

45-
best_estimator = LassoCV(alphas=self.reg_lambda, cv=5, n_jobs=1, random_state=self.random_state)
46-
best_estimator.fit(nx, self.y[sample_indice])
45+
if len(self.reg_lambda) > 1:
46+
best_estimator = LassoCV(alphas=self.reg_lambda, cv=5, n_jobs=self.n_jobs, precompute=False, random_state=self.random_state)
47+
best_estimator.fit(nx, self.y[sample_indice], self.sample_weight[sample_indice])
48+
else:
49+
if self.reg_lambda[0] > 0:
50+
best_estimator = Lasso(alpha=self.reg_lambda[0], precompute=False, random_state=self.random_state)
51+
else:
52+
best_estimator = LinearRegression()
53+
best_estimator.fit(nx, self.y[sample_indice])
54+
4755
best_estimator.coef_ = best_estimator.coef_ / sx
4856
best_estimator.intercept_ = best_estimator.intercept_ - np.dot(mx, best_estimator.coef_.T)
4957
xmin = np.min(np.dot(self.x[sample_indice], best_estimator.coef_) + best_estimator.intercept_)
@@ -83,8 +91,12 @@ def build_leaf(self, sample_indice):
8391
predict_func = lambda x: np.ones(x.shape[0]) * self.y[sample_indice].mean()
8492
best_impurity = self.get_loss(self.y[sample_indice], predict_func(self.x[sample_indice]))
8593
else:
86-
best_estimator = LogisticRegressionCV(Cs=self.reg_lambda, penalty="l1", solver="liblinear", scoring="roc_auc",
87-
cv=5, n_jobs=1, random_state=self.random_state)
94+
if len(self.reg_lambda) > 1:
95+
best_estimator = LogisticRegressionCV(Cs=self.reg_lambda, penalty="l1", solver="liblinear", scoring="roc_auc",
96+
cv=5, n_jobs=self.n_jobs, random_state=self.random_state)
97+
else:
98+
best_estimator = LogisticRegression(alpha=self.reg_lambda[0], precompute=False, random_state=self.random_state)
99+
88100
mx = self.x[sample_indice].mean(0)
89101
sx = self.x[sample_indice].std(0) + self.EPSILON
90102
nx = (self.x[sample_indice] - mx) / sx

simtree/simtree.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -434,13 +434,17 @@ def build_root(self):
434434

435435
def build_leaf(self, sample_indice):
436436

437-
base = SimRegressor(reg_gamma=self.reg_gamma, degree=self.degree,
438-
knot_num=self.knot_num, random_state=self.random_state)
439-
grid = GridSearchCV(base, param_grid={"reg_lambda": self.reg_lambda},
440-
scoring={"mse": make_scorer(mean_squared_error, greater_is_better=False)},
441-
cv=5, refit="mse", n_jobs=1, error_score=np.nan)
442-
grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
443-
best_estimator = grid.best_estimator_
437+
param_size = len(self.reg_lambda)
438+
if param_size == 1:
439+
best_estimator = SimRegressor(reg_lambda=[self.reg_lambda[0]], reg_gamma=self.reg_gamma, degree=self.degree,
440+
knot_num=self.knot_num, random_state=self.random_state)
441+
best_estimator.fit(self.x[sample_indice], self.y[sample_indice].ravel())
442+
else:
443+
grid = GridSearchCV(base, param_grid={"reg_lambda": self.reg_lambda},
444+
scoring={"mse": make_scorer(mean_squared_error, greater_is_better=False)},
445+
cv=5, refit="mse", n_jobs=1, error_score=np.nan)
446+
grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
447+
best_estimator = grid.best_estimator_
444448
predict_func = lambda x: best_estimator.predict(x)
445449
best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict(self.x[sample_indice]))
446450
return predict_func, best_estimator, best_impurity
@@ -477,18 +481,26 @@ def build_root(self):
477481

478482
def build_leaf(self, sample_indice):
479483

480-
if (self.y[sample_indice].std() == 0) | (self.y[sample_indice].sum() < 5) | ((1 - self.y[sample_indice]).sum() < 5):
481-
best_estimator = None
482-
predict_func = lambda x: np.ones(x.shape[0]) * self.y[sample_indice].mean()
483-
best_impurity = self.get_loss(self.y[sample_indice], predict_func(self.x[sample_indice]))
484-
else:
485-
base = SimClassifier(reg_gamma=self.reg_gamma, degree=self.degree,
484+
param_size = len(self.reg_lambda)
485+
if param_size == 1:
486+
best_estimator = SimRegressor(reg_lambda=[self.reg_lambda[0]], reg_gamma=self.reg_gamma, degree=self.degree,
486487
knot_num=self.knot_num, random_state=self.random_state)
487-
grid = GridSearchCV(base, param_grid={"reg_lambda": self.reg_lambda},
488-
scoring={"auc": make_scorer(roc_auc_score, needs_proba=True)},
489-
cv=5, refit="auc", n_jobs=1, error_score=np.nan)
490-
grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
491-
best_estimator = grid.best_estimator_
488+
best_estimator.fit(self.x[sample_indice], self.y[sample_indice].ravel())
492489
predict_func = lambda x: best_estimator.decision_function(x)
493490
best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict_proba(self.x[sample_indice])[:, 1])
491+
else:
492+
if (self.y[sample_indice].std() == 0) | (self.y[sample_indice].sum() < 5) | ((1 - self.y[sample_indice]).sum() < 5):
493+
best_estimator = None
494+
predict_func = lambda x: np.ones(x.shape[0]) * self.y[sample_indice].mean()
495+
best_impurity = self.get_loss(self.y[sample_indice], predict_func(self.x[sample_indice]))
496+
else:
497+
base = SimClassifier(reg_gamma=self.reg_gamma, degree=self.degree,
498+
knot_num=self.knot_num, random_state=self.random_state)
499+
grid = GridSearchCV(base, param_grid={"reg_lambda": self.reg_lambda},
500+
scoring={"auc": make_scorer(roc_auc_score, needs_proba=True)},
501+
cv=5, refit="auc", n_jobs=1, error_score=np.nan)
502+
grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
503+
best_estimator = grid.best_estimator_
504+
predict_func = lambda x: best_estimator.decision_function(x)
505+
best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict_proba(self.x[sample_indice])[:, 1])
494506
return predict_func, best_estimator, best_impurity

0 commit comments

Comments
 (0)