6
6
import pickle
7
7
import numpy as np
8
8
import lightgbm as lgb
9
- from sklearn .model_selection import RandomizedSearchCV
9
+ from skopt import BayesSearchCV
10
+ from skopt .space import Real , Categorical , Integer
10
11
from sklearn .model_selection import cross_val_score , StratifiedKFold
11
12
from utils import load_data
12
13
13
- N_JOBS = 4 * 4 * 9
14
+ N_JOBS = 4 * 4
14
15
N_ITER = 25 # budget for hyperparam search
15
16
16
17
17
18
def evaluate_pipeline_helper (X , y , pipeline , param_grid , random_state = 0 ):
18
19
inner_cv = StratifiedKFold (n_splits = 4 , shuffle = True , random_state = random_state )
19
20
outer_cv = StratifiedKFold (n_splits = 4 , shuffle = True , random_state = random_state )
20
- clf = RandomizedSearchCV (
21
+ clf = BayesSearchCV (
21
22
estimator = pipeline ,
22
- param_distributions = param_grid ,
23
+ search_spaces = param_grid ,
23
24
n_iter = N_ITER ,
25
+ n_points = 3 ,
24
26
cv = inner_cv ,
25
27
scoring = "roc_auc_ovr_weighted" ,
26
28
n_jobs = N_JOBS ,
@@ -35,7 +37,7 @@ def define_and_evaluate_lightgbm_pipeline(X, y, random_state=0):
35
37
if len (set (y )) == 2 :
36
38
pipeline = lgb .LGBMClassifier (
37
39
objective = "binary" ,
38
- n_estimators = 500 ,
40
+ n_estimators = 1000 ,
39
41
metric = "auc" ,
40
42
verbose = - 1 ,
41
43
tree_learner = "feature" ,
@@ -45,23 +47,23 @@ def define_and_evaluate_lightgbm_pipeline(X, y, random_state=0):
45
47
else :
46
48
pipeline = lgb .LGBMClassifier (
47
49
objective = "multiclass" ,
48
- n_estimators = 500 ,
50
+ n_estimators = 1000 ,
49
51
metric = "auc_mu" ,
50
52
verbose = - 1 ,
51
53
tree_learner = "feature" ,
52
54
random_state = random_state ,
53
55
silent = True ,
54
56
)
55
57
param_grid = {
56
- "learning_rate" : [1e-7 , 1e-6 , 1e-5 , 1e-4 , 1e-3 , 1e-2 , 1e-1 , 1e0 ],
57
- "num_leaves" : [ 2 , 4 , 8 , 16 , 32 , 64 ],
58
- "colsample_bytree" : [0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 ],
59
- "subsample" : [0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 ],
60
- "min_child_samples" : [ 2 , 4 , 8 , 16 , 32 , 64 , 128 , 256 ],
61
- "min_child_weight" : [1e-7 , 1e-6 , 1e-5 , 1e-4 , 1e-3 , 1e-2 , 1e-1 , 1e0 ],
62
- "reg_alpha" : [1e-7 , 1e-6 , 1e-5 , 1e-4 , 1e-3 , 1e-2 , 1e-1 , 1e0 ],
63
- "reg_lambda" : [1e-7 , 1e-6 , 1e-5 , 1e-4 , 1e-3 , 1e-2 , 1e-1 , 1e0 ],
64
- "max_depth" : [1 , 2 , 4 , 8 , 16 , 32 , - 1 ],
58
+ "learning_rate" : Real ( 1e-7 , 1e+0 , prior = 'log-uniform' ), # [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
59
+ "num_leaves" : Categorical ([ 1 , 3 , 15 , 31 , 63 , 127 ]), # 2**depth - 1
60
+ "colsample_bytree" : Categorical ( [0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 ]) ,
61
+ "subsample" : Categorical ( [0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 ]) ,
62
+ "min_child_samples" : Categorical ([ 1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 , 256 ]) ,
63
+ "min_child_weight" : Real ( 1e-7 , 1e+0 , prior = 'log-uniform' ), # [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
64
+ "reg_alpha" : Real ( 1e-7 , 1e+0 , prior = 'log-uniform' ), # [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
65
+ "reg_lambda" : Real ( 1e-7 , 1e+0 , prior = 'log-uniform' ), # [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
66
+ "max_depth" : [1 , 2 , 4 , 8 , 16 , - 1 ],
65
67
}
66
68
nested_scores = evaluate_pipeline_helper (X , y , pipeline , param_grid , random_state = random_state )
67
69
return nested_scores
0 commit comments