6
6
import pickle
7
7
import numpy as np
8
8
import lightgbm as lgb
9
- from skopt import BayesSearchCV
10
- from skopt .space import Real , Categorical , Integer
9
+ from sklearn .model_selection import RandomizedSearchCV
11
10
from sklearn .model_selection import cross_val_score , StratifiedKFold
12
11
from utils import load_data
13
12
14
- N_JOBS = 4 * 4
13
+ N_JOBS = 4 * 4 * 9
15
14
N_ITER = 25 # budget for hyperparam search
16
15
17
16
18
17
def evaluate_pipeline_helper (X , y , pipeline , param_grid , random_state = 0 ):
19
18
inner_cv = StratifiedKFold (n_splits = 4 , shuffle = True , random_state = random_state )
20
19
outer_cv = StratifiedKFold (n_splits = 4 , shuffle = True , random_state = random_state )
21
- clf = BayesSearchCV (
20
+ clf = RandomizedSearchCV (
22
21
estimator = pipeline ,
23
- search_spaces = param_grid ,
22
+ param_distributions = param_grid ,
24
23
n_iter = N_ITER ,
25
- n_points = 3 ,
26
24
cv = inner_cv ,
27
25
scoring = "roc_auc_ovr_weighted" ,
28
26
n_jobs = N_JOBS ,
@@ -37,7 +35,7 @@ def define_and_evaluate_lightgbm_pipeline(X, y, random_state=0):
37
35
if len (set (y )) == 2 :
38
36
pipeline = lgb .LGBMClassifier (
39
37
objective = "binary" ,
40
- n_estimators = 1000 ,
38
+ n_estimators = 500 ,
41
39
metric = "auc" ,
42
40
verbose = - 1 ,
43
41
tree_learner = "feature" ,
@@ -47,23 +45,23 @@ def define_and_evaluate_lightgbm_pipeline(X, y, random_state=0):
47
45
else :
48
46
pipeline = lgb .LGBMClassifier (
49
47
objective = "multiclass" ,
50
- n_estimators = 1000 ,
48
+ n_estimators = 500 ,
51
49
metric = "auc_mu" ,
52
50
verbose = - 1 ,
53
51
tree_learner = "feature" ,
54
52
random_state = random_state ,
55
53
silent = True ,
56
54
)
57
55
param_grid = {
58
- "learning_rate" : Real ( 1e-7 , 1e+0 , prior = 'log-uniform' ), # [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
59
- "num_leaves" : Categorical ([ 1 , 3 , 15 , 31 , 63 , 127 ]), # 2**depth - 1
60
- "colsample_bytree" : Categorical ( [0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 ]) ,
61
- "subsample" : Categorical ( [0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 ]) ,
62
- "min_child_samples" : Categorical ([ 1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 , 256 ]) ,
63
- "min_child_weight" : Real ( 1e-7 , 1e+0 , prior = 'log-uniform' ), # [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
64
- "reg_alpha" : Real ( 1e-7 , 1e+0 , prior = 'log-uniform' ), # [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
65
- "reg_lambda" : Real ( 1e-7 , 1e+0 , prior = 'log-uniform' ), # [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
66
- "max_depth" : [1 , 2 , 4 , 8 , 16 , - 1 ],
56
+ "learning_rate" : [1e-7 , 1e-6 , 1e-5 , 1e-4 , 1e-3 , 1e-2 , 1e-1 , 1e0 ],
57
+ "num_leaves" : [ 2 , 4 , 8 , 16 , 32 , 64 ],
58
+ "colsample_bytree" : [0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 ],
59
+ "subsample" : [0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 ],
60
+ "min_child_samples" : [ 2 , 4 , 8 , 16 , 32 , 64 , 128 , 256 ],
61
+ "min_child_weight" : [1e-7 , 1e-6 , 1e-5 , 1e-4 , 1e-3 , 1e-2 , 1e-1 , 1e0 ],
62
+ "reg_alpha" : [1e-7 , 1e-6 , 1e-5 , 1e-4 , 1e-3 , 1e-2 , 1e-1 , 1e0 ],
63
+ "reg_lambda" : [1e-7 , 1e-6 , 1e-5 , 1e-4 , 1e-3 , 1e-2 , 1e-1 , 1e0 ],
64
+ "max_depth" : [1 , 2 , 4 , 8 , 16 , 32 , - 1 ],
67
65
}
68
66
nested_scores = evaluate_pipeline_helper (X , y , pipeline , param_grid , random_state = random_state )
69
67
return nested_scores
0 commit comments