Skip to content

Commit 7795f2d

Browse files
author
Sergey Feldman
committed
autogluon and scikit-optimize
1 parent 6d52e3a commit 7795f2d

File tree

5 files changed

+113
-38
lines changed

5 files changed

+113
-38
lines changed

02_lightgbm.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,23 @@
66
import pickle
77
import numpy as np
88
import lightgbm as lgb
9-
from sklearn.model_selection import RandomizedSearchCV
9+
from skopt import BayesSearchCV
10+
from skopt.space import Real, Categorical, Integer
1011
from sklearn.model_selection import cross_val_score, StratifiedKFold
1112
from utils import load_data
1213

13-
N_JOBS = 4 * 4 * 9
14+
N_JOBS = 4 * 4
1415
N_ITER = 25 # budget for hyperparam search
1516

1617

1718
def evaluate_pipeline_helper(X, y, pipeline, param_grid, random_state=0):
1819
inner_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=random_state)
1920
outer_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=random_state)
20-
clf = RandomizedSearchCV(
21+
clf = BayesSearchCV(
2122
estimator=pipeline,
22-
param_distributions=param_grid,
23+
search_spaces=param_grid,
2324
n_iter=N_ITER,
25+
n_points=3,
2426
cv=inner_cv,
2527
scoring="roc_auc_ovr_weighted",
2628
n_jobs=N_JOBS,
@@ -35,7 +37,7 @@ def define_and_evaluate_lightgbm_pipeline(X, y, random_state=0):
3537
if len(set(y)) == 2:
3638
pipeline = lgb.LGBMClassifier(
3739
objective="binary",
38-
n_estimators=500,
40+
n_estimators=1000,
3941
metric="auc",
4042
verbose=-1,
4143
tree_learner="feature",
@@ -45,23 +47,23 @@ def define_and_evaluate_lightgbm_pipeline(X, y, random_state=0):
4547
else:
4648
pipeline = lgb.LGBMClassifier(
4749
objective="multiclass",
48-
n_estimators=500,
50+
n_estimators=1000,
4951
metric="auc_mu",
5052
verbose=-1,
5153
tree_learner="feature",
5254
random_state=random_state,
5355
silent=True,
5456
)
5557
param_grid = {
56-
"learning_rate": [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
57-
"num_leaves": [2, 4, 8, 16, 32, 64],
58-
"colsample_bytree": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
59-
"subsample": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
60-
"min_child_samples": [2, 4, 8, 16, 32, 64, 128, 256],
61-
"min_child_weight": [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
62-
"reg_alpha": [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
63-
"reg_lambda": [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
64-
"max_depth": [1, 2, 4, 8, 16, 32, -1],
58+
"learning_rate": Real(1e-7, 1e+0, prior='log-uniform'), #[1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
59+
"num_leaves": Categorical([1, 3, 15, 31, 63, 127]), # 2**depth - 1
60+
"colsample_bytree": Categorical([0.5, 0.6, 0.7, 0.8, 0.9, 1.0]),
61+
"subsample": Categorical([0.5, 0.6, 0.7, 0.8, 0.9, 1.0]),
62+
"min_child_samples": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256]),
63+
"min_child_weight": Real(1e-7, 1e+0, prior='log-uniform'), # [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
64+
"reg_alpha": Real(1e-7, 1e+0, prior='log-uniform'), # [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
65+
"reg_lambda": Real(1e-7, 1e+0, prior='log-uniform'), # [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0],
66+
"max_depth": [1, 2, 4, 8, 16, -1],
6567
}
6668
nested_scores = evaluate_pipeline_helper(X, y, pipeline, param_grid, random_state=random_state)
6769
return nested_scores

03_autogluon.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
from utils import load_data
1010

1111

12-
SEC = 60
13-
12+
SEC = 120
1413

1514
def define_and_evaluate_autogluon_pipeline(X, y, random_state=0):
1615
# autogluon dataframes
@@ -23,15 +22,18 @@ def define_and_evaluate_autogluon_pipeline(X, y, random_state=0):
2322
data_df_test = data_df.iloc[test_inds, :]
2423
if len((set(y))) == 2:
2524
eval_metric = 'roc_auc'
25+
problem_type = 'binary'
2626
else:
2727
eval_metric = 'f1_weighted' # no multiclass auroc in autogluon
28+
problem_type = 'multiclass'
2829
predictor = task.fit(
2930
data_df_train,
3031
"y",
3132
time_limits=SEC,
3233
auto_stack=True,
3334
output_directory=".autogluon_temp",
3435
eval_metric=eval_metric,
36+
problem_type=problem_type,
3537
verbosity=0,
3638
)
3739
y_pred = predictor.predict_proba(data_df.iloc[test_inds, :])
@@ -69,5 +71,5 @@ def define_and_evaluate_autogluon_pipeline(X, y, random_state=0):
6971
times = np.array(times)
7072

7173
# save everything to disk so we can make plots elsewhere
72-
with open(f"results/03_autoglun_sec_{SEC}.pickle", "wb") as f:
73-
pickle.dump((results, times), f)
74+
with open(f"results/03_autogluon_NN_sec_{SEC}.pickle", "wb") as f:
75+
pickle.dump((results, times), f)

make_figures.ipynb

Lines changed: 85 additions & 16 deletions
Large diffs are not rendered by default.

requirements.in

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ numpy
22
scipy
33
pandas
44
seaborn
5-
mxnet_cu102 # insert your own cuda number OR 'mxnet' if don't have a gpu
5+
mxnet_cu102 # insert your own cuda 3 digit version OR just 'mxnet' if don't have a gpu
66
autogluon
7-
scikit-learn>=0.24
8-
lightgbm>=3.1.1
7+
scikit-learn==0.23.2
8+
lightgbm>=3.1.1
9+
optuna
10+
scikit-optimize

results/03_autogluon_sec_60.pickle

5.73 KB
Binary file not shown.

0 commit comments

Comments
 (0)