Skip to content

Commit 2eb0ce2

Browse files
authored
Merge pull request #30 from ternaus/master
Added method points_to_csv that saves known data points to csv file
2 parents 56f3e88 + 0e95b60 commit 2eb0ce2

File tree

2 files changed

+93
-0
lines changed

2 files changed

+93
-0
lines changed

bayes_opt/bayesian_optimization.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,3 +327,17 @@ def maximize(self,
327327
# Print a final report if verbose active.
328328
if self.verbose:
329329
self.plog.print_summary()
330+
331+
def points_to_csv(self, file_name):
332+
"""
333+
After training all points for which we know target variable
334+
(both from initialization and optimization) are saved
335+
336+
:param file_name: name of the file where points will be saved in the csv format
337+
338+
:return: None
339+
"""
340+
341+
points = np.hstack((self.X, np.expand_dims(self.Y, axis=1)))
342+
header = ', '.join(self.keys + ['target'])
343+
np.savetxt(file_name, points, header=header, delimiter=',')

examples/xgb_example.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""
2+
Baysian hyperparameter optimization [https://github.com/fmfn/BayesianOptimization]
3+
for Mean Absoulte Error objective
4+
on default features for https://www.kaggle.com/c/allstate-claims-severity
5+
"""
6+
7+
__author__ = "Vladimir Iglovikov"
8+
9+
import pandas as pd
10+
import xgboost as xgb
11+
from sklearn.preprocessing import LabelEncoder
12+
from sklearn.metrics import mean_absolute_error
13+
from bayes_opt import BayesianOptimization
14+
from tqdm import tqdm
15+
16+
17+
def xgb_evaluate(min_child_weight,
18+
colsample_bytree,
19+
max_depth,
20+
subsample,
21+
gamma,
22+
alpha):
23+
24+
params['min_child_weight'] = int(min_child_weight)
25+
params['cosample_bytree'] = max(min(colsample_bytree, 1), 0)
26+
params['max_depth'] = int(max_depth)
27+
params['subsample'] = max(min(subsample, 1), 0)
28+
params['gamma'] = max(gamma, 0)
29+
params['alpha'] = max(alpha, 0)
30+
31+
32+
cv_result = xgb.cv(params, xgtrain, num_boost_round=num_rounds, nfold=5,
33+
seed=random_state,
34+
callbacks=[xgb.callback.early_stop(50)])
35+
36+
return -cv_result['test-mae-mean'].values[-1]
37+
38+
39+
def prepare_data():
40+
train = pd.read_csv('../input/train.csv')
41+
categorical_columns = train.select_dtypes(include=['object']).columns
42+
43+
for column in tqdm(categorical_columns):
44+
le = LabelEncoder()
45+
train[column] = le.fit_transform(train[column])
46+
47+
y = train['loss']
48+
49+
X = train.drop(['loss', 'id'], 1)
50+
xgtrain = xgb.DMatrix(X, label=y)
51+
52+
return xgtrain
53+
54+
55+
if __name__ == '__main__':
56+
xgtrain = prepare_data()
57+
58+
num_rounds = 3000
59+
random_state = 2016
60+
num_iter = 25
61+
init_points = 5
62+
params = {
63+
'eta': 0.1,
64+
'silent': 1,
65+
'eval_metric': 'mae',
66+
'verbose_eval': True,
67+
'seed': random_state
68+
}
69+
70+
xgbBO = BayesianOptimization(xgb_evaluate, {'min_child_weight': (1, 20),
71+
'colsample_bytree': (0.5, 1),
72+
'max_depth': (5, 15),
73+
'subsample': (0.5, 1),
74+
'gamma': (0, 10),
75+
'alpha': (0, 10),
76+
})
77+
78+
xgbBO.maximize(init_points=init_points, n_iter=num_iter)
79+

0 commit comments

Comments
 (0)