|
| 1 | +""" |
| 2 | +Baysian hyperparameter optimization [https://github.com/fmfn/BayesianOptimization] |
| 3 | +for Mean Absoulte Error objective |
| 4 | +on default features for https://www.kaggle.com/c/allstate-claims-severity |
| 5 | +""" |
| 6 | + |
| 7 | +__author__ = "Vladimir Iglovikov" |
| 8 | + |
| 9 | +import pandas as pd |
| 10 | +import xgboost as xgb |
| 11 | +from sklearn.preprocessing import LabelEncoder |
| 12 | +from sklearn.metrics import mean_absolute_error |
| 13 | +from bayes_opt import BayesianOptimization |
| 14 | +from tqdm import tqdm |
| 15 | + |
| 16 | + |
| 17 | +def xgb_evaluate(min_child_weight, |
| 18 | + colsample_bytree, |
| 19 | + max_depth, |
| 20 | + subsample, |
| 21 | + gamma, |
| 22 | + alpha): |
| 23 | + |
| 24 | + params['min_child_weight'] = int(min_child_weight) |
| 25 | + params['cosample_bytree'] = max(min(colsample_bytree, 1), 0) |
| 26 | + params['max_depth'] = int(max_depth) |
| 27 | + params['subsample'] = max(min(subsample, 1), 0) |
| 28 | + params['gamma'] = max(gamma, 0) |
| 29 | + params['alpha'] = max(alpha, 0) |
| 30 | + |
| 31 | + |
| 32 | + cv_result = xgb.cv(params, xgtrain, num_boost_round=num_rounds, nfold=5, |
| 33 | + seed=random_state, |
| 34 | + callbacks=[xgb.callback.early_stop(50)]) |
| 35 | + |
| 36 | + return -cv_result['test-mae-mean'].values[-1] |
| 37 | + |
| 38 | + |
| 39 | +def prepare_data(): |
| 40 | + train = pd.read_csv('../input/train.csv') |
| 41 | + categorical_columns = train.select_dtypes(include=['object']).columns |
| 42 | + |
| 43 | + for column in tqdm(categorical_columns): |
| 44 | + le = LabelEncoder() |
| 45 | + train[column] = le.fit_transform(train[column]) |
| 46 | + |
| 47 | + y = train['loss'] |
| 48 | + |
| 49 | + X = train.drop(['loss', 'id'], 1) |
| 50 | + xgtrain = xgb.DMatrix(X, label=y) |
| 51 | + |
| 52 | + return xgtrain |
| 53 | + |
| 54 | + |
| 55 | +if __name__ == '__main__': |
| 56 | + xgtrain = prepare_data() |
| 57 | + |
| 58 | + num_rounds = 3000 |
| 59 | + random_state = 2016 |
| 60 | + num_iter = 25 |
| 61 | + init_points = 5 |
| 62 | + params = { |
| 63 | + 'eta': 0.1, |
| 64 | + 'silent': 1, |
| 65 | + 'eval_metric': 'mae', |
| 66 | + 'verbose_eval': True, |
| 67 | + 'seed': random_state |
| 68 | + } |
| 69 | + |
| 70 | + xgbBO = BayesianOptimization(xgb_evaluate, {'min_child_weight': (1, 20), |
| 71 | + 'colsample_bytree': (0.5, 1), |
| 72 | + 'max_depth': (5, 15), |
| 73 | + 'subsample': (0.5, 1), |
| 74 | + 'gamma': (0, 10), |
| 75 | + 'alpha': (0, 10), |
| 76 | + }) |
| 77 | + |
| 78 | + xgbBO.maximize(init_points=init_points, n_iter=num_iter) |
| 79 | + |
0 commit comments