|
28 | 28 | "metadata": {}, |
29 | 29 | "outputs": [], |
30 | 30 | "source": [ |
| 31 | + "from xgboost import XGBClassifier\n", |
| 32 | + "\n", |
31 | 33 | "%matplotlib inline\n", |
32 | 34 | "\n", |
33 | 35 | "import numpy as np\n", |
|
231 | 233 | "metadata": {}, |
232 | 234 | "outputs": [], |
233 | 235 | "source": [ |
234 | | - "reweighter = reweight.GBReweighter(n_estimators=250, learning_rate=0.1, max_depth=3, min_samples_leaf=1000,\n", |
235 | | - " gb_args={'subsample': 0.4})\n", |
| 236 | + "reweighter = reweight.GBReweighter(n_estimators=50, learning_rate=0.1, max_depth=3, min_samples_leaf=1000,\n", |
| 237 | + " gb_args={'subsample': 0.7})\n", |
236 | 238 | "reweighter.fit(original_train, target_train)\n", |
237 | 239 | "\n", |
238 | 240 | "gb_weights_test = reweighter.predict_weights(original_test)\n", |
|
331 | 333 | "metadata": {}, |
332 | 334 | "outputs": [], |
333 | 335 | "source": [ |
334 | | - "from sklearn.ensemble import GradientBoostingClassifier\n", |
335 | 336 | "from sklearn.metrics import roc_auc_score\n", |
336 | 337 | "from sklearn.model_selection import train_test_split\n", |
| 338 | + "from xgboost import XGBClassifier\n", |
337 | 339 | "\n", |
338 | 340 | "data = np.concatenate([original_test, target_test])\n", |
339 | 341 | "labels = np.array([0] * len(original_test) + [1] * len(target_test))\n", |
|
347 | 349 | "for name, new_weights in weights.items():\n", |
348 | 350 | " W = np.concatenate([new_weights / new_weights.sum() * len(target_test), [1] * len(target_test)])\n", |
349 | 351 | " Xtr, Xts, Ytr, Yts, Wtr, Wts = train_test_split(data, labels, W, random_state=42, train_size=0.51)\n", |
350 | | - " clf = GradientBoostingClassifier(subsample=0.3, n_estimators=50).fit(Xtr, Ytr, sample_weight=Wtr)\n", |
| 352 | + " clf = XGBClassifier(subsample=0.8, n_estimators=50).fit(Xtr, Ytr, sample_weight=Wtr)\n", |
351 | 353 | "\n", |
352 | 354 | " print(name, roc_auc_score(Yts, clf.predict_proba(Xts)[:, 1], sample_weight=Wts))" |
353 | 355 | ] |
|
428 | 430 | "outputs": [], |
429 | 431 | "source": [ |
430 | 432 | "# define base reweighter\n", |
431 | | - "reweighter_base = reweight.GBReweighter(n_estimators=80,\n", |
432 | | - " learning_rate=0.01, max_depth=4, min_samples_leaf=100,\n", |
| 433 | + "reweighter_base = reweight.GBReweighter(n_estimators=40,\n", |
| 434 | + " learning_rate=0.02, max_depth=4, min_samples_leaf=100,\n", |
433 | 435 | " gb_args={'subsample': 0.8})\n", |
434 | 436 | "reweighter = reweight.FoldingReweighter(reweighter_base, n_folds=2)\n", |
435 | 437 | "# it is not needed divide data into train/test parts; reweighter can be train on the whole samples\n", |
|
467 | 469 | "for name, new_weights in weights.items():\n", |
468 | 470 | " W = np.concatenate([new_weights / new_weights.sum() * len(target), [1] * len(target)])\n", |
469 | 471 | " Xtr, Xts, Ytr, Yts, Wtr, Wts = train_test_split(data, labels, W, random_state=42, train_size=0.51)\n", |
470 | | - " clf = GradientBoostingClassifier(subsample=0.6, n_estimators=30).fit(Xtr, Ytr, sample_weight=Wtr)\n", |
| 472 | + " clf = XGBClassifier(subsample=0.6, n_estimators=30).fit(Xtr, Ytr, sample_weight=Wtr)\n", |
471 | 473 | "\n", |
472 | 474 | " print(name, roc_auc_score(Yts, clf.predict_proba(Xts)[:, 1], sample_weight=Wts))" |
473 | 475 | ] |
|
0 commit comments