Skip to content

Commit 2503a54

Browse files
committed
enh: improve bdt notebook time
1 parent 762bf9f commit 2503a54

9 files changed

+52
-23
lines changed

.pre-commit-config.yaml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,10 @@ repos:
3030
args: [ --extra-keys=metadata.language_info.codemirror_mode.version metadata.kernelspec metadata.language_info.pygments_lexer metadata.language_info.version ]
3131

3232

33-
34-
# needs rust, only activate if needed
33+
# needs rust, only activate if needed
3534
# - repo: https://github.com/shssoichiro/oxipng
36-
# rev: v9.1.2
35+
# rev: v9.1.3
3736
# hooks:
3837
# - id: oxipng
39-
# args: [ --best --strip all --quiet ]
38+
# args: [ "--", "--best", "--strip", "all", "--quiet" ]
4039

advanced-python/11AdvancedPython.ipynb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@
295295
"outputs": [],
296296
"source": [
297297
"# SOLUTION\n",
298+
"\n",
299+
"\n",
298300
"@contextlib.contextmanager\n",
299301
"def func(x):\n",
300302
" yield x\n",
@@ -479,6 +481,8 @@
479481
"outputs": [],
480482
"source": [
481483
"# SOLUTION\n",
484+
"\n",
485+
"\n",
482486
"def timed_func(func):\n",
483487
" def wrapped_func(*args, **kwargs):\n",
484488
" print(args)\n",

advanced-python/20DataAndPlotting.ipynb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@
3232
{
3333
"cell_type": "code",
3434
"execution_count": null,
35-
"metadata": {},
35+
"metadata": {
36+
"jupyter": {
37+
"is_executing": true
38+
}
39+
},
3640
"outputs": [],
3741
"source": [
3842
"import mplhep\n",

advanced-python/32BoostingToUniformity.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@
146146
"classifiers['uBoost'] = uboost.uBoostClassifier(uniform_features=uniform_features, uniform_label=1,\n",
147147
" base_estimator=base_estimator,\n",
148148
" n_estimators=n_estimators, train_features=train_features,\n",
149-
" efficiency_steps=12, n_jobs=4)\n",
149+
" efficiency_steps=12, n_threads=4)\n",
150150
"\n",
151151
"flatnessloss = ugb.KnnFlatnessLossFunction(uniform_features, fl_coefficient=3., power=1.3, uniform_label=1)\n",
152152
"classifiers['uGB+FL'] = ugb.UGradientBoostingClassifier(loss=flatnessloss, max_depth=4,\n",

advanced-python/40Histograms.ipynb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
"%store -r mc_df\n",
4343
"%store -r data_df\n",
4444
"\n",
45-
"import boost_histogram as bh\n",
45+
"import boost_histogram as bh # usually not needed\n",
4646
"import hist\n",
4747
"import mplhep\n",
4848
"import numpy as np"
@@ -61,8 +61,8 @@
6161
"# Let's get started with a simple example\n",
6262
"\n",
6363
"# Compose axis however you like; this is a 2D histogram\n",
64-
"h = bh.Histogram(bh.axis.Regular(2, 0, 1),\n",
65-
" bh.axis.Regular(4, 0.0, 1.0))\n",
64+
"h = hist.Hist(hist.axis.Regular(2, 0, 1),\n",
65+
" hist.axis.Regular(4, 0.0, 1.0))\n",
6666
"\n",
6767
"# Filling can be done with arrays, one per dimension\n",
6868
"h.fill([.3, .5, .2],\n",
@@ -131,7 +131,7 @@
131131
"outputs": [],
132132
"source": [
133133
"start, stop = data_df['Jpsi_M'].min(), data_df['Jpsi_M'].max()\n",
134-
"axis1 = hist.axis.Regular(bins=50, start=start, stop=stop, name=\"mass\")"
134+
"axis1 = hist.axis.Regular(bins=50, start=start, stop=stop, name=\"mass\", label=r\"$m(J/\\psi)$\")"
135135
]
136136
},
137137
{
@@ -558,5 +558,5 @@
558558
}
559559
},
560560
"nbformat": 4,
561-
"nbformat_minor": 1
561+
"nbformat_minor": 4
562562
}

advanced-python/45DemoReweighting.ipynb

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
"metadata": {},
2929
"outputs": [],
3030
"source": [
31+
"from xgboost import XGBClassifier\n",
32+
"\n",
3133
"%matplotlib inline\n",
3234
"\n",
3335
"import numpy as np\n",
@@ -231,8 +233,8 @@
231233
"metadata": {},
232234
"outputs": [],
233235
"source": [
234-
"reweighter = reweight.GBReweighter(n_estimators=250, learning_rate=0.1, max_depth=3, min_samples_leaf=1000,\n",
235-
" gb_args={'subsample': 0.4})\n",
236+
"reweighter = reweight.GBReweighter(n_estimators=50, learning_rate=0.1, max_depth=3, min_samples_leaf=1000,\n",
237+
" gb_args={'subsample': 0.7})\n",
236238
"reweighter.fit(original_train, target_train)\n",
237239
"\n",
238240
"gb_weights_test = reweighter.predict_weights(original_test)\n",
@@ -331,9 +333,9 @@
331333
"metadata": {},
332334
"outputs": [],
333335
"source": [
334-
"from sklearn.ensemble import GradientBoostingClassifier\n",
335336
"from sklearn.metrics import roc_auc_score\n",
336337
"from sklearn.model_selection import train_test_split\n",
338+
"from xgboost import XGBClassifier\n",
337339
"\n",
338340
"data = np.concatenate([original_test, target_test])\n",
339341
"labels = np.array([0] * len(original_test) + [1] * len(target_test))\n",
@@ -347,7 +349,7 @@
347349
"for name, new_weights in weights.items():\n",
348350
" W = np.concatenate([new_weights / new_weights.sum() * len(target_test), [1] * len(target_test)])\n",
349351
" Xtr, Xts, Ytr, Yts, Wtr, Wts = train_test_split(data, labels, W, random_state=42, train_size=0.51)\n",
350-
" clf = GradientBoostingClassifier(subsample=0.3, n_estimators=50).fit(Xtr, Ytr, sample_weight=Wtr)\n",
352+
" clf = XGBClassifier(subsample=0.8, n_estimators=50).fit(Xtr, Ytr, sample_weight=Wtr)\n",
351353
"\n",
352354
" print(name, roc_auc_score(Yts, clf.predict_proba(Xts)[:, 1], sample_weight=Wts))"
353355
]
@@ -428,8 +430,8 @@
428430
"outputs": [],
429431
"source": [
430432
"# define base reweighter\n",
431-
"reweighter_base = reweight.GBReweighter(n_estimators=80,\n",
432-
" learning_rate=0.01, max_depth=4, min_samples_leaf=100,\n",
433+
"reweighter_base = reweight.GBReweighter(n_estimators=40,\n",
434+
" learning_rate=0.02, max_depth=4, min_samples_leaf=100,\n",
433435
" gb_args={'subsample': 0.8})\n",
434436
"reweighter = reweight.FoldingReweighter(reweighter_base, n_folds=2)\n",
435437
"# it is not needed divide data into train/test parts; reweighter can be train on the whole samples\n",
@@ -467,7 +469,7 @@
467469
"for name, new_weights in weights.items():\n",
468470
" W = np.concatenate([new_weights / new_weights.sum() * len(target), [1] * len(target)])\n",
469471
" Xtr, Xts, Ytr, Yts, Wtr, Wts = train_test_split(data, labels, W, random_state=42, train_size=0.51)\n",
470-
" clf = GradientBoostingClassifier(subsample=0.6, n_estimators=30).fit(Xtr, Ytr, sample_weight=Wtr)\n",
472+
" clf = XGBClassifier(subsample=0.6, n_estimators=30).fit(Xtr, Ytr, sample_weight=Wtr)\n",
471473
"\n",
472474
" print(name, roc_auc_score(Yts, clf.predict_proba(Xts)[:, 1], sample_weight=Wts))"
473475
]

advanced-python/50LikelihoodInference.ipynb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,24 @@
213213
"plot_fit(model, data) # before the fit"
214214
]
215215
},
216+
{
217+
"cell_type": "markdown",
218+
"metadata": {},
219+
"source": [
220+
"### A quick plot\n",
221+
"\n",
222+
"Sometimes, it's useful to just quickly make a plot of the model without controlling all of the aspects. This can be done using the `plot` attribute of the model."
223+
]
224+
},
225+
{
226+
"cell_type": "code",
227+
"execution_count": null,
228+
"metadata": {},
229+
"outputs": [],
230+
"source": [
231+
"model.plot.plotpdf()"
232+
]
233+
},
216234
{
217235
"cell_type": "markdown",
218236
"metadata": {},

advanced-python/70ScikitHEPUniverse.ipynb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
"\n",
99
"The [Scikit-HEP project](https://scikit-hep.org/) is a well maintained collection of HEP packages in Python that are useful for analysis. It contains tools ranging from plotting helpers, PDG lookup, DecayLanguage converters over high-performance histogramming libraries and ROOT I/O up to likelihood fitting and statistics libraries.\n",
1010
"\n",
11-
"This is a minimal overview over the packages that are available and have not yet been used in the other tutorials."
11+
"This is a minimal overview over the packages that are available and have not yet been used in the other tutorials.\n",
12+
"\n",
13+
"**If you are interested to become pa"
1214
]
1315
},
1416
{

environment.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ channels:
44
- nodefaults
55
dependencies:
66
- python ~=3.11.0
7+
- aiohttp # needed for uproot http access
78
- boost-histogram
89
- hep_ml
10+
- hepstats
911
- hepunits
1012
- hist
1113
- ipython
@@ -19,16 +21,14 @@ dependencies:
1921
- particle
2022
- pandoc
2123
- pip
22-
- uv
24+
- requests # needed for uproot http access
2325
- scikit-learn
2426
- scipy
2527
- uproot >=5.0.0 # 5.0.0 breaks the httpsource argument with open, TODO upgrade (what's the equivalent?) in the "get_truth" function
26-
- aiohttp # needed for uproot http access
27-
- requests # needed for uproot http access
28+
- uv
2829
- vector
2930
- wget
3031
# - xgboost
31-
- hepstats
3232
- pip:
3333
- zfit >=0.24.0 # to have the newest version, TensorFlow is a bit stuck: https://github.com/conda-forge/tensorflow-feedstock/pull/408
3434
- zfit-physics >=0.7.0

0 commit comments

Comments
 (0)