Skip to content

Commit 0d1cc47

Browse files
author
Luigi Dello Stritto
committed
fixes
1 parent 00dd543 commit 0d1cc47

File tree

4 files changed

+32
-33
lines changed

4 files changed

+32
-33
lines changed

machine_learning_hep/analysis/analyzer_jets.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def calculate_efficiencies(self):
231231
self._save_canvas(c, f'eff/h_ptjet-pthf_eff_{cat}_ptjet.png')
232232

233233
# Run 3 efficiencies
234-
for cat in cats:
234+
for icat, cat in enumerate(cats):
235235
# gen-level efficiency for feeddown estimation
236236
h_eff_gen = h_genmatch[cat].Clone()
237237
h_eff_gen.Divide(h_gen[cat])
@@ -269,13 +269,24 @@ def calculate_efficiencies(self):
269269

270270
eff = h_det[cat].Clone(f'h_effnew_{cat}')
271271
ensure_sumw2(eff)
272-
eff.Divide(h_out)
272+
eff.Divide(h_out) #apply correction here. 2 axes pt hf
273+
274+
if eff_corr := self.cfg('efficiency.reweight'):
275+
for iptjet in range(get_nbins(eff, 0)):
276+
for ipt in range(get_nbins(eff, 1)):
277+
scale_bin(eff, eff_corr[ipt][icat], iptjet+1, ipt+1)
278+
273279
self._save_hist(eff, f'eff/h_ptjet-pthf_effnew_{cat}.png')
274280
self.h_effnew_ptjet_pthf[cat] = eff
275281

276282
eff_avg = project_hist(h_det[cat], [1], {0: bins_ptjet})
277283
ensure_sumw2(eff_avg)
278284
eff_avg.Divide(project_hist(h_out, [1], {0: bins_ptjet}))
285+
286+
if eff_corr := self.cfg('efficiency.reweight'):
287+
for ipt in range(get_nbins(eff_avg, 0)):
288+
scale_bin(eff_avg, eff_corr[ipt][icat], ipt+1)
289+
279290
self._save_hist(eff_avg, f'eff/h_pthf_effnew_{cat}.png')
280291
self.h_effnew_pthf[cat] = eff_avg
281292

machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ LcJet_pp:
2121
# sel_reco_skim: ["mlPromptScore > 0.96", "mlPromptScore > 0.97", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.6", null] # (sel_skim_binmin bins)
2222
sel_reco_skim: [null, null, null, null, null, null, null, null, null, null, null] # (sel_skim_binmin bins)
2323
sel_gen_skim: [null, null, null, null, null, null, null, null, null, null, null] # (sel_skim_binmin bins)
24-
sel_skim_binmin: [1, 2, 3, 4, 5, 6, 7, 8, 10, 12,] # skimming pt bins (sel_skim_binmin bins)
24+
sel_skim_binmin: [1, 2, 3, 4, 5, 6, 7, 8, 10, 12] # skimming pt bins (sel_skim_binmin bins)
2525
sel_skim_binmax: [2, 3, 4, 5, 6, 7, 8, 10, 12, 24] # skimming pt bins (sel_skim_binmin bins)
2626
var_binning: fPt
2727
dofullevtmerge: false
@@ -582,8 +582,8 @@ LcJet_pp:
582582
xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav
583583
] # sel_skim_binmin bins
584584
probcutpresel:
585-
data: [[0.05, 0.0, 0.0], [0.05, 0.0, 0.0], [0.08, 0.0, 0.0], [0.1, 0.0, 0.0], [0.2, 0.0, 0.0], [0.2, 0.0, 0.0], [0.2, 0.0, 0.0], [0.25, 0.0, 0.0], [0.35, 0.0, 0.0], [0.4, 0.0, 0.0]] #list of nbins
586-
mc: [[0.05, 0.0, 0.0], [0.05, 0.0, 0.0], [0.08, 0.0, 0.0], [0.1, 0.0, 0.0], [0.2, 0.0, 0.0], [0.2, 0.0, 0.0], [0.2, 0.0, 0.0], [0.25, 0.0, 0.0], [0.35, 0.0, 0.0], [0.4, 0.0, 0.0]] #list of nbins
585+
data: [[0.02, 0.0, 0.0], [0.03, 0.0, 0.0], [0.04, 0.0, 0.0], [0.07, 0.0, 0.0], [0.09, 0.0, 0.0], [0.11, 0.0, 0.0], [0.15, 0.0, 0.0], [0.18, 0.0, 0.0], [0.25, 0.0, 0.0], [0.35, 0.0, 0.0]] #list of nbins
586+
mc: [[0.02, 0.0, 0.0], [0.03, 0.0, 0.0], [0.04, 0.0, 0.0], [0.07, 0.0, 0.0], [0.09, 0.0, 0.0], [0.11, 0.0, 0.0], [0.15, 0.0, 0.0], [0.18, 0.0, 0.0], [0.25, 0.0, 0.0], [0.35, 0.0, 0.0]] #list of nbins
587587
probcutoptimal: [[0.02, 0.0, 0.0], [0.03, 0.0, 0.0], [0.04, 0.0, 0.0], [0.07, 0.0, 0.0], [0.09, 0.0, 0.0], [0.11, 0.0, 0.0], [0.15, 0.0, 0.0], [0.18, 0.0, 0.0], [0.25, 0.0, 0.0], [0.35, 0.0, 0.0]] #list of nbins
588588

589589
#region analysis

machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_newformat_mult_ana.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ LcpKpi:
6363
trees:
6464
O2hflcmccollbase: [fPosX, fPosY, fPosZ, fCentFT0M]
6565
O2hflcmcrcollid: [fIndexArrayHFLCCOLLBASES]
66-
rename: {old: fCentFT0M, new: fCentFT0Mmc}
6766

6867
reco:
6968
level: all

machine_learning_hep/processer.py

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -189,33 +189,28 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
189189
# Potentially mask certain values (e.g. nsigma TOF of -999)
190190
self.p_mask_values = datap["ml"].get("mask_values", None)
191191

192-
self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"][self.mcordata]
193-
self.lpt_probcutfin = datap["analysis"][self.typean].get("probcuts", None)
194-
195192
self.bins_skimming = np.array(list(zip(self.lpt_anbinmin, self.lpt_anbinmax)), 'd')
196193
self.bins_analysis = np.array(list(zip(self.lpt_finbinmin, self.lpt_finbinmax)), 'd')
197194
bin_matching = [
198195
[ptrange[0] <= bin[0] and ptrange[1] >= bin[1] for ptrange in self.bins_skimming].index(True)
199196
for bin in self.bins_analysis
200197
]
201198

202-
# Make it backwards-compatible
203-
if not self.lpt_probcutfin:
204-
lpt_probcutfin_tmp = datap["mlapplication"]["probcutoptimal"]
205-
self.lpt_probcutfin = []
206-
for i in range(self.p_nptfinbins):
207-
bin_id = bin_matching[i]
208-
self.lpt_probcutfin.append(lpt_probcutfin_tmp[bin_id])
199+
self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"][self.mcordata]
200+
lpt_probcutfin_tmp = datap["mlapplication"]["probcutoptimal"]
201+
self.lpt_probcutfin = [lpt_probcutfin_tmp[bin_matching[ibin]]
202+
for ibin in range(self.p_nptfinbins)]
209203

210-
if self.mltype == "MultiClassification":
211-
for probcutfin, probcutpre in zip(self.lpt_probcutfin, self.lpt_probcutpre):
204+
for ibin, probcutfin in enumerate(self.lpt_probcutfin):
205+
probcutpre = self.lpt_probcutpre[bin_matching[ibin]]
206+
if self.mltype == "MultiClassification":
212207
if probcutfin[0] > probcutpre[0] or probcutfin[1] < probcutpre[1] or probcutfin[2] < probcutpre[2]:
213208
self.logger.fatal("Probability cut final: %s must be tighter than presel %s!\n" \
214209
"Verify that bkg prob presel > final, and other cuts presel < final",
215210
self.lpt_probcutfin, self.lpt_probcutpre)
216-
elif self.lpt_probcutfin < self.lpt_probcutpre:
217-
self.logger.fatal("Probability cut final: %s must be tighter (smaller values) than presel %s!",
218-
self.lpt_probcutfin, self.lpt_probcutpre)
211+
elif probcutfin < probcutpre:
212+
self.logger.fatal("Probability cut final: %s must be tighter (smaller values) than presel %s!",
213+
self.lpt_probcutfin, self.lpt_probcutpre)
219214

220215
if self.mltype == "MultiClassification":
221216
self.l_selml = []
@@ -418,10 +413,6 @@ def dfuse(df_spec):
418413
dfs[df_name][var] = np.logical_and(dfs[df_name][var] == 1, swapped)
419414
self.logger.debug(' %s -> done', df_name)
420415

421-
if 'rename' in df_spec:
422-
spec = df_spec['rename']
423-
dfs[df_name] = dfs[df_name].rename(columns={spec['old']: spec['new']})
424-
425416

426417
if self.df_merge:
427418
for m_spec in self.df_merge:
@@ -433,18 +424,18 @@ def dfuse(df_spec):
433424
self.logger.info('merging %s with %s on %s into %s', base, ref, on, out)
434425
if not isinstance(on, list) or 'df' not in on:
435426
on = ['df', on]
436-
dfs[out] = dfmerge(dfs[base], dfs[ref], on=on)
427+
dfs[out] = dfmerge(dfs[base], dfs[ref], suffixes=(f'_{base}', None), on=on)
437428
elif (on := m_spec.get('left_on', None)) is not None:
438429
self.logger.info('merging %s with %s on %s into %s', base, ref, on, out)
439430
if not is_numeric_dtype(dfs[base][on]):
440431
self.logger.info('exploding dataframe %s on variable %s', base, on)
441-
dfs[out] = dfmerge(dfs[base].explode(on), dfs[ref], left_on=['df', on], right_index=True)
432+
dfs[out] = dfmerge(dfs[base].explode(on), dfs[ref], left_on=['df', on], suffixes=(f'_{base}', None), right_index=True)
442433
else:
443-
dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', on], right_index=True)
434+
dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', on], suffixes=(f'_{base}', None), right_index=True)
444435
else:
445436
var = self.df_read[ref]['index']
446437
self.logger.info('merging %s with %s on %s (default) into %s', base, ref, var, out)
447-
dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', var], right_index=True)
438+
dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', var], suffixes=(f'_{base}', None), right_index=True)
448439
if 'extra' in m_spec:
449440
self.logger.debug(' %s -> extra', out)
450441
for col_name, col_val in m_spec['extra'].items():
@@ -462,9 +453,7 @@ def dfuse(df_spec):
462453
def skim(self, file_index):
463454
dfreco = read_df(self.l_reco[file_index])
464455
dfgen = read_df(self.l_gen[file_index]) if self.mcordata == 'mc' else None
465-
466-
if self.n_gen_sl:
467-
dfgen_sl = read_df(self.l_gen_sl[file_index]) if self.mcordata == 'mc' else None
456+
dfgen_sl = read_df(self.l_gen_sl[file_index]) if self.n_gen_sl and self.mcordata == 'mc' else None
468457

469458
for ipt in range(self.p_nptbins):
470459
dfrecosk = seldf_singlevar(dfreco, self.v_var_binning,
@@ -478,7 +467,7 @@ def skim(self, file_index):
478467
dfgensk = dfquery(dfgensk, self.s_gen_skim[ipt])
479468
write_df(dfgensk, self.mptfiles_gensk[ipt][file_index])
480469

481-
if self.n_gen_sl:
470+
if dfgen_sl is not None:
482471
dfgensk_sl = seldf_singlevar(dfgen_sl, self.v_var_binning,
483472
self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt])
484473
dfgensk_sl = dfquery(dfgensk_sl, self.s_gen_skim[ipt])

0 commit comments

Comments
 (0)