Skip to content

Commit f616738

Browse files
author
Luigi Dello Stritto
committed
signal loss in mult processer
1 parent bd5a44f commit f616738

File tree

4 files changed

+196
-34
lines changed

4 files changed

+196
-34
lines changed

machine_learning_hep/analysis/analyzerdhadrons_mult.py

Lines changed: 99 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def __init__(self, datap, case, typean, period):
5151
self.p_nptbins = len(self.lpt_finbinmin)
5252
self.lpt_probcutfin = datap["mlapplication"]["probcutoptimal"]
5353

54+
self.signal_loss = datap["analysis"][self.typean].get("signal_loss", "")
5455
self.lvar2_binmin = datap["analysis"][self.typean]["sel_binmin2"]
5556
self.lvar2_binmax = datap["analysis"][self.typean]["sel_binmax2"]
5657
self.v_var2_binning = datap["analysis"][self.typean]["var_binning2"]
@@ -421,35 +422,90 @@ def efficiency(self):
421422
cEff.SetWindowSize(500, 500)
422423
cEff.SetLogy()
423424

424-
legeff = TLegend(.5, .25, .7, .45)
425+
legeff = TLegend(.5, .20, .7, .45)
425426
legeff.SetBorderSize(0)
426427
legeff.SetFillColor(0)
427428
legeff.SetFillStyle(0)
428429
legeff.SetTextFont(42)
429430
legeff.SetTextSize(0.035)
430431

432+
if self.signal_loss:
433+
cSl = TCanvas('cSl', 'The Fit Canvas')
434+
cSl.SetCanvasSize(1900, 1500)
435+
cSl.SetWindowSize(500, 500)
436+
legsl = TLegend(.5, .20, .7, .45)
437+
legsl.SetBorderSize(0)
438+
legsl.SetFillColor(0)
439+
legsl.SetFillStyle(0)
440+
legsl.SetTextFont(42)
441+
legsl.SetTextSize(0.035)
442+
431443
for imult in range(self.p_nbin2):
432-
stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning_gen, \
444+
stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \
433445
self.lvar2_binmin[imult], \
434446
self.lvar2_binmax[imult])
447+
legeffstring = "%.1f #leq %s < %.1f" % \
448+
(self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult])
449+
450+
if self.signal_loss:
451+
h_gen_pr_sl = lfileeff.Get("h_signal_loss_gen_pr" + stringbin2)
452+
h_sel_pr_sl = lfileeff.Get("h_signal_loss_rec_pr" + stringbin2)
453+
h_sel_pr_sl.Divide(h_sel_pr_sl, h_gen_pr_sl, 1.0, 1.0, "B")
454+
h_sel_pr_sl.SetLineColor(imult+1)
455+
h_sel_pr_sl.SetMarkerColor(imult+1)
456+
h_sel_pr_sl.SetMarkerStyle(21)
457+
cSl.cd()
458+
h_sel_pr_sl.Draw("same")
459+
fileouteff.cd()
460+
h_sel_pr_sl.SetName("signal_loss_pr_mult%d" % imult)
461+
h_sel_pr_sl.Write()
462+
463+
legsl.AddEntry(h_sel_pr_sl, legeffstring, "LEP")
464+
h_sel_pr_sl.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})")
465+
h_sel_pr_sl.GetYaxis().SetTitle("Signal loss (prompt) %s" \
466+
% (self.p_latexnhadron))
467+
h_sel_pr_sl.SetMinimum(0.7)
468+
h_sel_pr_sl.SetMaximum(1.0)
469+
435470
h_gen_pr = lfileeff.Get("h_gen_pr" + stringbin2)
436471
h_sel_pr = lfileeff.Get("h_sel_pr" + stringbin2)
437472
h_sel_pr.Divide(h_sel_pr, h_gen_pr, 1.0, 1.0, "B")
473+
474+
if self.signal_loss:
475+
h_sel_pr.Multiply(h_sel_pr_sl)
476+
438477
h_sel_pr.SetLineColor(imult+1)
439478
h_sel_pr.SetMarkerColor(imult+1)
440479
h_sel_pr.SetMarkerStyle(21)
480+
cEff.cd()
441481
h_sel_pr.Draw("same")
442482
fileouteff.cd()
443483
h_sel_pr.SetName("eff_mult%d" % imult)
444484
h_sel_pr.Write()
445-
legeffstring = "%.1f #leq %s < %.1f" % \
446-
(self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult])
447485
legeff.AddEntry(h_sel_pr, legeffstring, "LEP")
448486
h_sel_pr.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})")
449-
h_sel_pr.GetYaxis().SetTitle("Acc x efficiency (prompt) %s (1/GeV)" \
487+
h_sel_pr.GetYaxis().SetTitle("Acc x efficiency (prompt) %s" \
450488
% (self.p_latexnhadron))
451489
h_sel_pr.SetMinimum(0.0004)
452490
h_sel_pr.SetMaximum(0.4)
491+
492+
if self.signal_loss:
493+
cSl.cd()
494+
legsl.Draw()
495+
cSl.SaveAs("%s/SignalLoss%s%s.eps" % (self.d_resultsallpmc,
496+
self.case, self.typean))
497+
498+
cSlFD = TCanvas('cSlFD', 'The Fit Canvas')
499+
cSlFD.SetCanvasSize(1900, 1500)
500+
cSlFD.SetWindowSize(500, 500)
501+
legslFD = TLegend(.5, .20, .7, .45)
502+
legslFD.SetBorderSize(0)
503+
legslFD.SetFillColor(0)
504+
legslFD.SetFillStyle(0)
505+
legslFD.SetTextFont(42)
506+
legslFD.SetTextSize(0.035)
507+
508+
cEff.cd()
453509
legeff.Draw()
454510
cEff.SaveAs("%s/Eff%s%s.eps" % (self.d_resultsallpmc,
455511
self.case, self.typean))
@@ -458,38 +514,71 @@ def efficiency(self):
458514
cEffFD.SetCanvasSize(1900, 1500)
459515
cEffFD.SetWindowSize(500, 500)
460516
cEffFD.SetLogy()
461-
legeffFD = TLegend(.5, .25, .7, .45)
517+
legeffFD = TLegend(.5, .20, .7, .45)
462518
legeffFD.SetBorderSize(0)
463519
legeffFD.SetFillColor(0)
464520
legeffFD.SetFillStyle(0)
465521
legeffFD.SetTextFont(42)
466522
legeffFD.SetTextSize(0.035)
467523

468524
for imult in range(self.p_nbin2):
469-
stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning_gen, \
525+
stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \
470526
self.lvar2_binmin[imult], \
471527
self.lvar2_binmax[imult])
528+
legeffFDstring = "%.1f #leq %s < %.1f" % \
529+
(self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult])
530+
531+
if self.signal_loss:
532+
h_gen_fd_sl = lfileeff.Get("h_signal_loss_gen_fd" + stringbin2)
533+
h_sel_fd_sl = lfileeff.Get("h_signal_loss_rec_fd" + stringbin2)
534+
h_sel_fd_sl.Divide(h_sel_fd_sl, h_gen_fd_sl, 1.0, 1.0, "B")
535+
h_sel_fd_sl.SetLineColor(imult+1)
536+
h_sel_fd_sl.SetMarkerColor(imult+1)
537+
h_sel_fd_sl.SetMarkerStyle(21)
538+
cSlFD.cd()
539+
h_sel_fd_sl.Draw("same")
540+
fileouteff.cd()
541+
h_sel_fd_sl.SetName("signal_loss_fd_mult%d" % imult)
542+
h_sel_fd_sl.Write()
543+
544+
legslFD.AddEntry(h_sel_fd_sl, legeffstring, "LEP")
545+
h_sel_fd_sl.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})")
546+
h_sel_fd_sl.GetYaxis().SetTitle("Signal loss (feeddown) %s" \
547+
% (self.p_latexnhadron))
548+
h_sel_fd_sl.SetMinimum(0.7)
549+
h_sel_fd_sl.SetMaximum(1.0)
550+
472551
h_gen_fd = lfileeff.Get("h_gen_fd" + stringbin2)
473552
h_sel_fd = lfileeff.Get("h_sel_fd" + stringbin2)
474553
h_sel_fd.Divide(h_sel_fd, h_gen_fd, 1.0, 1.0, "B")
554+
555+
if self.signal_loss:
556+
h_sel_fd.Multiply(h_sel_fd_sl)
557+
475558
h_sel_fd.SetLineColor(imult+1)
476559
h_sel_fd.SetMarkerColor(imult+1)
477560
h_sel_fd.SetMarkerStyle(21)
561+
cEffFD.cd()
478562
h_sel_fd.Draw("same")
479563
fileouteff.cd()
480564
h_sel_fd.SetName("eff_fd_mult%d" % imult)
481565
h_sel_fd.Write()
482-
legeffFDstring = "%.1f #leq %s < %.1f" % \
483-
(self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult])
484566
legeffFD.AddEntry(h_sel_fd, legeffFDstring, "LEP")
485567
h_sel_fd.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})")
486-
h_sel_fd.GetYaxis().SetTitle("Acc x efficiency feed-down %s (1/GeV)" \
568+
h_sel_fd.GetYaxis().SetTitle("Acc x efficiency feed-down %s" \
487569
% (self.p_latexnhadron))
488570
h_sel_fd.SetMinimum(0.0004)
489571
h_sel_fd.SetMaximum(0.4)
572+
573+
cEffFD.cd()
490574
legeffFD.Draw()
491575
cEffFD.SaveAs("%s/EffFD%s%s.eps" % (self.d_resultsallpmc,
492576
self.case, self.typean))
577+
if self.signal_loss:
578+
cSlFD.cd()
579+
legslFD.Draw()
580+
cSlFD.SaveAs("%s/SignalLossFD%s%s.eps" % (self.d_resultsallpmc,
581+
self.case, self.typean))
493582

494583

495584
def plotter(self):

machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_newformat_mult_ana.yml

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,10 @@ LcpKpi:
6161
level: gen
6262
index: fIndexHFLCMCCOLLBASES
6363
trees:
64-
O2hflcmccollbase: [fPosX, fPosY, fPosZ]
64+
O2hflcmccollbase: [fPosX, fPosY, fPosZ, fCentFT0M]
6565
O2hflcmcrcollid: [fIndexArrayHFLCCOLLBASES]
66-
66+
rename: {old: fCentFT0M, new: fCentFT0Mmc}
67+
6768
reco:
6869
level: all
6970
index: fIndexHfLcBases
@@ -107,7 +108,7 @@ LcpKpi:
107108
merge:
108109
- {base: reco, ref: evtorig , extra: {fMultZeqNTracksPV_sub: fMultZeqNTracksPV - fNProngsContributorsPV}}
109110
- {base: gen, ref: evtoriggen}
110-
- {base: gen, ref: evtorig, left_on: fIndexArrayHFLCCOLLBASES}
111+
- {base: gen, ref: evtorig, left_on: fIndexArrayHFLCCOLLBASES, out: genrec}
111112

112113
write:
113114
evtorig:
@@ -117,10 +118,17 @@ LcpKpi:
117118
level: all
118119
source: evtorig
119120
file: AnalysisResultsEvt.parquet
121+
evtmc:
122+
level: mc
123+
source: evtoriggen
124+
file: AnalysisResultsEvtGen.parquet
120125
reco:
121126
level: all
122127
file: AnalysisResultsReco.parquet
123128
gen:
129+
level: mc
130+
file: AnalysisResultsGenSl.parquet
131+
genrec:
124132
level: mc
125133
file: AnalysisResultsGen.parquet
126134

@@ -398,9 +406,11 @@ LcpKpi:
398406
sel_binmin2: [0, 85, 70, 50, 30, 10, 1, 0] #list of var2 splittng nbins
399407
sel_binmax2: [100, 100, 85, 70, 50, 30, 10, 1]
400408
var_binning2: fCentFT0M
401-
var_binning2_gen: fCentFT0M
409+
var_binning2_gen: fCentFT0Mmc
402410
var_binning2_weights: fMultZeqNTracksPV
403411
mc_cut_on_binning2: false
412+
signal_loss: true
413+
signal_loss_idx: fIndexArrayHFLCCOLLBASES
404414
nbinshisto: 100
405415
minvaluehisto: -0.0005
406416
maxvaluehisto: 100.0005

machine_learning_hep/processer.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
115115
self.n_evtorig = datap["files_names"].get("namefile_evtorig")
116116
self.n_evt_count_ml = datap["files_names"].get("namefile_evt_count", "evtcount.yaml")
117117
self.n_gen = datap["files_names"]["namefile_gen"]
118+
self.n_gen_sl = datap["files_names"].get("namefile_gen_sl", "")
118119
self.n_filemass = datap["files_names"]["histofilename"]
119120
self.n_fileeff = datap["files_names"]["efffilename"]
120121
self.n_fileresp = datap["files_names"]["respfilename"]
@@ -170,7 +171,8 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
170171

171172
if self.mcordata == "mc":
172173
self.l_gen = createlist(self.d_pkl, self.l_path, self.n_gen)
173-
174+
if self.n_gen_sl:
175+
self.l_gen_sl = createlist(self.d_pkl, self.l_path, self.n_gen_sl)
174176
self.f_totevt = os.path.join(self.d_pkl, self.n_evt)
175177
self.f_totevtorig = os.path.join(self.d_pkl, self.n_evtorig)
176178

@@ -232,6 +234,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
232234
self.d_pkl_dec = d_pkl_dec
233235
self.mptfiles_recosk = []
234236
self.mptfiles_gensk = []
237+
self.mptfiles_gensk_sl = []
235238

236239
self.d_pkl_decmerged = d_pkl_decmerged
237240
self.n_filemass = os.path.join(self.d_results, self.n_filemass)
@@ -249,6 +252,12 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
249252
self.lpt_gen_ml = [os.path.join(self.d_pkl_ml, self.lpt_gensk[ipt]) \
250253
for ipt in range(self.p_nptbins)]
251254
self.f_evt_count_ml = os.path.join(self.d_pkl_ml, self.n_evt_count_ml)
255+
256+
if self.n_gen_sl:
257+
self.lpt_gensk_sl = [self.n_gen_sl.replace(".p", "_%s%d_%d.p" % \
258+
(self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \
259+
for i in range(self.p_nptbins)]
260+
252261
self.lpt_recodec = None
253262
if self.doml is True:
254263
if self.mltype == "MultiClassification":
@@ -277,6 +286,10 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
277286
self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)]
278287
self.lpt_gendecmerged = [os.path.join(self.d_pkl_decmerged, self.lpt_gensk[ipt])
279288
for ipt in range(self.p_nptbins)]
289+
if self.n_gen_sl:
290+
self.mptfiles_gensk_sl = [createlist(self.d_pklsk, self.l_path, \
291+
self.lpt_gensk_sl[ipt]) for ipt in range(self.p_nptbins)]
292+
280293
# self.triggerbit = datap["analysis"][self.typean]["triggerbit"]
281294
self.runlistrigger = runlisttrigger
282295

@@ -405,6 +418,10 @@ def dfuse(df_spec):
405418
dfs[df_name][var] = np.logical_and(dfs[df_name][var] == 1, swapped)
406419
self.logger.debug(' %s -> done', df_name)
407420

421+
if 'rename' in df_spec:
422+
spec = df_spec['rename']
423+
dfs[df_name] = dfs[df_name].rename(columns={spec['old']: spec['new']})
424+
408425

409426
if self.df_merge:
410427
for m_spec in self.df_merge:
@@ -421,14 +438,9 @@ def dfuse(df_spec):
421438
self.logger.info('merging %s with %s on %s into %s', base, ref, on, out)
422439
if not is_numeric_dtype(dfs[base][on]):
423440
self.logger.info('exploding dataframe %s on variable %s', base, on)
424-
dfs[base] = dfs[base].explode(on)
425-
dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', on], right_index=True)
426-
if not is_numeric_dtype(dfs[base][on]):
427-
dfs[out].index.name = 'MergedIndex'
428-
dfs[out] = dfs[out].reset_index()
429-
sorted_df = dfs[out].sort_values('fMultZeqNTracksPV', ascending=False)
430-
dfs[out] = sorted_df.drop_duplicates('MergedIndex')
431-
dfs[out] = dfs[out].sort_values('MergedIndex', ascending=True)
441+
dfs[out] = dfmerge(dfs[base].explode(on), dfs[ref], left_on=['df', on], right_index=True)
442+
else:
443+
dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', on], right_index=True)
432444
else:
433445
var = self.df_read[ref]['index']
434446
self.logger.info('merging %s with %s on %s (default) into %s', base, ref, var, out)
@@ -451,6 +463,9 @@ def skim(self, file_index):
451463
dfreco = read_df(self.l_reco[file_index])
452464
dfgen = read_df(self.l_gen[file_index]) if self.mcordata == 'mc' else None
453465

466+
if self.n_gen_sl:
467+
dfgen_sl = read_df(self.l_gen_sl[file_index]) if self.mcordata == 'mc' else None
468+
454469
for ipt in range(self.p_nptbins):
455470
dfrecosk = seldf_singlevar(dfreco, self.v_var_binning,
456471
self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt])
@@ -463,6 +478,12 @@ def skim(self, file_index):
463478
dfgensk = dfquery(dfgensk, self.s_gen_skim[ipt])
464479
write_df(dfgensk, self.mptfiles_gensk[ipt][file_index])
465480

481+
if dfgen_sl is not None:
482+
dfgensk_sl = seldf_singlevar(dfgen_sl, self.v_var_binning,
483+
self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt])
484+
dfgensk_sl = dfquery(dfgensk_sl, self.s_gen_skim[ipt])
485+
write_df(dfgensk_sl, self.mptfiles_gensk_sl[ipt][file_index])
486+
466487
def applymodel(self, file_index):
467488
for ipt in range(self.p_nptbins):
468489
if os.path.exists(self.mptfiles_recoskmldec[ipt][file_index]):

0 commit comments

Comments
 (0)