Skip to content

Commit d70cb98

Browse files
authored
Validation dev (#673)
* Add pt_cand in validation plots * Add reco with TOF in efficiency * Improve verbose mode * Using analysis binning in TPC-TOF matching eff * Use itertools * Using presel DF * Minor fix
1 parent 9946d91 commit d70cb98

File tree

4 files changed

+93
-97
lines changed

4 files changed

+93
-97
lines changed

machine_learning_hep/analysis/analyzerdhadrons_mult.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import os
2020
# pylint: disable=unused-wildcard-import, wildcard-import
2121
from array import array
22+
import itertools
2223
# pylint: disable=import-error, no-name-in-module, unused-import
2324
from root_numpy import hist2array, array2hist
2425
from ROOT import TFile, TH1F, TH2F, TCanvas, TPad, TF1, TH1D
@@ -647,28 +648,30 @@ def do_plot(histo):
647648
def plot_tpc_tof_me(tag):
648649
# Compute TPC-TOF matching efficiency
649650
if tpc_tof_me:
650-
for i in ["Pi", "K", "Pr"]:
651-
for j in ["0", "1"]:
652-
for k in ["p", "pt"]:
653-
hname = [f"{k}_prong{j}",
654-
f"nsigTOF_{i}_{j}", tag]
655-
hnum = get_histo(*hname,
656-
strictly_require=False)
657-
if hnum is None:
658-
continue
659-
hnum = hnum.ProjectionX(
660-
hnum.GetName() + "_num", 2, -1)
661-
hden = get_histo(*hname)
662-
hden = hden.ProjectionX(
663-
hden.GetName() + "_den")
664-
hnum.Divide(hnum, hden, 1, 1, "B")
665-
hnum.SetName(
666-
hnum.GetName().replace(
667-
"_num", "_TPC-TOF_MatchingEfficiency"
668-
)
669-
)
670-
hnum.GetYaxis().SetTitle("TPC-TOF_MatchingEfficiency")
671-
do_plot(hnum)
651+
to_plot = [["Pi", "K", "Pr"],
652+
["0", "1"],
653+
["p_prong0", "pt_prong0", "pt_cand"]
654+
]
655+
for spec, prong, observable in itertools.product(*to_plot):
656+
hname = [f"{observable}",
657+
f"nsigTOF_{spec}_{prong}", tag]
658+
hnum = get_histo(*hname,
659+
strictly_require=False)
660+
if hnum is None:
661+
continue
662+
hnum = hnum.ProjectionX(
663+
hnum.GetName() + "_num", 2, -1)
664+
hden = get_histo(*hname)
665+
hden = hden.ProjectionX(
666+
hden.GetName() + "_den")
667+
hnum.Divide(hnum, hden, 1, 1, "B")
668+
hnum.SetName(
669+
hnum.GetName().replace(
670+
"_num", "_TPC-TOF_MatchingEfficiency"
671+
)
672+
)
673+
hnum.GetYaxis().SetTitle("TPC-TOF_MatchingEfficiency")
674+
do_plot(hnum)
672675

673676
plot_tpc_tof_me(tag="")
674677
# Part dedicated to MC Checks

machine_learning_hep/processerdhadrons_mult.py

Lines changed: 60 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -268,26 +268,44 @@ def get_reweighted_count(self, dfsel):
268268
# pylint: disable=line-too-long
269269
def process_efficiency_single(self, index):
270270
out_file = TFile.Open(self.l_histoeff[index], "recreate")
271+
h_list = []
271272
for ibin2 in range(len(self.lvar2_binmin)):
272-
stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning_gen, \
273-
self.lvar2_binmin[ibin2], \
274-
self.lvar2_binmax[ibin2])
273+
stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning_gen,
274+
self.lvar2_binmin[ibin2],
275+
self.lvar2_binmax[ibin2])
275276
n_bins = len(self.lpt_finbinmin)
276277
analysis_bin_lims_temp = self.lpt_finbinmin.copy()
277278
analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins-1])
278279
analysis_bin_lims = array.array('f', analysis_bin_lims_temp)
279-
h_gen_pr = TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \
280-
n_bins, analysis_bin_lims)
281-
h_presel_pr = TH1F("h_presel_pr" + stringbin2, "Prompt Reco in acc |#eta|<0.8 and sel", \
282-
n_bins, analysis_bin_lims)
283-
h_sel_pr = TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \
284-
n_bins, analysis_bin_lims)
285-
h_gen_fd = TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \
286-
n_bins, analysis_bin_lims)
287-
h_presel_fd = TH1F("h_presel_fd" + stringbin2, "FD Reco in acc |#eta|<0.8 and sel", \
288-
n_bins, analysis_bin_lims)
289-
h_sel_fd = TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \
290-
n_bins, analysis_bin_lims)
280+
281+
def make_histo(name, title,
282+
name_extra=stringbin2,
283+
bins=n_bins,
284+
binning=analysis_bin_lims):
285+
histo = TH1F(name + name_extra, title, bins, binning)
286+
h_list.append(histo)
287+
return histo
288+
289+
h_gen_pr = make_histo("h_gen_pr",
290+
"Prompt Generated in acceptance |y|<0.5")
291+
h_presel_pr = make_histo("h_presel_pr",
292+
"Prompt Reco in acc |#eta|<0.8 and sel")
293+
h_presel_pr_wotof = make_histo("h_presel_pr_wotof",
294+
"Prompt Reco in acc woTOF |#eta|<0.8 and pre-sel")
295+
h_presel_pr_wtof = make_histo("h_presel_pr_wtof",
296+
"Prompt Reco in acc wTOF |#eta|<0.8 and pre-sel")
297+
h_sel_pr = make_histo("h_sel_pr",
298+
"Prompt Reco and sel in acc |#eta|<0.8 and sel")
299+
h_sel_pr_wotof = make_histo("h_sel_pr_wotof",
300+
"Prompt Reco and sel woTOF in acc |#eta|<0.8")
301+
h_sel_pr_wtof = make_histo("h_sel_pr_wtof",
302+
"Prompt Reco and sel wTOF in acc |#eta|<0.8")
303+
h_gen_fd = make_histo("h_gen_fd",
304+
"FD Generated in acceptance |y|<0.5")
305+
h_presel_fd = make_histo("h_presel_fd",
306+
"FD Reco in acc |#eta|<0.8 and sel")
307+
h_sel_fd = make_histo("h_sel_fd",
308+
"FD Reco and sel in acc |#eta|<0.8 and sel")
291309

292310
bincounter = 0
293311
for ipt in range(self.p_nptfinbins):
@@ -328,66 +346,38 @@ def process_efficiency_single(self, index):
328346
else:
329347
df_reco_sel_fd = df_reco_presel_fd.copy()
330348

331-
if self.corr_eff_mult[ibin2] is True:
332-
val, err = self.get_reweighted_count(df_gen_sel_pr)
333-
h_gen_pr.SetBinContent(bincounter + 1, val)
334-
h_gen_pr.SetBinError(bincounter + 1, err)
335-
val, err = self.get_reweighted_count(df_reco_presel_pr)
336-
h_presel_pr.SetBinContent(bincounter + 1, val)
337-
h_presel_pr.SetBinError(bincounter + 1, err)
338-
val, err = self.get_reweighted_count(df_reco_sel_pr)
339-
h_sel_pr.SetBinContent(bincounter + 1, val)
340-
h_sel_pr.SetBinError(bincounter + 1, err)
341-
#print("prompt efficiency tot ptbin=", bincounter, ", value = ",
342-
# len(df_reco_sel_pr)/len(df_gen_sel_pr))
343-
344-
val, err = self.get_reweighted_count(df_gen_sel_fd)
345-
h_gen_fd.SetBinContent(bincounter + 1, val)
346-
h_gen_fd.SetBinError(bincounter + 1, err)
347-
val, err = self.get_reweighted_count(df_reco_presel_fd)
348-
h_presel_fd.SetBinContent(bincounter + 1, val)
349-
h_presel_fd.SetBinError(bincounter + 1, err)
350-
val, err = self.get_reweighted_count(df_reco_sel_fd)
351-
h_sel_fd.SetBinContent(bincounter + 1, val)
352-
h_sel_fd.SetBinError(bincounter + 1, err)
353-
#print("fd efficiency tot ptbin=", bincounter, ", value = ",
354-
# len(df_reco_sel_fd)/len(df_gen_sel_fd))
355-
else:
356-
val = len(df_gen_sel_pr)
357-
err = math.sqrt(val)
358-
h_gen_pr.SetBinContent(bincounter + 1, val)
359-
h_gen_pr.SetBinError(bincounter + 1, err)
360-
val = len(df_reco_presel_pr)
361-
err = math.sqrt(val)
362-
h_presel_pr.SetBinContent(bincounter + 1, val)
363-
h_presel_pr.SetBinError(bincounter + 1, err)
364-
val = len(df_reco_sel_pr)
365-
err = math.sqrt(val)
366-
h_sel_pr.SetBinContent(bincounter + 1, val)
367-
h_sel_pr.SetBinError(bincounter + 1, err)
349+
def set_content(df_to_use, histogram,
350+
i_b=ibin2, b_c=bincounter):
351+
if self.corr_eff_mult[i_b] is True:
352+
val, err = self.get_reweighted_count(df_to_use)
353+
histogram.SetBinContent(b_c + 1, val)
354+
histogram.SetBinError(b_c + 1, err)
355+
else:
356+
val = len(df_to_use)
357+
err = math.sqrt(val)
358+
histogram.SetBinContent(b_c + 1, val)
359+
histogram.SetBinError(b_c + 1, err)
368360

369-
val = len(df_gen_sel_fd)
370-
err = math.sqrt(val)
371-
h_gen_fd.SetBinContent(bincounter + 1, val)
372-
h_gen_fd.SetBinError(bincounter + 1, err)
373-
val = len(df_reco_presel_fd)
374-
err = math.sqrt(val)
375-
h_presel_fd.SetBinContent(bincounter + 1, val)
376-
h_presel_fd.SetBinError(bincounter + 1, err)
377-
val = len(df_reco_sel_fd)
378-
err = math.sqrt(val)
379-
h_sel_fd.SetBinContent(bincounter + 1, val)
380-
h_sel_fd.SetBinError(bincounter + 1, err)
361+
set_content(df_gen_sel_pr, h_gen_pr)
362+
if "nsigTOF_Pr_0" in df_reco_presel_pr:
363+
set_content(df_reco_presel_pr[df_reco_presel_pr.nsigTOF_Pr_0 < -998],
364+
h_presel_pr_wotof)
365+
set_content(df_reco_presel_pr[df_reco_presel_pr.nsigTOF_Pr_0 > -998],
366+
h_presel_pr_wtof)
367+
set_content(df_reco_presel_pr, h_presel_pr)
368+
set_content(df_reco_sel_pr, h_sel_pr)
369+
set_content(df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 < -998], h_sel_pr_wotof)
370+
set_content(df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 > -998], h_sel_pr_wtof)
371+
set_content(df_gen_sel_fd, h_gen_fd)
372+
set_content(df_reco_presel_fd, h_presel_fd)
373+
set_content(df_reco_sel_fd, h_sel_fd)
381374

382375
bincounter = bincounter + 1
383376

384377
out_file.cd()
385-
h_gen_pr.Write()
386-
h_presel_pr.Write()
387-
h_sel_pr.Write()
388-
h_gen_fd.Write()
389-
h_presel_fd.Write()
390-
h_sel_fd.Write()
378+
for h in h_list:
379+
h.Write()
380+
h_list = []
391381

392382
def process_efficiency(self):
393383
print("Doing efficiencies", self.mcordata, self.period)

machine_learning_hep/validation/validation.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,14 @@ def __init__(self, dataframe=None, tag="", verbose=False, strictly_require=False
3131
self.histograms = []
3232
self.verbose = verbose
3333
self.strictly_require = strictly_require
34+
if self.verbose:
35+
get_logger().info("Creating ValidationCollection with tag '%s'", self.collection_tag)
3436

3537
def reset_input(self, dataframe, tag):
3638
self.source_dataframe = dataframe
3739
self.collection_tag = tag
40+
if self.verbose:
41+
get_logger().info("Resetting ValidationCollection with tag '%s'", self.collection_tag)
3842

3943
def make_and_fill(self, binx, namex, biny=None, namey=None):
4044
"""
@@ -51,10 +55,10 @@ def column_exists(col_name, axis_name):
5155
return True
5256

5357
h = None
58+
if not column_exists(namex, "X"):
59+
return
5460
if namey:
5561
# Check that column exists
56-
if not column_exists(namex, "X"):
57-
return
5862
if not column_exists(namey, "Y"):
5963
return
6064
h_name = f"hVal_{namex}_vs_{namey}{self.collection_tag}"
@@ -64,8 +68,6 @@ def column_exists(col_name, axis_name):
6468
h.SetTitle(h_tit)
6569
else:
6670
# Check that column exists
67-
if not column_exists(namex, "X"):
68-
return
6971
h_name = f"hVal_{namex}{self.collection_tag}"
7072
h_tit = f" ; {namex} ; Entries"
7173
h = makefill1dhist(self.source_dataframe,

machine_learning_hep/validation/validation_candidates.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
Script containing validation histograms on the candidate granularity
1717
"""
1818

19-
from machine_learning_hep.utilities_plot import buildbinning
19+
from machine_learning_hep.utilities_plot import buildbinning, buildarray
2020
from machine_learning_hep.validation.validation import ValidationCollection
2121

2222

@@ -30,6 +30,7 @@ def fill_validation_candidates(df_reco, tag=""):
3030
binning_nsigma = buildbinning(1, -1000, -998)
3131
binning_nsigma += buildbinning(2000, -100, 100)
3232
binning_pt = buildbinning(400, 0, 100)
33+
binning_pt = buildarray([1, 2, 4, 6, 8, 12, 24])
3334
binning_eta = buildbinning(100, -1, 1)
3435
binning_phi = buildbinning(100, 0, 7)
3536
binning_inv_mass = buildbinning(100, 2, 2.5)

0 commit comments

Comments
 (0)