Skip to content

Commit 911fd73

Browse files
author
Luigi Dello Stritto
committed
updated db and bug fixing
1 parent 8487d93 commit 911fd73

File tree

7 files changed

+432
-854
lines changed

7 files changed

+432
-854
lines changed

machine_learning_hep/analysis/analyzerdhadrons.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def __init__(self, datap, case, typean, period):
7979
self.lpt_finbinmax = datap["analysis"][self.typean]["sel_an_binmax"]
8080
self.bin_matching = datap["analysis"][self.typean]["binning_matching"]
8181
self.p_nptbins = len(self.lpt_finbinmin)
82-
self.lpt_probcutfin = datap["mlapplication"]["probcutoptimal"]
82+
self.lpt_probcutfin_tmp = datap["mlapplication"]["probcutoptimal"]
8383
self.triggerbit = datap["analysis"][self.typean].get("triggerbit", "")
8484

8585
dp = datap["analysis"][self.typean]
@@ -131,7 +131,7 @@ def __init__(self, datap, case, typean, period):
131131
self.fit_func_bkg = {}
132132
self.fit_range = {}
133133

134-
self.path_fig = Path(f"fig/{self.case}/{self.typean}")
134+
self.path_fig = Path(f'{os.path.expandvars(self.d_resultsallpdata)}/fig')
135135
for folder in ["qa", "fit", "roofit", "sideband", "signalextr", "fd", "uf"]:
136136
(self.path_fig / folder).mkdir(parents=True, exist_ok=True)
137137

@@ -160,7 +160,7 @@ def __init__(self, datap, case, typean, period):
160160
# region helpers
161161
def _save_canvas(self, canvas, filename):
162162
# folder = self.d_resultsallpmc if mcordata == 'mc' else self.d_resultsallpdata
163-
canvas.SaveAs(f"fig/{self.case}/{self.typean}/{filename}")
163+
canvas.SaveAs(f'{self.path_fig}/{filename}')
164164

165165
def _save_hist(self, hist, filename, option=""):
166166
if not hist:
@@ -290,25 +290,27 @@ def fit(self):
290290
signifhistos = TH1F("hsignifs0", "", len(self.lpt_finbinmin), array("d", self.bins_candpt))
291291
soverbhistos = TH1F("hSoverB0", "", len(self.lpt_finbinmin), array("d", self.bins_candpt))
292292

293+
lpt_probcutfin = [None] * self.nbins
293294
with TFile(rfilename) as rfile:
294295
for ipt in range(len(self.lpt_finbinmin)):
296+
lpt_probcutfin[ipt] = self.lpt_probcutfin_tmp[self.bin_matching[ipt]]
295297
self.logger.debug("fitting %s - %i", level, ipt)
296298
roows = self.roows.get(ipt)
297299
if self.mltype == "MultiClassification":
298300
suffix = "%s%d_%d_%.2f%.2f%.2f" % (
299301
self.v_var_binning,
300302
self.lpt_finbinmin[ipt],
301303
self.lpt_finbinmax[ipt],
302-
self.lpt_probcutfin[ipt][0],
303-
self.lpt_probcutfin[ipt][1],
304-
self.lpt_probcutfin[ipt][2],
304+
lpt_probcutfin[ipt][0],
305+
lpt_probcutfin[ipt][1],
306+
lpt_probcutfin[ipt][2],
305307
)
306308
else:
307309
suffix = "%s%d_%d_%.2f" % (
308310
self.v_var_binning,
309311
self.lpt_finbinmin[ipt],
310312
self.lpt_finbinmax[ipt],
311-
self.lpt_probcutfin[ipt],
313+
lpt_probcutfin[ipt],
312314
)
313315
h_invmass = rfile.Get("hmass" + suffix)
314316
# Rebin

machine_learning_hep/analysis/analyzerdhadrons_mult.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def __init__(self, datap, case, typean, period):
8181
self.lpt_finbinmax = datap["analysis"][self.typean]["sel_an_binmax"]
8282
self.bin_matching = datap["analysis"][self.typean]["binning_matching"]
8383
self.p_nptbins = len(self.lpt_finbinmin)
84-
self.lpt_probcutfin = datap["mlapplication"]["probcutoptimal"]
84+
self.lpt_probcutfin_tmp = datap["mlapplication"]["probcutoptimal"]
8585

8686
self.signal_loss = datap["analysis"][self.typean].get("signal_loss", "")
8787
self.lvar2_binmin = datap["analysis"][self.typean]["sel_binmin2"]
@@ -326,16 +326,18 @@ def fit(self):
326326
"hSoverB%d" % (ibin2), "", len(self.lpt_finbinmin), array("d", self.bins_candpt)
327327
)
328328

329+
lpt_probcutfin = [None] * self.nbins
329330
for ipt in range(len(self.lpt_finbinmin)):
331+
lpt_probcutfin[ipt] = self.lpt_probcutfin_tmp[self.bin_matching[ipt]]
330332
self.logger.debug("fitting %s - %i - %i", level, ipt, ibin2)
331333
roows = self.roows.get(ipt)
332334
if self.mltype == "MultiClassification":
333335
suffix = "%s%d_%d_%.2f%.2f%s_%.2f_%.2f" % (
334336
self.v_var_binning,
335337
self.lpt_finbinmin[ipt],
336338
self.lpt_finbinmax[ipt],
337-
self.lpt_probcutfin[ipt][0],
338-
self.lpt_probcutfin[ipt][1],
339+
lpt_probcutfin[ipt][0],
340+
lpt_probcutfin[ipt][1],
339341
self.v_var2_binning,
340342
self.lvar2_binmin[ibin2],
341343
self.lvar2_binmax[ibin2],
@@ -345,7 +347,7 @@ def fit(self):
345347
self.v_var_binning,
346348
self.lpt_finbinmin[ipt],
347349
self.lpt_finbinmax[ipt],
348-
self.lpt_probcutfin[ipt],
350+
lpt_probcutfin[ipt],
349351
self.v_var2_binning,
350352
self.lvar2_binmin[ibin2],
351353
self.lvar2_binmax[ibin2],

machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ LcJet_pp:
2222
sel_reco_skim: [null, null, null, null, null, null, null, null, null, null, null] # (sel_skim_binmin bins)
2323
sel_gen_skim: [null, null, null, null, null, null, null, null, null, null, null] # (sel_skim_binmin bins)
2424
sel_skim_binmin: [1, 2, 3, 4, 5, 6, 7, 8, 10, 12] # skimming pt bins (sel_skim_binmin bins)
25-
sel_skim_binmax: [2, 3, 4, 5, 6, 7, 8, 10, 12, 24] # skimming pt bins (sel_skim_binmin bins)
25+
sel_skim_binmax: [2, 3, 4, 5, 6, 7, 8, 10, 12, 30] # skimming pt bins (sel_skim_binmin bins)
2626
var_binning: fPt
2727
dofullevtmerge: false
2828
var_cand: fCandidateSelFlag
@@ -50,7 +50,7 @@ LcJet_pp:
5050
read:
5151
evtorig:
5252
level: all
53-
index: fIndexHfLcCollBases
53+
index: fIndexHFLCCOLLBASES
5454
trees:
5555
O2hflccollbase: [fNumContrib, fCentFT0M, fMultZeqNTracksPV]
5656
extra:
@@ -73,7 +73,7 @@ LcJet_pp:
7373
index: fIndexLCMCPJETOS
7474
trees:
7575
O2hflcpbase: [fPt, fY, fEta, fPhi, fFlagMcMatchGen, fOriginMcGen]
76-
O2lccmcpjeto: [fIndexLCCMCPJETCOS, fIndexHFLcPBASES_0, fJetPt, fJetPhi, fJetEta, fJetNConstituents, fJetR]
76+
O2lccmcpjeto: [fIndexLCCMCPJETCOS, fIndexHFLCPBASES_0, fJetPt, fJetPhi, fJetEta, fJetNConstituents, fJetR]
7777
O2lccmcpjetmo: [fIndexArrayLCCMCDJETOS_hf, fIndexArrayLCCMCDJETOS_geo, fIndexArrayLCCMCDJETOS_pt]
7878
O2lccmcpjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
7979
tags:
@@ -93,13 +93,13 @@ LcJet_pp:
9393
level: det
9494
index: fIndexLCCMCDJETOS
9595
trees:
96-
O2hflcbase: [fIndexHfLcCollBases, fPt, fY, fEta, fPhi, fM]
96+
O2hflcbase: [fIndexHFLCCOLLBASES, fPt, fY, fEta, fPhi, fM]
9797
O2hflcmc: [fFlagMcMatchRec, fOriginMcRec]
9898
# O2hflcpar: [fCpa, fCpaXY, fChi2PCA, fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fImpactParameter0, fImpactParameter1, fImpactParameter2, fImpactParameterNormalised0, fImpactParameterNormalised1, fImpactParameterNormalised2, fPtProng0, fPtProng1, fPtProng2]
9999
# O2hflcpare: [fErrorDecayLength, fErrorDecayLengthXY, fErrorImpactParameter0, fErrorImpactParameter1]
100100
O2hflcsel: [fCandidateSelFlag]
101101
O2hflcml: [fMlScores]
102-
O2lccmcdjeto: [fIndexLCCMCDJETCOS, fIndexHFLcBASES_0, fJetPt, fJetPhi, fJetEta, fJetNConstituents, fJetR]
102+
O2lccmcdjeto: [fIndexLCCMCDJETCOS, fIndexHFLCBASES_0, fJetPt, fJetPhi, fJetEta, fJetNConstituents, fJetR]
103103
O2lccmcdjetmo: [fIndexArrayLCCMCPJETOS_hf, fIndexArrayLCCMCPJETOS_geo, fIndexArrayLCCMCPJETOS_pt]
104104
O2lccmcdjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
105105
tags:
@@ -123,12 +123,12 @@ LcJet_pp:
123123
level: data
124124
index: fIndexLCCJETOS
125125
trees:
126-
O2hflcbase: [fIndexHfLcCollBases, fPt, fY, fEta, fPhi, fM]
126+
O2hflcbase: [fIndexHFLCCOLLBASES, fPt, fY, fEta, fPhi, fM]
127127
# O2hflcpar: [fCpa, fCpaXY, fChi2PCA, fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fImpactParameter0, fImpactParameter1, fImpactParameter2, fImpactParameterNormalised0, fImpactParameterNormalised1, fImpactParameterNormalised2, fPtProng0, fPtProng1, fPtProng2]
128128
# O2hflcpare: [fErrorDecayLength, fErrorDecayLengthXY, fErrorImpactParameter0, fErrorImpactParameter1]
129129
O2hflcsel: [fCandidateSelFlag]
130130
O2hflcml: [fMlScores]
131-
O2lccjeto: [fIndexLCCJETCOS, fIndexHFLcBASES_0, fJetPt, fJetPhi, fJetEta, fJetNConstituents, fJetR]
131+
O2lccjeto: [fIndexLCCJETCOS, fIndexHFLCBASES_0, fJetPt, fJetPhi, fJetEta, fJetNConstituents, fJetR]
132132
O2lccjetsso: [fIndexLCCJETOS, fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
133133
extract_component:
134134
#- { var: fMlScores, newvar: mlPromptScore, component: 1 }
@@ -239,7 +239,7 @@ LcJet_pp:
239239
seedmerge: [12] #list of periods
240240
period: [LHC23] #list of periods
241241
select_period: [1]
242-
prefix_dir: /data2/MLhep/real/train_318625/
242+
prefix_dir: /data2/MLhep/real/train_357222/
243243
unmerged_tree_dir: [alice] #list of periods
244244
pkl: ["${USER}/lcjet/pkl"] #list of periods
245245
pkl_skimmed: ["${USER}/lcjet/pklsk"] #list of periods
@@ -256,7 +256,7 @@ LcJet_pp:
256256
seedmerge: [12] #list of periods
257257
period: [LHC24h1] #list of periods
258258
select_period: [1]
259-
prefix_dir: /data2/MLhep/sim/train_316964/
259+
prefix_dir: /data2/MLhep/sim/train_352826/
260260
unmerged_tree_dir: [alice]
261261
pkl: ["${USER}/lcjet/pkl"] #list of periods
262262
pkl_skimmed: ["${USER}/lcjet/pklsk"] #list of periods
@@ -346,10 +346,10 @@ LcJet_pp:
346346

347347
jet_obs: &jet_default
348348
sel_an_binmin: [2, 3, 4, 5, 6, 7, 8, 10, 12, 16] # hadron pt bins (sel_an_binmin bins)
349-
sel_an_binmax: [3, 4, 5, 6, 7, 8, 10, 12, 16, 24] # hadron pt bins (sel_an_binmin bins)
350-
bins_ptjet: [2, 5, 7, 10, 15, 30] # systematics, TODO: split rec and gen binning
349+
sel_an_binmax: [3, 4, 5, 6, 7, 8, 10, 12, 16, 30] # hadron pt bins (sel_an_binmin bins)
350+
bins_ptjet: [5, 7, 15, 30] # systematics, TODO: split rec and gen binning
351351
bins_ptjet_eff: [2, 5, 7, 15, 30, 50] # systematics, TODO: split rec and gen binning
352-
cand_collidx: fIndexHfLcCollBases
352+
cand_collidx: fIndexHFLCCOLLBASES
353353
counter_read_data: fReadCountsWithTVXAndZVertexAndSel8
354354
counter_read_mc: fReadCountsWithTVXAndZVertexAndSelMC
355355
counter_tvx: fReadCountsWithTVX
@@ -411,9 +411,22 @@ LcJet_pp:
411411
mass_roofit:
412412
- level: mc
413413
# per_ptjet: true
414+
ptrange: [1., 5.]
415+
range: [2.10, 2.45]
414416
components:
415417
sig:
416-
fn: "Gaussian::peak(m[1., 5.], mean[2.27,2.29], sigma_g1[.01,.005,.035])"
418+
fn: "Gaussian::peak(m[1., 5.], mean[2.282,2.29], sigma_g1[.007,.006,.015])"
419+
wide:
420+
fn: 'Gaussian::wide(m, mean, expr("n*sigma_g1", n[1.,5.], sigma_g1))'
421+
model:
422+
fn: "SUM::sig(f_peak[0.,1.]*peak, wide)"
423+
- level: mc
424+
# per_ptjet: true
425+
ptrange: [5., 30.]
426+
range: [2.10, 2.45]
427+
components:
428+
sig:
429+
fn: "Gaussian::peak(m[1., 5.], mean[2.282,2.29], sigma_g1[.01,.008,.030])"
417430
wide:
418431
fn: 'Gaussian::wide(m, mean, expr("n*sigma_g1", n[1.,5.], sigma_g1))'
419432
model:
@@ -474,6 +487,11 @@ LcJet_pp:
474487
extra_cols: ["mlBkgScore"]
475488
correction_method: run3
476489

490+
#reweight: [[0.86,0.89], [0.89,0.91], [0.90,0.93], [0.92,0.93], [0.93,0.93], [0.94,0.93], [0.95,0.93], [0.95,0.94], [0.95,0.94], [0.95,0.94]] #70-100
491+
#reweight: [[1.03,1.05], [1.02,1.04], [1.02,1.04], [1.02,1.04], [1.01,1.03], [1.01,1.03], [1.01,1.03], [1.01,1.03], [1.00,1.03], [1.01,1.02]] #20-70
492+
#reweight: [[1.15,1.16], [1.11,1.13], [1.10,1.10], [1.08,1.10], [1.07,1.10], [1.06,1.09], [1.06,1.08], [1.05,1.08], [1.05,1.08], [1.05,1.08]] #0-20
493+
494+
477495
unfolding_iterations: 8 # used, maximum iteration
478496
unfolding_iterations_sel: 5 # used, selected iteration # systematics
479497
unfolding_prior_flatness: 0. # ranges from 0. (no flatness) to 1. (flat)
@@ -581,7 +599,8 @@ LcJet_pp:
581599

582600
# Additional cuts applied before mass histogram is filled
583601
use_cuts: True
584-
cuts: ["mlBkgScore < 0.03", "mlBkgScore < 0.04", "mlBkgScore < 0.07", "mlBkgScore < 0.09", "mlBkgScore < 0.11", "mlBkgScore < 0.15", "mlBkgScore < 0.18", "mlBkgScore < 0.25", "mlBkgScore < 0.35", "mlBkgScore < 0.35"] # (sel_an_binmin bins) systematics FIXME: Update for new model.
602+
cuts: ["mlBkgScore < 0.03", "mlBkgScore < 0.04", "mlBkgScore < 0.07", "mlBkgScore < 0.09", "mlBkgScore < 0.11", "mlBkgScore < 0.15", "mlBkgScore < 0.18", "mlBkgScore < 0.25", "mlBkgScore < 0.35", "mlBkgScore < 0.35"] # (sel_an_binmin bins)
603+
#mult_cuts: ["fCentFT0M >= 20 and fCentFT0M <= 70", "fCentFT0M >= 20 and fCentFT0M <= 70", "fCentFT0M >= 20 and fCentFT0M <= 70", "fCentFT0M >= 20 and fCentFT0M <= 70", "fCentFT0M >= 20 and fCentFT0M <= 70", "fCentFT0M >= 20 and fCentFT0M <= 70", "fCentFT0M >= 20 and fCentFT0M <= 70", "fCentFT0M >= 20 and fCentFT0M <= 70", "fCentFT0M >= 20 and fCentFT0M <= 70", "fCentFT0M >= 20 and fCentFT0M <= 70"] # (sel_an_binmin bins)
585604

586605
systematics: # used in machine_learning_hep/analysis/systematics.py
587606
probvariation:

0 commit comments

Comments
 (0)