Skip to content

Commit d07aad6

Browse files
arthur-galArthurBenedikt Volkel
authored
trigger corr using an histogram as input (#672)
* trigger corr using an histogram as input * Propagate trigger weighting flag * analysis/systematics.py anyway needs further attention * removed weighttrig flag from all databases and code using usetriggcorrfunc instead: 1. usetriggcorrfunc is None: no weighting at all 2. usetriggcorrfunc is True: weighting according to fitted function 3. usetriggcorrfunc is False: weighting according to histogram Co-authored-by: Arthur <[email protected]> Co-authored-by: Benedikt Volkel <[email protected]>
1 parent d70cb98 commit d07aad6

File tree

9 files changed

+66
-31
lines changed

9 files changed

+66
-31
lines changed

machine_learning_hep/analysis/analyzerdhadrons_mult.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,9 @@ def __init__(self, datap, case, typean, period):
168168
self.p_triggereff = datap["analysis"][self.typean].get("triggereff", [1] * 10)
169169
self.p_triggereffunc = datap["analysis"][self.typean].get("triggereffunc", [0] * 10)
170170

171-
self.apply_weights = datap["analysis"][self.typean]["triggersel"]["weighttrig"]
171+
self.apply_weights = \
172+
datap["analysis"][self.typean]["triggersel"].get("usetriggcorrfunc", None) \
173+
is not None
172174
self.root_objects = []
173175

174176
self.get_crossmb_from_path = datap["analysis"][self.typean].get("get_crossmb_from_path", \

machine_learning_hep/analysis/systematics.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,9 @@ def __init__(self, datap, case, typean, period, run_param):
149149
self.s_presel_gen_eff = datap["analysis"][self.typean]["presel_gen_eff"]
150150
self.s_trigger_mc = datap["analysis"][self.typean]["triggersel"]["mc"]
151151
self.s_trigger_data = datap["analysis"][self.typean]["triggersel"]["data"]
152-
self.apply_weights = datap["analysis"][self.typean]["triggersel"]["weighttrig"]
152+
self.apply_weights = \
153+
datap["analysis"][self.typean]["triggersel"].get("usetriggcorrfunc", None) \
154+
is not None
153155

154156
#Build names for input pickle files (data, mc_reco, mc_gen)
155157
self.n_reco = datap["files_names"]["namefile_reco"]

machine_learning_hep/data/data_prod_20200304/database_ml_parameters_D0pp_0304.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,6 @@ D0pp:
297297
triggersel:
298298
data: "trigger_hasbit_INT7==1"
299299
mc: null
300-
weighttrig: false
301300
data: &data_out_default
302301
runselection: [null, null, null] #FIXME
303302
results: [/data/DerivedResultsJets/D0kAnywithJets/vAN-20200304_ROOT6-1/zg/default/default/pp_2016_data/374_20200304-2028/resultsMBjetvspt,
@@ -482,7 +481,7 @@ D0pp:
482481
triggersel:
483482
data: "trigger_hasbit_HighMultSPD==1"
484483
mc: null
485-
weighttrig: true
484+
usetriggcorrfunc: True
486485

487486
data:
488487
runselection: [null, null, HighMultSPD2018] #FIXME the last will have to be replaced by HighMultSPD2018
@@ -577,7 +576,6 @@ D0pp:
577576
triggersel:
578577
data: "trigger_hasbit_INT7==1"
579578
mc: null
580-
weighttrig: false
581579

582580
data:
583581
runselection: [null, null, null]
@@ -671,7 +669,6 @@ D0pp:
671669
triggersel:
672670
data: "trigger_hasbit_INT7==1"
673671
mc: null
674-
weighttrig: false
675672

676673
data:
677674
runselection: [null, null, null]

machine_learning_hep/data/data_prod_20200304/database_ml_parameters_Dspp.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ Dspp:
294294
triggersel:
295295
data: "trigger_hasbit_INT7==1"
296296
mc: null
297-
weighttrig: false
297+
usetriggcorrfunc: Null
298298

299299
data:
300300
runselection: [null, null, null]
@@ -403,7 +403,7 @@ Dspp:
403403
triggersel:
404404
data: "trigger_hasbit_HighMultSPD==1"
405405
mc: null
406-
weighttrig: true
406+
usetriggcorrfunc: true
407407

408408
data:
409409
runselection: [null, null, HighMultSPD2018]

machine_learning_hep/data/data_prod_20200304/database_ml_parameters_LcpK0spp_0304.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,6 @@ LcpK0spp:
372372
triggersel:
373373
data: "trigger_hasbit_INT7==1"
374374
mc: null
375-
weighttrig: false
376375
data: &data_out_default
377376
runselection: [null, null, null]
378377
results: [/data/DerivedResultsJets/LckAnywithJets_sub/vAN-20200304_ROOT6-1/ff/default/default/pp_2016_data/374_20200304-2028/resultsMBjetvspt,
@@ -464,7 +463,6 @@ LcpK0spp:
464463
triggersel:
465464
data: "trigger_hasbit_INT7==1"
466465
mc: null
467-
weighttrig: false
468466
data:
469467
runselection: [null, null, null]
470468
results: [/data/DerivedResults/LckAnywithJets_sub/vAN-20200304_ROOT6-1/pp_2016_data/374_20200304-2028/resultsMBvspt_ntrkl,
@@ -559,7 +557,7 @@ LcpK0spp:
559557
triggersel:
560558
data: "trigger_hasbit_HighMultSPD==1"
561559
mc: null
562-
weighttrig: true
560+
usetriggcorrfunc: True
563561
data:
564562
runselection: [null, null, HighMultSPD2018]
565563
results: [/data/DerivedResults/LckAnywithJets_sub/vAN-20200304_ROOT6-1/pp_2016_data/374_20200304-2028/resultsSPDvspt,
@@ -652,7 +650,6 @@ LcpK0spp:
652650
triggersel:
653651
data: "trigger_hasbit_INT7==1"
654652
mc: null
655-
weighttrig: false
656653
data:
657654
runselection: [null, null, null]
658655
results: [/data/DerivedResults/LckAnywithJets_sub/vAN-20200304_ROOT6-1/pp_2016_data/374_20200304-2028/resultsMBvspt_perc_v0m,

machine_learning_hep/data/data_prod_20200417/database_ml_parameters_D0pp_0417.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,6 @@ D0pp:
306306
triggersel:
307307
data: "trigger_hasbit_INT7==1"
308308
mc: null
309-
weighttrig: false
310309
data: &data_out_default
311310
runselection: [null, null, null] #FIXME
312311
results: [/data/DerivedResultsJets/D0kINTHighMultCALOwithJets/vAN-20200417_ROOT6-1/zg/default/default/pp_2016_data/405_20200417-1825/resultsMBjetvspt,
@@ -491,7 +490,7 @@ D0pp:
491490
triggersel:
492491
data: "trigger_hasbit_HighMultSPD==1"
493492
mc: null
494-
weighttrig: true
493+
usetriggcorrfunc: True
495494

496495
data:
497496
runselection: [null, null, HighMultSPD2018] #FIXME the last will have to be replaced by HighMultSPD2018
@@ -586,7 +585,6 @@ D0pp:
586585
triggersel:
587586
data: "trigger_hasbit_INT7==1"
588587
mc: null
589-
weighttrig: false
590588

591589
data:
592590
runselection: [null, null, null]
@@ -680,7 +678,6 @@ D0pp:
680678
triggersel:
681679
data: "trigger_hasbit_INT7==1"
682680
mc: null
683-
weighttrig: false
684681

685682
data:
686683
runselection: [null, null, null]

machine_learning_hep/fitting/helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def __init__(self, database: dict, ana_type: str, file_data_name: str, file_mc_n
117117
self.include_reflections = ana_config.get("include_reflection", False)
118118

119119
# Is this a trigger weighted histogram?
120-
self.apply_weights = ana_config["triggersel"]["weighttrig"]
120+
self.apply_weights = ana_config["triggersel"].get("usetriggcorrfunc", None) is not None
121121

122122
# Systematics
123123
self.syst_pars = ana_config.get("systematics", {})

machine_learning_hep/fitting/simple_fit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def do_simple_fit(database, type_ana, period_number=-1, output_dir="simple_fit")
169169
include_reflections = fit_pars.get("include_reflection", False)
170170

171171
# Is this a trigger weighted histogram?
172-
apply_weights = fit_pars["triggersel"]["weighttrig"]
172+
apply_weights = fit_pars["triggersel"].get("usetriggcorrfunc", None) is not None
173173

174174
# 4) Misc
175175
# ML WP is needed to build the suffix for extracting the mass histogram

machine_learning_hep/processerdhadrons_mult.py

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,17 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles,
7474
self.event_cand_validation = datap["analysis"][self.typean].get("event_cand_validation", "")
7575
if "event_cand_validation" not in datap["analysis"][self.typean]:
7676
self.event_cand_validation = False
77-
self.apply_weights = datap["analysis"][self.typean]["triggersel"]["weighttrig"]
77+
self.usetriggcorrfunc = \
78+
datap["analysis"][self.typean]["triggersel"].get("usetriggcorrfunc", None)
7879
self.weightfunc = None
79-
if self.apply_weights is True and self.mcordata == "data":
80+
self.weighthist = None
81+
if self.usetriggcorrfunc is not None and self.mcordata == "data":
8082
filename = os.path.join(self.d_mcreweights, "trigger%s.root" % self.typean)
8183
if os.path.exists(filename):
8284
weight_file = TFile.Open(filename, "read")
8385
self.weightfunc = weight_file.Get("func%s_norm" % self.typean)
86+
self.weighthist = weight_file.Get("hist%s_norm" % self.typean)
87+
self.weighthist.SetDirectory(0)
8488
weight_file.Close()
8589
else:
8690
print("trigger correction file", filename, "doesnt exist")
@@ -89,9 +93,37 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles,
8993
self.maxvaluehisto = datap["analysis"][self.typean]["maxvaluehisto"]
9094
self.mass = datap["mass"]
9195

92-
def gethistonormforselevt_mult(self, df_evt, dfevtevtsel, label, var, weightfunc=None):
96+
@staticmethod
97+
def make_weights(col, func, hist, use_func):
98+
"""Helper function to extract weights
9399
94-
if weightfunc is not None:
100+
Args:
101+
col: np.array
102+
array to evaluate/run over
103+
func: ROOT.TF1
104+
ROOT function to use for evaluation
105+
hist: TH1
106+
ROOT histogram used for getting weights
107+
use_func: bool
108+
whether or not to use func (otherwise hist)
109+
110+
Returns:
111+
iterable
112+
"""
113+
114+
if use_func:
115+
return evaluate(func, col)
116+
def reg(value):
117+
# warning, the histogram has empty bins at high mult.
118+
# (>125 ntrkl) so a check is needed to avoid a 1/0 division
119+
# when computing the inverse of the weight
120+
return value if value != 0. else 1.
121+
return [reg(hist.GetBinContent(hist.FindBin(iw))) for iw in col]
122+
123+
124+
def gethistonormforselevt_mult(self, df_evt, dfevtevtsel, label, var, useweightfromfunc=None):
125+
126+
if useweightfromfunc is not None:
95127
label = label + "_weight"
96128
hSelMult = TH1F('sel_' + label, 'sel_' + label, self.nbinshisto,
97129
self.minvaluehisto, self.maxvaluehisto)
@@ -105,14 +137,20 @@ def gethistonormforselevt_mult(self, df_evt, dfevtevtsel, label, var, weightfunc
105137
df_no_vtx = df_to_keep[~tag_vtx.values]
106138
# events with reco zvtx > 10 cm after previous selection
107139
df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, 'is_ev_rej', [[3], [1, 2, 7, 12]])
108-
if weightfunc is not None:
109-
weightssel = evaluate(weightfunc, dfevtevtsel[var])
140+
141+
142+
if useweightfromfunc is not None:
143+
weightssel = self.make_weights(dfevtevtsel[var], self.weightfunc, self.weighthist,
144+
useweightfromfunc)
145+
weightsnovtx = self.make_weights(df_no_vtx[var], self.weightfunc, self.weighthist,
146+
useweightfromfunc)
147+
weightsgr10 = self.make_weights(df_bit_zvtx_gr10[var], self.weightfunc,
148+
self.weighthist, useweightfromfunc)
149+
110150
weightsinvsel = [1./weight for weight in weightssel]
111151
fill_hist(hSelMult, dfevtevtsel[var], weights=weightsinvsel)
112-
weightsnovtx = evaluate(weightfunc, df_no_vtx[var])
113152
weightsinvnovtx = [1./weight for weight in weightsnovtx]
114153
fill_hist(hNoVtxMult, df_no_vtx[var], weights=weightsinvnovtx)
115-
weightsgr10 = evaluate(weightfunc, df_bit_zvtx_gr10[var])
116154
weightsinvgr10 = [1./weight for weight in weightsgr10]
117155
fill_hist(hVtxOutMult, df_bit_zvtx_gr10[var], weights=weightsinvgr10)
118156
else:
@@ -161,10 +199,10 @@ def process_histomass_single(self, index):
161199
self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \
162200
labeltrigger, self.v_var2_binning_gen)
163201

164-
if self.apply_weights is True and self.mcordata == "data":
202+
if self.usetriggcorrfunc is not None and self.mcordata == "data":
165203
hselweight, hnovtxmultweight, hvtxoutmultweight = \
166204
self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \
167-
labeltrigger, self.v_var2_binning_gen, self.weightfunc)
205+
labeltrigger, self.v_var2_binning_gen, self.usetriggcorrfunc)
168206
hselweight.Write()
169207
hnovtxmultweight.Write()
170208
hvtxoutmultweight.Write()
@@ -175,7 +213,7 @@ def process_histomass_single(self, index):
175213

176214
list_df_recodtrig = []
177215

178-
for ipt in range(self.p_nptfinbins):
216+
for ipt in range(self.p_nptfinbins): # pylint: disable=too-many-nested-blocks
179217
bin_id = self.bin_matching[ipt]
180218
df = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb"))
181219
if self.s_evtsel is not None:
@@ -206,8 +244,10 @@ def process_histomass_single(self, index):
206244
df_bin = seldf_singlevar_inclusive(df, self.v_var2_binning, \
207245
self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
208246
fill_hist(h_invmass, df_bin.inv_mass)
209-
if self.apply_weights is True and self.mcordata == "data":
210-
weights = evaluate(self.weightfunc, df_bin[self.v_var2_binning_gen])
247+
if self.usetriggcorrfunc is not None and self.mcordata == "data":
248+
weights = self.make_weights(df_bin[self.v_var2_binning_gen], self.weightfunc,
249+
self.weighthist, self.usetriggcorrfunc)
250+
211251
weightsinv = [1./weight for weight in weights]
212252
fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv)
213253
myfile.cd()

0 commit comments

Comments
 (0)