Skip to content

Commit 3d8191d

Browse files
committed
Propagate POWHEG weights to luminosity scaling
1 parent edf2773 commit 3d8191d

File tree

4 files changed

+31
-8
lines changed

4 files changed

+31
-8
lines changed

machine_learning_hep/analysis/analyzer_jets.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1126,7 +1126,10 @@ def estimate_feeddown(self):
11261126
project_hist(h3_fd_gen_orig[var], [0, 2], {}),
11271127
f"fd/h_ptjet-{var}_feeddown_genonly_noeffscaling.png",
11281128
)
1129-
powheg_xsection_scale_factor = 0. # FIXME: retrieve cross section
1129+
h_norm = rfile.Get("histonorm")
1130+
powheg_xsection_scale_factor = h_norm.GetBinContent(5)
1131+
self.logger.info("powheg_xsection_scale_factor = %f", powheg_xsection_scale_factor)
1132+
self.logger.info("POWHEG luminosity (mb^{-1}): %g", 1.0 / powheg_xsection_scale_factor)
11301133

11311134
case fd_input:
11321135
self.logger.critical("Invalid feeddown input %s", fd_input)

machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ D0Jet_pp:
7070
O2hfd0mccollbase: [fPosZ]
7171
filter: "abs(fPosZ) < 10."
7272

73-
# powhegcoll:
74-
# O2d0cmcpjetmcco: ...
73+
coll_powheg_fd:
74+
trees:
75+
O2d0cmcpjetmcco: [fEventWeight]
7576
collcnt_mc:
7677
level: mc
7778
trees:
@@ -234,6 +235,9 @@ D0Jet_pp:
234235
source: colldata
235236
file: AnalysisResultsEvt.parquet
236237

238+
coll_powheg_fd:
239+
level: fd
240+
file: AnalysisResultsPowheg.parquet
237241
evtorig_fd:
238242
level: fd
239243
source: collfd
@@ -311,6 +315,7 @@ D0Jet_pp:
311315
namefile_reco_applieddata: AnalysisResultsRecoAppliedData.parquet
312316
namefile_reco_appliedmc: AnalysisResultsRecoAppliedMC.parquet
313317
namefile_mcweights: mcweights.root
318+
namefile_wgt: AnalysisResultsPowheg.parquet
314319
treeoutput: "D0tree"
315320
histofilename: "masshisto.root"
316321
efffilename: "effhisto.root"

machine_learning_hep/processer.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ def __init__(
143143
self.n_evt = datap["files_names"]["namefile_evt"]
144144
self.n_collcnt = datap["files_names"].get("namefile_collcnt")
145145
self.n_bccnt = datap["files_names"].get("namefile_bccnt")
146+
self.n_wgt = datap["files_names"].get("namefile_wgt", None)
146147
self.n_evtorig = datap["files_names"].get("namefile_evtorig")
147148
self.n_evt_count_ml = datap["files_names"].get("namefile_evt_count", "evtcount.yaml")
148149
self.n_gen = datap["files_names"]["namefile_gen"]
@@ -195,6 +196,7 @@ def __init__(
195196
self.l_evtorig = createlist(self.d_pkl, self.l_path, self.n_evtorig)
196197
self.l_collcnt = createlist(self.d_pkl, self.l_path, self.n_collcnt) if self.datatype != "fd" else None
197198
self.l_bccnt = createlist(self.d_pkl, self.l_path, self.n_bccnt) if self.datatype != "fd" else None
199+
self.l_wgt = createlist(self.d_pkl, self.l_path, self.n_wgt) if self.datatype == "fd" and self.n_wgt else None
198200
self.l_histomass = createlist(self.d_results, self.l_path, self.n_filemass)
199201
self.l_histoeff = createlist(self.d_results, self.l_path, self.n_fileeff)
200202
# self.l_historesp = createlist(self.d_results, self.l_path, self.n_fileresp)
@@ -384,6 +386,8 @@ def dfread(rdir, trees, cols, idx_name=None):
384386
trees = [trees]
385387
cols = [cols]
386388
# if all(type(var) is str for var in vars): vars = [vars]
389+
if not all((name in rdir for name in trees)):
390+
self.logger.critical("Missing trees: %s", trees)
387391
df = None
388392
for tree, col in zip([rdir[name] for name in trees], cols):
389393
try:
@@ -536,10 +540,13 @@ def dfuse(df_spec):
536540
for df_name, df_spec in self.df_write.items():
537541
if dfuse(df_spec):
538542
src = df_spec.get("source", df_name)
539-
dfo = dfquery(dfs[src], df_spec.get("filter", None))
540-
path = os.path.join(self.d_pkl, self.l_path[file_index], df_spec["file"])
541-
self.logger.info("writing %s to %s with info %s", df_name, path, dfo.info())
542-
write_df(dfo, path)
543+
if src in dfs:
544+
dfo = dfquery(dfs[src], df_spec.get("filter", None))
545+
path = os.path.join(self.d_pkl, self.l_path[file_index], df_spec["file"])
546+
self.logger.info("writing %s to %s with info %s", df_name, path, dfo.info())
547+
write_df(dfo, path)
548+
else:
549+
self.logger.error("could not write tree, missing source %s", src)
543550

544551
def skim(self, file_index):
545552
dfreco = read_df(self.l_reco[file_index]) if self.datatype != "fd" else None

machine_learning_hep/processer_jet.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def process_histomass_single(self, index):
241241
print(f"Opening file {self.l_histomass[index]}", flush=True)
242242
with TFile.Open(self.l_histomass[index], "recreate") as _:
243243
dfevtorig = read_df(self.l_evtorig[index])
244-
histonorm = TH1F("histonorm", "histonorm", 4, 0, 4)
244+
histonorm = TH1F("histonorm", "histonorm", 5, 0, 5)
245245
histonorm.SetBinContent(1, len(dfquery(dfevtorig, self.s_evtsel)))
246246
if self.l_collcnt:
247247
dfcollcnt = read_df(self.l_collcnt[index])
@@ -257,10 +257,18 @@ def process_histomass_single(self, index):
257257
ser_bccnt = dfbccnt[self.cfg("counter_tvx")]
258258
bccnt_tvx = functools.reduce(lambda x, y: float(x) + float(y), (ar[0] for ar in ser_bccnt))
259259
histonorm.SetBinContent(4, bccnt_tvx)
260+
if self.l_wgt:
261+
self.logger.info("Filling event weights")
262+
dfwgt = read_df(self.l_wgt[index])
263+
print(dfwgt.info())
264+
histonorm.SetBinContent(5, dfwgt["fEventWeight"].sum())
265+
else:
266+
self.logger.warning("No event weights found, empty list: %s", self.l_wgt)
260267
get_axis(histonorm, 0).SetBinLabel(1, "N_{evt}")
261268
get_axis(histonorm, 0).SetBinLabel(2, "N_{coll}")
262269
get_axis(histonorm, 0).SetBinLabel(3, "N_{coll}^{TVX}")
263270
get_axis(histonorm, 0).SetBinLabel(4, "N_{BC}^{TVX}")
271+
get_axis(histonorm, 0).SetBinLabel(5, "w_{POWHEG}")
264272
histonorm.Write()
265273

266274
if self.datatype != "fd":

0 commit comments

Comments
 (0)