Skip to content

Commit e64d0ed

Browse files
committed
up
1 parent ef64f48 commit e64d0ed

File tree

1 file changed

+68
-64
lines changed

1 file changed

+68
-64
lines changed

boostedhiggs/hwwprocessor.py

Lines changed: 68 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -149,19 +149,24 @@ def accumulator(self):
149149
return self._accumulator
150150

151151
def _save_dfs_parquet(self, fname, dfs_dict, ch):
152+
"""Save the given pandas DataFrame dictionary as a Parquet file to the output directory."""
152153
if self._output_location is not None:
153154
table = pa.Table.from_pandas(dfs_dict)
154155
if len(table) != 0: # skip dataframes with empty entries
155156
pq.write_table(table, self._output_location + ch + "/parquet/" + fname + ".parquet")
156157

157158
def _ak_to_pandas(self, output_collection: ak.Array) -> pd.DataFrame:
159+
"""Converts an Awkward Array collection to a flat pandas DataFrame for to write the output."""
158160
output = pd.DataFrame()
159161
for field in ak.fields(output_collection):
160162
output[field] = ak.to_numpy(output_collection[field])
161163
return output
162164

163165
def add_selection(self, name: str, sel: np.ndarray, channel: str = "all"):
164-
"""Adds selection to PackedSelection object and the cutflow dictionary"""
166+
"""
167+
Adds a boolean mask to the PackedSelection object for the given channel(s) and updates the
168+
cutflow dictionary with the sum of selected events or weights.
169+
"""
165170
channels = self._channels if channel == "all" else [channel]
166171

167172
for ch in channels:
@@ -180,6 +185,7 @@ def add_selection(self, name: str, sel: np.ndarray, channel: str = "all"):
180185
self.cutflows[ch][name] = np.sum(selection_ch)
181186

182187
def _compute_lhe_pdf_weights(self, events):
188+
"""Computes and returns summed LHE scale and PDF weights (used for QCD and PDF uncertainty calculations)."""
183189
sumlheweight = {}
184190
if "LHEScaleWeight" in events.fields:
185191
if len(events.LHEScaleWeight[0]) == 9:
@@ -193,7 +199,8 @@ def _compute_lhe_pdf_weights(self, events):
193199

194200
return sumlheweight, sumpdfweight
195201

196-
def _apply_trigger_and_metfilters(self, events):
202+
def _compute_trigger_msk(self, events):
203+
"""Computes boolean mask for trigger."""
197204
nevents = len(events)
198205
trigger = {}
199206
for ch in ["ele", "mu_lowpt", "mu_highpt"]:
@@ -206,16 +213,21 @@ def _apply_trigger_and_metfilters(self, events):
206213
trigger["mu_highpt"] = trigger["mu_highpt"] & (~trigger["ele"])
207214
trigger["mu_lowpt"] = trigger["mu_lowpt"] & (~trigger["ele"])
208215

216+
return trigger
217+
218+
def _compute_metfilters_msk(self, events):
219+
"""Computes boolean mask for METfilters."""
220+
nevents = len(events)
209221
metfilters = np.ones(nevents, dtype="bool")
210222
metfilterkey = "mc" if self.isMC else "data"
211223
for mf in self._metfilters[metfilterkey]:
212224
if mf in events.Flag.fields:
213225
metfilters = metfilters & events.Flag[mf]
214226

215-
return trigger, metfilters
227+
return metfilters
216228

217229
def _build_objects(self, events):
218-
230+
"""Constructs physics objects (e.g. leptons, jets) and applies basic kinematic and quality cuts."""
219231
# MUONS
220232
muons = ak.with_field(events.Muon, 0, "flavor")
221233

@@ -273,6 +285,13 @@ def _build_objects(self, events):
273285
msk_good_muons = msk_tight_muons
274286
msk_good_electrons = msk_tight_electrons
275287

288+
# CANDIDATE LEP
289+
goodleptons = ak.concatenate([muons[msk_good_muons], electrons[msk_good_electrons]], axis=1)
290+
goodleptons = goodleptons[ak.argsort(goodleptons.pt, ascending=False)] # sort by pt
291+
292+
candidatelep = ak.firsts(goodleptons) # pick highest pt
293+
candidatelep_p4 = build_p4(candidatelep) # build p4 for candidate lepton
294+
276295
# AK8 JETS
277296
fatjets = events.FatJet
278297
fatjets["msdcorr"] = fatjets.msoftdrop
@@ -284,6 +303,15 @@ def _build_objects(self, events):
284303
events, good_fatjets, self._year, not self.isMC, self.jecs, fatjets=True
285304
)
286305

306+
FirstFatjet = ak.firsts(good_fatjets[:, 0:1])
307+
SecondFatjet = ak.firsts(good_fatjets[:, 1:2])
308+
309+
# HIGGS CANDIDATE
310+
fj_idx_lep = ak.argmin(good_fatjets.delta_r(candidatelep_p4), axis=1, keepdims=True)
311+
candidatefj = ak.firsts(good_fatjets[fj_idx_lep])
312+
313+
jmsr_shifted_fatjetvars = get_jmsr(good_fatjets[fj_idx_lep], num_jets=1, year=self._year, isData=not self.isMC)
314+
287315
# AK4 JETS
288316
jets, jec_shifted_jetvars = get_jec_jets(events, events.Jet, self._year, not self.isMC, self.jecs, fatjets=False)
289317
met = met_factory.build(events.MET, jets, {}) if self.isMC else events.MET
@@ -299,6 +327,14 @@ def _build_objects(self, events):
299327
jet_veto_map, cut_jetveto = get_JetVetoMap(jets, self._year)
300328
jets = jets[(jets.pt > 30) & jet_veto_map]
301329

330+
# AK4 JETS (OUTSIDE AK8)
331+
msk_ak4_outside_ak8 = jets.delta_r(candidatefj) > 0.8
332+
ak4_outside_ak8 = jets[msk_ak4_outside_ak8]
333+
334+
# VBF VARIABLES
335+
jet1 = ak4_outside_ak8[:, 0:1]
336+
jet2 = ak4_outside_ak8[:, 1:2]
337+
302338
# TAUS
303339
loose_taus_mu = (events.Tau.pt > 20) & (abs(events.Tau.eta) < 2.3) & (events.Tau.idAntiMu >= 1) # loose antiMu ID
304340
loose_taus_ele = (
@@ -309,30 +345,7 @@ def _build_objects(self, events):
309345
loose_taus = (events.Tau.pt > 20) & (abs(events.Tau.eta) < 2.3)
310346
loose_taus = events.Tau[loose_taus]
311347

312-
# CANDIDATE LEP
313-
goodleptons = ak.concatenate([muons[msk_good_muons], electrons[msk_good_electrons]], axis=1)
314-
goodleptons = goodleptons[ak.argsort(goodleptons.pt, ascending=False)] # sort by pt
315-
316-
candidatelep = ak.firsts(goodleptons) # pick highest pt
317-
candidatelep_p4 = build_p4(candidatelep) # build p4 for candidate lepton
318-
319-
# HIGGS CANDIDATE
320-
fj_idx_lep = ak.argmin(good_fatjets.delta_r(candidatelep_p4), axis=1, keepdims=True)
321-
candidatefj = ak.firsts(good_fatjets[fj_idx_lep])
322-
323-
jmsr_shifted_fatjetvars = get_jmsr(good_fatjets[fj_idx_lep], num_jets=1, year=self._year, isData=not self.isMC)
324-
325-
# ak4 jet outside ak8
326-
ak4_outside_ak8_selector = jets.delta_r(candidatefj) > 0.8
327-
ak4_outside_ak8 = jets[ak4_outside_ak8_selector]
328-
329-
# VBF variables
330-
jet1 = ak4_outside_ak8[:, 0:1]
331-
jet2 = ak4_outside_ak8[:, 1:2]
332-
333-
FirstFatjet = ak.firsts(good_fatjets[:, 0:1])
334-
SecondFatjet = ak.firsts(good_fatjets[:, 1:2])
335-
348+
# b-jets (only for jets with abs(eta)<2.5)
336349
bjet_selector = (jets.delta_r(candidatefj) > 0.8) & (abs(jets.eta) < 2.5)
337350

338351
objects = {
@@ -366,20 +379,16 @@ def _build_objects(self, events):
366379
"jmsr_shifted_fatjetvars": jmsr_shifted_fatjetvars,
367380
"jec_shifted_fatjetvars": jec_shifted_fatjetvars,
368381
"jec_shifted_jetvars": jec_shifted_jetvars,
369-
# others
382+
# others (easier to just store them here)
370383
"fj_idx_lep": fj_idx_lep,
371-
"ak4_outside_ak8_selector": ak4_outside_ak8_selector,
372-
"ak4_outside_ak8": ak4_outside_ak8,
384+
"msk_ak4_outside_ak8": msk_ak4_outside_ak8,
373385
"bjet_selector": bjet_selector,
374386
}
375387

376388
return objects
377389

378390
def _derive_variables(self, events, objects):
379-
380-
ak4_outside_ak8 = objects["ak4_outside_ak8"]
381-
bjet_selector = objects["bjet_selector"]
382-
391+
"""Derives physics variables from `objects` (e.g. deltaR(jet,lep))."""
383392
ht = ak.sum(objects["jets"].pt, axis=1)
384393

385394
# VH jet
@@ -388,8 +397,7 @@ def _derive_variables(self, events, objects):
388397
mask_candidatefj = fatJetIndices != minDeltaR
389398

390399
allScores = VScore(objects["good_fatjets"])
391-
masked = allScores[mask_candidatefj]
392-
VH_fj = ak.firsts(objects["good_fatjets"][allScores == ak.max(masked, axis=1)])
400+
VH_fj = ak.firsts(objects["good_fatjets"][allScores == ak.max(allScores[mask_candidatefj], axis=1)])
393401

394402
# nleptons
395403
n_loose_taus_mu = ak.sum(objects["loose_taus_mu"], axis=1)
@@ -413,15 +421,15 @@ def _derive_variables(self, events, objects):
413421
lep_miso = objects["candidatelep"].miniPFRelIso_all # miniso for candidate lepton
414422

415423
# b-jets (only for jets with abs(eta)<2.5)
416-
ak4_bjet_candidate = objects["jets"][bjet_selector]
424+
ak4_bjet_candidate = objects["jets"][objects["bjet_selector"]]
417425

418426
# VBF variables
419427
deta = abs(ak.firsts(objects["jet1"]).eta - ak.firsts(objects["jet2"]).eta)
420428
mjj = (ak.firsts(objects["jet1"]) + ak.firsts(objects["jet2"])).mass
421429

422430
# njets
423431
NumFatjets = ak.num(objects["good_fatjets"])
424-
NumOtherJets = ak.num(ak4_outside_ak8)
432+
NumOtherJets = ak.sum(objects["msk_ak4_outside_ak8"])
425433

426434
# n-bjets
427435
n_bjets_L = ak.sum(ak4_bjet_candidate.btagDeepFlavB > btagWPs["deepJet"][self._year]["L"], axis=1)
@@ -477,7 +485,6 @@ def _derive_variables(self, events, objects):
477485
nCjets = (ak.sum(goodgenjets.hadronFlavour == 4, axis=1)).to_numpy()
478486

479487
derived_vars = {
480-
"fj_idx_lep": objects["fj_idx_lep"],
481488
"ht": ht,
482489
# candidatefj
483490
"fj_pt": objects["candidatefj"].pt,
@@ -556,14 +563,11 @@ def _derive_variables(self, events, objects):
556563
return derived_vars
557564

558565
def _apply_JEC(self, objects, variables):
559-
566+
"""Computes JES/JER and JMSR shift variations and appends them to the set of output variables."""
560567
jec_shifted_fatjetvars = objects["jec_shifted_fatjetvars"]
561568
jmsr_shifted_fatjetvars = objects["jmsr_shifted_fatjetvars"]
562569
jec_shifted_jetvars = objects["jec_shifted_jetvars"]
563570

564-
fj_idx_lep = objects["fj_idx_lep"]
565-
ak4_outside_ak8_selector = objects["ak4_outside_ak8_selector"]
566-
567571
fatjetvars = {
568572
"fj_pt": objects["candidatefj"].pt,
569573
"fj_eta": objects["candidatefj"].eta,
@@ -576,7 +580,7 @@ def _apply_JEC(self, objects, variables):
576580
# JEC vars
577581
for shift, vals in jec_shifted_fatjetvars["pt"].items():
578582
if shift != "":
579-
fatjetvars_sys[f"fj_pt{shift}"] = ak.firsts(vals[fj_idx_lep])
583+
fatjetvars_sys[f"fj_pt{shift}"] = ak.firsts(vals[objects["fj_idx_lep"]])
580584

581585
# JMSR vars
582586
for shift, vals in jmsr_shifted_fatjetvars["msoftdrop"].items():
@@ -593,11 +597,11 @@ def _apply_JEC(self, objects, variables):
593597
pt_1 = objects["jet1"].pt
594598
pt_2 = objects["jet2"].pt
595599
try:
596-
pt_1 = vals[ak4_outside_ak8_selector][:, 0]
600+
pt_1 = vals[objects["msk_ak4_outside_ak8"]][:, 0]
597601
except Exception:
598602
pt_1 = objects["jet1"].pt
599603
try:
600-
pt_2 = vals[ak4_outside_ak8_selector][:, 1]
604+
pt_2 = vals[objects["msk_ak4_outside_ak8"]][:, 1]
601605
except Exception:
602606
pt_2 = objects["jet2"].pt
603607

@@ -661,7 +665,7 @@ def _apply_pileup_cutoff(self, events, year, yearmod, cutoff: float = 4):
661665
self.add_selection(name="PU_cutoff", sel=pw_pass)
662666

663667
def _add_selections(self, events, trigger, metfilters, objects, variables):
664-
668+
"""Adds event pre-selection to PackedSelection for cutflow and filtering."""
665669
if self.isMC:
666670
self._apply_pileup_cutoff(events, self._year, self._yearmod, cutoff=4)
667671

@@ -738,8 +742,7 @@ def _add_selections(self, events, trigger, metfilters, objects, variables):
738742
self.add_selection(name="dummy", sel=(variables["NumFatjets"] > -1))
739743

740744
def _store_genVars(self, dataset, events, objects, variables):
741-
742-
# store gen-level matching variables
745+
"""Matches generator-level particles to reconstructed jets or leptons."""
743746
signal_mask = None
744747
if self.isMC:
745748
if self.isSignal:
@@ -774,12 +777,8 @@ def _store_genVars(self, dataset, events, objects, variables):
774777

775778
return variables
776779

777-
def _store_MCweights(self, dataset, events, objects, variables):
778-
779-
candidatelep = objects["candidatelep"]
780-
jets = objects["jets"]
781-
bjet_selector = objects["bjet_selector"]
782-
780+
def _add_MC_weights(self, dataset, events, objects, variables):
781+
"""Applies event-level weights and corrections for MC, including."""
783782
for ch in self._channels:
784783
if self._year in ("2016", "2017"):
785784
self.weights[ch].add(
@@ -795,12 +794,12 @@ def _store_MCweights(self, dataset, events, objects, variables):
795794
nPU=ak.to_numpy(events.Pileup.nPU),
796795
)
797796

798-
add_pileupid_weights(self.weights[ch], self._year, self._yearmod, jets, events.GenJet, wp="L")
797+
add_pileupid_weights(self.weights[ch], self._year, self._yearmod, objects["jets"], events.GenJet, wp="L")
799798

800799
if ch == "mu":
801-
add_lepton_weight(self.weights[ch], candidatelep, self._year + self._yearmod, "muon")
800+
add_lepton_weight(self.weights[ch], objects["candidatelep"], self._year + self._yearmod, "muon")
802801
elif ch == "ele":
803-
add_lepton_weight(self.weights[ch], candidatelep, self._year + self._yearmod, "electron")
802+
add_lepton_weight(self.weights[ch], objects["candidatelep"], self._year + self._yearmod, "electron")
804803

805804
ewk_corr, qcd_corr, alt_qcd_corr = add_VJets_kFactors(self.weights[ch], events.GenPart, dataset, events)
806805
# add corrections for plotting
@@ -880,8 +879,8 @@ def _store_MCweights(self, dataset, events, objects, variables):
880879
**variables,
881880
**get_btag_weights(
882881
self._year,
883-
jets,
884-
bjet_selector,
882+
objects["jets"],
883+
objects["bjet_selector"],
885884
wp=wp_,
886885
algo="deepJet",
887886
systematics=self._systematics,
@@ -890,6 +889,7 @@ def _store_MCweights(self, dataset, events, objects, variables):
890889
return variables
891890

892891
def _add_lundplane_weights(self, events, dataset, selection_ch, objects):
892+
"""Computes variables used for Lund plane reweighting technique."""
893893
from boostedhiggs.corrections import getLPweights
894894

895895
pf_cands, gen_parts_eta_phi, gen_parts_pt_mass, ak8_jets, bgen_parts_eta_phi, genlep = getLPweights(
@@ -925,6 +925,7 @@ def _add_lundplane_weights(self, events, dataset, selection_ch, objects):
925925
return lpvars
926926

927927
def _run_inference(self, events, selection_ch, objects):
928+
"""Runs ParT tagger inference using Triton."""
928929
model_name = "ak8_MD_vminclv2ParT_manual_fixwrap_all_nodes"
929930
pnet_vars = runInferenceTriton(
930931
self.tagger_resources_path, events[selection_ch], objects["fj_idx_lep"][selection_ch], model_name=model_name
@@ -937,6 +938,7 @@ def _run_inference(self, events, selection_ch, objects):
937938
return {**scores, **reg_mass, **hidNeurons}
938939

939940
def _build_output(self, events, dataset, variables, objects):
941+
"""Stores the events passing the selections in a pandas dataframes."""
940942
output = {}
941943
for ch in self._channels:
942944
selection_ch = self.selections[ch].all(*self.selections[ch].names)
@@ -975,7 +977,7 @@ def _build_output(self, events, dataset, variables, objects):
975977
return output
976978

977979
def process(self, events: ak.Array):
978-
"""Returns skimmed events which pass preselection cuts and with the branches listed in self._skimvars"""
980+
"""Returns skimmed events which pass preselection cuts as pandas dataframe."""
979981

980982
dataset = events.metadata["dataset"]
981983
nevents = len(events)
@@ -994,7 +996,8 @@ def process(self, events: ak.Array):
994996
for ch in self._channels:
995997
self.weights[ch].add("genweight", events.genWeight)
996998

997-
trigger, metfilters = self._apply_trigger_and_metfilters(events)
999+
trigger = self._compute_trigger_msk(events)
1000+
metfilters = self._compute_metfilters_msk(events)
9981001

9991002
objects = self._build_objects(events)
10001003
variables = self._derive_variables(events, objects)
@@ -1010,7 +1013,7 @@ def process(self, events: ak.Array):
10101013
variables = self._store_genVars(dataset, events, objects, variables)
10111014

10121015
if self.isMC:
1013-
variables = self._store_MCweights(dataset, events, objects, variables)
1016+
variables = self._add_MC_weights(dataset, events, objects, variables)
10141017

10151018
output = self._build_output(events, dataset, variables, objects)
10161019

@@ -1038,4 +1041,5 @@ def process(self, events: ak.Array):
10381041
}
10391042

10401043
def postprocess(self, accumulator):
1044+
"""Placeholder for Coffea postprocessing. Currently does nothing."""
10411045
return accumulator

0 commit comments

Comments
 (0)