Skip to content

Commit 2e0637f

Browse files
committed
up
1 parent a949a06 commit 2e0637f

File tree

1 file changed

+68
-62
lines changed

1 file changed

+68
-62
lines changed

boostedhiggs/hwwprocessor.py

Lines changed: 68 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -145,22 +145,28 @@ def __init__(
145145

146146
@property
147147
def accumulator(self):
148+
"""Required Coffea interface property. Not used in this processor, returns a dummy accumulator."""
148149
return self._accumulator
149150

150151
def _save_dfs_parquet(self, fname, dfs_dict, ch):
152+
"""Save the given pandas DataFrame dictionary as a Parquet file to the output directory."""
151153
if self._output_location is not None:
152154
table = pa.Table.from_pandas(dfs_dict)
153155
if len(table) != 0: # skip dataframes with empty entries
154156
pq.write_table(table, self._output_location + ch + "/parquet/" + fname + ".parquet")
155157

156158
def _ak_to_pandas(self, output_collection: ak.Array) -> pd.DataFrame:
159+
"""Converts an Awkward Array collection to a flat pandas DataFrame for to write the output."""
157160
output = pd.DataFrame()
158161
for field in ak.fields(output_collection):
159162
output[field] = ak.to_numpy(output_collection[field])
160163
return output
161164

162165
def add_selection(self, name: str, sel: np.ndarray, channel: str = "all"):
163-
"""Adds selection to PackedSelection object and the cutflow dictionary"""
166+
"""
167+
Adds a boolean mask to the PackedSelection object for the given channel(s) and updates the
168+
cutflow dictionary with the sum of selected events or weights.
169+
"""
164170
channels = self._channels if channel == "all" else [channel]
165171

166172
for ch in channels:
@@ -179,6 +185,7 @@ def add_selection(self, name: str, sel: np.ndarray, channel: str = "all"):
179185
self.cutflows[ch][name] = np.sum(selection_ch)
180186

181187
def _compute_lhe_pdf_weights(self, events):
188+
"""Computes and returns summed LHE scale and PDF weights (used for QCD and PDF uncertainty calculations)."""
182189
sumlheweight = {}
183190
if "LHEScaleWeight" in events.fields:
184191
if len(events.LHEScaleWeight[0]) == 9:
@@ -192,7 +199,8 @@ def _compute_lhe_pdf_weights(self, events):
192199

193200
return sumlheweight, sumpdfweight
194201

195-
def _apply_trigger_and_metfilters(self, events):
202+
def _compute_trigger_msk(self, events):
203+
"""Computes boolean mask for trigger."""
196204
nevents = len(events)
197205
trigger = {}
198206
for ch in ["ele", "mu_lowpt", "mu_highpt"]:
@@ -205,16 +213,21 @@ def _apply_trigger_and_metfilters(self, events):
205213
trigger["mu_highpt"] = trigger["mu_highpt"] & (~trigger["ele"])
206214
trigger["mu_lowpt"] = trigger["mu_lowpt"] & (~trigger["ele"])
207215

216+
return trigger
217+
218+
def _compute_metfilters_msk(self, events):
219+
"""Computes boolean mask for METfilters."""
220+
nevents = len(events)
208221
metfilters = np.ones(nevents, dtype="bool")
209222
metfilterkey = "mc" if self.isMC else "data"
210223
for mf in self._metfilters[metfilterkey]:
211224
if mf in events.Flag.fields:
212225
metfilters = metfilters & events.Flag[mf]
213226

214-
return trigger, metfilters
227+
return metfilters
215228

216229
def _build_objects(self, events):
217-
230+
"""Constructs physics objects (e.g. leptons, jets) and applies basic kinematic and quality cuts."""
218231
# MUONS
219232
muons = ak.with_field(events.Muon, 0, "flavor")
220233

@@ -272,6 +285,13 @@ def _build_objects(self, events):
272285
msk_good_muons = msk_tight_muons
273286
msk_good_electrons = msk_tight_electrons
274287

288+
# CANDIDATE LEP
289+
goodleptons = ak.concatenate([muons[msk_good_muons], electrons[msk_good_electrons]], axis=1)
290+
goodleptons = goodleptons[ak.argsort(goodleptons.pt, ascending=False)] # sort by pt
291+
292+
candidatelep = ak.firsts(goodleptons) # pick highest pt
293+
candidatelep_p4 = build_p4(candidatelep) # build p4 for candidate lepton
294+
275295
# AK8 JETS
276296
fatjets = events.FatJet
277297
fatjets["msdcorr"] = fatjets.msoftdrop
@@ -283,6 +303,15 @@ def _build_objects(self, events):
283303
events, good_fatjets, self._year, not self.isMC, self.jecs, fatjets=True
284304
)
285305

306+
FirstFatjet = ak.firsts(good_fatjets[:, 0:1])
307+
SecondFatjet = ak.firsts(good_fatjets[:, 1:2])
308+
309+
# HIGGS CANDIDATE
310+
fj_idx_lep = ak.argmin(good_fatjets.delta_r(candidatelep_p4), axis=1, keepdims=True)
311+
candidatefj = ak.firsts(good_fatjets[fj_idx_lep])
312+
313+
jmsr_shifted_fatjetvars = get_jmsr(good_fatjets[fj_idx_lep], num_jets=1, year=self._year, isData=not self.isMC)
314+
286315
# AK4 JETS
287316
jets, jec_shifted_jetvars = get_jec_jets(events, events.Jet, self._year, not self.isMC, self.jecs, fatjets=False)
288317
met = met_factory.build(events.MET, jets, {}) if self.isMC else events.MET
@@ -298,6 +327,14 @@ def _build_objects(self, events):
298327
jet_veto_map, cut_jetveto = get_JetVetoMap(jets, self._year)
299328
jets = jets[(jets.pt > 30) & jet_veto_map]
300329

330+
# AK4 JETS (OUTSIDE AK8)
331+
msk_ak4_outside_ak8 = jets.delta_r(candidatefj) > 0.8
332+
ak4_outside_ak8 = jets[msk_ak4_outside_ak8]
333+
334+
# VBF VARIABLES
335+
jet1 = ak4_outside_ak8[:, 0:1]
336+
jet2 = ak4_outside_ak8[:, 1:2]
337+
301338
# TAUS
302339
loose_taus_mu = (events.Tau.pt > 20) & (abs(events.Tau.eta) < 2.3) & (events.Tau.idAntiMu >= 1) # loose antiMu ID
303340
loose_taus_ele = (
@@ -308,30 +345,7 @@ def _build_objects(self, events):
308345
loose_taus = (events.Tau.pt > 20) & (abs(events.Tau.eta) < 2.3)
309346
loose_taus = events.Tau[loose_taus]
310347

311-
# CANDIDATE LEP
312-
goodleptons = ak.concatenate([muons[msk_good_muons], electrons[msk_good_electrons]], axis=1)
313-
goodleptons = goodleptons[ak.argsort(goodleptons.pt, ascending=False)] # sort by pt
314-
315-
candidatelep = ak.firsts(goodleptons) # pick highest pt
316-
candidatelep_p4 = build_p4(candidatelep) # build p4 for candidate lepton
317-
318-
# HIGGS CANDIDATE
319-
fj_idx_lep = ak.argmin(good_fatjets.delta_r(candidatelep_p4), axis=1, keepdims=True)
320-
candidatefj = ak.firsts(good_fatjets[fj_idx_lep])
321-
322-
jmsr_shifted_fatjetvars = get_jmsr(good_fatjets[fj_idx_lep], num_jets=1, year=self._year, isData=not self.isMC)
323-
324-
# ak4 jet outside ak8
325-
ak4_outside_ak8_selector = jets.delta_r(candidatefj) > 0.8
326-
ak4_outside_ak8 = jets[ak4_outside_ak8_selector]
327-
328-
# VBF variables
329-
jet1 = ak4_outside_ak8[:, 0:1]
330-
jet2 = ak4_outside_ak8[:, 1:2]
331-
332-
FirstFatjet = ak.firsts(good_fatjets[:, 0:1])
333-
SecondFatjet = ak.firsts(good_fatjets[:, 1:2])
334-
348+
# b-jets (only for jets with abs(eta)<2.5)
335349
bjet_selector = (jets.delta_r(candidatefj) > 0.8) & (abs(jets.eta) < 2.5)
336350

337351
objects = {
@@ -365,20 +379,16 @@ def _build_objects(self, events):
365379
"jmsr_shifted_fatjetvars": jmsr_shifted_fatjetvars,
366380
"jec_shifted_fatjetvars": jec_shifted_fatjetvars,
367381
"jec_shifted_jetvars": jec_shifted_jetvars,
368-
# others
382+
# others (easier to just store them here)
369383
"fj_idx_lep": fj_idx_lep,
370-
"ak4_outside_ak8_selector": ak4_outside_ak8_selector,
371-
"ak4_outside_ak8": ak4_outside_ak8,
384+
"msk_ak4_outside_ak8": msk_ak4_outside_ak8,
372385
"bjet_selector": bjet_selector,
373386
}
374387

375388
return objects
376389

377390
def _derive_variables(self, events, objects):
378-
379-
ak4_outside_ak8 = objects["ak4_outside_ak8"]
380-
bjet_selector = objects["bjet_selector"]
381-
391+
"""Derives physics variables from `objects` (e.g. deltaR(jet,lep))."""
382392
ht = ak.sum(objects["jets"].pt, axis=1)
383393

384394
# VH jet
@@ -412,15 +422,15 @@ def _derive_variables(self, events, objects):
412422
lep_miso = objects["candidatelep"].miniPFRelIso_all # miniso for candidate lepton
413423

414424
# b-jets (only for jets with abs(eta)<2.5)
415-
ak4_bjet_candidate = objects["jets"][bjet_selector]
425+
ak4_bjet_candidate = objects["jets"][objects["bjet_selector"]]
416426

417427
# VBF variables
418428
deta = abs(ak.firsts(objects["jet1"]).eta - ak.firsts(objects["jet2"]).eta)
419429
mjj = (ak.firsts(objects["jet1"]) + ak.firsts(objects["jet2"])).mass
420430

421431
# njets
422432
NumFatjets = ak.num(objects["good_fatjets"])
423-
NumOtherJets = ak.num(ak4_outside_ak8)
433+
NumOtherJets = ak.sum(objects["msk_ak4_outside_ak8"])
424434

425435
# n-bjets
426436
n_bjets_L = ak.sum(ak4_bjet_candidate.btagDeepFlavB > btagWPs["deepJet"][self._year]["L"], axis=1)
@@ -476,7 +486,6 @@ def _derive_variables(self, events, objects):
476486
nCjets = (ak.sum(goodgenjets.hadronFlavour == 4, axis=1)).to_numpy()
477487

478488
derived_vars = {
479-
"fj_idx_lep": objects["fj_idx_lep"],
480489
"ht": ht,
481490
# candidatefj
482491
"fj_pt": objects["candidatefj"].pt,
@@ -555,14 +564,11 @@ def _derive_variables(self, events, objects):
555564
return derived_vars
556565

557566
def _apply_JEC(self, objects, variables):
558-
567+
"""Computes JES/JER and JMSR shift variations and appends them to the set of output variables."""
559568
jec_shifted_fatjetvars = objects["jec_shifted_fatjetvars"]
560569
jmsr_shifted_fatjetvars = objects["jmsr_shifted_fatjetvars"]
561570
jec_shifted_jetvars = objects["jec_shifted_jetvars"]
562571

563-
fj_idx_lep = objects["fj_idx_lep"]
564-
ak4_outside_ak8_selector = objects["ak4_outside_ak8_selector"]
565-
566572
fatjetvars = {
567573
"fj_pt": objects["candidatefj"].pt,
568574
"fj_eta": objects["candidatefj"].eta,
@@ -575,7 +581,7 @@ def _apply_JEC(self, objects, variables):
575581
# JEC vars
576582
for shift, vals in jec_shifted_fatjetvars["pt"].items():
577583
if shift != "":
578-
fatjetvars_sys[f"fj_pt{shift}"] = ak.firsts(vals[fj_idx_lep])
584+
fatjetvars_sys[f"fj_pt{shift}"] = ak.firsts(vals[objects["fj_idx_lep"]])
579585

580586
# JMSR vars
581587
for shift, vals in jmsr_shifted_fatjetvars["msoftdrop"].items():
@@ -592,11 +598,11 @@ def _apply_JEC(self, objects, variables):
592598
pt_1 = objects["jet1"].pt
593599
pt_2 = objects["jet2"].pt
594600
try:
595-
pt_1 = vals[ak4_outside_ak8_selector][:, 0]
601+
pt_1 = vals[objects["msk_ak4_outside_ak8"]][:, 0]
596602
except Exception:
597603
pt_1 = objects["jet1"].pt
598604
try:
599-
pt_2 = vals[ak4_outside_ak8_selector][:, 1]
605+
pt_2 = vals[objects["msk_ak4_outside_ak8"]][:, 1]
600606
except Exception:
601607
pt_2 = objects["jet2"].pt
602608

@@ -660,7 +666,7 @@ def _apply_pileup_cutoff(self, events, year, yearmod, cutoff: float = 4):
660666
self.add_selection(name="PU_cutoff", sel=pw_pass)
661667

662668
def _add_selections(self, events, trigger, metfilters, objects, variables):
663-
669+
"""Adds event pre-selection to PackedSelection for cutflow and filtering."""
664670
if self.isMC:
665671
self._apply_pileup_cutoff(events, self._year, self._yearmod, cutoff=4)
666672

@@ -737,8 +743,7 @@ def _add_selections(self, events, trigger, metfilters, objects, variables):
737743
self.add_selection(name="dummy", sel=(variables["NumFatjets"] > -1))
738744

739745
def _store_genVars(self, dataset, events, objects, variables):
740-
741-
# store gen-level matching variables
746+
"""Matches generator-level particles to reconstructed jets or leptons."""
742747
signal_mask = None
743748
if self.isMC:
744749
if self.isSignal:
@@ -773,12 +778,8 @@ def _store_genVars(self, dataset, events, objects, variables):
773778

774779
return variables
775780

776-
def _store_MCweights(self, dataset, events, objects, variables):
777-
778-
candidatelep = objects["candidatelep"]
779-
jets = objects["jets"]
780-
bjet_selector = objects["bjet_selector"]
781-
781+
def _add_MC_weights(self, dataset, events, objects, variables):
782+
"""Applies event-level weights and corrections for MC, including."""
782783
for ch in self._channels:
783784
if self._year in ("2016", "2017"):
784785
self.weights[ch].add(
@@ -794,12 +795,12 @@ def _store_MCweights(self, dataset, events, objects, variables):
794795
nPU=ak.to_numpy(events.Pileup.nPU),
795796
)
796797

797-
add_pileupid_weights(self.weights[ch], self._year, self._yearmod, jets, events.GenJet, wp="L")
798+
add_pileupid_weights(self.weights[ch], self._year, self._yearmod, objects["jets"], events.GenJet, wp="L")
798799

799800
if ch == "mu":
800-
add_lepton_weight(self.weights[ch], candidatelep, self._year + self._yearmod, "muon")
801+
add_lepton_weight(self.weights[ch], objects["candidatelep"], self._year + self._yearmod, "muon")
801802
elif ch == "ele":
802-
add_lepton_weight(self.weights[ch], candidatelep, self._year + self._yearmod, "electron")
803+
add_lepton_weight(self.weights[ch], objects["candidatelep"], self._year + self._yearmod, "electron")
803804

804805
ewk_corr, qcd_corr, alt_qcd_corr = add_VJets_kFactors(self.weights[ch], events.GenPart, dataset, events)
805806
# add corrections for plotting
@@ -879,8 +880,8 @@ def _store_MCweights(self, dataset, events, objects, variables):
879880
**variables,
880881
**get_btag_weights(
881882
self._year,
882-
jets,
883-
bjet_selector,
883+
objects["jets"],
884+
objects["bjet_selector"],
884885
wp=wp_,
885886
algo="deepJet",
886887
systematics=self._systematics,
@@ -889,6 +890,7 @@ def _store_MCweights(self, dataset, events, objects, variables):
889890
return variables
890891

891892
def _add_lundplane_weights(self, events, dataset, selection_ch, objects):
893+
"""Computes variables used for Lund plane reweighting technique."""
892894
from boostedhiggs.corrections import getLPweights
893895

894896
pf_cands, gen_parts_eta_phi, gen_parts_pt_mass, ak8_jets, bgen_parts_eta_phi, genlep = getLPweights(
@@ -924,6 +926,7 @@ def _add_lundplane_weights(self, events, dataset, selection_ch, objects):
924926
return lpvars
925927

926928
def _run_inference(self, events, selection_ch, objects):
929+
"""Runs ParT tagger inference using Triton."""
927930
model_name = "ak8_MD_vminclv2ParT_manual_fixwrap_all_nodes"
928931
pnet_vars = runInferenceTriton(
929932
self.tagger_resources_path, events[selection_ch], objects["fj_idx_lep"][selection_ch], model_name=model_name
@@ -936,6 +939,7 @@ def _run_inference(self, events, selection_ch, objects):
936939
return {**scores, **reg_mass, **hidNeurons}
937940

938941
def _build_output(self, events, dataset, variables, objects):
942+
"""Stores the events passing the selections in a pandas dataframes."""
939943
output = {}
940944
for ch in self._channels:
941945
selection_ch = self.selections[ch].all(*self.selections[ch].names)
@@ -974,7 +978,7 @@ def _build_output(self, events, dataset, variables, objects):
974978
return output
975979

976980
def process(self, events: ak.Array):
977-
"""Returns skimmed events which pass preselection cuts and with the branches listed in self._skimvars"""
981+
"""Returns skimmed events which pass preselection cuts as pandas dataframe."""
978982

979983
dataset = events.metadata["dataset"]
980984
nevents = len(events)
@@ -993,7 +997,8 @@ def process(self, events: ak.Array):
993997
for ch in self._channels:
994998
self.weights[ch].add("genweight", events.genWeight)
995999

996-
trigger, metfilters = self._apply_trigger_and_metfilters(events)
1000+
trigger = self._compute_trigger_msk(events)
1001+
metfilters = self._compute_metfilters_msk(events)
9971002

9981003
objects = self._build_objects(events)
9991004
variables = self._derive_variables(events, objects)
@@ -1009,7 +1014,7 @@ def process(self, events: ak.Array):
10091014
variables = self._store_genVars(dataset, events, objects, variables)
10101015

10111016
if self.isMC:
1012-
variables = self._store_MCweights(dataset, events, objects, variables)
1017+
variables = self._add_MC_weights(dataset, events, objects, variables)
10131018

10141019
output = self._build_output(events, dataset, variables, objects)
10151020

@@ -1037,4 +1042,5 @@ def process(self, events: ak.Array):
10371042
}
10381043

10391044
def postprocess(self, accumulator):
1045+
"""Placeholder for Coffea postprocessing. Currently does nothing."""
10401046
return accumulator

0 commit comments

Comments
 (0)