Skip to content

Commit a949a06

Browse files
committed
up
1 parent 7abaa43 commit a949a06

File tree

1 file changed

+27
-36
lines changed

1 file changed

+27
-36
lines changed

boostedhiggs/hwwprocessor.py

Lines changed: 27 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -147,13 +147,13 @@ def __init__(
147147
def accumulator(self):
148148
return self._accumulator
149149

150-
def save_dfs_parquet(self, fname, dfs_dict, ch):
150+
def _save_dfs_parquet(self, fname, dfs_dict, ch):
151151
if self._output_location is not None:
152152
table = pa.Table.from_pandas(dfs_dict)
153153
if len(table) != 0: # skip dataframes with empty entries
154154
pq.write_table(table, self._output_location + ch + "/parquet/" + fname + ".parquet")
155155

156-
def ak_to_pandas(self, output_collection: ak.Array) -> pd.DataFrame:
156+
def _ak_to_pandas(self, output_collection: ak.Array) -> pd.DataFrame:
157157
output = pd.DataFrame()
158158
for field in ak.fields(output_collection):
159159
output[field] = ak.to_numpy(output_collection[field])
@@ -287,8 +287,6 @@ def _build_objects(self, events):
287287
jets, jec_shifted_jetvars = get_jec_jets(events, events.Jet, self._year, not self.isMC, self.jecs, fatjets=False)
288288
met = met_factory.build(events.MET, jets, {}) if self.isMC else events.MET
289289

290-
ht = ak.sum(jets.pt, axis=1)
291-
292290
jet_selector = (
293291
(jets.pt > 15)
294292
& (abs(jets.eta) < 5.0)
@@ -372,33 +370,25 @@ def _build_objects(self, events):
372370
"ak4_outside_ak8_selector": ak4_outside_ak8_selector,
373371
"ak4_outside_ak8": ak4_outside_ak8,
374372
"bjet_selector": bjet_selector,
375-
"ht": ht,
376373
}
377374

378375
return objects
379376

380377
def _derive_variables(self, events, objects):
381378

382-
# unpack
383-
good_fatjets = objects["good_fatjets"]
384-
fj_idx_lep = objects["fj_idx_lep"]
385-
386379
ak4_outside_ak8 = objects["ak4_outside_ak8"]
387-
388-
FirstFatjet = objects["FirstFatjet"]
389-
SecondFatjet = objects["SecondFatjet"]
390-
391380
bjet_selector = objects["bjet_selector"]
392-
ht = objects["ht"]
381+
382+
ht = ak.sum(objects["jets"].pt, axis=1)
393383

394384
# VH jet
395-
minDeltaR = ak.argmin(objects["candidatelep_p4"].delta_r(good_fatjets), axis=1)
396-
fatJetIndices = ak.local_index(good_fatjets, axis=1)
385+
minDeltaR = ak.argmin(objects["candidatelep_p4"].delta_r(objects["good_fatjets"]), axis=1)
386+
fatJetIndices = ak.local_index(objects["good_fatjets"], axis=1)
397387
mask_candidatefj = fatJetIndices != minDeltaR
398388

399-
allScores = VScore(good_fatjets)
389+
allScores = VScore(objects["good_fatjets"])
400390
masked = allScores[mask_candidatefj]
401-
VH_fj = ak.firsts(good_fatjets[allScores == ak.max(masked, axis=1)])
391+
VH_fj = ak.firsts(objects["good_fatjets"][allScores == ak.max(masked, axis=1)])
402392

403393
# nleptons
404394
n_loose_taus_mu = ak.sum(objects["loose_taus_mu"], axis=1)
@@ -429,7 +419,7 @@ def _derive_variables(self, events, objects):
429419
mjj = (ak.firsts(objects["jet1"]) + ak.firsts(objects["jet2"])).mass
430420

431421
# njets
432-
NumFatjets = ak.num(good_fatjets)
422+
NumFatjets = ak.num(objects["good_fatjets"])
433423
NumOtherJets = ak.num(ak4_outside_ak8)
434424

435425
# n-bjets
@@ -486,6 +476,7 @@ def _derive_variables(self, events, objects):
486476
nCjets = (ak.sum(goodgenjets.hadronFlavour == 4, axis=1)).to_numpy()
487477

488478
derived_vars = {
479+
"fj_idx_lep": objects["fj_idx_lep"],
489480
"ht": ht,
490481
# candidatefj
491482
"fj_pt": objects["candidatefj"].pt,
@@ -528,17 +519,17 @@ def _derive_variables(self, events, objects):
528519
"deta": deta,
529520
"mjj": mjj,
530521
# leading fatjet
531-
"FirstFatjet_pt": FirstFatjet.pt,
532-
"FirstFatjet_eta": FirstFatjet.eta,
533-
"FirstFatjet_phi": FirstFatjet.phi,
534-
"FirstFatjet_msd": FirstFatjet.msdcorr,
535-
"FirstFatjet_Vscore": VScore(SecondFatjet),
522+
"FirstFatjet_pt": objects["FirstFatjet"].pt,
523+
"FirstFatjet_eta": objects["FirstFatjet"].eta,
524+
"FirstFatjet_phi": objects["FirstFatjet"].phi,
525+
"FirstFatjet_msd": objects["FirstFatjet"].msdcorr,
526+
"FirstFatjet_Vscore": VScore(objects["SecondFatjet"]),
536527
# second leading fatjet
537-
"SecondFatjet_pt": SecondFatjet.pt,
538-
"SecondFatjet_eta": SecondFatjet.eta,
539-
"SecondFatjet_phi": SecondFatjet.phi,
540-
"SecondFatjet_msd": SecondFatjet.msdcorr,
541-
"SecondFatjet_Vscore": VScore(FirstFatjet),
528+
"SecondFatjet_pt": objects["SecondFatjet"].pt,
529+
"SecondFatjet_eta": objects["SecondFatjet"].eta,
530+
"SecondFatjet_phi": objects["SecondFatjet"].phi,
531+
"SecondFatjet_msd": objects["SecondFatjet"].msdcorr,
532+
"SecondFatjet_Vscore": VScore(objects["FirstFatjet"]),
542533
# second fatjet after candidate jet
543534
"VH_fj_pt": VH_fj.pt,
544535
"VH_fj_eta": VH_fj.eta,
@@ -549,7 +540,7 @@ def _derive_variables(self, events, objects):
549540
).miniPFRelIso_all,
550541
"loose_lep1_pt": ak.firsts(objects["loose_muons1"][ak.argsort(objects["loose_muons1"].pt, ascending=False)]).pt,
551542
"msk_leptonic_taus": msk_leptonic_taus,
552-
"fj_mass_raw": good_fatjets[fj_idx_lep].msdcorr,
543+
"ht": ht,
553544
}
554545

555546
if self.isMC:
@@ -565,11 +556,11 @@ def _derive_variables(self, events, objects):
565556

566557
def _apply_JEC(self, objects, variables):
567558

568-
fj_idx_lep = objects["fj_idx_lep"]
569559
jec_shifted_fatjetvars = objects["jec_shifted_fatjetvars"]
570560
jmsr_shifted_fatjetvars = objects["jmsr_shifted_fatjetvars"]
571561
jec_shifted_jetvars = objects["jec_shifted_jetvars"]
572562

563+
fj_idx_lep = objects["fj_idx_lep"]
573564
ak4_outside_ak8_selector = objects["ak4_outside_ak8_selector"]
574565

575566
fatjetvars = {
@@ -668,7 +659,7 @@ def _apply_pileup_cutoff(self, events, year, yearmod, cutoff: float = 4):
668659

669660
self.add_selection(name="PU_cutoff", sel=pw_pass)
670661

671-
def _apply_selections(self, events, trigger, metfilters, objects, variables):
662+
def _add_selections(self, events, trigger, metfilters, objects, variables):
672663

673664
if self.isMC:
674665
self._apply_pileup_cutoff(events, self._year, self._yearmod, cutoff=4)
@@ -937,7 +928,7 @@ def _run_inference(self, events, selection_ch, objects):
937928
pnet_vars = runInferenceTriton(
938929
self.tagger_resources_path, events[selection_ch], objects["fj_idx_lep"][selection_ch], model_name=model_name
939930
)
940-
pnet_df = self.ak_to_pandas(pnet_vars)
931+
pnet_df = self._ak_to_pandas(pnet_vars)
941932
scores = {"fj_ParT_score": pnet_df[sigs].sum(axis=1).values}
942933
hidNeurons = {k: v for k, v in pnet_vars.items() if "hidNeuron" in k}
943934
reg_mass = {"fj_ParT_mass": pnet_vars["fj_ParT_mass"]}
@@ -967,7 +958,7 @@ def _build_output(self, events, dataset, variables, objects):
967958
output[ch] = {}
968959

969960
if not isinstance(output[ch], pd.DataFrame):
970-
output[ch] = self.ak_to_pandas(output[ch])
961+
output[ch] = self._ak_to_pandas(output[ch])
971962

972963
for var_ in [
973964
"rec_higgs_m",
@@ -1013,7 +1004,7 @@ def process(self, events: ak.Array):
10131004

10141005
variables = self._apply_JEC(objects, variables)
10151006

1016-
self._apply_selections(events, trigger, metfilters, objects, variables)
1007+
self._add_selections(events, trigger, metfilters, objects, variables)
10171008

10181009
variables = self._store_genVars(dataset, events, objects, variables)
10191010

@@ -1030,7 +1021,7 @@ def process(self, events: ak.Array):
10301021
os.makedirs(self._output_location + ch)
10311022
if not os.path.exists(self._output_location + ch + "/parquet"):
10321023
os.makedirs(self._output_location + ch + "/parquet")
1033-
self.save_dfs_parquet(fname, output[ch], ch)
1024+
self._save_dfs_parquet(fname, output[ch], ch)
10341025

10351026
return {
10361027
dataset: {

0 commit comments

Comments
 (0)