@@ -149,19 +149,24 @@ def accumulator(self):
149149 return self ._accumulator
150150
151151 def _save_dfs_parquet (self , fname , dfs_dict , ch ):
152+ """Save the given pandas DataFrame dictionary as a Parquet file to the output directory."""
152153 if self ._output_location is not None :
153154 table = pa .Table .from_pandas (dfs_dict )
154155 if len (table ) != 0 : # skip dataframes with empty entries
155156 pq .write_table (table , self ._output_location + ch + "/parquet/" + fname + ".parquet" )
156157
157158 def _ak_to_pandas (self , output_collection : ak .Array ) -> pd .DataFrame :
159+ """Converts an Awkward Array collection to a flat pandas DataFrame for to write the output."""
158160 output = pd .DataFrame ()
159161 for field in ak .fields (output_collection ):
160162 output [field ] = ak .to_numpy (output_collection [field ])
161163 return output
162164
163165 def add_selection (self , name : str , sel : np .ndarray , channel : str = "all" ):
164- """Adds selection to PackedSelection object and the cutflow dictionary"""
166+ """
167+ Adds a boolean mask to the PackedSelection object for the given channel(s) and updates the
168+ cutflow dictionary with the sum of selected events or weights.
169+ """
165170 channels = self ._channels if channel == "all" else [channel ]
166171
167172 for ch in channels :
@@ -180,6 +185,7 @@ def add_selection(self, name: str, sel: np.ndarray, channel: str = "all"):
180185 self .cutflows [ch ][name ] = np .sum (selection_ch )
181186
182187 def _compute_lhe_pdf_weights (self , events ):
188+ """Computes and returns summed LHE scale and PDF weights (used for QCD and PDF uncertainty calculations)."""
183189 sumlheweight = {}
184190 if "LHEScaleWeight" in events .fields :
185191 if len (events .LHEScaleWeight [0 ]) == 9 :
@@ -193,7 +199,8 @@ def _compute_lhe_pdf_weights(self, events):
193199
194200 return sumlheweight , sumpdfweight
195201
196- def _apply_trigger_and_metfilters (self , events ):
202+ def _compute_trigger_msk (self , events ):
203+ """Computes boolean mask for trigger."""
197204 nevents = len (events )
198205 trigger = {}
199206 for ch in ["ele" , "mu_lowpt" , "mu_highpt" ]:
@@ -206,16 +213,21 @@ def _apply_trigger_and_metfilters(self, events):
206213 trigger ["mu_highpt" ] = trigger ["mu_highpt" ] & (~ trigger ["ele" ])
207214 trigger ["mu_lowpt" ] = trigger ["mu_lowpt" ] & (~ trigger ["ele" ])
208215
216+ return trigger
217+
218+ def _compute_metfilters_msk (self , events ):
219+ """Computes boolean mask for METfilters."""
220+ nevents = len (events )
209221 metfilters = np .ones (nevents , dtype = "bool" )
210222 metfilterkey = "mc" if self .isMC else "data"
211223 for mf in self ._metfilters [metfilterkey ]:
212224 if mf in events .Flag .fields :
213225 metfilters = metfilters & events .Flag [mf ]
214226
215- return trigger , metfilters
227+ return metfilters
216228
217229 def _build_objects (self , events ):
218-
230+ """Constructs physics objects (e.g. leptons, jets) and applies basic kinematic and quality cuts."""
219231 # MUONS
220232 muons = ak .with_field (events .Muon , 0 , "flavor" )
221233
@@ -273,6 +285,13 @@ def _build_objects(self, events):
273285 msk_good_muons = msk_tight_muons
274286 msk_good_electrons = msk_tight_electrons
275287
288+ # CANDIDATE LEP
289+ goodleptons = ak .concatenate ([muons [msk_good_muons ], electrons [msk_good_electrons ]], axis = 1 )
290+ goodleptons = goodleptons [ak .argsort (goodleptons .pt , ascending = False )] # sort by pt
291+
292+ candidatelep = ak .firsts (goodleptons ) # pick highest pt
293+ candidatelep_p4 = build_p4 (candidatelep ) # build p4 for candidate lepton
294+
276295 # AK8 JETS
277296 fatjets = events .FatJet
278297 fatjets ["msdcorr" ] = fatjets .msoftdrop
@@ -284,6 +303,15 @@ def _build_objects(self, events):
284303 events , good_fatjets , self ._year , not self .isMC , self .jecs , fatjets = True
285304 )
286305
306+ FirstFatjet = ak .firsts (good_fatjets [:, 0 :1 ])
307+ SecondFatjet = ak .firsts (good_fatjets [:, 1 :2 ])
308+
309+ # HIGGS CANDIDATE
310+ fj_idx_lep = ak .argmin (good_fatjets .delta_r (candidatelep_p4 ), axis = 1 , keepdims = True )
311+ candidatefj = ak .firsts (good_fatjets [fj_idx_lep ])
312+
313+ jmsr_shifted_fatjetvars = get_jmsr (good_fatjets [fj_idx_lep ], num_jets = 1 , year = self ._year , isData = not self .isMC )
314+
287315 # AK4 JETS
288316 jets , jec_shifted_jetvars = get_jec_jets (events , events .Jet , self ._year , not self .isMC , self .jecs , fatjets = False )
289317 met = met_factory .build (events .MET , jets , {}) if self .isMC else events .MET
@@ -299,6 +327,14 @@ def _build_objects(self, events):
299327 jet_veto_map , cut_jetveto = get_JetVetoMap (jets , self ._year )
300328 jets = jets [(jets .pt > 30 ) & jet_veto_map ]
301329
330+ # AK4 JETS (OUTSIDE AK8)
331+ msk_ak4_outside_ak8 = jets .delta_r (candidatefj ) > 0.8
332+ ak4_outside_ak8 = jets [msk_ak4_outside_ak8 ]
333+
334+ # VBF VARIABLES
335+ jet1 = ak4_outside_ak8 [:, 0 :1 ]
336+ jet2 = ak4_outside_ak8 [:, 1 :2 ]
337+
302338 # TAUS
303339 loose_taus_mu = (events .Tau .pt > 20 ) & (abs (events .Tau .eta ) < 2.3 ) & (events .Tau .idAntiMu >= 1 ) # loose antiMu ID
304340 loose_taus_ele = (
@@ -309,30 +345,7 @@ def _build_objects(self, events):
309345 loose_taus = (events .Tau .pt > 20 ) & (abs (events .Tau .eta ) < 2.3 )
310346 loose_taus = events .Tau [loose_taus ]
311347
312- # CANDIDATE LEP
313- goodleptons = ak .concatenate ([muons [msk_good_muons ], electrons [msk_good_electrons ]], axis = 1 )
314- goodleptons = goodleptons [ak .argsort (goodleptons .pt , ascending = False )] # sort by pt
315-
316- candidatelep = ak .firsts (goodleptons ) # pick highest pt
317- candidatelep_p4 = build_p4 (candidatelep ) # build p4 for candidate lepton
318-
319- # HIGGS CANDIDATE
320- fj_idx_lep = ak .argmin (good_fatjets .delta_r (candidatelep_p4 ), axis = 1 , keepdims = True )
321- candidatefj = ak .firsts (good_fatjets [fj_idx_lep ])
322-
323- jmsr_shifted_fatjetvars = get_jmsr (good_fatjets [fj_idx_lep ], num_jets = 1 , year = self ._year , isData = not self .isMC )
324-
325- # ak4 jet outside ak8
326- ak4_outside_ak8_selector = jets .delta_r (candidatefj ) > 0.8
327- ak4_outside_ak8 = jets [ak4_outside_ak8_selector ]
328-
329- # VBF variables
330- jet1 = ak4_outside_ak8 [:, 0 :1 ]
331- jet2 = ak4_outside_ak8 [:, 1 :2 ]
332-
333- FirstFatjet = ak .firsts (good_fatjets [:, 0 :1 ])
334- SecondFatjet = ak .firsts (good_fatjets [:, 1 :2 ])
335-
348+ # b-jets (only for jets with abs(eta)<2.5)
336349 bjet_selector = (jets .delta_r (candidatefj ) > 0.8 ) & (abs (jets .eta ) < 2.5 )
337350
338351 objects = {
@@ -366,20 +379,16 @@ def _build_objects(self, events):
366379 "jmsr_shifted_fatjetvars" : jmsr_shifted_fatjetvars ,
367380 "jec_shifted_fatjetvars" : jec_shifted_fatjetvars ,
368381 "jec_shifted_jetvars" : jec_shifted_jetvars ,
369- # others
382+ # others (easier to just store them here)
370383 "fj_idx_lep" : fj_idx_lep ,
371- "ak4_outside_ak8_selector" : ak4_outside_ak8_selector ,
372- "ak4_outside_ak8" : ak4_outside_ak8 ,
384+ "msk_ak4_outside_ak8" : msk_ak4_outside_ak8 ,
373385 "bjet_selector" : bjet_selector ,
374386 }
375387
376388 return objects
377389
378390 def _derive_variables (self , events , objects ):
379-
380- ak4_outside_ak8 = objects ["ak4_outside_ak8" ]
381- bjet_selector = objects ["bjet_selector" ]
382-
391+ """Derives physics variables from `objects` (e.g. deltaR(jet,lep))."""
383392 ht = ak .sum (objects ["jets" ].pt , axis = 1 )
384393
385394 # VH jet
@@ -388,8 +397,7 @@ def _derive_variables(self, events, objects):
388397 mask_candidatefj = fatJetIndices != minDeltaR
389398
390399 allScores = VScore (objects ["good_fatjets" ])
391- masked = allScores [mask_candidatefj ]
392- VH_fj = ak .firsts (objects ["good_fatjets" ][allScores == ak .max (masked , axis = 1 )])
400+ VH_fj = ak .firsts (objects ["good_fatjets" ][allScores == ak .max (allScores [mask_candidatefj ], axis = 1 )])
393401
394402 # nleptons
395403 n_loose_taus_mu = ak .sum (objects ["loose_taus_mu" ], axis = 1 )
@@ -413,15 +421,15 @@ def _derive_variables(self, events, objects):
413421 lep_miso = objects ["candidatelep" ].miniPFRelIso_all # miniso for candidate lepton
414422
415423 # b-jets (only for jets with abs(eta)<2.5)
416- ak4_bjet_candidate = objects ["jets" ][bjet_selector ]
424+ ak4_bjet_candidate = objects ["jets" ][objects [ " bjet_selector" ] ]
417425
418426 # VBF variables
419427 deta = abs (ak .firsts (objects ["jet1" ]).eta - ak .firsts (objects ["jet2" ]).eta )
420428 mjj = (ak .firsts (objects ["jet1" ]) + ak .firsts (objects ["jet2" ])).mass
421429
422430 # njets
423431 NumFatjets = ak .num (objects ["good_fatjets" ])
424- NumOtherJets = ak .num ( ak4_outside_ak8 )
432+ NumOtherJets = ak .sum ( objects [ "msk_ak4_outside_ak8" ] )
425433
426434 # n-bjets
427435 n_bjets_L = ak .sum (ak4_bjet_candidate .btagDeepFlavB > btagWPs ["deepJet" ][self ._year ]["L" ], axis = 1 )
@@ -477,7 +485,6 @@ def _derive_variables(self, events, objects):
477485 nCjets = (ak .sum (goodgenjets .hadronFlavour == 4 , axis = 1 )).to_numpy ()
478486
479487 derived_vars = {
480- "fj_idx_lep" : objects ["fj_idx_lep" ],
481488 "ht" : ht ,
482489 # candidatefj
483490 "fj_pt" : objects ["candidatefj" ].pt ,
@@ -556,14 +563,11 @@ def _derive_variables(self, events, objects):
556563 return derived_vars
557564
558565 def _apply_JEC (self , objects , variables ):
559-
566+ """Computes JES/JER and JMSR shift variations and appends them to the set of output variables."""
560567 jec_shifted_fatjetvars = objects ["jec_shifted_fatjetvars" ]
561568 jmsr_shifted_fatjetvars = objects ["jmsr_shifted_fatjetvars" ]
562569 jec_shifted_jetvars = objects ["jec_shifted_jetvars" ]
563570
564- fj_idx_lep = objects ["fj_idx_lep" ]
565- ak4_outside_ak8_selector = objects ["ak4_outside_ak8_selector" ]
566-
567571 fatjetvars = {
568572 "fj_pt" : objects ["candidatefj" ].pt ,
569573 "fj_eta" : objects ["candidatefj" ].eta ,
@@ -576,7 +580,7 @@ def _apply_JEC(self, objects, variables):
576580 # JEC vars
577581 for shift , vals in jec_shifted_fatjetvars ["pt" ].items ():
578582 if shift != "" :
579- fatjetvars_sys [f"fj_pt{ shift } " ] = ak .firsts (vals [fj_idx_lep ])
583+ fatjetvars_sys [f"fj_pt{ shift } " ] = ak .firsts (vals [objects [ " fj_idx_lep" ] ])
580584
581585 # JMSR vars
582586 for shift , vals in jmsr_shifted_fatjetvars ["msoftdrop" ].items ():
@@ -593,11 +597,11 @@ def _apply_JEC(self, objects, variables):
593597 pt_1 = objects ["jet1" ].pt
594598 pt_2 = objects ["jet2" ].pt
595599 try :
596- pt_1 = vals [ak4_outside_ak8_selector ][:, 0 ]
600+ pt_1 = vals [objects [ "msk_ak4_outside_ak8" ] ][:, 0 ]
597601 except Exception :
598602 pt_1 = objects ["jet1" ].pt
599603 try :
600- pt_2 = vals [ak4_outside_ak8_selector ][:, 1 ]
604+ pt_2 = vals [objects [ "msk_ak4_outside_ak8" ] ][:, 1 ]
601605 except Exception :
602606 pt_2 = objects ["jet2" ].pt
603607
@@ -661,7 +665,7 @@ def _apply_pileup_cutoff(self, events, year, yearmod, cutoff: float = 4):
661665 self .add_selection (name = "PU_cutoff" , sel = pw_pass )
662666
663667 def _add_selections (self , events , trigger , metfilters , objects , variables ):
664-
668+ """Adds event pre-selection to PackedSelection for cutflow and filtering."""
665669 if self .isMC :
666670 self ._apply_pileup_cutoff (events , self ._year , self ._yearmod , cutoff = 4 )
667671
@@ -738,8 +742,7 @@ def _add_selections(self, events, trigger, metfilters, objects, variables):
738742 self .add_selection (name = "dummy" , sel = (variables ["NumFatjets" ] > - 1 ))
739743
740744 def _store_genVars (self , dataset , events , objects , variables ):
741-
742- # store gen-level matching variables
745+ """Matches generator-level particles to reconstructed jets or leptons."""
743746 signal_mask = None
744747 if self .isMC :
745748 if self .isSignal :
@@ -774,12 +777,8 @@ def _store_genVars(self, dataset, events, objects, variables):
774777
775778 return variables
776779
777- def _store_MCweights (self , dataset , events , objects , variables ):
778-
779- candidatelep = objects ["candidatelep" ]
780- jets = objects ["jets" ]
781- bjet_selector = objects ["bjet_selector" ]
782-
780+ def _add_MC_weights (self , dataset , events , objects , variables ):
781+ """Applies event-level weights and corrections for MC, including."""
783782 for ch in self ._channels :
784783 if self ._year in ("2016" , "2017" ):
785784 self .weights [ch ].add (
@@ -795,12 +794,12 @@ def _store_MCweights(self, dataset, events, objects, variables):
795794 nPU = ak .to_numpy (events .Pileup .nPU ),
796795 )
797796
798- add_pileupid_weights (self .weights [ch ], self ._year , self ._yearmod , jets , events .GenJet , wp = "L" )
797+ add_pileupid_weights (self .weights [ch ], self ._year , self ._yearmod , objects [ " jets" ] , events .GenJet , wp = "L" )
799798
800799 if ch == "mu" :
801- add_lepton_weight (self .weights [ch ], candidatelep , self ._year + self ._yearmod , "muon" )
800+ add_lepton_weight (self .weights [ch ], objects [ " candidatelep" ] , self ._year + self ._yearmod , "muon" )
802801 elif ch == "ele" :
803- add_lepton_weight (self .weights [ch ], candidatelep , self ._year + self ._yearmod , "electron" )
802+ add_lepton_weight (self .weights [ch ], objects [ " candidatelep" ] , self ._year + self ._yearmod , "electron" )
804803
805804 ewk_corr , qcd_corr , alt_qcd_corr = add_VJets_kFactors (self .weights [ch ], events .GenPart , dataset , events )
806805 # add corrections for plotting
@@ -880,8 +879,8 @@ def _store_MCweights(self, dataset, events, objects, variables):
880879 ** variables ,
881880 ** get_btag_weights (
882881 self ._year ,
883- jets ,
884- bjet_selector ,
882+ objects [ " jets" ] ,
883+ objects [ " bjet_selector" ] ,
885884 wp = wp_ ,
886885 algo = "deepJet" ,
887886 systematics = self ._systematics ,
@@ -890,6 +889,7 @@ def _store_MCweights(self, dataset, events, objects, variables):
890889 return variables
891890
892891 def _add_lundplane_weights (self , events , dataset , selection_ch , objects ):
892+ """Computes variables used for Lund plane reweighting technique."""
893893 from boostedhiggs .corrections import getLPweights
894894
895895 pf_cands , gen_parts_eta_phi , gen_parts_pt_mass , ak8_jets , bgen_parts_eta_phi , genlep = getLPweights (
@@ -925,6 +925,7 @@ def _add_lundplane_weights(self, events, dataset, selection_ch, objects):
925925 return lpvars
926926
927927 def _run_inference (self , events , selection_ch , objects ):
928+ """Runs ParT tagger inference using Triton."""
928929 model_name = "ak8_MD_vminclv2ParT_manual_fixwrap_all_nodes"
929930 pnet_vars = runInferenceTriton (
930931 self .tagger_resources_path , events [selection_ch ], objects ["fj_idx_lep" ][selection_ch ], model_name = model_name
@@ -937,6 +938,7 @@ def _run_inference(self, events, selection_ch, objects):
937938 return {** scores , ** reg_mass , ** hidNeurons }
938939
939940 def _build_output (self , events , dataset , variables , objects ):
941+ """Stores the events passing the selections in a pandas dataframes."""
940942 output = {}
941943 for ch in self ._channels :
942944 selection_ch = self .selections [ch ].all (* self .selections [ch ].names )
@@ -975,7 +977,7 @@ def _build_output(self, events, dataset, variables, objects):
975977 return output
976978
977979 def process (self , events : ak .Array ):
978- """Returns skimmed events which pass preselection cuts and with the branches listed in self._skimvars """
980+ """Returns skimmed events which pass preselection cuts as pandas dataframe. """
979981
980982 dataset = events .metadata ["dataset" ]
981983 nevents = len (events )
@@ -994,7 +996,8 @@ def process(self, events: ak.Array):
994996 for ch in self ._channels :
995997 self .weights [ch ].add ("genweight" , events .genWeight )
996998
997- trigger , metfilters = self ._apply_trigger_and_metfilters (events )
999+ trigger = self ._compute_trigger_msk (events )
1000+ metfilters = self ._compute_metfilters_msk (events )
9981001
9991002 objects = self ._build_objects (events )
10001003 variables = self ._derive_variables (events , objects )
@@ -1010,7 +1013,7 @@ def process(self, events: ak.Array):
10101013 variables = self ._store_genVars (dataset , events , objects , variables )
10111014
10121015 if self .isMC :
1013- variables = self ._store_MCweights (dataset , events , objects , variables )
1016+ variables = self ._add_MC_weights (dataset , events , objects , variables )
10141017
10151018 output = self ._build_output (events , dataset , variables , objects )
10161019
@@ -1038,4 +1041,5 @@ def process(self, events: ak.Array):
10381041 }
10391042
10401043 def postprocess (self , accumulator ):
1044+ """Placeholder for Coffea postprocessing. Currently does nothing."""
10411045 return accumulator
0 commit comments