@@ -145,22 +145,28 @@ def __init__(
145145
146146 @property
147147 def accumulator (self ):
148+ """Required Coffea interface property. Not used in this processor, returns a dummy accumulator."""
148149 return self ._accumulator
149150
150151 def _save_dfs_parquet (self , fname , dfs_dict , ch ):
152+ """Save the given pandas DataFrame dictionary as a Parquet file to the output directory."""
151153 if self ._output_location is not None :
152154 table = pa .Table .from_pandas (dfs_dict )
153155 if len (table ) != 0 : # skip dataframes with empty entries
154156 pq .write_table (table , self ._output_location + ch + "/parquet/" + fname + ".parquet" )
155157
156158 def _ak_to_pandas (self , output_collection : ak .Array ) -> pd .DataFrame :
159+ """Converts an Awkward Array collection to a flat pandas DataFrame for to write the output."""
157160 output = pd .DataFrame ()
158161 for field in ak .fields (output_collection ):
159162 output [field ] = ak .to_numpy (output_collection [field ])
160163 return output
161164
162165 def add_selection (self , name : str , sel : np .ndarray , channel : str = "all" ):
163- """Adds selection to PackedSelection object and the cutflow dictionary"""
166+ """
167+ Adds a boolean mask to the PackedSelection object for the given channel(s) and updates the
168+ cutflow dictionary with the sum of selected events or weights.
169+ """
164170 channels = self ._channels if channel == "all" else [channel ]
165171
166172 for ch in channels :
@@ -179,6 +185,7 @@ def add_selection(self, name: str, sel: np.ndarray, channel: str = "all"):
179185 self .cutflows [ch ][name ] = np .sum (selection_ch )
180186
181187 def _compute_lhe_pdf_weights (self , events ):
188+ """Computes and returns summed LHE scale and PDF weights (used for QCD and PDF uncertainty calculations)."""
182189 sumlheweight = {}
183190 if "LHEScaleWeight" in events .fields :
184191 if len (events .LHEScaleWeight [0 ]) == 9 :
@@ -192,7 +199,8 @@ def _compute_lhe_pdf_weights(self, events):
192199
193200 return sumlheweight , sumpdfweight
194201
195- def _apply_trigger_and_metfilters (self , events ):
202+ def _compute_trigger_msk (self , events ):
203+ """Computes boolean mask for trigger."""
196204 nevents = len (events )
197205 trigger = {}
198206 for ch in ["ele" , "mu_lowpt" , "mu_highpt" ]:
@@ -205,16 +213,21 @@ def _apply_trigger_and_metfilters(self, events):
205213 trigger ["mu_highpt" ] = trigger ["mu_highpt" ] & (~ trigger ["ele" ])
206214 trigger ["mu_lowpt" ] = trigger ["mu_lowpt" ] & (~ trigger ["ele" ])
207215
216+ return trigger
217+
218+ def _compute_metfilters_msk (self , events ):
219+ """Computes boolean mask for METfilters."""
220+ nevents = len (events )
208221 metfilters = np .ones (nevents , dtype = "bool" )
209222 metfilterkey = "mc" if self .isMC else "data"
210223 for mf in self ._metfilters [metfilterkey ]:
211224 if mf in events .Flag .fields :
212225 metfilters = metfilters & events .Flag [mf ]
213226
214- return trigger , metfilters
227+ return metfilters
215228
216229 def _build_objects (self , events ):
217-
230+ """Constructs physics objects (e.g. leptons, jets) and applies basic kinematic and quality cuts."""
218231 # MUONS
219232 muons = ak .with_field (events .Muon , 0 , "flavor" )
220233
@@ -272,6 +285,13 @@ def _build_objects(self, events):
272285 msk_good_muons = msk_tight_muons
273286 msk_good_electrons = msk_tight_electrons
274287
288+ # CANDIDATE LEP
289+ goodleptons = ak .concatenate ([muons [msk_good_muons ], electrons [msk_good_electrons ]], axis = 1 )
290+ goodleptons = goodleptons [ak .argsort (goodleptons .pt , ascending = False )] # sort by pt
291+
292+ candidatelep = ak .firsts (goodleptons ) # pick highest pt
293+ candidatelep_p4 = build_p4 (candidatelep ) # build p4 for candidate lepton
294+
275295 # AK8 JETS
276296 fatjets = events .FatJet
277297 fatjets ["msdcorr" ] = fatjets .msoftdrop
@@ -283,6 +303,15 @@ def _build_objects(self, events):
283303 events , good_fatjets , self ._year , not self .isMC , self .jecs , fatjets = True
284304 )
285305
306+ FirstFatjet = ak .firsts (good_fatjets [:, 0 :1 ])
307+ SecondFatjet = ak .firsts (good_fatjets [:, 1 :2 ])
308+
309+ # HIGGS CANDIDATE
310+ fj_idx_lep = ak .argmin (good_fatjets .delta_r (candidatelep_p4 ), axis = 1 , keepdims = True )
311+ candidatefj = ak .firsts (good_fatjets [fj_idx_lep ])
312+
313+ jmsr_shifted_fatjetvars = get_jmsr (good_fatjets [fj_idx_lep ], num_jets = 1 , year = self ._year , isData = not self .isMC )
314+
286315 # AK4 JETS
287316 jets , jec_shifted_jetvars = get_jec_jets (events , events .Jet , self ._year , not self .isMC , self .jecs , fatjets = False )
288317 met = met_factory .build (events .MET , jets , {}) if self .isMC else events .MET
@@ -298,6 +327,14 @@ def _build_objects(self, events):
298327 jet_veto_map , cut_jetveto = get_JetVetoMap (jets , self ._year )
299328 jets = jets [(jets .pt > 30 ) & jet_veto_map ]
300329
330+ # AK4 JETS (OUTSIDE AK8)
331+ msk_ak4_outside_ak8 = jets .delta_r (candidatefj ) > 0.8
332+ ak4_outside_ak8 = jets [msk_ak4_outside_ak8 ]
333+
334+ # VBF VARIABLES
335+ jet1 = ak4_outside_ak8 [:, 0 :1 ]
336+ jet2 = ak4_outside_ak8 [:, 1 :2 ]
337+
301338 # TAUS
302339 loose_taus_mu = (events .Tau .pt > 20 ) & (abs (events .Tau .eta ) < 2.3 ) & (events .Tau .idAntiMu >= 1 ) # loose antiMu ID
303340 loose_taus_ele = (
@@ -308,30 +345,7 @@ def _build_objects(self, events):
308345 loose_taus = (events .Tau .pt > 20 ) & (abs (events .Tau .eta ) < 2.3 )
309346 loose_taus = events .Tau [loose_taus ]
310347
311- # CANDIDATE LEP
312- goodleptons = ak .concatenate ([muons [msk_good_muons ], electrons [msk_good_electrons ]], axis = 1 )
313- goodleptons = goodleptons [ak .argsort (goodleptons .pt , ascending = False )] # sort by pt
314-
315- candidatelep = ak .firsts (goodleptons ) # pick highest pt
316- candidatelep_p4 = build_p4 (candidatelep ) # build p4 for candidate lepton
317-
318- # HIGGS CANDIDATE
319- fj_idx_lep = ak .argmin (good_fatjets .delta_r (candidatelep_p4 ), axis = 1 , keepdims = True )
320- candidatefj = ak .firsts (good_fatjets [fj_idx_lep ])
321-
322- jmsr_shifted_fatjetvars = get_jmsr (good_fatjets [fj_idx_lep ], num_jets = 1 , year = self ._year , isData = not self .isMC )
323-
324- # ak4 jet outside ak8
325- ak4_outside_ak8_selector = jets .delta_r (candidatefj ) > 0.8
326- ak4_outside_ak8 = jets [ak4_outside_ak8_selector ]
327-
328- # VBF variables
329- jet1 = ak4_outside_ak8 [:, 0 :1 ]
330- jet2 = ak4_outside_ak8 [:, 1 :2 ]
331-
332- FirstFatjet = ak .firsts (good_fatjets [:, 0 :1 ])
333- SecondFatjet = ak .firsts (good_fatjets [:, 1 :2 ])
334-
348+ # b-jets (only for jets with abs(eta)<2.5)
335349 bjet_selector = (jets .delta_r (candidatefj ) > 0.8 ) & (abs (jets .eta ) < 2.5 )
336350
337351 objects = {
@@ -365,20 +379,16 @@ def _build_objects(self, events):
365379 "jmsr_shifted_fatjetvars" : jmsr_shifted_fatjetvars ,
366380 "jec_shifted_fatjetvars" : jec_shifted_fatjetvars ,
367381 "jec_shifted_jetvars" : jec_shifted_jetvars ,
368- # others
382+ # others (easier to just store them here)
369383 "fj_idx_lep" : fj_idx_lep ,
370- "ak4_outside_ak8_selector" : ak4_outside_ak8_selector ,
371- "ak4_outside_ak8" : ak4_outside_ak8 ,
384+ "msk_ak4_outside_ak8" : msk_ak4_outside_ak8 ,
372385 "bjet_selector" : bjet_selector ,
373386 }
374387
375388 return objects
376389
377390 def _derive_variables (self , events , objects ):
378-
379- ak4_outside_ak8 = objects ["ak4_outside_ak8" ]
380- bjet_selector = objects ["bjet_selector" ]
381-
391+ """Derives physics variables from `objects` (e.g. deltaR(jet,lep))."""
382392 ht = ak .sum (objects ["jets" ].pt , axis = 1 )
383393
384394 # VH jet
@@ -412,15 +422,15 @@ def _derive_variables(self, events, objects):
412422 lep_miso = objects ["candidatelep" ].miniPFRelIso_all # miniso for candidate lepton
413423
414424 # b-jets (only for jets with abs(eta)<2.5)
415- ak4_bjet_candidate = objects ["jets" ][bjet_selector ]
425+ ak4_bjet_candidate = objects ["jets" ][objects [ " bjet_selector" ] ]
416426
417427 # VBF variables
418428 deta = abs (ak .firsts (objects ["jet1" ]).eta - ak .firsts (objects ["jet2" ]).eta )
419429 mjj = (ak .firsts (objects ["jet1" ]) + ak .firsts (objects ["jet2" ])).mass
420430
421431 # njets
422432 NumFatjets = ak .num (objects ["good_fatjets" ])
423- NumOtherJets = ak .num ( ak4_outside_ak8 )
433+ NumOtherJets = ak .sum ( objects [ "msk_ak4_outside_ak8" ] )
424434
425435 # n-bjets
426436 n_bjets_L = ak .sum (ak4_bjet_candidate .btagDeepFlavB > btagWPs ["deepJet" ][self ._year ]["L" ], axis = 1 )
@@ -476,7 +486,6 @@ def _derive_variables(self, events, objects):
476486 nCjets = (ak .sum (goodgenjets .hadronFlavour == 4 , axis = 1 )).to_numpy ()
477487
478488 derived_vars = {
479- "fj_idx_lep" : objects ["fj_idx_lep" ],
480489 "ht" : ht ,
481490 # candidatefj
482491 "fj_pt" : objects ["candidatefj" ].pt ,
@@ -555,14 +564,11 @@ def _derive_variables(self, events, objects):
555564 return derived_vars
556565
557566 def _apply_JEC (self , objects , variables ):
558-
567+ """Computes JES/JER and JMSR shift variations and appends them to the set of output variables."""
559568 jec_shifted_fatjetvars = objects ["jec_shifted_fatjetvars" ]
560569 jmsr_shifted_fatjetvars = objects ["jmsr_shifted_fatjetvars" ]
561570 jec_shifted_jetvars = objects ["jec_shifted_jetvars" ]
562571
563- fj_idx_lep = objects ["fj_idx_lep" ]
564- ak4_outside_ak8_selector = objects ["ak4_outside_ak8_selector" ]
565-
566572 fatjetvars = {
567573 "fj_pt" : objects ["candidatefj" ].pt ,
568574 "fj_eta" : objects ["candidatefj" ].eta ,
@@ -575,7 +581,7 @@ def _apply_JEC(self, objects, variables):
575581 # JEC vars
576582 for shift , vals in jec_shifted_fatjetvars ["pt" ].items ():
577583 if shift != "" :
578- fatjetvars_sys [f"fj_pt{ shift } " ] = ak .firsts (vals [fj_idx_lep ])
584+ fatjetvars_sys [f"fj_pt{ shift } " ] = ak .firsts (vals [objects [ " fj_idx_lep" ] ])
579585
580586 # JMSR vars
581587 for shift , vals in jmsr_shifted_fatjetvars ["msoftdrop" ].items ():
@@ -592,11 +598,11 @@ def _apply_JEC(self, objects, variables):
592598 pt_1 = objects ["jet1" ].pt
593599 pt_2 = objects ["jet2" ].pt
594600 try :
595- pt_1 = vals [ak4_outside_ak8_selector ][:, 0 ]
601+ pt_1 = vals [objects [ "msk_ak4_outside_ak8" ] ][:, 0 ]
596602 except Exception :
597603 pt_1 = objects ["jet1" ].pt
598604 try :
599- pt_2 = vals [ak4_outside_ak8_selector ][:, 1 ]
605+ pt_2 = vals [objects [ "msk_ak4_outside_ak8" ] ][:, 1 ]
600606 except Exception :
601607 pt_2 = objects ["jet2" ].pt
602608
@@ -660,7 +666,7 @@ def _apply_pileup_cutoff(self, events, year, yearmod, cutoff: float = 4):
660666 self .add_selection (name = "PU_cutoff" , sel = pw_pass )
661667
662668 def _add_selections (self , events , trigger , metfilters , objects , variables ):
663-
669+ """Adds event pre-selection to PackedSelection for cutflow and filtering."""
664670 if self .isMC :
665671 self ._apply_pileup_cutoff (events , self ._year , self ._yearmod , cutoff = 4 )
666672
@@ -737,8 +743,7 @@ def _add_selections(self, events, trigger, metfilters, objects, variables):
737743 self .add_selection (name = "dummy" , sel = (variables ["NumFatjets" ] > - 1 ))
738744
739745 def _store_genVars (self , dataset , events , objects , variables ):
740-
741- # store gen-level matching variables
746+ """Matches generator-level particles to reconstructed jets or leptons."""
742747 signal_mask = None
743748 if self .isMC :
744749 if self .isSignal :
@@ -773,12 +778,8 @@ def _store_genVars(self, dataset, events, objects, variables):
773778
774779 return variables
775780
776- def _store_MCweights (self , dataset , events , objects , variables ):
777-
778- candidatelep = objects ["candidatelep" ]
779- jets = objects ["jets" ]
780- bjet_selector = objects ["bjet_selector" ]
781-
781+ def _add_MC_weights (self , dataset , events , objects , variables ):
782+ """Applies event-level weights and corrections for MC, including."""
782783 for ch in self ._channels :
783784 if self ._year in ("2016" , "2017" ):
784785 self .weights [ch ].add (
@@ -794,12 +795,12 @@ def _store_MCweights(self, dataset, events, objects, variables):
794795 nPU = ak .to_numpy (events .Pileup .nPU ),
795796 )
796797
797- add_pileupid_weights (self .weights [ch ], self ._year , self ._yearmod , jets , events .GenJet , wp = "L" )
798+ add_pileupid_weights (self .weights [ch ], self ._year , self ._yearmod , objects [ " jets" ] , events .GenJet , wp = "L" )
798799
799800 if ch == "mu" :
800- add_lepton_weight (self .weights [ch ], candidatelep , self ._year + self ._yearmod , "muon" )
801+ add_lepton_weight (self .weights [ch ], objects [ " candidatelep" ] , self ._year + self ._yearmod , "muon" )
801802 elif ch == "ele" :
802- add_lepton_weight (self .weights [ch ], candidatelep , self ._year + self ._yearmod , "electron" )
803+ add_lepton_weight (self .weights [ch ], objects [ " candidatelep" ] , self ._year + self ._yearmod , "electron" )
803804
804805 ewk_corr , qcd_corr , alt_qcd_corr = add_VJets_kFactors (self .weights [ch ], events .GenPart , dataset , events )
805806 # add corrections for plotting
@@ -879,8 +880,8 @@ def _store_MCweights(self, dataset, events, objects, variables):
879880 ** variables ,
880881 ** get_btag_weights (
881882 self ._year ,
882- jets ,
883- bjet_selector ,
883+ objects [ " jets" ] ,
884+ objects [ " bjet_selector" ] ,
884885 wp = wp_ ,
885886 algo = "deepJet" ,
886887 systematics = self ._systematics ,
@@ -889,6 +890,7 @@ def _store_MCweights(self, dataset, events, objects, variables):
889890 return variables
890891
891892 def _add_lundplane_weights (self , events , dataset , selection_ch , objects ):
893+ """Computes variables used for Lund plane reweighting technique."""
892894 from boostedhiggs .corrections import getLPweights
893895
894896 pf_cands , gen_parts_eta_phi , gen_parts_pt_mass , ak8_jets , bgen_parts_eta_phi , genlep = getLPweights (
@@ -924,6 +926,7 @@ def _add_lundplane_weights(self, events, dataset, selection_ch, objects):
924926 return lpvars
925927
926928 def _run_inference (self , events , selection_ch , objects ):
929+ """Runs ParT tagger inference using Triton."""
927930 model_name = "ak8_MD_vminclv2ParT_manual_fixwrap_all_nodes"
928931 pnet_vars = runInferenceTriton (
929932 self .tagger_resources_path , events [selection_ch ], objects ["fj_idx_lep" ][selection_ch ], model_name = model_name
@@ -936,6 +939,7 @@ def _run_inference(self, events, selection_ch, objects):
936939 return {** scores , ** reg_mass , ** hidNeurons }
937940
938941 def _build_output (self , events , dataset , variables , objects ):
942+ """Stores the events passing the selections in a pandas dataframes."""
939943 output = {}
940944 for ch in self ._channels :
941945 selection_ch = self .selections [ch ].all (* self .selections [ch ].names )
@@ -974,7 +978,7 @@ def _build_output(self, events, dataset, variables, objects):
974978 return output
975979
976980 def process (self , events : ak .Array ):
977- """Returns skimmed events which pass preselection cuts and with the branches listed in self._skimvars """
981+ """Returns skimmed events which pass preselection cuts as pandas dataframe. """
978982
979983 dataset = events .metadata ["dataset" ]
980984 nevents = len (events )
@@ -993,7 +997,8 @@ def process(self, events: ak.Array):
993997 for ch in self ._channels :
994998 self .weights [ch ].add ("genweight" , events .genWeight )
995999
996- trigger , metfilters = self ._apply_trigger_and_metfilters (events )
1000+ trigger = self ._compute_trigger_msk (events )
1001+ metfilters = self ._compute_metfilters_msk (events )
9971002
9981003 objects = self ._build_objects (events )
9991004 variables = self ._derive_variables (events , objects )
@@ -1009,7 +1014,7 @@ def process(self, events: ak.Array):
10091014 variables = self ._store_genVars (dataset , events , objects , variables )
10101015
10111016 if self .isMC :
1012- variables = self ._store_MCweights (dataset , events , objects , variables )
1017+ variables = self ._add_MC_weights (dataset , events , objects , variables )
10131018
10141019 output = self ._build_output (events , dataset , variables , objects )
10151020
@@ -1037,4 +1042,5 @@ def process(self, events: ak.Array):
10371042 }
10381043
10391044 def postprocess (self , accumulator ):
1045+ """Placeholder for Coffea postprocessing. Currently does nothing."""
10401046 return accumulator
0 commit comments