@@ -147,13 +147,13 @@ def __init__(
147147 def accumulator (self ):
148148 return self ._accumulator
149149
150- def save_dfs_parquet (self , fname , dfs_dict , ch ):
150+ def _save_dfs_parquet (self , fname , dfs_dict , ch ):
151151 if self ._output_location is not None :
152152 table = pa .Table .from_pandas (dfs_dict )
153153 if len (table ) != 0 : # skip dataframes with empty entries
154154 pq .write_table (table , self ._output_location + ch + "/parquet/" + fname + ".parquet" )
155155
156- def ak_to_pandas (self , output_collection : ak .Array ) -> pd .DataFrame :
156+ def _ak_to_pandas (self , output_collection : ak .Array ) -> pd .DataFrame :
157157 output = pd .DataFrame ()
158158 for field in ak .fields (output_collection ):
159159 output [field ] = ak .to_numpy (output_collection [field ])
@@ -287,8 +287,6 @@ def _build_objects(self, events):
287287 jets , jec_shifted_jetvars = get_jec_jets (events , events .Jet , self ._year , not self .isMC , self .jecs , fatjets = False )
288288 met = met_factory .build (events .MET , jets , {}) if self .isMC else events .MET
289289
290- ht = ak .sum (jets .pt , axis = 1 )
291-
292290 jet_selector = (
293291 (jets .pt > 15 )
294292 & (abs (jets .eta ) < 5.0 )
@@ -372,33 +370,25 @@ def _build_objects(self, events):
372370 "ak4_outside_ak8_selector" : ak4_outside_ak8_selector ,
373371 "ak4_outside_ak8" : ak4_outside_ak8 ,
374372 "bjet_selector" : bjet_selector ,
375- "ht" : ht ,
376373 }
377374
378375 return objects
379376
380377 def _derive_variables (self , events , objects ):
381378
382- # unpack
383- good_fatjets = objects ["good_fatjets" ]
384- fj_idx_lep = objects ["fj_idx_lep" ]
385-
386379 ak4_outside_ak8 = objects ["ak4_outside_ak8" ]
387-
388- FirstFatjet = objects ["FirstFatjet" ]
389- SecondFatjet = objects ["SecondFatjet" ]
390-
391380 bjet_selector = objects ["bjet_selector" ]
392- ht = objects ["ht" ]
381+
382+ ht = ak .sum (objects ["jets" ].pt , axis = 1 )
393383
394384 # VH jet
395- minDeltaR = ak .argmin (objects ["candidatelep_p4" ].delta_r (good_fatjets ), axis = 1 )
396- fatJetIndices = ak .local_index (good_fatjets , axis = 1 )
385+ minDeltaR = ak .argmin (objects ["candidatelep_p4" ].delta_r (objects [ " good_fatjets" ] ), axis = 1 )
386+ fatJetIndices = ak .local_index (objects [ " good_fatjets" ] , axis = 1 )
397387 mask_candidatefj = fatJetIndices != minDeltaR
398388
399- allScores = VScore (good_fatjets )
389+ allScores = VScore (objects [ " good_fatjets" ] )
400390 masked = allScores [mask_candidatefj ]
401- VH_fj = ak .firsts (good_fatjets [allScores == ak .max (masked , axis = 1 )])
391+ VH_fj = ak .firsts (objects [ " good_fatjets" ] [allScores == ak .max (masked , axis = 1 )])
402392
403393 # nleptons
404394 n_loose_taus_mu = ak .sum (objects ["loose_taus_mu" ], axis = 1 )
@@ -429,7 +419,7 @@ def _derive_variables(self, events, objects):
429419 mjj = (ak .firsts (objects ["jet1" ]) + ak .firsts (objects ["jet2" ])).mass
430420
431421 # njets
432- NumFatjets = ak .num (good_fatjets )
422+ NumFatjets = ak .num (objects [ " good_fatjets" ] )
433423 NumOtherJets = ak .num (ak4_outside_ak8 )
434424
435425 # n-bjets
@@ -486,6 +476,7 @@ def _derive_variables(self, events, objects):
486476 nCjets = (ak .sum (goodgenjets .hadronFlavour == 4 , axis = 1 )).to_numpy ()
487477
488478 derived_vars = {
479+ "fj_idx_lep" : objects ["fj_idx_lep" ],
489480 "ht" : ht ,
490481 # candidatefj
491482 "fj_pt" : objects ["candidatefj" ].pt ,
@@ -528,17 +519,17 @@ def _derive_variables(self, events, objects):
528519 "deta" : deta ,
529520 "mjj" : mjj ,
530521 # leading fatjet
531- "FirstFatjet_pt" : FirstFatjet .pt ,
532- "FirstFatjet_eta" : FirstFatjet .eta ,
533- "FirstFatjet_phi" : FirstFatjet .phi ,
534- "FirstFatjet_msd" : FirstFatjet .msdcorr ,
535- "FirstFatjet_Vscore" : VScore (SecondFatjet ),
522+ "FirstFatjet_pt" : objects [ " FirstFatjet" ] .pt ,
523+ "FirstFatjet_eta" : objects [ " FirstFatjet" ] .eta ,
524+ "FirstFatjet_phi" : objects [ " FirstFatjet" ] .phi ,
525+ "FirstFatjet_msd" : objects [ " FirstFatjet" ] .msdcorr ,
526+ "FirstFatjet_Vscore" : VScore (objects [ " SecondFatjet" ] ),
536527 # second leading fatjet
537- "SecondFatjet_pt" : SecondFatjet .pt ,
538- "SecondFatjet_eta" : SecondFatjet .eta ,
539- "SecondFatjet_phi" : SecondFatjet .phi ,
540- "SecondFatjet_msd" : SecondFatjet .msdcorr ,
541- "SecondFatjet_Vscore" : VScore (FirstFatjet ),
528+ "SecondFatjet_pt" : objects [ " SecondFatjet" ] .pt ,
529+ "SecondFatjet_eta" : objects [ " SecondFatjet" ] .eta ,
530+ "SecondFatjet_phi" : objects [ " SecondFatjet" ] .phi ,
531+ "SecondFatjet_msd" : objects [ " SecondFatjet" ] .msdcorr ,
532+ "SecondFatjet_Vscore" : VScore (objects [ " FirstFatjet" ] ),
542533 # second fatjet after candidate jet
543534 "VH_fj_pt" : VH_fj .pt ,
544535 "VH_fj_eta" : VH_fj .eta ,
@@ -549,7 +540,7 @@ def _derive_variables(self, events, objects):
549540 ).miniPFRelIso_all ,
550541 "loose_lep1_pt" : ak .firsts (objects ["loose_muons1" ][ak .argsort (objects ["loose_muons1" ].pt , ascending = False )]).pt ,
551542 "msk_leptonic_taus" : msk_leptonic_taus ,
552- "fj_mass_raw " : good_fatjets [ fj_idx_lep ]. msdcorr ,
543+ "ht " : ht ,
553544 }
554545
555546 if self .isMC :
@@ -565,11 +556,11 @@ def _derive_variables(self, events, objects):
565556
566557 def _apply_JEC (self , objects , variables ):
567558
568- fj_idx_lep = objects ["fj_idx_lep" ]
569559 jec_shifted_fatjetvars = objects ["jec_shifted_fatjetvars" ]
570560 jmsr_shifted_fatjetvars = objects ["jmsr_shifted_fatjetvars" ]
571561 jec_shifted_jetvars = objects ["jec_shifted_jetvars" ]
572562
563+ fj_idx_lep = objects ["fj_idx_lep" ]
573564 ak4_outside_ak8_selector = objects ["ak4_outside_ak8_selector" ]
574565
575566 fatjetvars = {
@@ -668,7 +659,7 @@ def _apply_pileup_cutoff(self, events, year, yearmod, cutoff: float = 4):
668659
669660 self .add_selection (name = "PU_cutoff" , sel = pw_pass )
670661
671- def _apply_selections (self , events , trigger , metfilters , objects , variables ):
662+ def _add_selections (self , events , trigger , metfilters , objects , variables ):
672663
673664 if self .isMC :
674665 self ._apply_pileup_cutoff (events , self ._year , self ._yearmod , cutoff = 4 )
@@ -937,7 +928,7 @@ def _run_inference(self, events, selection_ch, objects):
937928 pnet_vars = runInferenceTriton (
938929 self .tagger_resources_path , events [selection_ch ], objects ["fj_idx_lep" ][selection_ch ], model_name = model_name
939930 )
940- pnet_df = self .ak_to_pandas (pnet_vars )
931+ pnet_df = self ._ak_to_pandas (pnet_vars )
941932 scores = {"fj_ParT_score" : pnet_df [sigs ].sum (axis = 1 ).values }
942933 hidNeurons = {k : v for k , v in pnet_vars .items () if "hidNeuron" in k }
943934 reg_mass = {"fj_ParT_mass" : pnet_vars ["fj_ParT_mass" ]}
@@ -967,7 +958,7 @@ def _build_output(self, events, dataset, variables, objects):
967958 output [ch ] = {}
968959
969960 if not isinstance (output [ch ], pd .DataFrame ):
970- output [ch ] = self .ak_to_pandas (output [ch ])
961+ output [ch ] = self ._ak_to_pandas (output [ch ])
971962
972963 for var_ in [
973964 "rec_higgs_m" ,
@@ -1013,7 +1004,7 @@ def process(self, events: ak.Array):
10131004
10141005 variables = self ._apply_JEC (objects , variables )
10151006
1016- self ._apply_selections (events , trigger , metfilters , objects , variables )
1007+ self ._add_selections (events , trigger , metfilters , objects , variables )
10171008
10181009 variables = self ._store_genVars (dataset , events , objects , variables )
10191010
@@ -1030,7 +1021,7 @@ def process(self, events: ak.Array):
10301021 os .makedirs (self ._output_location + ch )
10311022 if not os .path .exists (self ._output_location + ch + "/parquet" ):
10321023 os .makedirs (self ._output_location + ch + "/parquet" )
1033- self .save_dfs_parquet (fname , output [ch ], ch )
1024+ self ._save_dfs_parquet (fname , output [ch ], ch )
10341025
10351026 return {
10361027 dataset : {
0 commit comments