@@ -189,33 +189,28 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
189189 # Potentially mask certain values (e.g. nsigma TOF of -999)
190190 self .p_mask_values = datap ["ml" ].get ("mask_values" , None )
191191
192- self .lpt_probcutpre = datap ["mlapplication" ]["probcutpresel" ][self .mcordata ]
193- self .lpt_probcutfin = datap ["analysis" ][self .typean ].get ("probcuts" , None )
194-
195192 self .bins_skimming = np .array (list (zip (self .lpt_anbinmin , self .lpt_anbinmax )), 'd' )
196193 self .bins_analysis = np .array (list (zip (self .lpt_finbinmin , self .lpt_finbinmax )), 'd' )
197194 bin_matching = [
198195 [ptrange [0 ] <= bin [0 ] and ptrange [1 ] >= bin [1 ] for ptrange in self .bins_skimming ].index (True )
199196 for bin in self .bins_analysis
200197 ]
201198
202- # Make it backwards-compatible
203- if not self .lpt_probcutfin :
204- lpt_probcutfin_tmp = datap ["mlapplication" ]["probcutoptimal" ]
205- self .lpt_probcutfin = []
206- for i in range (self .p_nptfinbins ):
207- bin_id = bin_matching [i ]
208- self .lpt_probcutfin .append (lpt_probcutfin_tmp [bin_id ])
199+ self .lpt_probcutpre = datap ["mlapplication" ]["probcutpresel" ][self .mcordata ]
200+ lpt_probcutfin_tmp = datap ["mlapplication" ]["probcutoptimal" ]
201+ self .lpt_probcutfin = [lpt_probcutfin_tmp [bin_matching [ibin ]]
202+ for ibin in range (self .p_nptfinbins )]
209203
210- if self .mltype == "MultiClassification" :
211- for probcutfin , probcutpre in zip (self .lpt_probcutfin , self .lpt_probcutpre ):
204+ for ibin , probcutfin in enumerate (self .lpt_probcutfin ):
205+ probcutpre = self .lpt_probcutpre [bin_matching [ibin ]]
206+ if self .mltype == "MultiClassification" :
212207 if probcutfin [0 ] > probcutpre [0 ] or probcutfin [1 ] < probcutpre [1 ] or probcutfin [2 ] < probcutpre [2 ]:
213208 self .logger .fatal ("Probability cut final: %s must be tighter than presel %s!\n " \
214209 "Verify that bkg prob presel > final, and other cuts presel < final" ,
215210 self .lpt_probcutfin , self .lpt_probcutpre )
216- elif self . lpt_probcutfin < self . lpt_probcutpre :
217- self .logger .fatal ("Probability cut final: %s must be tighter (smaller values) than presel %s!" ,
218- self .lpt_probcutfin , self .lpt_probcutpre )
211+ elif probcutfin < probcutpre :
212+ self .logger .fatal ("Probability cut final: %s must be tighter (smaller values) than presel %s!" ,
213+ self .lpt_probcutfin , self .lpt_probcutpre )
219214
220215 if self .mltype == "MultiClassification" :
221216 self .l_selml = []
@@ -418,10 +413,6 @@ def dfuse(df_spec):
418413 dfs [df_name ][var ] = np .logical_and (dfs [df_name ][var ] == 1 , swapped )
419414 self .logger .debug (' %s -> done' , df_name )
420415
421- if 'rename' in df_spec :
422- spec = df_spec ['rename' ]
423- dfs [df_name ] = dfs [df_name ].rename (columns = {spec ['old' ]: spec ['new' ]})
424-
425416
426417 if self .df_merge :
427418 for m_spec in self .df_merge :
@@ -433,18 +424,18 @@ def dfuse(df_spec):
433424 self .logger .info ('merging %s with %s on %s into %s' , base , ref , on , out )
434425 if not isinstance (on , list ) or 'df' not in on :
435426 on = ['df' , on ]
436- dfs [out ] = dfmerge (dfs [base ], dfs [ref ], on = on )
427+ dfs [out ] = dfmerge (dfs [base ], dfs [ref ], suffixes = ( f'_ { base } ' , None ), on = on )
437428 elif (on := m_spec .get ('left_on' , None )) is not None :
438429 self .logger .info ('merging %s with %s on %s into %s' , base , ref , on , out )
439430 if not is_numeric_dtype (dfs [base ][on ]):
440431 self .logger .info ('exploding dataframe %s on variable %s' , base , on )
441- dfs [out ] = dfmerge (dfs [base ].explode (on ), dfs [ref ], left_on = ['df' , on ], right_index = True )
432+ dfs [out ] = dfmerge (dfs [base ].explode (on ), dfs [ref ], left_on = ['df' , on ], suffixes = ( f'_ { base } ' , None ), right_index = True )
442433 else :
443- dfs [out ] = dfmerge (dfs [base ], dfs [ref ], left_on = ['df' , on ], right_index = True )
434+ dfs [out ] = dfmerge (dfs [base ], dfs [ref ], left_on = ['df' , on ], suffixes = ( f'_ { base } ' , None ), right_index = True )
444435 else :
445436 var = self .df_read [ref ]['index' ]
446437 self .logger .info ('merging %s with %s on %s (default) into %s' , base , ref , var , out )
447- dfs [out ] = dfmerge (dfs [base ], dfs [ref ], left_on = ['df' , var ], right_index = True )
438+ dfs [out ] = dfmerge (dfs [base ], dfs [ref ], left_on = ['df' , var ], suffixes = ( f'_ { base } ' , None ), right_index = True )
448439 if 'extra' in m_spec :
449440 self .logger .debug (' %s -> extra' , out )
450441 for col_name , col_val in m_spec ['extra' ].items ():
@@ -462,9 +453,7 @@ def dfuse(df_spec):
462453 def skim (self , file_index ):
463454 dfreco = read_df (self .l_reco [file_index ])
464455 dfgen = read_df (self .l_gen [file_index ]) if self .mcordata == 'mc' else None
465-
466- if self .n_gen_sl :
467- dfgen_sl = read_df (self .l_gen_sl [file_index ]) if self .mcordata == 'mc' else None
456+ dfgen_sl = read_df (self .l_gen_sl [file_index ]) if self .n_gen_sl and self .mcordata == 'mc' else None
468457
469458 for ipt in range (self .p_nptbins ):
470459 dfrecosk = seldf_singlevar (dfreco , self .v_var_binning ,
@@ -478,7 +467,7 @@ def skim(self, file_index):
478467 dfgensk = dfquery (dfgensk , self .s_gen_skim [ipt ])
479468 write_df (dfgensk , self .mptfiles_gensk [ipt ][file_index ])
480469
481- if self . n_gen_sl :
470+ if dfgen_sl is not None :
482471 dfgensk_sl = seldf_singlevar (dfgen_sl , self .v_var_binning ,
483472 self .lpt_anbinmin [ipt ], self .lpt_anbinmax [ipt ])
484473 dfgensk_sl = dfquery (dfgensk_sl , self .s_gen_skim [ipt ])
0 commit comments