2626pd .set_option ("mode.chained_assignment" , None )
2727
2828
29- def get_wjetsNLOcorr (hists , presel , years , channels , samples_dir , regions_sel , model_path ):
29+ with open ("../binder/trg_eff_SF_ARC.pkl" , "rb" ) as f :
30+ TRIGGER_SF = pkl .load (f )
3031
31- for year in years : # e.g. 2018, 2017, 2016APV, 2016
32- for ch in channels : # e.g. mu, ele
33-
34- with open ("../fileset/luminosity.json" ) as f :
35- luminosity = json .load (f )[ch ][year ]
36-
37- for sample in os .listdir (samples_dir [year ]):
38-
39- sample_to_use = get_common_sample_name (sample )
40-
41- if sample_to_use != "vjetsNLO" :
42- continue
43-
44- out_files = f"{ samples_dir [year ]} /{ sample } /outfiles/"
45- parquet_files = glob .glob (f"{ out_files } /*_{ ch } .parquet" )
46- pkl_files = glob .glob (f"{ out_files } /*.pkl" )
47-
48- if not parquet_files :
49- logging .info (f"No parquet file for { sample } " )
50- continue
51-
52- try :
53- data = pd .read_parquet (parquet_files )
54- except pyarrow .lib .ArrowInvalid : # empty parquet because no event passed selection
55- continue
56-
57- if len (data ) == 0 :
58- continue
59-
60- # use hidNeurons to get the finetuned scores
61- data ["THWW" ] = get_finetuned_score (data , model_path )
62-
63- # drop hidNeurons which are not needed anymore
64- data = data [data .columns .drop (list (data .filter (regex = "hidNeuron" )))]
65-
66- # apply selection
67- for selection in presel [ch ]:
68- data = data .query (presel [ch ][selection ])
69-
70- # apply genlep recolep matching
71- data = data [data ["dR_genlep_recolep" ] < 0.005 ]
72-
73- # get the xsecweight
74- xsecweight , _ , _ , _ = get_xsecweight (pkl_files , year , sample , sample_to_use , False , luminosity )
32+ THWW_SF = {
33+ "ggF" : 0.948 ,
34+ "VBF" : 0.984 ,
35+ }
36+ ptbinning_trgSF = [2000 , 200 , 170 , 150 , 130 , 110 , 90 , 70 , 50 , 30 ]
37+ etabinning_trgSF = [- 2.5 , - 1.5 , - 0.5 , 0.5 , 1.5 , 2.5 ]
7538
76- with open ("../binder/trg_eff_SF_ARC.pkl" , "rb" ) as f :
77- TRIGGER_SF = pkl .load (f )
7839
79- for region , region_sel in regions_sel . items (): # e.g. pass, fail, top control region, etc.
40+ def get_nominal_from_df ( df , year , ch , sample_label , region , region_sel , xsecweight , is_data ):
8041
81- df = data .copy ()
82- df = df .query (region_sel )
83-
84- # ------------------- Nominal -------------------
85- nominal = df [f"weight_{ ch } " ] * xsecweight
42+ if is_data :
43+ nominal = np .ones_like (df ["fj_pt" ]) # for data (nominal is 1)
44+ else :
45+ nominal = df [f"weight_{ ch } " ] * xsecweight
8646
87- if "bjets" in region_sel : # if there's a bjet selection, add btag SF to the nominal weight
88- nominal *= df ["weight_btag" ]
47+ if "bjets" in region_sel : # if there's a bjet selection, add btag SF to the nominal weight
48+ nominal *= df ["weight_btag" ]
8949
90- # apply trigger SF
91- if ch == "ele" :
92- ptbinning_trgSF = [2000 , 200 , 170 , 150 , 130 , 110 , 90 , 70 , 50 , 30 ]
93- etabinning_trgSF = [- 2.5 , - 1.5 , - 0.5 , 0.5 , 1.5 , 2.5 ]
50+ # apply trigger SF
51+ if ch == "ele" :
9452
95- for i in range (len (ptbinning_trgSF ) - 1 ):
96- high_pt = ptbinning_trgSF [i ]
97- low_pt = ptbinning_trgSF [i + 1 ]
53+ for i in range (len (ptbinning_trgSF ) - 1 ):
54+ high_pt = ptbinning_trgSF [i ]
55+ low_pt = ptbinning_trgSF [i + 1 ]
9856
99- msk_pt = (df ["lep_pt" ] >= low_pt ) & (df ["lep_pt" ] < high_pt )
57+ msk_pt = (df ["lep_pt" ] >= low_pt ) & (df ["lep_pt" ] < high_pt )
10058
101- for j in range (len (etabinning_trgSF ) - 1 ):
102- low_eta = etabinning_trgSF [j ]
103- high_eta = etabinning_trgSF [j + 1 ]
59+ for j in range (len (etabinning_trgSF ) - 1 ):
60+ low_eta = etabinning_trgSF [j ]
61+ high_eta = etabinning_trgSF [j + 1 ]
10462
105- msk_eta = (abs (df ["lep_eta" ]) >= low_eta ) & (abs (df ["lep_eta" ]) < high_eta )
63+ msk_eta = (abs (df ["lep_eta" ]) >= low_eta ) & (abs (df ["lep_eta" ]) < high_eta )
10664
107- nominal [msk_pt & msk_eta ] *= TRIGGER_SF ["UL" + year [2 :].replace ("APV" , "" )]["nominal" ][i , j ]
65+ nominal [msk_pt & msk_eta ] *= TRIGGER_SF ["UL" + year [2 :].replace ("APV" , "" )]["nominal" ][i , j ]
10866
109- hists .fill (
110- Sample = "WJetsLNu" ,
111- Systematic = "vjetsnlo_up" ,
112- Region = region ,
113- mass_observable = df ["rec_higgs_m" ],
114- weight = nominal ,
115- )
67+ # apply THWW SF
68+ if ("ggF" in sample_label ) or (sample_label in ["ggF" , "VBF" , "WH" , "ZH" , "ttH" ]):
69+ if "ggF" in region :
70+ nominal *= THWW_SF ["ggF" ]
71+ else :
72+ nominal *= THWW_SF ["VBF" ]
11673
117- return hists
74+ return nominal
11875
11976
12077def fill_systematics (
@@ -132,58 +89,14 @@ def fill_systematics(
13289 sumgenweights ,
13390 sumscaleweights ,
13491):
135- # with open("trg_eff_SF.pkl", "rb") as f:
136- # TRIGGER_SF = pkl.load(f)
137-
138- with open ("../binder/trg_eff_SF_ARC.pkl" , "rb" ) as f :
139- TRIGGER_SF = pkl .load (f )
140-
141- THWW_SF = {
142- "ggF" : 0.948 ,
143- "VBF" : 0.984 ,
144- }
145-
14692 SYST_DICT = get_systematic_dict (years )
14793
14894 for region , region_sel in regions_sel .items (): # e.g. pass, fail, top control region, etc.
14995
15096 df = data .copy ()
15197 df = df .query (region_sel )
15298
153- # ------------------- Nominal -------------------
154- if is_data :
155- nominal = np .ones_like (df ["fj_pt" ]) # for data (nominal is 1)
156- else :
157- nominal = df [f"weight_{ ch } " ] * xsecweight
158-
159- if "bjets" in region_sel : # if there's a bjet selection, add btag SF to the nominal weight
160- nominal *= df ["weight_btag" ]
161-
162- # apply trigger SF
163- if ch == "ele" :
164- ptbinning_trgSF = [2000 , 200 , 170 , 150 , 130 , 110 , 90 , 70 , 50 , 30 ]
165- etabinning_trgSF = [- 2.5 , - 1.5 , - 0.5 , 0.5 , 1.5 , 2.5 ]
166-
167- for i in range (len (ptbinning_trgSF ) - 1 ):
168- high_pt = ptbinning_trgSF [i ]
169- low_pt = ptbinning_trgSF [i + 1 ]
170-
171- msk_pt = (df ["lep_pt" ] >= low_pt ) & (df ["lep_pt" ] < high_pt )
172-
173- for j in range (len (etabinning_trgSF ) - 1 ):
174- low_eta = etabinning_trgSF [j ]
175- high_eta = etabinning_trgSF [j + 1 ]
176-
177- msk_eta = (abs (df ["lep_eta" ]) >= low_eta ) & (abs (df ["lep_eta" ]) < high_eta )
178-
179- nominal [msk_pt & msk_eta ] *= TRIGGER_SF ["UL" + year [2 :].replace ("APV" , "" )]["nominal" ][i , j ]
180-
181- # apply THWW SF
182- if ("ggF" in sample_label ) or (sample_label in ["ggF" , "VBF" , "WH" , "ZH" , "ttH" ]):
183- if "ggF" in region :
184- nominal *= THWW_SF ["ggF" ]
185- else :
186- nominal *= THWW_SF ["VBF" ]
99+ nominal = get_nominal_from_df (df , year , ch , sample_label , region , region_sel , xsecweight , is_data )
187100
188101 hists .fill (
189102 Sample = sample_label ,
@@ -254,23 +167,6 @@ def fill_systematics(
254167 weight = down ,
255168 )
256169
257- # ------------------- vjets NLO -------------------
258- # with open("templates/v15/hists_templates_Run2.pkl", "rb") as f:
259- # hists_vjetsnlo = pkl.load(f)
260-
261- # sample_idx = np.argmax(np.array(hists.axes["Sample"]) == "WJetsLNu")
262- # syst_idx = np.argmax(np.array(hists.axes["Systematic"]) == "nominal")
263- # region_idx = np.argmax(np.array(hists.axes["Region"]) == "region")
264- if sample_label == "WJetsLNu" :
265-
266- hists .fill (
267- Sample = sample_label ,
268- Systematic = "vjetsnlo_down" ,
269- Region = region ,
270- mass_observable = df ["rec_higgs_m" ],
271- weight = nominal ,
272- )
273-
274170 # ------------------- PDF acceptance -------------------
275171
276172 """
@@ -281,8 +177,7 @@ def fill_systematics(
281177 e.g. https://github.com/LPC-HH/HHLooper/blob/master/python/prepare_card_SR_final.py#L258
282178 and https://github.com/LPC-HH/HHLooper/blob/master/app/HHLooper.cc#L1488
283179 """
284- if (sample_label in sigs + ["WJetsLNu" , "TTbar" ]) and (sample != "ST_s-channel_4f_hadronicDecays" ):
285- # if (sample_label in sigs + ["TTbar"]) and (sample != "ST_s-channel_4f_hadronicDecays"): # TODO add WJets
180+ if (sample_label in sigs + ["TTbar" ]) and (sample != "ST_s-channel_4f_hadronicDecays" ):
286181
287182 pdfweights = []
288183
@@ -336,10 +231,7 @@ def fill_systematics(
336231 - then, take max/min of h0, h1, h3, h5, h7, h8 w.r.t h4: h_up and h_dn
337232 - the uncertainty is the nominal histogram * h_up / h4
338233 """
339- if (sample_label in sigs + ["WJetsLNu" , "TTbar" , "SingleTop" ]) and (sample != "ST_s-channel_4f_hadronicDecays" ):
340- # if (sample_label in sigs + ["TTbar", "SingleTop"]) and (
341- # sample != "ST_s-channel_4f_hadronicDecays"
342- # ): # TODO add WJets
234+ if (sample_label in sigs + ["TTbar" , "SingleTop" ]) and (sample != "ST_s-channel_4f_hadronicDecays" ):
343235
344236 R_4 = sumscaleweights [4 ] / sumgenweights
345237 scaleweight_4 = df ["weight_scale4" ].values * nominal / R_4
@@ -444,7 +336,7 @@ def fill_systematics(
444336
445337 # ------------------- individual sources of JES -------------------
446338
447- """We apply the jet pt cut on the up/down variations. Must loop over systematics first. """
339+ """We apply the jet pt cut on the up/down variations."""
448340 for syst , (yrs , smpls , var ) in SYST_DICT ["JEC" ].items ():
449341
450342 for variation in ["up" , "down" ]:
@@ -460,14 +352,7 @@ def fill_systematics(
460352 df = data .copy ()
461353 df = df .query (region_sel )
462354
463- # ------------------- Nominal -------------------
464- if is_data :
465- nominal = np .ones_like (df ["fj_pt" ]) # for data (nominal is 1)
466- else :
467- nominal = df [f"weight_{ ch } " ] * xsecweight
468-
469- if "bjets" in region_sel : # if there's a bjet selection, add btag SF to the nominal weight
470- nominal *= df ["weight_btag" ]
355+ nominal = get_nominal_from_df (df , year , ch , sample_label , region , region_sel , xsecweight , is_data )
471356
472357 if (sample_label in smpls ) and (year in yrs ) and (ch in var ):
473358 shape_variation = df ["rec_higgs_m" + var [ch ] + f"_{ variation } " ]
@@ -530,8 +415,6 @@ def get_templates(years, channels, samples, samples_dir, regions_sel, model_path
530415 storage = hist2 .storage .Weight (),
531416 )
532417
533- hists = get_wjetsNLOcorr (hists , presel , years , channels , samples_dir , regions_sel , model_path )
534-
535418 for year in years : # e.g. 2018, 2017, 2016APV, 2016
536419 for ch in channels : # e.g. mu, ele
537420 logging .info (f"Processing year { year } and { ch } channel" )
@@ -593,9 +476,6 @@ def get_templates(years, channels, samples, samples_dir, regions_sel, model_path
593476 pkl_files , year , sample , sample_to_use , is_data , luminosity
594477 )
595478
596- # if sample_to_use == "vjetsNLO":
597- # continue
598-
599479 if sample_to_use == "ggF" :
600480
601481 stxs_list = [
@@ -684,7 +564,6 @@ def get_templates(years, channels, samples, samples_dir, regions_sel, model_path
684564 sumscaleweights ,
685565 )
686566
687-
688567 fake_SF = {
689568 "ele" : 0.75 ,
690569 "mu" : 1.0 ,
0 commit comments