Skip to content

Commit afb398c

Browse files
committed
up
1 parent 304fe50 commit afb398c

File tree

11 files changed

+423
-1623
lines changed

11 files changed

+423
-1623
lines changed

binder/combine.ipynb

Lines changed: 101 additions & 7 deletions
Large diffs are not rendered by default.

binder/hists_plots.ipynb

Lines changed: 275 additions & 83 deletions
Large diffs are not rendered by default.

combine/config_make_templates.yaml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ samples:
44
- WH
55
- ZH
66
- ttH
7-
# - vjetsNLO
87
- WJetsLNu
98
- TTbar
109
- SingleTop
@@ -23,15 +22,6 @@ regions_sel:
2322
"WJetsCR": (n_bjets_T==0) & (THWW<0.905) & (rec_higgs_pt>250) & (NumFatjets>=2)
2423
"TopCR": (n_bjets_T>0) & (rec_higgs_pt>250) & (THWW>0.90)
2524

26-
regions_binning: # (low_range, high_range, binning)
27-
28-
"VBF": [55, 255, 20]
29-
"ggFpt250to350": [55, 255, 20]
30-
"ggFpt350to500": [55, 255, 20]
31-
"ggFpt500toInf": [55, 255, 20]
32-
"WJetsCR": [55, 255, 20]
33-
"TopCR": [55, 255, 20]
34-
3525
samples_dir:
3626
"2018": ../eos/hww/Dec20_hww_2018
3727
"2017": ../eos/hww/Dec20_hww_2017

combine/datacard_systematics.py

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
SIG_regions = ["VBF", "ggFpt250to350", "ggFpt350to500", "ggFpt500toInf"]
2323
CONTROL_regions = ["TopCR", "WJetsCR"]
24-
# CONTROL_regions = ["TopCR", "WJetsCR1", "WJetsCR2", "WJetsCR3"]
2524

2625

2726
def systs_not_from_parquets(years: List[str], lep_channels: List[str], do_unfolding: bool):
@@ -372,38 +371,12 @@ def systs_from_parquets(years):
372371
["SingleTop"],
373372
SIG_regions + CONTROL_regions,
374373
),
375-
# systematics applied only on WJets & DYJets
376-
# rl.NuisanceParameter(f"{CMS_PARAMS_LABEL}_d1K_NLO", "lnN"): (
377-
# "weight_d1K_NLO",
378-
# ["WJetsLNu"],
379-
# SIG_regions + CONTROL_regions,
380-
# ),
381-
# rl.NuisanceParameter(f"{CMS_PARAMS_LABEL}_d2K_NLO", "lnN"): (
382-
# "weight_d2K_NLO",
383-
# ["WJetsLNu"],
384-
# SIG_regions + CONTROL_regions,
385-
# ),
386-
# rl.NuisanceParameter(f"{CMS_PARAMS_LABEL}_d3K_NLO", "lnN"): (
387-
# "weight_d3K_NLO",
388-
# ["WJetsLNu"],
389-
# SIG_regions + CONTROL_regions,
390-
# ),
374+
# systematics applied only on DYJets
391375
rl.NuisanceParameter(f"{CMS_PARAMS_LABEL}_W_d1kappa_EW", "lnN"): (
392376
"weight_d1kappa_EW",
393-
# ["WJetsLNu", "DYJets"],
394377
["DYJets"],
395378
SIG_regions + CONTROL_regions,
396379
),
397-
# rl.NuisanceParameter(f"{CMS_PARAMS_LABEL}_W_d2kappa_EW", "lnN"): (
398-
# "weight_W_d2kappa_EW",
399-
# ["WJetsLNu"],
400-
# SIG_regions + CONTROL_regions,
401-
# ),
402-
# rl.NuisanceParameter(f"{CMS_PARAMS_LABEL}_W_d3kappa_EW", "lnN"): (
403-
# "weight_W_d3kappa_EW",
404-
# ["WJetsLNu"],
405-
# SIG_regions + CONTROL_regions,
406-
# ),
407380
rl.NuisanceParameter(f"{CMS_PARAMS_LABEL}_Z_d2kappa_EW", "lnN"): (
408381
"weight_Z_d2kappa_EW",
409382
["DYJets"],

combine/make_templates.py

Lines changed: 38 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -26,95 +26,52 @@
2626
pd.set_option("mode.chained_assignment", None)
2727

2828

29-
def get_wjetsNLOcorr(hists, presel, years, channels, samples_dir, regions_sel, model_path):
29+
with open("../binder/trg_eff_SF_ARC.pkl", "rb") as f:
30+
TRIGGER_SF = pkl.load(f)
3031

31-
for year in years: # e.g. 2018, 2017, 2016APV, 2016
32-
for ch in channels: # e.g. mu, ele
33-
34-
with open("../fileset/luminosity.json") as f:
35-
luminosity = json.load(f)[ch][year]
36-
37-
for sample in os.listdir(samples_dir[year]):
38-
39-
sample_to_use = get_common_sample_name(sample)
40-
41-
if sample_to_use != "vjetsNLO":
42-
continue
43-
44-
out_files = f"{samples_dir[year]}/{sample}/outfiles/"
45-
parquet_files = glob.glob(f"{out_files}/*_{ch}.parquet")
46-
pkl_files = glob.glob(f"{out_files}/*.pkl")
47-
48-
if not parquet_files:
49-
logging.info(f"No parquet file for {sample}")
50-
continue
51-
52-
try:
53-
data = pd.read_parquet(parquet_files)
54-
except pyarrow.lib.ArrowInvalid: # empty parquet because no event passed selection
55-
continue
56-
57-
if len(data) == 0:
58-
continue
59-
60-
# use hidNeurons to get the finetuned scores
61-
data["THWW"] = get_finetuned_score(data, model_path)
62-
63-
# drop hidNeurons which are not needed anymore
64-
data = data[data.columns.drop(list(data.filter(regex="hidNeuron")))]
65-
66-
# apply selection
67-
for selection in presel[ch]:
68-
data = data.query(presel[ch][selection])
69-
70-
# apply genlep recolep matching
71-
data = data[data["dR_genlep_recolep"] < 0.005]
72-
73-
# get the xsecweight
74-
xsecweight, _, _, _ = get_xsecweight(pkl_files, year, sample, sample_to_use, False, luminosity)
32+
THWW_SF = {
33+
"ggF": 0.948,
34+
"VBF": 0.984,
35+
}
36+
ptbinning_trgSF = [2000, 200, 170, 150, 130, 110, 90, 70, 50, 30]
37+
etabinning_trgSF = [-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]
7538

76-
with open("../binder/trg_eff_SF_ARC.pkl", "rb") as f:
77-
TRIGGER_SF = pkl.load(f)
7839

79-
for region, region_sel in regions_sel.items(): # e.g. pass, fail, top control region, etc.
40+
def get_nominal_from_df(df, year, ch, sample_label, region, region_sel, xsecweight, is_data):
8041

81-
df = data.copy()
82-
df = df.query(region_sel)
83-
84-
# ------------------- Nominal -------------------
85-
nominal = df[f"weight_{ch}"] * xsecweight
42+
if is_data:
43+
nominal = np.ones_like(df["fj_pt"]) # for data (nominal is 1)
44+
else:
45+
nominal = df[f"weight_{ch}"] * xsecweight
8646

87-
if "bjets" in region_sel: # if there's a bjet selection, add btag SF to the nominal weight
88-
nominal *= df["weight_btag"]
47+
if "bjets" in region_sel: # if there's a bjet selection, add btag SF to the nominal weight
48+
nominal *= df["weight_btag"]
8949

90-
# apply trigger SF
91-
if ch == "ele":
92-
ptbinning_trgSF = [2000, 200, 170, 150, 130, 110, 90, 70, 50, 30]
93-
etabinning_trgSF = [-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]
50+
# apply trigger SF
51+
if ch == "ele":
9452

95-
for i in range(len(ptbinning_trgSF) - 1):
96-
high_pt = ptbinning_trgSF[i]
97-
low_pt = ptbinning_trgSF[i + 1]
53+
for i in range(len(ptbinning_trgSF) - 1):
54+
high_pt = ptbinning_trgSF[i]
55+
low_pt = ptbinning_trgSF[i + 1]
9856

99-
msk_pt = (df["lep_pt"] >= low_pt) & (df["lep_pt"] < high_pt)
57+
msk_pt = (df["lep_pt"] >= low_pt) & (df["lep_pt"] < high_pt)
10058

101-
for j in range(len(etabinning_trgSF) - 1):
102-
low_eta = etabinning_trgSF[j]
103-
high_eta = etabinning_trgSF[j + 1]
59+
for j in range(len(etabinning_trgSF) - 1):
60+
low_eta = etabinning_trgSF[j]
61+
high_eta = etabinning_trgSF[j + 1]
10462

105-
msk_eta = (abs(df["lep_eta"]) >= low_eta) & (abs(df["lep_eta"]) < high_eta)
63+
msk_eta = (abs(df["lep_eta"]) >= low_eta) & (abs(df["lep_eta"]) < high_eta)
10664

107-
nominal[msk_pt & msk_eta] *= TRIGGER_SF["UL" + year[2:].replace("APV", "")]["nominal"][i, j]
65+
nominal[msk_pt & msk_eta] *= TRIGGER_SF["UL" + year[2:].replace("APV", "")]["nominal"][i, j]
10866

109-
hists.fill(
110-
Sample="WJetsLNu",
111-
Systematic="vjetsnlo_up",
112-
Region=region,
113-
mass_observable=df["rec_higgs_m"],
114-
weight=nominal,
115-
)
67+
# apply THWW SF
68+
if ("ggF" in sample_label) or (sample_label in ["ggF", "VBF", "WH", "ZH", "ttH"]):
69+
if "ggF" in region:
70+
nominal *= THWW_SF["ggF"]
71+
else:
72+
nominal *= THWW_SF["VBF"]
11673

117-
return hists
74+
return nominal
11875

11976

12077
def fill_systematics(
@@ -132,58 +89,14 @@ def fill_systematics(
13289
sumgenweights,
13390
sumscaleweights,
13491
):
135-
# with open("trg_eff_SF.pkl", "rb") as f:
136-
# TRIGGER_SF = pkl.load(f)
137-
138-
with open("../binder/trg_eff_SF_ARC.pkl", "rb") as f:
139-
TRIGGER_SF = pkl.load(f)
140-
141-
THWW_SF = {
142-
"ggF": 0.948,
143-
"VBF": 0.984,
144-
}
145-
14692
SYST_DICT = get_systematic_dict(years)
14793

14894
for region, region_sel in regions_sel.items(): # e.g. pass, fail, top control region, etc.
14995

15096
df = data.copy()
15197
df = df.query(region_sel)
15298

153-
# ------------------- Nominal -------------------
154-
if is_data:
155-
nominal = np.ones_like(df["fj_pt"]) # for data (nominal is 1)
156-
else:
157-
nominal = df[f"weight_{ch}"] * xsecweight
158-
159-
if "bjets" in region_sel: # if there's a bjet selection, add btag SF to the nominal weight
160-
nominal *= df["weight_btag"]
161-
162-
# apply trigger SF
163-
if ch == "ele":
164-
ptbinning_trgSF = [2000, 200, 170, 150, 130, 110, 90, 70, 50, 30]
165-
etabinning_trgSF = [-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]
166-
167-
for i in range(len(ptbinning_trgSF) - 1):
168-
high_pt = ptbinning_trgSF[i]
169-
low_pt = ptbinning_trgSF[i + 1]
170-
171-
msk_pt = (df["lep_pt"] >= low_pt) & (df["lep_pt"] < high_pt)
172-
173-
for j in range(len(etabinning_trgSF) - 1):
174-
low_eta = etabinning_trgSF[j]
175-
high_eta = etabinning_trgSF[j + 1]
176-
177-
msk_eta = (abs(df["lep_eta"]) >= low_eta) & (abs(df["lep_eta"]) < high_eta)
178-
179-
nominal[msk_pt & msk_eta] *= TRIGGER_SF["UL" + year[2:].replace("APV", "")]["nominal"][i, j]
180-
181-
# apply THWW SF
182-
if ("ggF" in sample_label) or (sample_label in ["ggF", "VBF", "WH", "ZH", "ttH"]):
183-
if "ggF" in region:
184-
nominal *= THWW_SF["ggF"]
185-
else:
186-
nominal *= THWW_SF["VBF"]
99+
nominal = get_nominal_from_df(df, year, ch, sample_label, region, region_sel, xsecweight, is_data)
187100

188101
hists.fill(
189102
Sample=sample_label,
@@ -254,23 +167,6 @@ def fill_systematics(
254167
weight=down,
255168
)
256169

257-
# ------------------- vjets NLO -------------------
258-
# with open("templates/v15/hists_templates_Run2.pkl", "rb") as f:
259-
# hists_vjetsnlo = pkl.load(f)
260-
261-
# sample_idx = np.argmax(np.array(hists.axes["Sample"]) == "WJetsLNu")
262-
# syst_idx = np.argmax(np.array(hists.axes["Systematic"]) == "nominal")
263-
# region_idx = np.argmax(np.array(hists.axes["Region"]) == "region")
264-
if sample_label == "WJetsLNu":
265-
266-
hists.fill(
267-
Sample=sample_label,
268-
Systematic="vjetsnlo_down",
269-
Region=region,
270-
mass_observable=df["rec_higgs_m"],
271-
weight=nominal,
272-
)
273-
274170
# ------------------- PDF acceptance -------------------
275171

276172
"""
@@ -281,8 +177,7 @@ def fill_systematics(
281177
e.g. https://github.com/LPC-HH/HHLooper/blob/master/python/prepare_card_SR_final.py#L258
282178
and https://github.com/LPC-HH/HHLooper/blob/master/app/HHLooper.cc#L1488
283179
"""
284-
if (sample_label in sigs + ["WJetsLNu", "TTbar"]) and (sample != "ST_s-channel_4f_hadronicDecays"):
285-
# if (sample_label in sigs + ["TTbar"]) and (sample != "ST_s-channel_4f_hadronicDecays"): # TODO add WJets
180+
if (sample_label in sigs + ["TTbar"]) and (sample != "ST_s-channel_4f_hadronicDecays"):
286181

287182
pdfweights = []
288183

@@ -336,10 +231,7 @@ def fill_systematics(
336231
- then, take max/min of h0, h1, h3, h5, h7, h8 w.r.t h4: h_up and h_dn
337232
- the uncertainty is the nominal histogram * h_up / h4
338233
"""
339-
if (sample_label in sigs + ["WJetsLNu", "TTbar", "SingleTop"]) and (sample != "ST_s-channel_4f_hadronicDecays"):
340-
# if (sample_label in sigs + ["TTbar", "SingleTop"]) and (
341-
# sample != "ST_s-channel_4f_hadronicDecays"
342-
# ): # TODO add WJets
234+
if (sample_label in sigs + ["TTbar", "SingleTop"]) and (sample != "ST_s-channel_4f_hadronicDecays"):
343235

344236
R_4 = sumscaleweights[4] / sumgenweights
345237
scaleweight_4 = df["weight_scale4"].values * nominal / R_4
@@ -444,7 +336,7 @@ def fill_systematics(
444336

445337
# ------------------- individual sources of JES -------------------
446338

447-
"""We apply the jet pt cut on the up/down variations. Must loop over systematics first."""
339+
"""We apply the jet pt cut on the up/down variations."""
448340
for syst, (yrs, smpls, var) in SYST_DICT["JEC"].items():
449341

450342
for variation in ["up", "down"]:
@@ -460,14 +352,7 @@ def fill_systematics(
460352
df = data.copy()
461353
df = df.query(region_sel)
462354

463-
# ------------------- Nominal -------------------
464-
if is_data:
465-
nominal = np.ones_like(df["fj_pt"]) # for data (nominal is 1)
466-
else:
467-
nominal = df[f"weight_{ch}"] * xsecweight
468-
469-
if "bjets" in region_sel: # if there's a bjet selection, add btag SF to the nominal weight
470-
nominal *= df["weight_btag"]
355+
nominal = get_nominal_from_df(df, year, ch, sample_label, region, region_sel, xsecweight, is_data)
471356

472357
if (sample_label in smpls) and (year in yrs) and (ch in var):
473358
shape_variation = df["rec_higgs_m" + var[ch] + f"_{variation}"]
@@ -530,8 +415,6 @@ def get_templates(years, channels, samples, samples_dir, regions_sel, model_path
530415
storage=hist2.storage.Weight(),
531416
)
532417

533-
hists = get_wjetsNLOcorr(hists, presel, years, channels, samples_dir, regions_sel, model_path)
534-
535418
for year in years: # e.g. 2018, 2017, 2016APV, 2016
536419
for ch in channels: # e.g. mu, ele
537420
logging.info(f"Processing year {year} and {ch} channel")
@@ -593,9 +476,6 @@ def get_templates(years, channels, samples, samples_dir, regions_sel, model_path
593476
pkl_files, year, sample, sample_to_use, is_data, luminosity
594477
)
595478

596-
# if sample_to_use == "vjetsNLO":
597-
# continue
598-
599479
if sample_to_use == "ggF":
600480

601481
stxs_list = [
@@ -684,7 +564,6 @@ def get_templates(years, channels, samples, samples_dir, regions_sel, model_path
684564
sumscaleweights,
685565
)
686566

687-
688567
fake_SF = {
689568
"ele": 0.75,
690569
"mu": 1.0,

0 commit comments

Comments
 (0)