Skip to content

Commit 65a3d4b

Browse files
committed
First implementation of the dataset
1 parent 6dd3978 commit 65a3d4b

12 files changed

+46845
-0
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
data_central:
2+
- 1.46960501e-05
3+
- 2.39949958e-05
4+
- 3.52416251e-05
5+
- 2.75263349e-06
6+
- 1.22897427e-05
7+
- 1.04523099e-03
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
import numpy as np
2+
import pandas as pd
3+
import yaml
4+
5+
from nnpdf_data.filter_utils.legacy_jets_utils import (
6+
TABLE_TO_RAPIDITY_ATLAS_1JET_7TEV_R06,
7+
VARIANT_MAP,
8+
fill_df_ATLAS_1JET_7TEV_R06,
9+
)
10+
from nnpdf_data.filter_utils.utils import prettify_float
11+
12+
yaml.add_representer(float, prettify_float)
13+
14+
15+
def filter_ATLAS_1JET_7TEV_data_kinematics():
16+
"""
17+
Write kinematic values in the kinematics.yaml file.
18+
"""
19+
with open("metadata.yaml") as file:
20+
metadata = yaml.safe_load(file)
21+
22+
version = metadata["hepdata"]["version"]
23+
tables = metadata["hepdata"]["tables"]
24+
25+
kin = []
26+
for table in tables:
27+
hepdata_table = f"rawdata/HEPData-ins1325553-v{version}_table{table}.yaml"
28+
29+
with open(hepdata_table) as file:
30+
input = yaml.safe_load(file)
31+
32+
# rapidity
33+
rapidity_interval = TABLE_TO_RAPIDITY_ATLAS_1JET_7TEV_R06[table]
34+
rap = {}
35+
rap['min'], rap['max'] = rapidity_interval[0], rapidity_interval[1]
36+
rap['mid'] = 0.5 * (rap['min'] + rap['max'])
37+
38+
# center of mass energy
39+
sqrts = float(input['dependent_variables'][0]['qualifiers'][4]['value'])
40+
41+
# transverse momentum
42+
jet_kt_bins = input['independent_variables'][0]['values']
43+
KT = {}
44+
for kt in jet_kt_bins:
45+
KT['min'], KT['max'] = kt['low'], kt['high']
46+
KT['mid'] = float(f"{0.5 * (kt['low'] + kt['high']):.3f}")
47+
48+
kin_value = {
49+
'y': {'min': rap['min'], 'mid': rap['mid'], 'max': rap['max']},
50+
'pT': {'min': KT['min'], 'mid': KT['mid'], 'max': KT['max']},
51+
'sqrts': {'min': None, 'mid': sqrts, 'max': None},
52+
}
53+
54+
kin.append(kin_value)
55+
56+
kinematics_yaml = {"bins": kin}
57+
58+
with open("kinematics.yaml", "w") as file:
59+
yaml.dump(kinematics_yaml, file, sort_keys=False)
60+
61+
62+
def filter_ATLAS_1JET_7TEV_data_central(variant='nominal'):
63+
"""
64+
Write central data values in the data.yaml file.
65+
"""
66+
with open("metadata.yaml") as file:
67+
metadata = yaml.safe_load(file)
68+
69+
version = metadata["hepdata"]["version"]
70+
tables = metadata["hepdata"]["tables"]
71+
72+
data_central = []
73+
for table in tables:
74+
hepdata_table = f"rawdata/HEPData-ins1325553-v{version}_table{table}.yaml"
75+
76+
with open(hepdata_table) as file:
77+
input = yaml.safe_load(file)
78+
79+
values = input['dependent_variables'][VARIANT_MAP[variant]]['values']
80+
81+
for value in values:
82+
data_central.append(value['value'])
83+
return data_central
84+
85+
86+
def filter_ALTAS_1JET_7TEV_data_uncertainties(variant='nominal'):
87+
"""
88+
Write uncertainties in the uncertainties.yaml file.
89+
"""
90+
with open("metadata.yaml") as file:
91+
metadata = yaml.safe_load(file)
92+
93+
version = metadata["hepdata"]["version"]
94+
tables = metadata["hepdata"]["tables"]
95+
96+
# get df of uncertainties
97+
dfs = []
98+
cvs = []
99+
for table in tables:
100+
# uncertainties dataframe
101+
df, cv = fill_df_ATLAS_1JET_7TEV_R06(table, version, variant)
102+
dfs.append(df)
103+
cvs.append(cv)
104+
105+
df_unc = pd.concat([df for df in dfs], axis=0)
106+
cvs = np.stack(cvs, axis=0)
107+
108+
# statistical errors fully uncorrelated
109+
stat_errors = df_unc["stat"].to_numpy()
110+
111+
# luminosity errors
112+
lum_errors = df_unc["sys_lumi"].to_numpy()
113+
114+
A_corr = df_unc.drop(["stat", "sys_lumi"], axis=1).to_numpy()
115+
116+
# Error definitions
117+
error_definition = {
118+
f"{col}": {
119+
"description": f"correlated systematic {col}",
120+
"treatment": "MULT",
121+
"type": "CORR",
122+
}
123+
for col in df_unc.drop(["stat", "sys_lumi"], axis=1).columns
124+
}
125+
126+
error_definition["luminosity_uncertainty"] = {
127+
"description": "luminosity uncertainty",
128+
"treatment": "MULT",
129+
"type": "ATLASLUMI14",
130+
}
131+
132+
error_definition["statistical_uncertainty"] = {
133+
"description": "statistical uncertainty",
134+
"treatment": "MULT",
135+
"type": "UNCORR",
136+
}
137+
138+
# store error in dict
139+
error = []
140+
for n in range(A_corr.shape[0]):
141+
error_value = {}
142+
for col, m in zip(
143+
df_unc.drop(["stat", "sys_lumi"], axis=1).columns, range(A_corr.shape[1])
144+
):
145+
error_value[f"{col}"] = float(A_corr[n, m])
146+
147+
error_value["luminosity_uncertainty"] = float(lum_errors[n])
148+
error_value["statistical_uncertainty"] = float(stat_errors[n])
149+
error.append(error_value)
150+
151+
uncertainties_yaml = {"definitions": error_definition, "bins": error}
152+
153+
if variant == 'nominal':
154+
filename = 'uncertainties.yaml'
155+
else:
156+
filename = f"uncertainties_{variant}.yaml"
157+
158+
with open(filename, "w") as file:
159+
yaml.dump(uncertainties_yaml, file, sort_keys=False)
160+
161+
data_central_yaml = {"data_central": cvs.tolist()}
162+
163+
# write central values and kinematics to yaml file
164+
with open("data.yaml", "w") as file:
165+
yaml.dump(data_central_yaml, file, sort_keys=False)
166+
167+
168+
if __name__ == "__main__":
169+
filter_ATLAS_1JET_7TEV_data_kinematics()
170+
filter_ALTAS_1JET_7TEV_data_uncertainties()

0 commit comments

Comments
 (0)