Skip to content

Commit 6e3907e

Browse files
committed
update the code to be compatible with pandas==3
1 parent ac6affb commit 6e3907e

File tree

17 files changed

+357
-364
lines changed

17 files changed

+357
-364
lines changed
Lines changed: 56 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,69 @@
1-
from nnpdf_data.filter_utils.hera_utils import commondata #, covmat_is_close
2-
from pathlib import Path
31
from dataclasses import dataclass
4-
import typing
5-
from typing import List
6-
import numpy as np
7-
import pandas as pd
82
from os import PathLike
9-
import yaml
3+
from pathlib import Path
4+
5+
import pandas as pd
6+
7+
from nnpdf_data.filter_utils.hera_utils import commondata # , covmat_is_close
8+
109

1110
@dataclass
1211
class hera_commondata(commondata):
13-
def __init__(self, filename: str | PathLike, dataset_name: str,
14-
process: str):
15-
# Read the data.
16-
file = Path(filename)
17-
df = pd.read_table(file, sep=r"\s+")
12+
def __init__(self, filename: str | PathLike, dataset_name: str, process: str):
13+
# Read the data.
14+
file = Path(filename)
15+
df = pd.read_table(file, sep=r"\s+")
1816

19-
# Kinematic quantieties.
20-
self.central_values = df["Sigma"].to_numpy()
21-
self.kinematics = df[["x", "Q2", "y"]].to_numpy()
22-
self.kinematic_quantities = ["x", "Q2", "y"]
17+
# Kinematic quantieties.
18+
self.central_values = df["Sigma"].to_numpy()
19+
self.kinematics = df[["x", "Q2", "y"]].to_numpy()
20+
self.kinematic_quantities = ["x", "Q2", "y"]
2321

24-
# Statistical uncertainties.
25-
statistical_uncertainties = df["stat"].to_numpy()
26-
for iunc,unc in enumerate(statistical_uncertainties):
27-
unc = self.central_values[iunc]*unc/100
28-
statistical_uncertainties[iunc] = unc
29-
self.statistical_uncertainties = statistical_uncertainties
22+
# Statistical uncertainties.
23+
statistical_uncertainties = df["stat"].to_numpy(copy=True)
24+
for iunc, unc in enumerate(statistical_uncertainties):
25+
unc = self.central_values[iunc] * unc / 100
26+
statistical_uncertainties[iunc] = unc
27+
self.statistical_uncertainties = statistical_uncertainties
3028

31-
# Systematic uncertainties.
32-
# remove the column containing the total uncertainty excluding
33-
# procedural uncertainties.
34-
df = df.drop(columns=["tot_noproc"])
35-
sys_uncert_col_names = list(df.columns.values)[5:]
36-
self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
37-
systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
38-
for iunc,unc in enumerate(systematic_uncertainties):
39-
unc = self.central_values[iunc]*unc/100
40-
systematic_uncertainties[iunc] = unc
41-
self.systematic_uncertainties = systematic_uncertainties
42-
43-
# All uncertainties are treated as multiplicative.
44-
systypes = []
45-
for name in sys_uncert_col_names:
46-
if(name == "uncor"):
47-
systypes.append(("MULT", "UNCORR"))
48-
else:
49-
systypes.append(("MULT", f"HC_{name}"))
50-
self.systypes = systypes
51-
self.process = process
52-
self.dataset_name = dataset_name
29+
# Systematic uncertainties.
30+
# remove the column containing the total uncertainty excluding
31+
# procedural uncertainties.
32+
df = df.drop(columns=["tot_noproc"])
33+
sys_uncert_col_names = list(df.columns.values)[5:]
34+
self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
35+
systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
36+
for iunc, unc in enumerate(systematic_uncertainties):
37+
unc = self.central_values[iunc] * unc / 100
38+
systematic_uncertainties[iunc] = unc
39+
self.systematic_uncertainties = systematic_uncertainties
5340

54-
def main():
55-
hera_em = hera_commondata("./rawdata/HERA1+2_CCem.dat","HERACOMBCCEM", "DIS_CC")
56-
hera_em.write_new_commondata(Path("data_EM-SIGMARED.yaml"),
57-
Path("kinematics_EM-SIGMARED.yaml"),
58-
Path("uncertainties_EM-SIGMARED.yaml"))
59-
hera_ep = hera_commondata("./rawdata/HERA1+2_CCep.dat","HERACOMBCCEP", "DIS_CC")
60-
hera_ep.write_new_commondata(Path("data_EP-SIGMARED.yaml"),
61-
Path("kinematics_EP-SIGMARED.yaml"),
62-
Path("uncertainties_EP-SIGMARED.yaml"))
41+
# All uncertainties are treated as multiplicative.
42+
systypes = []
43+
for name in sys_uncert_col_names:
44+
if name == "uncor":
45+
systypes.append(("MULT", "UNCORR"))
46+
else:
47+
systypes.append(("MULT", f"HC_{name}"))
48+
self.systypes = systypes
49+
self.process = process
50+
self.dataset_name = dataset_name
6351

64-
if __name__ == "__main__":
65-
main()
6652

53+
def main():
54+
hera_em = hera_commondata("./rawdata/HERA1+2_CCem.dat", "HERACOMBCCEM", "DIS_CC")
55+
hera_em.write_new_commondata(
56+
Path("data_EM-SIGMARED.yaml"),
57+
Path("kinematics_EM-SIGMARED.yaml"),
58+
Path("uncertainties_EM-SIGMARED.yaml"),
59+
)
60+
hera_ep = hera_commondata("./rawdata/HERA1+2_CCep.dat", "HERACOMBCCEP", "DIS_CC")
61+
hera_ep.write_new_commondata(
62+
Path("data_EP-SIGMARED.yaml"),
63+
Path("kinematics_EP-SIGMARED.yaml"),
64+
Path("uncertainties_EP-SIGMARED.yaml"),
65+
)
6766

6867

68+
if __name__ == "__main__":
69+
main()
Lines changed: 54 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,63 @@
1-
from nnpdf_data.filter_utils.hera_utils import commondata #, covmat_is_close
2-
from pathlib import Path
31
from dataclasses import dataclass
4-
import typing
5-
from typing import List
6-
import numpy as np
7-
import pandas as pd
82
from os import PathLike
9-
import yaml
3+
from pathlib import Path
4+
5+
import pandas as pd
6+
7+
from nnpdf_data.filter_utils.hera_utils import commondata # , covmat_is_close
8+
109

1110
@dataclass
1211
class hera_commondata(commondata):
13-
def __init__(self, filename: str | PathLike, dataset_name: str,
14-
process: str):
15-
# Read the data.
16-
file = Path(filename)
17-
df = pd.read_table(file, sep=r"\s+")
18-
19-
# Kinematic quantieties.
20-
self.central_values = df["Sigma"].to_numpy()
21-
self.kinematics = df[["x", "Q2", "y"]].to_numpy()
22-
self.kinematic_quantities = ["x", "Q2", "y"]
23-
24-
# Statistical uncertainties.
25-
statistical_uncertainties = df["stat"].to_numpy()
26-
for iunc,unc in enumerate(statistical_uncertainties):
27-
unc = self.central_values[iunc]*unc/100
28-
statistical_uncertainties[iunc] = unc
29-
self.statistical_uncertainties = statistical_uncertainties
30-
31-
# Systematic uncertainties.
32-
# remove the column containing the total uncertainty excluding
33-
# procedural uncertainties.
34-
df = df.drop(columns=["tot_noproc"])
35-
sys_uncert_col_names = list(df.columns.values)[5:]
36-
self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
37-
systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
38-
for iunc,unc in enumerate(systematic_uncertainties):
39-
unc = self.central_values[iunc]*unc/100
40-
systematic_uncertainties[iunc] = unc
41-
self.systematic_uncertainties = systematic_uncertainties
42-
43-
# All uncertainties are treated as multiplicative.
44-
systypes = []
45-
for name in sys_uncert_col_names:
46-
if(name == "uncor"):
47-
systypes.append(("MULT", "UNCORR"))
48-
else:
49-
systypes.append(("MULT", "HC_" + name))
50-
self.systypes = systypes
51-
self.process = process
52-
self.dataset_name = dataset_name
12+
def __init__(self, filename: str | PathLike, dataset_name: str, process: str):
13+
# Read the data.
14+
file = Path(filename)
15+
df = pd.read_table(file, sep=r"\s+")
16+
17+
# Kinematic quantieties.
18+
self.central_values = df["Sigma"].to_numpy()
19+
self.kinematics = df[["x", "Q2", "y"]].to_numpy()
20+
self.kinematic_quantities = ["x", "Q2", "y"]
21+
22+
# Statistical uncertainties.
23+
statistical_uncertainties = df["stat"].to_numpy(copy=True)
24+
for iunc, unc in enumerate(statistical_uncertainties):
25+
unc = self.central_values[iunc] * unc / 100
26+
statistical_uncertainties[iunc] = unc
27+
self.statistical_uncertainties = statistical_uncertainties
28+
29+
# Systematic uncertainties.
30+
# remove the column containing the total uncertainty excluding
31+
# procedural uncertainties.
32+
df = df.drop(columns=["tot_noproc"])
33+
sys_uncert_col_names = list(df.columns.values)[5:]
34+
self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
35+
systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
36+
for iunc, unc in enumerate(systematic_uncertainties):
37+
unc = self.central_values[iunc] * unc / 100
38+
systematic_uncertainties[iunc] = unc
39+
self.systematic_uncertainties = systematic_uncertainties
40+
41+
# All uncertainties are treated as multiplicative.
42+
systypes = []
43+
for name in sys_uncert_col_names:
44+
if name == "uncor":
45+
systypes.append(("MULT", "UNCORR"))
46+
else:
47+
systypes.append(("MULT", "HC_" + name))
48+
self.systypes = systypes
49+
self.process = process
50+
self.dataset_name = dataset_name
5351

54-
def main():
55-
hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_460.dat","HERACOMBNCEP460", "DIS_NCE")
56-
hera_ep.write_new_commondata(Path("data_EP-SIGMARED.yaml"),
57-
Path("kinematics_EP-SIGMARED.yaml"),
58-
Path("uncertainties_EP-SIGMARED.yaml"))
59-
if __name__ == "__main__":
60-
main()
6152

53+
def main():
54+
hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_460.dat", "HERACOMBNCEP460", "DIS_NCE")
55+
hera_ep.write_new_commondata(
56+
Path("data_EP-SIGMARED.yaml"),
57+
Path("kinematics_EP-SIGMARED.yaml"),
58+
Path("uncertainties_EP-SIGMARED.yaml"),
59+
)
6260

6361

62+
if __name__ == "__main__":
63+
main()
Lines changed: 50 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,67 @@
1-
from nnpdf_data.filter_utils.hera_utils import commondata #, covmat_is_close
2-
from pathlib import Path
31
from dataclasses import dataclass
2+
from os import PathLike
3+
from pathlib import Path
44
import typing
55
from typing import List
6+
67
import numpy as np
78
import pandas as pd
8-
from os import PathLike
99
import yaml
1010

11+
from nnpdf_data.filter_utils.hera_utils import commondata # , covmat_is_close
12+
13+
1114
@dataclass
1215
class hera_commondata(commondata):
13-
def __init__(self, filename: str | PathLike, dataset_name: str,
14-
process: str):
15-
# Read the data.
16-
file = Path(filename)
17-
df = pd.read_table(file, sep=r"\s+")
18-
19-
# Kinematic quantieties.
20-
self.central_values = df["Sigma"].to_numpy()
21-
self.kinematics = df[["x", "Q2", "y"]].to_numpy()
22-
self.kinematic_quantities = ["x", "Q2", "y"]
16+
def __init__(self, filename: str | PathLike, dataset_name: str, process: str):
17+
# Read the data.
18+
file = Path(filename)
19+
df = pd.read_table(file, sep=r"\s+")
2320

24-
# Statistical uncertainties.
25-
statistical_uncertainties = df["stat"].to_numpy()
26-
for iunc,unc in enumerate(statistical_uncertainties):
27-
unc = self.central_values[iunc]*unc/100
28-
statistical_uncertainties[iunc] = unc
29-
self.statistical_uncertainties = statistical_uncertainties
21+
# Kinematic quantieties.
22+
self.central_values = df["Sigma"].to_numpy()
23+
self.kinematics = df[["x", "Q2", "y"]].to_numpy()
24+
self.kinematic_quantities = ["x", "Q2", "y"]
3025

31-
# Systematic uncertainties.
32-
# remove the column containing the total uncertainty excluding
33-
# procedural uncertainties.
34-
df = df.drop(columns=["tot_noproc"])
35-
sys_uncert_col_names = list(df.columns.values)[5:]
36-
self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
37-
systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
38-
for iunc,unc in enumerate(systematic_uncertainties):
39-
unc = self.central_values[iunc]*unc/100
40-
systematic_uncertainties[iunc] = unc
41-
self.systematic_uncertainties = systematic_uncertainties
42-
43-
# All uncertainties are treated as multiplicative.
44-
systypes = []
45-
for name in sys_uncert_col_names:
46-
if(name == "uncor"):
47-
systypes.append(("MULT", "UNCORR"))
48-
else:
49-
systypes.append(("MULT", "HC_" + name))
50-
self.systypes = systypes
51-
self.process = process
52-
self.dataset_name = dataset_name
26+
# Statistical uncertainties.
27+
statistical_uncertainties = df["stat"].to_numpy(copy=True)
28+
for iunc, unc in enumerate(statistical_uncertainties):
29+
unc = self.central_values[iunc] * unc / 100
30+
statistical_uncertainties[iunc] = unc
31+
self.statistical_uncertainties = statistical_uncertainties
5332

54-
def main():
55-
hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_575.dat","HERACOMBNCEP575", "DIS_NCE")
56-
hera_ep.write_new_commondata(Path("data_EP-SIGMARED.yaml"),
57-
Path("kinematics_EP-SIGMARED.yaml"),
58-
Path("uncertainties_EP-SIGMARED.yaml"))
33+
# Systematic uncertainties.
34+
# remove the column containing the total uncertainty excluding
35+
# procedural uncertainties.
36+
df = df.drop(columns=["tot_noproc"])
37+
sys_uncert_col_names = list(df.columns.values)[5:]
38+
self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
39+
systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
40+
for iunc, unc in enumerate(systematic_uncertainties):
41+
unc = self.central_values[iunc] * unc / 100
42+
systematic_uncertainties[iunc] = unc
43+
self.systematic_uncertainties = systematic_uncertainties
5944

45+
# All uncertainties are treated as multiplicative.
46+
systypes = []
47+
for name in sys_uncert_col_names:
48+
if name == "uncor":
49+
systypes.append(("MULT", "UNCORR"))
50+
else:
51+
systypes.append(("MULT", "HC_" + name))
52+
self.systypes = systypes
53+
self.process = process
54+
self.dataset_name = dataset_name
6055

61-
if __name__ == "__main__":
62-
main()
6356

57+
def main():
58+
hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_575.dat", "HERACOMBNCEP575", "DIS_NCE")
59+
hera_ep.write_new_commondata(
60+
Path("data_EP-SIGMARED.yaml"),
61+
Path("kinematics_EP-SIGMARED.yaml"),
62+
Path("uncertainties_EP-SIGMARED.yaml"),
63+
)
6464

6565

66+
if __name__ == "__main__":
67+
main()

0 commit comments

Comments
 (0)