Skip to content

Commit cbe183a

Browse files
committed
improved dataspec handling logic
1 parent ccf8b43 commit cbe183a

File tree

5 files changed

+91
-45
lines changed

5 files changed

+91
-45
lines changed

validphys2/src/validphys/comparefittemplates/comparecard.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ lumi_report:
9999
meta: Null
100100
template: lumi.md
101101

102+
excluded_datasets:
103+
use_cuts: "fromfit"
104+
from_: excluded
105+
102106
template: report.md
103107

104108
positivity:
@@ -110,10 +114,6 @@ description:
110114
dataset_inputs:
111115
from_: fit
112116

113-
excluded_datasets:
114-
use_cuts: "fromfit"
115-
from_: excluded
116-
117117
dataspecs:
118118
- theoryid:
119119
from_: current

validphys2/src/validphys/comparefittemplates/comparecard_excluded.yaml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,12 @@ lumi_report:
9999
meta: Null
100100
template: lumi.md
101101

102+
excluded_report:
103+
meta: Null
104+
use_cuts: "fromfit"
105+
from_: dataspecs
106+
template: excluded.md
107+
102108
template: report_with_excluded.md
103109

104110
positivity:
@@ -110,14 +116,6 @@ description:
110116
dataset_inputs:
111117
from_: fit
112118

113-
excluded_datasets:
114-
use_cuts: "fromfit"
115-
from_: dataspecs
116-
117-
excluded_positivity:
118-
use_cuts: "fromfit"
119-
from_: dataspecs
120-
121119
dataspecs:
122120
- theoryid:
123121
from_: current

validphys2/src/validphys/comparefittemplates/report_with_excluded.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,12 @@ We are comparing:
1111

1212
{@ summarise_fits @}
1313

14-
15-
1614
Datasets excluded from fit
1715
--------------------------
1816
{@with matched_excluded_datasets_by_name@}
1917
[Plots for {@dataset_name@}]({@plot_fancy@})
2018
{@endwith@}
2119

22-
2320
Code versions
2421
-------------
2522
{@fits_version_table@}

validphys2/src/validphys/config.py

Lines changed: 80 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,7 @@ def produce_matched_datasets_from_dataspecs(self, dataspecs):
10001000
res.sort(key=lambda x: (x["process"], x["dataset_name"]))
10011001
return res
10021002

1003+
#### JCM code
10031004

10041005
# def produce_matched_excluded_datasets_by_name(self, dataspecs):
10051006
# import pdb; pdb.set_trace()
@@ -1014,49 +1015,98 @@ def produce_matched_datasets_from_dataspecs(self, dataspecs):
10141015
# }
10151016
# return [{"dataset_input": i, **more_info} for i in dinputs_b[1:3]]
10161017

1018+
#### JELLE code
1019+
1020+
# def produce_matched_excluded_datasets_by_name(self, dataspecs):
1021+
# """Like produce_matched_datasets_from_dataspecs but for all datasets excluded from the fit."""
1022+
# self._check_dataspecs_type(dataspecs)
1023+
# loader = Loader()
1024+
#
1025+
# all_used = []
1026+
#
1027+
# for spec in dataspecs:
1028+
# with self.set_context(ns=self._curr_ns.new_child(spec)):
1029+
# _, data_input = self.parse_from_(None, "data_input", write=False)
1030+
# names = {}
1031+
# for dsin in data_input:
1032+
# cd = self.produce_commondata(dataset_input=dsin)
1033+
# proc = get_info(cd).nnpdf31_process
1034+
# ds = dsin.name
1035+
# names[(proc, ds)] = dsin
1036+
# all_used.append(names)
1037+
#
1038+
# union = set.union(*(set(d) for d in all_used))
1039+
# intersection = set.intersection(*(set(d) for d in all_used))
1040+
# excluded_set = union - intersection
1041+
#
1042+
# excluded_datasets = []
1043+
# for names in all_used:
1044+
# for k in excluded_set:
1045+
# if k in names:
1046+
# excluded_datasets.append(names[k])
1047+
#
1048+
# more_info = {
1049+
# "pdfs": [i["pdf"] for i in dataspecs],
1050+
# "theoryid": dataspecs[0]["theoryid"],
1051+
# "fit": dataspecs[0]["fit"],
1052+
# }
1053+
# return [
1054+
# {
1055+
# "dataset_input": dsin,
1056+
# "dataset_name": dsin.name,
1057+
# **more_info
1058+
# }
1059+
# for dsin in excluded_datasets
1060+
# ]
1061+
1062+
#### attempt to generalize
1063+
10171064
def produce_matched_excluded_datasets_by_name(self, dataspecs):
1018-
"""Like produce_matched_datasets_from_dataspecs but for all datasets excluded from the fit."""
1065+
"""Return excluded datasets, each tagged with the more_info from the dataspecs they came from."""
10191066
self._check_dataspecs_type(dataspecs)
10201067
loader = Loader()
10211068

1022-
all_used = []
1069+
# (proc, ds) -> list of (dsin, spec)
1070+
excluded_sets = {}
10231071

10241072
for spec in dataspecs:
10251073
with self.set_context(ns=self._curr_ns.new_child(spec)):
10261074
_, data_input = self.parse_from_(None, "data_input", write=False)
1027-
names = {}
1075+
10281076
for dsin in data_input:
10291077
cd = self.produce_commondata(dataset_input=dsin)
10301078
proc = get_info(cd).nnpdf31_process
10311079
ds = dsin.name
1032-
names[(proc, ds)] = dsin
1033-
all_used.append(names)
1034-
1035-
union = set.union(*(set(d) for d in all_used))
1036-
intersection = set.intersection(*(set(d) for d in all_used))
1037-
excluded_set = union - intersection
1038-
1039-
excluded_datasets = []
1040-
excluded_dataset_names = []
1041-
for names in all_used:
1042-
for k in excluded_set:
1043-
if k in names:
1044-
excluded_datasets.append(names[k])
1045-
excluded_dataset_names.append(k)
1046-
1047-
more_info = {
1048-
"pdfs": [i["pdf"] for i in dataspecs],
1049-
"theoryid": dataspecs[0]["theoryid"],
1050-
"fit": dataspecs[1]["fit"],
1080+
key = (proc, ds)
1081+
1082+
if key not in excluded_sets:
1083+
excluded_sets[key] = []
1084+
excluded_sets[key].append((dsin, spec))
1085+
1086+
all_keys = set(excluded_sets)
1087+
excluded_keys = {
1088+
k for k, occurences_for_key in excluded_sets.items()
1089+
if len(occurences_for_key) < len(dataspecs)
10511090
}
1052-
return [
1053-
{
1054-
"dataset_input": dsin,
1055-
"dataset_name": dsin.name,
1056-
**more_info
1057-
}
1058-
for dsin in excluded_datasets
1059-
]
1091+
1092+
def build_more_info(spec):
1093+
return {
1094+
"pdfs": [i["pdf"] for i in dataspecs],
1095+
"theoryid": spec["theoryid"],
1096+
"fit": spec["fit"],
1097+
}
1098+
1099+
out = []
1100+
for key in excluded_keys:
1101+
for dsin, spec in excluded_sets[key]:
1102+
out.append({
1103+
"dataset_input": dsin,
1104+
"dataset_name": dsin.name,
1105+
**build_more_info(spec),
1106+
})
1107+
1108+
return out
1109+
10601110

10611111
def produce_matched_excluded_datasets_from_dataspecs(self, dataspecs):
10621112
return self.produce_matched_excluded_datasets_by_name(dataspecs)

validphys2/src/validphys/scripts/vp_comparefits.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ def complete_mapping(self):
233233
if are_the_same:
234234
log.info("Using excluded comparecard: identical theory cuts/covmat detected")
235235
autosettings["template"] = "report_with_excluded.md"
236+
args['config_yml'] = comparefittemplates.template_with_excluded_path
236237
return autosettings
237238

238239

0 commit comments

Comments
 (0)