Skip to content

Commit a470562

Browse files
authored
Merge pull request #2387 from NNPDF/nnpdf-vp-excluded-datasets
Excluded dataset page for vp-comparefits
2 parents 29c78fe + a32dc03 commit a470562

File tree

6 files changed

+112
-7
lines changed

6 files changed

+112
-7
lines changed

n3fit/runcards/examples/developing.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ parameters: # This defines the parameter dictionary that is passed to the Model
8181
stopping_patience: 0.1 # percentage of the number of epochs
8282
layer_type: 'dense'
8383
dropout: 0.01
84-
interpolation_points: 7
84+
feature_scaling_points: 7
8585

8686
fitting:
8787
fitbasis: CCBAR_ASYMM

validphys2/src/validphys/comparefittemplates/comparecard.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,26 @@ lumi_report:
9999
meta: Null
100100
template: lumi.md
101101

102+
mismatched_information:
103+
meta: Null
104+
actions_:
105+
- report
106+
107+
# Datasets will go to their own page
108+
mismatched_report:
109+
meta: Null
110+
template: mismatched.md
111+
112+
template_text: |
113+
Mismatched datasets
114+
---------------------
115+
The following plots corresponds to datasets which are not available in one of the fits.
116+
117+
{@with mismatched_datasets_by_name@}
118+
[Plots for {@dataset_name@}]({@mismatched_report report@})
119+
{@endwith@}
120+
121+
102122
template: report.md
103123

104124
positivity:
@@ -119,6 +139,8 @@ dataspecs:
119139
from_: current
120140
speclabel:
121141
from_: current
142+
dataset_inputs:
143+
from_: fit
122144

123145
- theoryid:
124146
from_: reference
@@ -128,6 +150,8 @@ dataspecs:
128150
from_: reference
129151
speclabel:
130152
from_: reference
153+
dataset_inputs:
154+
from_: fit
131155

132156
t0_info:
133157
- use_t0: True
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
% Data-theory comparison for the mismatched dataset {@dataset_name@}
2+
# Absolute
3+
{@plot_fancy@}
4+
# Normalized
5+
{@Datanorm plot_fancy@}

validphys2/src/validphys/comparefittemplates/report.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,17 @@ $\phi$ by {@processed_metadata_group@}
9999
{@plot_fits_groups_data_phi@}
100100
{@endwith@}
101101

102+
103+
102104
Dataset plots
103105
-------------
104106
{@with matched_datasets_from_dataspecs@}
105107
[Plots for {@dataset_name@}]({@dataset_report report@})
106108
{@endwith@}
107109

110+
[Mismatched datasets]({@mismatched_information report@})
111+
--------------------
112+
108113
Positivity
109114
----------
110115
{@with matched_positivity_from_dataspecs@}

validphys2/src/validphys/config.py

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
default_filter_rules_input,
3737
default_filter_settings_input,
3838
)
39-
from validphys.fitdata import fitted_replica_indexes, num_fitted_replicas
39+
from validphys.fitdata import fitted_replica_indexes, match_datasets_by_name, num_fitted_replicas
4040
from validphys.gridvalues import LUMI_CHANNELS
4141
from validphys.loader import (
4242
DataNotFoundError,
@@ -981,14 +981,12 @@ def produce_matched_datasets_from_dataspecs(self, dataspecs):
981981
for spec in dataspecs:
982982
with self.set_context(ns=self._curr_ns.new_child(spec)):
983983
_, data_input = self.parse_from_(None, "data_input", write=False)
984-
985984
names = {}
986985
for dsin in data_input:
987986
cd = self.produce_commondata(dataset_input=dsin)
988987
proc = get_info(cd).nnpdf31_process
989988
ds = dsin.name
990989
names[(proc, ds)] = dsin
991-
992990
all_names.append(names)
993991
used_set = set.intersection(*(set(d) for d in all_names))
994992
res = []
@@ -997,13 +995,62 @@ def produce_matched_datasets_from_dataspecs(self, dataspecs):
997995
# TODO: Should this have the same name?
998996
inner_spec_list = inres["dataspecs"] = []
999997
for ispec, spec in enumerate(dataspecs):
1000-
# Passing spec by referene
998+
# Passing spec by reference
1001999
d = ChainMap({"dataset_input": all_names[ispec][k]}, spec)
10021000
inner_spec_list.append(d)
10031001
res.append(inres)
10041002
res.sort(key=lambda x: (x["process"], x["dataset_name"]))
10051003
return res
10061004

1005+
def produce_mismatched_datasets_by_name(self, dataspecs):
1006+
"""
1007+
Like produce_matched_datasets_from_dataspecs, but for mismatched datasets from a fit comparison.
1008+
Returns the mismatched datasets, each tagged with more_info from the dataspecs they came from. Set up to work with plot_fancy.
1009+
1010+
Datasets are considered a mismatch if the name is different and if the variant is different.
1011+
"""
1012+
1013+
self._check_dataspecs_type(dataspecs)
1014+
1015+
# Parse the data for the comparison so that only variant and dataset are actually tested
1016+
parsed_data = []
1017+
for spec in dataspecs:
1018+
tmp = [(i.name, i.variant) for i in spec["dataset_inputs"]]
1019+
parsed_data.append((spec, tmp))
1020+
1021+
# TODO:
1022+
# This is a convoluted way of checking whether there are mismatches
1023+
# between the lists of dataset inputs of a list of specs.
1024+
# This is not going to win any codegolf tournaments
1025+
already_mismatched = []
1026+
mismatched_dinputs = []
1027+
for spec, parsed_dinputs in parsed_data:
1028+
for spec_to_check, parsed_dinputs_to_check in parsed_data:
1029+
if spec == spec_to_check:
1030+
continue
1031+
for i, parsed_dinput in enumerate(parsed_dinputs):
1032+
# Use a list of already mismatched data to avoid duplicates
1033+
if parsed_dinput in already_mismatched:
1034+
continue
1035+
if parsed_dinput not in parsed_dinputs_to_check:
1036+
dinput = spec["dataset_inputs"][i]
1037+
mismatched_dinputs.append((dinput, spec))
1038+
already_mismatched.append(parsed_dinput)
1039+
1040+
res = []
1041+
# prepare output for plot_fancy
1042+
for dsin, spec in mismatched_dinputs:
1043+
res.append(
1044+
{
1045+
"dataset_input": dsin,
1046+
"dataset_name": dsin.name,
1047+
"theoryid": spec["theoryid"],
1048+
"pdfs": [i["pdf"] for i in dataspecs],
1049+
"fit": spec["fit"],
1050+
}
1051+
)
1052+
return res
1053+
10071054
def produce_matched_positivity_from_dataspecs(self, dataspecs):
10081055
"""Like produce_matched_datasets_from_dataspecs but for positivity datasets."""
10091056
self._check_dataspecs_type(dataspecs)
@@ -1014,7 +1061,6 @@ def produce_matched_positivity_from_dataspecs(self, dataspecs):
10141061
names = {(p.name): (p) for p in pos}
10151062
all_names.append(names)
10161063
used_set = set.intersection(*(set(d) for d in all_names))
1017-
10181064
res = []
10191065
for k in used_set:
10201066
inres = {"posdataset_name": k}

validphys2/src/validphys/scripts/vp_comparefits.py

100644100755
Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ def add_positional_arguments(self, parser):
6868
help="Use LUX basis (which include the photon) for the report",
6969
action='store_true',
7070
)
71-
7271
parser.set_defaults()
7372

7473
def try_complete_args(self):
@@ -178,6 +177,7 @@ def get_commandline_arguments(self, cmdline=None):
178177
args['config_yml'] = comparefittemplates.template_pol_path
179178
else:
180179
args['config_yml'] = comparefittemplates.template_path
180+
181181
return args
182182

183183
def complete_mapping(self):
@@ -223,8 +223,16 @@ def complete_mapping(self):
223223
'unpolarized_bc': {'from_': 'positivity_bound'},
224224
}
225225
)
226+
are_the_same = self.check_identical_theory_cuts_covmat()
227+
if are_the_same:
228+
log.info("Adding mismatched datasets page: identical theory, data cuts and covmat detected")
229+
else:
230+
autosettings["mismatched_information"] = {
231+
"template_text": "Mismatched datasets cannot be shown due to cuts theory, data cuts and/or covmat not being identical"
232+
}
226233
return autosettings
227234

235+
228236
def get_config(self):
229237
self.try_complete_args()
230238
# No error handling here because this is our internal file
@@ -234,6 +242,23 @@ def get_config(self):
234242
c = yaml_safe.load(f)
235243
c.update(self.complete_mapping())
236244
return self.config_class(c, environment=self.environment)
245+
246+
def check_identical_theory_cuts_covmat(self):
247+
"""
248+
Checks whether the theory ID, data cuts, and thcovmat are the same between the two fits.
249+
In the affirmative case, a mismatched datasets page will be added to the report.
250+
"""
251+
args = self.args
252+
l = self.environment.loader
253+
current_runcard = l.check_fit(args['current_fit']).as_input()
254+
reference_runcard = l.check_fit(args['reference_fit']).as_input()
255+
256+
current_thcovmat = current_runcard.get("theorycovmatconfig")
257+
reference_thcovmat = reference_runcard.get("theorycovmatconfig")
258+
same_theoryid = current_runcard.get("theory", {}).get("theoryid") == reference_runcard.get("theory", {}).get("theoryid")
259+
same_datacuts = current_runcard.get("datacuts") == reference_runcard.get("datacuts")
260+
same_thcovmat = (current_thcovmat == reference_thcovmat)
261+
return same_theoryid and same_datacuts and same_thcovmat
237262

238263

239264
def main():

0 commit comments

Comments
 (0)