@@ -1000,6 +1000,7 @@ def produce_matched_datasets_from_dataspecs(self, dataspecs):
10001000 res .sort (key = lambda x : (x ["process" ], x ["dataset_name" ]))
10011001 return res
10021002
1003+ #### JCM code
10031004
10041005# def produce_matched_excluded_datasets_by_name(self, dataspecs):
10051006# import pdb; pdb.set_trace()
@@ -1014,49 +1015,98 @@ def produce_matched_datasets_from_dataspecs(self, dataspecs):
10141015# }
10151016# return [{"dataset_input": i, **more_info} for i in dinputs_b[1:3]]
10161017
1018+ #### JELLE code
1019+
1020+ # def produce_matched_excluded_datasets_by_name(self, dataspecs):
1021+ # """Like produce_matched_datasets_from_dataspecs but for all datasets excluded from the fit."""
1022+ # self._check_dataspecs_type(dataspecs)
1023+ # loader = Loader()
1024+ #
1025+ # all_used = []
1026+ #
1027+ # for spec in dataspecs:
1028+ # with self.set_context(ns=self._curr_ns.new_child(spec)):
1029+ # _, data_input = self.parse_from_(None, "data_input", write=False)
1030+ # names = {}
1031+ # for dsin in data_input:
1032+ # cd = self.produce_commondata(dataset_input=dsin)
1033+ # proc = get_info(cd).nnpdf31_process
1034+ # ds = dsin.name
1035+ # names[(proc, ds)] = dsin
1036+ # all_used.append(names)
1037+ #
1038+ # union = set.union(*(set(d) for d in all_used))
1039+ # intersection = set.intersection(*(set(d) for d in all_used))
1040+ # excluded_set = union - intersection
1041+ #
1042+ # excluded_datasets = []
1043+ # for names in all_used:
1044+ # for k in excluded_set:
1045+ # if k in names:
1046+ # excluded_datasets.append(names[k])
1047+ #
1048+ # more_info = {
1049+ # "pdfs": [i["pdf"] for i in dataspecs],
1050+ # "theoryid": dataspecs[0]["theoryid"],
1051+ # "fit": dataspecs[0]["fit"],
1052+ # }
1053+ # return [
1054+ # {
1055+ # "dataset_input": dsin,
1056+ # "dataset_name": dsin.name,
1057+ # **more_info
1058+ # }
1059+ # for dsin in excluded_datasets
1060+ # ]
1061+
1062+ #### attempt to generalize
1063+
10171064 def produce_matched_excluded_datasets_by_name (self , dataspecs ):
1018- """Like produce_matched_datasets_from_dataspecs but for all datasets excluded from the fit ."""
1065+ """Return excluded datasets, each tagged with the more_info from the dataspecs they came from ."""
10191066 self ._check_dataspecs_type (dataspecs )
10201067 loader = Loader ()
10211068
1022- all_used = []
1069+ # (proc, ds) -> list of (dsin, spec)
1070+ excluded_sets = {}
10231071
10241072 for spec in dataspecs :
10251073 with self .set_context (ns = self ._curr_ns .new_child (spec )):
10261074 _ , data_input = self .parse_from_ (None , "data_input" , write = False )
1027- names = {}
1075+
10281076 for dsin in data_input :
10291077 cd = self .produce_commondata (dataset_input = dsin )
10301078 proc = get_info (cd ).nnpdf31_process
10311079 ds = dsin .name
1032- names [(proc , ds )] = dsin
1033- all_used .append (names )
1034-
1035- union = set .union (* (set (d ) for d in all_used ))
1036- intersection = set .intersection (* (set (d ) for d in all_used ))
1037- excluded_set = union - intersection
1038-
1039- excluded_datasets = []
1040- excluded_dataset_names = []
1041- for names in all_used :
1042- for k in excluded_set :
1043- if k in names :
1044- excluded_datasets .append (names [k ])
1045- excluded_dataset_names .append (k )
1046-
1047- more_info = {
1048- "pdfs" : [i ["pdf" ] for i in dataspecs ],
1049- "theoryid" : dataspecs [0 ]["theoryid" ],
1050- "fit" : dataspecs [1 ]["fit" ],
1080+ key = (proc , ds )
1081+
1082+ if key not in excluded_sets :
1083+ excluded_sets [key ] = []
1084+ excluded_sets [key ].append ((dsin , spec ))
1085+
1086+ all_keys = set (excluded_sets )
1087+ excluded_keys = {
1088+ k for k , occurences_for_key in excluded_sets .items ()
1089+ if len (occurences_for_key ) < len (dataspecs )
10511090 }
1052- return [
1053- {
1054- "dataset_input" : dsin ,
1055- "dataset_name" : dsin .name ,
1056- ** more_info
1057- }
1058- for dsin in excluded_datasets
1059- ]
1091+
1092+ def build_more_info (spec ):
1093+ return {
1094+ "pdfs" : [i ["pdf" ] for i in dataspecs ],
1095+ "theoryid" : spec ["theoryid" ],
1096+ "fit" : spec ["fit" ],
1097+ }
1098+
1099+ out = []
1100+ for key in excluded_keys :
1101+ for dsin , spec in excluded_sets [key ]:
1102+ out .append ({
1103+ "dataset_input" : dsin ,
1104+ "dataset_name" : dsin .name ,
1105+ ** build_more_info (spec ),
1106+ })
1107+
1108+ return out
1109+
10601110
10611111 def produce_matched_excluded_datasets_from_dataspecs (self , dataspecs ):
10621112 return self .produce_matched_excluded_datasets_by_name (dataspecs )
0 commit comments