11from dataclasses import dataclass , field
2+ from numbers import Number
23
34import intake_esgf .projects
45import iris .cube
@@ -81,11 +82,21 @@ def find_data(self, **facets: FacetValue) -> list[IntakeESGFDataset]:
8182 :obj:`list` of :obj:`esmvalcore.io.intake_esgf.IntakeESGFDataset`
8283 A list of data elements that have been found.
8384 """
84- # Translate "our" facets to ESGF facets
85+ # Normalize facets so all values are `list[str]`.
86+ facets = {
87+ facet : [str (values )]
88+ if isinstance (values , str | Number | bool )
89+ else values
90+ for facet , values in facets .items ()
91+ }
92+ # Translate "our" facets to ESGF facets and "our" values to ESGF values.
8593 esgf_facets = {
86- self .values .get (k , {}).get (v , v ): facets [k ]
87- for k , v in self .facets .items ()
88- if k in facets and facets [k ] != "*"
94+ their_facet : [
95+ self .values .get (our_facet , {}).get (v , v )
96+ for v in facets [our_facet ]
97+ ]
98+ for our_facet , their_facet in self .facets .items ()
99+ if our_facet in facets
89100 }
90101 # TODO: filter by timerange
91102 try :
@@ -99,32 +110,54 @@ def find_data(self, **facets: FacetValue) -> list[IntakeESGFDataset]:
99110 )
100111 return []
101112
113+ # Return a list of datasets, with one IntakeESGFDataset per dataset_id.
114+ result : list [IntakeESGFDataset ] = []
115+
116+ # These are the keys in the dict[str, xarray.Dataset] returned by
117+ # `intake_esgf.ESGFCatalog.to_dataset_dict`. Taken from:
118+ # https://github.com/esgf2-us/intake-esgf/blob/c34124e54078e70ef271709a6d158edb22bcdb96/intake_esgf/catalog.py#L523-L528
102119 self .catalog .df ["key" ] = self .catalog .df .apply (
103120 lambda row : "." .join (
104121 [row [f ] for f in self .catalog .project .master_id_facets ()],
105122 ),
106123 axis = 1 ,
107124 )
108125 inverse_values = {
109- facet : {v : k }
110- for facet in self .values
111- for k , v in self .values [facet ].items ()
126+ our_facet : {
127+ their_value : our_value
128+ for our_value , their_value in self .values [our_facet ].items ()
129+ }
130+ for our_facet in self .values
112131 }
113- datasets = []
114132 for _ , row in self .catalog .df .iterrows ():
115133 dataset_id = row ["key" ]
116134 # Subset the catalog to a single dataset.
117135 cat = self .catalog .clone ()
118- cat .project = self .catalog .project
119136 cat .df = self .catalog .df [self .catalog .df .key == dataset_id ]
120- facets = {
121- k : inverse_values .get (k , {}).get (row [v ], row [v ])
122- for k , v in self .facets .items ()
137+ # Discard all but the latest version. It is not clear how/if
138+ # `intake_esgf.ESGFCatalog.to_dataset_dict` supports multiple versions.
139+ cat .df = cat .df [cat .df .version == cat .df .version .max ()]
140+ cat .project = self .catalog .project
141+ if "short_name" in facets :
142+ cat .last_search [self .facets ["short_name" ]] = facets [
143+ "short_name"
144+ ]
145+ # Retrieve "our" facets associated with the dataset_id.
146+ dataset_facets = {
147+ our_facet : [
148+ inverse_values .get (our_facet , {}).get (v , v )
149+ for v in row [their_facet ]
150+ ]
151+ for our_facet , their_facet in self .facets .items ()
152+ if their_facet in row
153+ }
154+ dataset_facets = {
155+ f : v [0 ] if len (v ) == 1 else v for f , v in facets .items ()
123156 }
124157 dataset = IntakeESGFDataset (
125158 name = dataset_id ,
126- facets = facets ,
159+ facets = dataset_facets ,
127160 catalog = cat ,
128161 )
129- datasets .append (dataset )
130- return datasets
162+ result .append (dataset )
163+ return result
0 commit comments