Skip to content

Commit e91e383

Browse files
committed
Add support for CMIP5
1 parent d97975e commit e91e383

File tree

4 files changed

+97
-18
lines changed

4 files changed

+97
-18
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
projects:
2+
CMIP6:
3+
data:
4+
intake-esgf:
5+
type: esmvalcore.io.intake_esgf.IntakeESGFDataSource
6+
facets:
7+
activity: activity_drs
8+
dataset: source_id
9+
ensemble: member_id
10+
exp: experiment_id
11+
institute: institution_id
12+
grid: grid_label
13+
mip: table_id
14+
project: "project"
15+
short_name: variable_id
16+
CMIP5:
17+
data:
18+
intake-esgf:
19+
type: esmvalcore.io.intake_esgf.IntakeESGFDataSource
20+
facets:
21+
dataset: model
22+
ensemble: ensemble
23+
exp: experiment
24+
frequency: time_frequency
25+
institute: institute
26+
mip: cmor_table
27+
product: product
28+
project: "project"
29+
short_name: variable
30+
values:
31+
dataset:
32+
"ACCESS1-0": "ACCESS1.0"
33+
"ACCESS1-3": "ACCESS1.3"
34+
"bcc-csm1-1": "BCC-CSM1.1"
35+
"bcc-csm1-1-m": "BCC-CSM1.1(m)"
36+
"CESM1-BGC": "CESM1(BGC)"
37+
"CESM1-CAM5": "CESM1(CAM5)"
38+
"CESM1-CAM5-1-FV2": "CESM1(CAM5.1,FV2)"
39+
"CESM1-FASTCHEM": "CESM1(FASTCHEM)"
40+
"CESM1-WACCM": "CESM1(WACCM)"
41+
"CSIRO-Mk3-6-0": "CSIRO-Mk3.6.0"
42+
"fio-esm": "FIO-ESM"
43+
"GFDL-CM2p1": "GFDL-CM2.1"
44+
"inmcm4": "INM-CM4"
45+
"MRI-AGCM3-2H": "MRI-AGCM3.2H"
46+
"MRI-AGCM3-2S": "MRI-AGCM3.2S"

esmvalcore/config/data_sources.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def get_data_sources(session: Session) -> list[DataSource]:
1515
if "data" not in project_settings:
1616
logger.info("Using legacy data sources for project '%s'", project)
1717
# Use legacy data sources from config-user.yml.
18-
legacy_local_sources = esmvalcore.local._get_data_sources(project)
18+
legacy_local_sources = esmvalcore.local._get_data_sources(project) # noqa: SLF001
1919
data_sources.extend(legacy_local_sources)
2020
if (
2121
session["search_esgf"] != "never"

esmvalcore/io/intake_esgf.py

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from dataclasses import dataclass, field
2+
from numbers import Number
23

34
import intake_esgf.projects
45
import iris.cube
@@ -81,11 +82,21 @@ def find_data(self, **facets: FacetValue) -> list[IntakeESGFDataset]:
8182
:obj:`list` of :obj:`esmvalcore.io.intake_esgf.IntakeESGFDataset`
8283
A list of data elements that have been found.
8384
"""
84-
# Translate "our" facets to ESGF facets
85+
# Normalize facets so all values are `list[str]`.
86+
facets = {
87+
facet: [str(values)]
88+
if isinstance(values, str | Number | bool)
89+
else values
90+
for facet, values in facets.items()
91+
}
92+
# Translate "our" facets to ESGF facets and "our" values to ESGF values.
8593
esgf_facets = {
86-
self.values.get(k, {}).get(v, v): facets[k]
87-
for k, v in self.facets.items()
88-
if k in facets and facets[k] != "*"
94+
their_facet: [
95+
self.values.get(our_facet, {}).get(v, v)
96+
for v in facets[our_facet]
97+
]
98+
for our_facet, their_facet in self.facets.items()
99+
if our_facet in facets
89100
}
90101
# TODO: filter by timerange
91102
try:
@@ -99,32 +110,54 @@ def find_data(self, **facets: FacetValue) -> list[IntakeESGFDataset]:
99110
)
100111
return []
101112

113+
# Return a list of datasets, with one IntakeESGFDataset per dataset_id.
114+
result: list[IntakeESGFDataset] = []
115+
116+
# These are the keys in the dict[str, xarray.Dataset] returned by
117+
# `intake_esgf.ESGFCatalog.to_dataset_dict`. Taken from:
118+
# https://github.com/esgf2-us/intake-esgf/blob/c34124e54078e70ef271709a6d158edb22bcdb96/intake_esgf/catalog.py#L523-L528
102119
self.catalog.df["key"] = self.catalog.df.apply(
103120
lambda row: ".".join(
104121
[row[f] for f in self.catalog.project.master_id_facets()],
105122
),
106123
axis=1,
107124
)
108125
inverse_values = {
109-
facet: {v: k}
110-
for facet in self.values
111-
for k, v in self.values[facet].items()
126+
our_facet: {
127+
their_value: our_value
128+
for our_value, their_value in self.values[our_facet].items()
129+
}
130+
for our_facet in self.values
112131
}
113-
datasets = []
114132
for _, row in self.catalog.df.iterrows():
115133
dataset_id = row["key"]
116134
# Subset the catalog to a single dataset.
117135
cat = self.catalog.clone()
118-
cat.project = self.catalog.project
119136
cat.df = self.catalog.df[self.catalog.df.key == dataset_id]
120-
facets = {
121-
k: inverse_values.get(k, {}).get(row[v], row[v])
122-
for k, v in self.facets.items()
137+
# Discard all but the latest version. It is not clear how/if
138+
# `intake_esgf.ESGFCatalog.to_dataset_dict` supports multiple versions.
139+
cat.df = cat.df[cat.df.version == cat.df.version.max()]
140+
cat.project = self.catalog.project
141+
if "short_name" in facets:
142+
cat.last_search[self.facets["short_name"]] = facets[
143+
"short_name"
144+
]
145+
# Retrieve "our" facets associated with the dataset_id.
146+
dataset_facets = {
147+
our_facet: [
148+
inverse_values.get(our_facet, {}).get(v, v)
149+
for v in row[their_facet]
150+
]
151+
for our_facet, their_facet in self.facets.items()
152+
if their_facet in row
153+
}
154+
dataset_facets = {
155+
f: v[0] if len(v) == 1 else v for f, v in facets.items()
123156
}
124157
dataset = IntakeESGFDataset(
125158
name=dataset_id,
126-
facets=facets,
159+
facets=dataset_facets,
127160
catalog=cat,
128161
)
129-
datasets.append(dataset)
130-
return datasets
162+
result.append(dataset)
163+
return result

esmvalcore/preprocessor/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import copy
66
import inspect
77
import logging
8-
from pathlib import Path
98
from pprint import pformat
109
from typing import TYPE_CHECKING, Any
1110

@@ -97,6 +96,7 @@
9796

9897
if TYPE_CHECKING:
9998
from collections.abc import Iterable
99+
from pathlib import Path
100100

101101
from dask.delayed import Delayed
102102

@@ -399,7 +399,7 @@ def _run_preproc_function(function, items, kwargs, input_files=None):
399399
)
400400

401401
# Make sure that the arguments are indexable
402-
if isinstance(items, (PreprocessorFile, Cube, str, Path)):
402+
if isinstance(items, (PreprocessorFile, Cube, DataElement)):
403403
items = [items]
404404
if isinstance(items, set):
405405
items = list(items)

0 commit comments

Comments
 (0)