Skip to content

Commit b98ab5d

Browse files
committed
progress
1 parent 8a7a935 commit b98ab5d

File tree

28 files changed

+319
-159
lines changed

28 files changed

+319
-159
lines changed

esmvalcore/_provenance.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
from PIL.PngImagePlugin import PngInfo
1212
from prov.model import ProvDerivation, ProvDocument
1313

14-
from esmvalcore.io.protocol import DataElement
15-
1614
from ._version import __version__
1715

1816
logger = logging.getLogger(__name__)
@@ -111,7 +109,7 @@ class TrackedFile:
111109

112110
def __init__(
113111
self,
114-
filename: Path | DataElement,
112+
filename,
115113
attributes=None,
116114
ancestors=None,
117115
prov_filename=None,
@@ -120,8 +118,8 @@ def __init__(
120118
121119
Arguments
122120
---------
123-
filename:
124-
Path to the file on disk.
121+
filename: :obj:`pathlib.Path` or :obj:`esmvalcore.io.protocol.DataElement`
122+
Path or data element containing the data described by the provenance.
125123
attributes: dict
126124
Dictionary with facets describing the file. If set to None, this
127125
will be read from the file when provenance is initialized.
@@ -139,6 +137,8 @@ def __init__(
139137
self.prov_filename = self._filename
140138
else:
141139
self.prov_filename = prov_filename
140+
# TODO: ensure global attributes are recorded for input data if they're
141+
# not netcdf files.
142142
self.attributes = copy.deepcopy(attributes)
143143

144144
self.provenance = None
@@ -171,20 +171,21 @@ def copy_provenance(self):
171171
if self.provenance is None:
172172
msg = f"Provenance of {self} not initialized"
173173
raise ValueError(msg)
174-
new = TrackedFile(self.filename, self.attributes)
174+
new = TrackedFile(Path(self.filename), self.attributes)
175175
new.provenance = copy.deepcopy(self.provenance)
176176
new.entity = new.provenance.get_record(self.entity.identifier)[0]
177177
new.activity = new.provenance.get_record(self.activity.identifier)[0]
178178
return new
179179

180180
@property
181-
def filename(self):
182-
"""Filename."""
181+
def filename(self) -> str:
182+
"""Name of data described by this provenance document."""
183183
return self._filename
184184

185185
@property
186186
def provenance_file(self):
187-
"""Filename of provenance."""
187+
"""Filename of provenance file."""
188+
# This may not work well if filename is the instance_id.
188189
return os.path.splitext(self.filename)[0] + "_provenance.xml"
189190

190191
def initialize_provenance(self, activity):

esmvalcore/_recipe/recipe.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -443,10 +443,7 @@ def _get_common_attributes(
443443

444444
# Ensure that attributes start_year and end_year are always available if at
445445
# least one of the input datasets defines it
446-
if "timerange" in attributes:
447-
start_year, end_year = _parse_period(attributes["timerange"])
448-
attributes["start_year"] = int(str(start_year[0:4]))
449-
attributes["end_year"] = int(str(end_year[0:4]))
446+
_set_start_end_year(attributes)
450447

451448
return attributes
452449

@@ -710,7 +707,7 @@ def _get_preprocessor_products(
710707
)
711708

712709
for product in products:
713-
_set_start_end_year(product)
710+
_set_start_end_year(product.attributes)
714711
product.check()
715712

716713
return products
@@ -770,18 +767,18 @@ def _configure_multi_product_preprocessor(
770767

771768
for product in multimodel_products | ensemble_products:
772769
product.check()
773-
_set_start_end_year(product)
770+
_set_start_end_year(product.attributes)
774771

775772

776-
def _set_start_end_year(product: PreprocessorFile) -> None:
773+
def _set_start_end_year(attributes: dict[str, Any]) -> None:
777774
"""Set the attributes `start_year` and `end_year`.
778775
779776
These attributes are used by many diagnostic scripts in ESMValTool.
780777
"""
781-
if "timerange" in product.attributes:
782-
start_year, end_year = _parse_period(product.attributes["timerange"])
783-
product.attributes["start_year"] = int(str(start_year[0:4]))
784-
product.attributes["end_year"] = int(str(end_year[0:4]))
778+
if "timerange" in attributes:
779+
start_year, end_year = _parse_period(attributes["timerange"])
780+
attributes["start_year"] = int(str(start_year[0:4]))
781+
attributes["end_year"] = int(str(end_year[0:4]))
785782

786783

787784
def _update_preproc_functions(

esmvalcore/_task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ def __init__(self, prev_preproc_dir, preproc_dir, name):
351351
for prov_filename, attributes in prev_metadata.items():
352352
# Update the filename in case the output directory was moved
353353
# since the original run
354-
filename = str(prev_preproc_dir / Path(prov_filename).name)
354+
filename = prev_preproc_dir / Path(prov_filename).name
355355
attributes["filename"] = filename
356356
product = TrackedFile(
357357
filename,

esmvalcore/config/_config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def warn_if_old_extra_facets_exist() -> None:
9494
)
9595

9696

97-
def load_config_developer(cfg_file):
97+
def load_config_developer(cfg_file) -> dict:
9898
"""Read the developer's configuration file."""
9999
with open(cfg_file, encoding="utf-8") as file:
100100
cfg = yaml.safe_load(file)
@@ -118,6 +118,7 @@ def load_config_developer(cfg_file):
118118
CFG[project] = settings
119119

120120
read_cmor_tables(cfg_file)
121+
return cfg
121122

122123

123124
def get_project_config(project):

esmvalcore/config/_config_object.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import yaml
1414

1515
import esmvalcore
16+
from esmvalcore.config._config import load_config_developer
1617
from esmvalcore.config._config_validators import (
1718
_deprecated_options_defaults,
1819
_deprecators,
@@ -145,6 +146,10 @@ def _load_user_config(
145146

146147
try:
147148
new.update(mapping)
149+
# Add known projects from config-developer file while we still have it.
150+
for project in load_config_developer(new["config_developer_file"]):
151+
if project not in new["projects"]:
152+
new["projects"][project] = {}
148153
new.check_missing()
149154
except InvalidConfigParameter as exc:
150155
msg = (
@@ -364,7 +369,10 @@ def load_from_dirs(self, dirs: Iterable[str | Path]) -> None:
364369
new_config_dict = self._get_config_dict_from_dirs(dirs)
365370
self.clear()
366371
self.update(new_config_dict)
367-
372+
# Add known projects from config-developer file while we still have it.
373+
for project in load_config_developer(self["config_developer_file"]):
374+
if project not in self["projects"]:
375+
self["projects"][project] = {}
368376
self.check_missing()
369377

370378
def reload(self) -> None:

esmvalcore/config/_data_sources.py

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,34 +11,54 @@
1111
logger = logging.getLogger(__name__)
1212

1313

14-
def _get_data_sources(session: Session) -> list[DataSource]:
14+
def _get_data_sources(
15+
session: Session,
16+
project: str | None,
17+
) -> list[DataSource]:
1518
"""Get the list of available data sources.
1619
1720
Arguments
1821
---------
1922
session:
2023
The configuration.
24+
project:
25+
If specified, only data sources for this project are returned.
2126
2227
Returns
2328
-------
2429
:obj:`list` of :obj:`DataSource`:
2530
A list of available data sources.
2631
32+
Raises
33+
------
34+
TypeError:
35+
If a data source in the configuration is not of type `DataSource`.
36+
KeyError:
37+
If the project or its settings are not found in the configuration.
38+
2739
"""
2840
data_sources: list[DataSource] = []
29-
for project, project_settings in session["projects"].items():
41+
if project is not None and project not in session["projects"]:
42+
msg = f"Unknown project '{project}', please configure it under 'projects'."
43+
raise KeyError(msg)
44+
settings = (
45+
session["projects"]
46+
if project is None
47+
else {project: session["projects"][project]}
48+
)
49+
for project_, project_settings in settings.items():
3050
if "data" not in project_settings:
31-
logger.info("Using legacy data sources for project '%s'", project)
51+
logger.info("Using legacy data sources for project '%s'", project_)
3252
# Use legacy data sources from config-user.yml.
33-
legacy_local_sources = esmvalcore.local._get_data_sources(project) # noqa: SLF001
53+
legacy_local_sources = esmvalcore.local._get_data_sources(project_) # noqa: SLF001
3454
data_sources.extend(legacy_local_sources)
3555
if (
3656
session["search_esgf"] != "never"
37-
and project in esmvalcore.esgf.facets.FACETS
57+
and project_ in esmvalcore.esgf.facets.FACETS
3858
):
3959
data_source = esmvalcore.esgf.ESGFDataSource(
40-
name="legacy",
41-
project=project,
60+
name="legacy-esgf",
61+
project=project_,
4262
priority=2,
4363
download_dir=session["download_dir"],
4464
)
@@ -56,16 +76,26 @@ def _get_data_sources(session: Session) -> list[DataSource]:
5676
priority = kwargs.pop("priority", 1)
5777
data_source = cls(
5878
name=name,
59-
project=project,
79+
project=project_,
6080
priority=priority,
6181
**kwargs,
6282
)
6383
if not isinstance(data_source, DataSource):
6484
msg = (
6585
"Expected a data source of type `esmvalcore.io.protocol.DataSource`, "
66-
f"but your configuration for project '{project}' contains "
86+
f"but your configuration for project '{project_}' contains "
6787
f"'{data_source}' of type '{type(data_source)}'."
6888
)
6989
raise TypeError(msg)
7090
data_sources.append(data_source)
91+
92+
if not data_sources:
93+
if project is None:
94+
msg = "No data sources found. Check your configuration under 'projects'"
95+
else:
96+
msg = (
97+
f"No data sources found for project '{project}'. "
98+
f"Check your configuration under 'projects: {project}: data'"
99+
)
100+
raise KeyError(msg)
71101
return data_sources
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
projects:
2+
CMIP6:
3+
data:
4+
archive-dkrz:
5+
type: "esmvalcore.local.DataSource"
6+
rootpath: /work/bd0854/DATA/ESMValTool2/CMIP6_DKRZ
7+
dirname_template: "{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}"
8+
filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc"
9+
CMIP5:
10+
data:
11+
archive-dkrz:
12+
type: "esmvalcore.local.DataSource"
13+
rootpath: /work/bd0854/DATA/ESMValTool2/CMIP5_DKRZ
14+
dirname_template: "{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}/{short_name}"
15+
filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc"
16+
CMIP3:
17+
data:
18+
archive-dkrz:
19+
type: "esmvalcore.local.DataSource"
20+
rootpath: /work/bd0854/DATA/ESMValTool2/CMIP3
21+
dirname_template: "{exp}/{modeling_realm}/{frequency}/{short_name}/{dataset}/{ensemble}"
22+
filename_template: "{short_name}_*.nc"
23+
CORDEX:
24+
data:
25+
archive-dkrz:
26+
type: "esmvalcore.local.DataSource"
27+
rootpath: /work/ik1017/C3SCORDEX/data/c3s-cordex/output
28+
dirname_template: "{domain}/{institute}/{driver}/{exp}/{ensemble}/{institute}-{dataset}/{rcm_version}/{mip}/{short_name}/{version}"
29+
filename_template: "{short_name}_{domain}_{driver}_{exp}_{ensemble}_{institute}-{dataset}_{rcm_version}_{mip}*.nc"
30+
obs4MIPs:
31+
data:
32+
archive-dkrz:
33+
type: "esmvalcore.local.DataSource"
34+
rootpath: /work/bd0854/DATA/ESMValTool2/OBS
35+
dirname_template: "Tier{tier}/{dataset}"
36+
filename_template: "{short_name}_*.nc"
37+
native6:
38+
data:
39+
archive-dkrz:
40+
type: "esmvalcore.local.DataSource"
41+
rootpath: /work/bd0854/DATA/ESMValTool2/RAWOBS
42+
dirname_template: "Tier{tier}/{dataset}/{version}/{frequency}/{short_name}"
43+
filename_template: "*.nc"
44+
era5-archive-dkrz:
45+
type: "esmvalcore.local.DataSource"
46+
rootpath: /pool/data/ERA5
47+
dirname_template: "{family}/{level}/{type}/{tres}/{grib_id}"
48+
filename_template: "{family}{level}{typeid}_{tres}_*_{grib_id}.grb"
49+
OBS6:
50+
data:
51+
archive-dkrz:
52+
type: "esmvalcore.local.DataSource"
53+
rootpath: /work/bd0854/DATA/ESMValTool2/OBS
54+
dirname_template: "Tier{tier}/{dataset}"
55+
filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
56+
OBS:
57+
data:
58+
archive-dkrz:
59+
type: "esmvalcore.local.DataSource"
60+
rootpath: /work/bd0854/DATA/ESMValTool2/OBS
61+
dirname_template: "Tier{tier}/{dataset}"
62+
filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"

esmvalcore/dataset.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,7 @@ def version(file: DataElement) -> str:
756756
self._file_globs = []
757757
files: dict[str, DataElement] = {}
758758
for data_source in sorted(
759-
_get_data_sources(self.session),
759+
_get_data_sources(self.session, self.facets["project"]), # type: ignore[arg-type]
760760
key=lambda ds: ds.priority,
761761
):
762762
if data_source.project == self.facets["project"]:
@@ -985,7 +985,8 @@ def _update_timerange(self) -> None:
985985
dataset.facets.pop("timerange")
986986
dataset.supplementaries = []
987987
check.data_availability(dataset)
988-
intervals = [_get_start_end_date(f) for f in dataset.files]
988+
# TODO: read start and end time from facets
989+
intervals = [_get_start_end_date(f) for f in dataset.files] # type: ignore[arg-type]
989990

990991
min_date = min(interval[0] for interval in intervals)
991992
max_date = max(interval[1] for interval in intervals)

esmvalcore/esgf/_download.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import yaml
2323
from humanfriendly import format_size, format_timespan
2424

25-
from esmvalcore.config._config import CFG
25+
from esmvalcore.config import CFG
2626
from esmvalcore.io.protocol import DataElement
2727
from esmvalcore.local import LocalFile
2828
from esmvalcore.typing import Facets

esmvalcore/esgf/_search.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import itertools
44
import logging
5-
from dataclasses import dataclass
5+
from dataclasses import dataclass, field
66
from functools import lru_cache
77
from pathlib import Path
88

@@ -398,12 +398,9 @@ class ESGFDataSource(DataSource):
398398
download_dir: Path
399399
"""The destination directory where data will be downloaded."""
400400

401-
debug_info: str = ""
401+
debug_info: str = field(init=False, default="")
402402
"""A string containing debug information when no data is found."""
403403

404-
def __post__init__(self):
405-
self.debug_info = ""
406-
407404
def find_data(self, **facets: FacetValue) -> list[ESGFFile]:
408405
"""Find data.
409406

0 commit comments

Comments
 (0)