Skip to content

Commit 5283ecb

Browse files
committed
Some progress
1 parent 8a7a935 commit 5283ecb

File tree

13 files changed

+73
-42
lines changed

13 files changed

+73
-42
lines changed

esmvalcore/_provenance.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
from PIL.PngImagePlugin import PngInfo
1212
from prov.model import ProvDerivation, ProvDocument
1313

14-
from esmvalcore.io.protocol import DataElement
15-
1614
from ._version import __version__
1715

1816
logger = logging.getLogger(__name__)
@@ -111,7 +109,7 @@ class TrackedFile:
111109

112110
def __init__(
113111
self,
114-
filename: Path | DataElement,
112+
filename,
115113
attributes=None,
116114
ancestors=None,
117115
prov_filename=None,
@@ -120,8 +118,8 @@ def __init__(
120118
121119
Arguments
122120
---------
123-
filename:
124-
Path to the file on disk.
121+
filename: :obj:`pathlib.Path` or :obj:`esmvalcore.io.protocol.DataElement`
122+
Path or data element containing the data described by the provenance.
125123
attributes: dict
126124
Dictionary with facets describing the file. If set to None, this
127125
will be read from the file when provenance is initialized.
@@ -133,7 +131,9 @@ def __init__(
133131
processing.
134132
"""
135133
self._filename = (
136-
str(filename) if isinstance(filename, Path) else filename.name
134+
str(filename)
135+
if isinstance(filename, Path | str)
136+
else filename.name
137137
)
138138
if prov_filename is None:
139139
self.prov_filename = self._filename
@@ -178,13 +178,13 @@ def copy_provenance(self):
178178
return new
179179

180180
@property
181-
def filename(self):
182-
"""Filename."""
181+
def filename(self) -> str:
182+
"""Name of data described by this provenance document."""
183183
return self._filename
184184

185185
@property
186186
def provenance_file(self):
187-
"""Filename of provenance."""
187+
"""Filename of provenance file."""
188188
return os.path.splitext(self.filename)[0] + "_provenance.xml"
189189

190190
def initialize_provenance(self, activity):

esmvalcore/_recipe/recipe.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -443,10 +443,7 @@ def _get_common_attributes(
443443

444444
# Ensure that attributes start_year and end_year are always available if at
445445
# least one of the input datasets defines it
446-
if "timerange" in attributes:
447-
start_year, end_year = _parse_period(attributes["timerange"])
448-
attributes["start_year"] = int(str(start_year[0:4]))
449-
attributes["end_year"] = int(str(end_year[0:4]))
446+
_set_start_end_year(attributes)
450447

451448
return attributes
452449

@@ -710,7 +707,7 @@ def _get_preprocessor_products(
710707
)
711708

712709
for product in products:
713-
_set_start_end_year(product)
710+
_set_start_end_year(product.attributes)
714711
product.check()
715712

716713
return products
@@ -770,18 +767,18 @@ def _configure_multi_product_preprocessor(
770767

771768
for product in multimodel_products | ensemble_products:
772769
product.check()
773-
_set_start_end_year(product)
770+
_set_start_end_year(product.attributes)
774771

775772

776-
def _set_start_end_year(product: PreprocessorFile) -> None:
773+
def _set_start_end_year(attributes: dict) -> None:
777774
"""Set the attributes `start_year` and `end_year`.
778775
779776
These attributes are used by many diagnostic scripts in ESMValTool.
780777
"""
781-
if "timerange" in product.attributes:
782-
start_year, end_year = _parse_period(product.attributes["timerange"])
783-
product.attributes["start_year"] = int(str(start_year[0:4]))
784-
product.attributes["end_year"] = int(str(end_year[0:4]))
778+
if "timerange" in attributes:
779+
start_year, end_year = _parse_period(attributes["timerange"])
780+
attributes["start_year"] = int(str(start_year[0:4]))
781+
attributes["end_year"] = int(str(end_year[0:4]))
785782

786783

787784
def _update_preproc_functions(

esmvalcore/config/_config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def warn_if_old_extra_facets_exist() -> None:
9494
)
9595

9696

97-
def load_config_developer(cfg_file):
97+
def load_config_developer(cfg_file) -> dict:
9898
"""Read the developer's configuration file."""
9999
with open(cfg_file, encoding="utf-8") as file:
100100
cfg = yaml.safe_load(file)
@@ -118,6 +118,7 @@ def load_config_developer(cfg_file):
118118
CFG[project] = settings
119119

120120
read_cmor_tables(cfg_file)
121+
return cfg
121122

122123

123124
def get_project_config(project):

esmvalcore/config/_config_object.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import yaml
1414

1515
import esmvalcore
16+
from esmvalcore.config._config import load_config_developer
1617
from esmvalcore.config._config_validators import (
1718
_deprecated_options_defaults,
1819
_deprecators,
@@ -145,6 +146,10 @@ def _load_user_config(
145146

146147
try:
147148
new.update(mapping)
149+
# Add known projects from config-developer file while we still have it.
150+
for project in load_config_developer(new["config_developer_file"]):
151+
if project not in new["projects"]:
152+
new["projects"][project] = {}
148153
new.check_missing()
149154
except InvalidConfigParameter as exc:
150155
msg = (
@@ -364,7 +369,10 @@ def load_from_dirs(self, dirs: Iterable[str | Path]) -> None:
364369
new_config_dict = self._get_config_dict_from_dirs(dirs)
365370
self.clear()
366371
self.update(new_config_dict)
367-
372+
# Add known projects from config-developer file while we still have it.
373+
for project in load_config_developer(self["config_developer_file"]):
374+
if project not in self["projects"]:
375+
self["projects"][project] = {}
368376
self.check_missing()
369377

370378
def reload(self) -> None:

esmvalcore/config/_data_sources.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def _get_data_sources(session: Session) -> list[DataSource]:
3737
and project in esmvalcore.esgf.facets.FACETS
3838
):
3939
data_source = esmvalcore.esgf.ESGFDataSource(
40-
name="legacy",
40+
name="legacy-esgf",
4141
project=project,
4242
priority=2,
4343
download_dir=session["download_dir"],

esmvalcore/esgf/_download.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import yaml
2323
from humanfriendly import format_size, format_timespan
2424

25-
from esmvalcore.config._config import CFG
25+
from esmvalcore.config import CFG
2626
from esmvalcore.io.protocol import DataElement
2727
from esmvalcore.local import LocalFile
2828
from esmvalcore.typing import Facets

esmvalcore/esgf/_search.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import itertools
44
import logging
5-
from dataclasses import dataclass
5+
from dataclasses import dataclass, field
66
from functools import lru_cache
77
from pathlib import Path
88

@@ -398,12 +398,9 @@ class ESGFDataSource(DataSource):
398398
download_dir: Path
399399
"""The destination directory where data will be downloaded."""
400400

401-
debug_info: str = ""
401+
debug_info: str = field(init=False, default="")
402402
"""A string containing debug information when no data is found."""
403403

404-
def __post__init__(self):
405-
self.debug_info = ""
406-
407404
def find_data(self, **facets: FacetValue) -> list[ESGFFile]:
408405
"""Find data.
409406

esmvalcore/io/intake_esgf.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@
1313
import intake_esgf
1414
import intake_esgf.exceptions
1515
import iris.cube
16+
import isodate
1617

1718
from esmvalcore.io.protocol import DataElement, DataSource
1819
from esmvalcore.iris_helpers import dataset_to_iris
20+
from esmvalcore.local import _parse_period
1921
from esmvalcore.typing import Facets, FacetValue
2022

2123
__all__ = [
@@ -51,11 +53,17 @@ def to_iris(self, ignore_warnings=None) -> iris.cube.CubeList:
5153
:
5254
The loaded data.
5355
"""
56+
files = self.catalog.to_path_dict(
57+
minimal_keys=False,
58+
quiet=True,
59+
)[self.name]
5460
dataset = self.catalog.to_dataset_dict(
5561
minimal_keys=False,
5662
add_measures=False,
5763
quiet=True,
5864
)[self.name]
65+
dataset.attrs["source_file"] = ", ".join(str(f) for f in files)
66+
5967
return dataset_to_iris(dataset, ignore_warnings=ignore_warnings)
6068

6169

@@ -121,7 +129,17 @@ def find_data(self, **facets: FacetValue) -> list[IntakeESGFDataset]:
121129
for our_facet, their_facet in self.facets.items()
122130
if our_facet in our_facets
123131
}
124-
# TODO: filter by timerange
132+
if (
133+
"timerange" in facets and "*" not in facets["timerange"] # type: ignore[operator]
134+
):
135+
start, end = _parse_period(facets["timerange"])
136+
esgf_facets["file_start"] = isodate.date_isoformat(
137+
isodate.parse_date(start.split("T")[0]),
138+
)
139+
esgf_facets["file_end"] = isodate.date_isoformat(
140+
isodate.parse_date(end.split("T")[0]),
141+
)
142+
# Search ESGF.
125143
try:
126144
self.catalog.search(**esgf_facets, quiet=True)
127145
except intake_esgf.exceptions.NoSearchResults:
@@ -156,6 +174,8 @@ def find_data(self, **facets: FacetValue) -> list[IntakeESGFDataset]:
156174
dataset_id = row["key"]
157175
# Subset the catalog to a single dataset.
158176
cat = self.catalog.clone()
177+
cat.file_start = self.catalog.file_start
178+
cat.file_end = self.catalog.file_end
159179
cat.df = self.catalog.df[self.catalog.df.key == dataset_id]
160180
# Discard all but the latest version. It is not clear how/if
161181
# `intake_esgf.ESGFCatalog.to_dataset_dict` supports multiple versions.

esmvalcore/local.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -473,17 +473,21 @@ class DataSource(esmvalcore.io.protocol.DataSource):
473473
priority: int
474474
"""The priority of the data source. Lower values have priority."""
475475

476-
debug_info: str = field(init=False)
476+
debug_info: str = field(init=False, default="")
477477
"""A string containing debug information when no data is found."""
478478

479479
rootpath: Path
480+
"""The path where the directories are located."""
481+
480482
dirname_template: str
483+
"""The template for the directory names."""
484+
481485
filename_template: str
486+
"""The template for the file names."""
482487

483488
def __post_init__(self) -> None:
484489
"""Set further attributes."""
485490
self._regex_pattern = self._templates_to_regex()
486-
self.debug_info = ""
487491

488492
@property
489493
def regex_pattern(self) -> str:
@@ -502,6 +506,7 @@ def get_glob_patterns(self, **facets) -> list[Path]:
502506

503507
def find_files(self, **facets) -> list[LocalFile]:
504508
"""Find files."""
509+
# TODO: deprecate this method
505510
return self.find_data(**facets)
506511

507512
def find_data(self, **facets) -> list[LocalFile]:
@@ -656,7 +661,7 @@ def _get_data_sources(project: str) -> list[DataSource]:
656661
file_templates = _select_drs("input_file", project, structure)
657662
sources.extend(
658663
DataSource(
659-
name="legacy",
664+
name="legacy-local",
660665
project=project,
661666
priority=1,
662667
rootpath=path,
@@ -776,6 +781,7 @@ def version(file):
776781
return result
777782

778783

784+
# TODO: Deprecate this?
779785
def find_files(
780786
*,
781787
debug: bool = False,

tests/conftest.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def _load_default_config():
2727
"ignore",
2828
message="Do not instantiate `Config` objects directly",
2929
category=UserWarning,
30-
module="esmvalcore",
3130
)
3231
cfg = Config()
3332
cfg.load_from_dirs([])

0 commit comments

Comments
 (0)