Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
72 commits
Select commit Hold shift + click to select a range
4b989d3
Remove all new features, just keep no-op changes
schlunma Jul 13, 2025
b0c44f6
Further no-op changes
schlunma Jul 13, 2025
1dd5671
force_derivation=True without derive=True does not make sense
schlunma Jul 13, 2025
8989549
Add tests
schlunma Jul 13, 2025
1f6dfa3
Add type hints to check.py
schlunma Jul 13, 2025
b6a6651
Added type hints for recipe.py
schlunma Jul 13, 2025
6793e0c
Added type hints for to_datasets.py
schlunma Jul 13, 2025
878e310
Added type hints for dataset.py
schlunma Jul 13, 2025
be6e55d
Add type hints to local.py
schlunma Jul 13, 2025
b1caf65
Add type hints to preprocessor/__init__.py
schlunma Jul 13, 2025
19dbff9
Add type hints to compare_with_refs.py
schlunma Jul 13, 2025
d8ea7d9
Add type hints to _derive/__init__.py
schlunma Jul 13, 2025
367bfe7
Add type hints to some derive functions
schlunma Jul 13, 2025
5bbe6ce
Add type hints to _regrid.py
schlunma Jul 13, 2025
d10de1e
Make new dataset methods private
schlunma Jul 13, 2025
7323866
Small fix
schlunma Jul 13, 2025
3ab2cdf
Fix test
schlunma Jul 13, 2025
099349f
Fix mock
schlunma Jul 13, 2025
86b308b
100% test coverage
schlunma Jul 13, 2025
369a811
Clean doc
schlunma Jul 14, 2025
c2a3d81
100% diff coverage
schlunma Jul 14, 2025
a3dab12
Try to please Codacy
schlunma Jul 14, 2025
001eafa
Make tests work without ESMValTool installation
schlunma Jul 14, 2025
debd589
100% diff coverage for real
schlunma Jul 14, 2025
c3df13e
Added Dataset.input_datasets
schlunma Jul 15, 2025
e794817
Shorter code
schlunma Jul 15, 2025
7c1bfd7
Merge remote-tracking branch 'origin/type_hints_derive' into from_fil…
schlunma Jul 15, 2025
b971d50
Dataset.set_version can handle derived variables now
schlunma Jul 15, 2025
f6b6d22
Dataset._input_datasets is always list[Dataset]
schlunma Jul 15, 2025
1f4de86
Make changes fully backwards-compatible
schlunma Jul 15, 2025
8ee3e04
Merge remote-tracking branch 'origin/type_hints_derive' into from_fil…
schlunma Jul 15, 2025
c6d303b
Make Dataset.from_files work with derived variables (no globs yet)
schlunma Jul 15, 2025
40147fb
Added test for derived variable with glob
schlunma Jul 15, 2025
6ec04fc
Better var name
schlunma Jul 15, 2025
58b12d1
Merge remote-tracking branch 'origin/type_hints_derive' into from_fil…
schlunma Jul 15, 2025
ea3386e
Update esmvalcore/dataset.py
schlunma Jul 15, 2025
dc7e122
Merge remote-tracking branch 'origin/main' into type_hints_derive
schlunma Jul 15, 2025
73ddc24
Merge remote-tracking branch 'origin/type_hints_derive' into from_fil…
schlunma Jul 15, 2025
efa2ac1
Add further tests for Dataset.from_files with globs
schlunma Jul 15, 2025
f9c47a9
Update _dataset_from_files to new Dataset.from_files
schlunma Jul 15, 2025
3de7bc8
Move _fix_cmip5_fx_ensemble to _get_preprocessor_products
schlunma Jul 15, 2025
77fd1e8
Make _derivation_necessary work with timerange globs
schlunma Jul 15, 2025
312fafa
Fix bug for non-derived variables
schlunma Jul 15, 2025
e8c7bf2
Use new Dataset.from_files in code
schlunma Jul 15, 2025
6cdd714
Added test to check differing timeranges
schlunma Jul 15, 2025
9057cf9
Make everything work with glob in timerange
schlunma Jul 15, 2025
ebc82ba
Differing timeranges are caught by _get_all_available_datasets
schlunma Jul 15, 2025
58dd666
Use ABCs and other type hint suggestions from @bouweandela
schlunma Jul 15, 2025
b1c66fd
Ruff fixes
schlunma Jul 15, 2025
6be3169
Use type aliases
schlunma Jul 15, 2025
5744b0d
Do not change minimal facets
schlunma Jul 15, 2025
cbcf37b
Used more type aliases
schlunma Jul 15, 2025
14e8b5e
Fix typo in func name
schlunma Jul 15, 2025
ecbecc6
Make mypy happy
schlunma Jul 15, 2025
d7c73aa
Use type aliases in regrid.py
schlunma Jul 15, 2025
69e0502
Valid return type in docstring
schlunma Jul 15, 2025
6eedca2
Avoid Coord
schlunma Jul 15, 2025
62c1996
Correct type hint
schlunma Jul 15, 2025
8f2f179
Assign new variable for new type
schlunma Jul 15, 2025
7bc1bee
Raise error for invalid type
schlunma Jul 15, 2025
62067fc
Fail if invalid types given
schlunma Jul 15, 2025
b12df84
Restore _pattern_filter
schlunma Jul 15, 2025
22ab6e7
Better _special_name_to_dataset
schlunma Jul 15, 2025
36724ef
Do not cast to str
schlunma Jul 15, 2025
6ad2fef
Use int variables
schlunma Jul 15, 2025
3ce06fc
Merge remote-tracking branch 'origin/type_hints_derive' into from_fil…
schlunma Jul 15, 2025
1116641
Merge remote-tracking branch 'origin/main' into from_files_with_deriv…
schlunma Jul 16, 2025
74983d5
Add doc
schlunma Jul 16, 2025
acaf9fd
Expand notebook
schlunma Jul 16, 2025
f6e531b
Fix doc build
schlunma Jul 16, 2025
30b6f53
Update doc
schlunma Jul 16, 2025
1cdfef2
Better derivation example
schlunma Jul 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions esmvalcore/_recipe/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@

from esmvalcore._task import TaskSet
from esmvalcore.dataset import Dataset
from esmvalcore.typing import Facets


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -466,20 +465,6 @@ def valid_time_selection(timerange: str) -> None:
_check_timerange_values(date, timerange_list)


def differing_timeranges(
timeranges: set[str],
required_vars: list[Facets],
) -> None:
"""Log error if required variables have differing timeranges."""
if len(timeranges) > 1:
msg = (
f"Differing timeranges with values {timeranges} "
f"found for required variables {required_vars}. "
"Set `timerange` to a common value."
)
raise ValueError(msg)


def _check_literal(
settings: dict,
*,
Expand Down
41 changes: 23 additions & 18 deletions esmvalcore/_recipe/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
from . import check
from .from_datasets import datasets_to_recipe
from .to_datasets import (
_derive_needed,
_get_input_datasets,
_representative_datasets,
)
Expand Down Expand Up @@ -246,7 +245,7 @@

settings = {}

if _derive_needed(dataset):
if dataset._derivation_necessary(): # noqa: SLF001 (will be replaced soon)

Check notice on line 248 in esmvalcore/_recipe/recipe.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

esmvalcore/_recipe/recipe.py#L248

Access to a protected member _derivation_necessary of a client class (protected-access)
settings["derive"] = {
"short_name": facets["short_name"],
"standard_name": facets["standard_name"],
Expand Down Expand Up @@ -615,21 +614,26 @@
)


def _set_version(dataset: Dataset, input_datasets: list[Dataset]) -> None:
"""Set the 'version' facet based on derivation input datasets."""
versions = set()
for in_dataset in input_datasets:
in_dataset.set_version()
if version := in_dataset.facets.get("version"):
if isinstance(version, list):
versions.update(version)
else:
versions.add(version)
if versions:
version = versions.pop() if len(versions) == 1 else sorted(versions)
dataset.set_facet("version", version)
for supplementary_ds in dataset.supplementaries:
supplementary_ds.set_version()
def _fix_cmip5_fx_ensemble(dataset: Dataset) -> None:
"""Automatically correct the wrong ensemble for CMIP5 fx variables."""
if (
dataset.facets.get("project") == "CMIP5"
and dataset.facets.get("mip") == "fx"
and dataset.facets.get("ensemble") != "r0i0p0"
and not dataset.files
):
original_ensemble = dataset["ensemble"]
copy = dataset.copy()
copy.facets["ensemble"] = "r0i0p0"
if copy.files:
dataset.facets["ensemble"] = "r0i0p0"
logger.info(
"Corrected wrong 'ensemble' from '%s' to '%s' for %s",
original_ensemble,
dataset["ensemble"],
dataset.summary(shorten=True),
)
dataset.find_files()


def _get_preprocessor_products(
Expand All @@ -655,6 +659,7 @@
settings = _get_default_settings(dataset)
_apply_preprocessor_profile(settings, profile)
_update_multi_dataset_settings(dataset.facets, settings)
_fix_cmip5_fx_ensemble(dataset)
_update_preproc_functions(settings, dataset, datasets, missing_vars)
_add_dataset_specific_settings(dataset, settings)
check.preprocessor_supplementaries(dataset, settings)
Expand All @@ -666,7 +671,7 @@
else:
missing_vars.update(missing)
continue
_set_version(dataset, input_datasets)
dataset.set_version()
USED_DATASETS.append(dataset)
_schedule_for_download(input_datasets)
_log_input_files(input_datasets)
Expand Down
148 changes: 33 additions & 115 deletions esmvalcore/_recipe/to_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from esmvalcore.esgf.facets import FACETS
from esmvalcore.exceptions import RecipeError
from esmvalcore.local import LocalFile, _replace_years_with_timerange
from esmvalcore.preprocessor._derive import get_required
from esmvalcore.preprocessor._io import DATASET_KEYS
from esmvalcore.preprocessor._supplementary_vars import (
PREPROCESSOR_SUPPLEMENTARIES,
Expand Down Expand Up @@ -188,28 +187,6 @@
return list(merged.values())


def _fix_cmip5_fx_ensemble(dataset: Dataset) -> None:
"""Automatically correct the wrong ensemble for CMIP5 fx variables."""
if (
dataset.facets.get("project") == "CMIP5"
and dataset.facets.get("mip") == "fx"
and dataset.facets.get("ensemble") != "r0i0p0"
and not dataset.files
):
original_ensemble = dataset["ensemble"]
copy = dataset.copy()
copy.facets["ensemble"] = "r0i0p0"
if copy.files:
dataset.facets["ensemble"] = "r0i0p0"
logger.info(
"Corrected wrong 'ensemble' from '%s' to '%s' for %s",
original_ensemble,
dataset["ensemble"],
dataset.summary(shorten=True),
)
dataset.find_files()


def _get_supplementary_short_names(
facets: Facets,
step: str,
Expand Down Expand Up @@ -428,9 +405,7 @@
return datasets


def _dataset_from_files( # noqa: C901
dataset: Dataset,
) -> list[Dataset]:
def _dataset_from_files(dataset: Dataset) -> list[Dataset]:
"""Replace facet values of '*' based on available files."""
result: list[Dataset] = []
errors: list[str] = []
Expand All @@ -441,53 +416,32 @@
dataset.summary(shorten=True),
)

representative_datasets = _representative_datasets(dataset)

# For derived variables, representative_datasets might contain more than
# one element
all_datasets: list[list[tuple[dict, Dataset]]] = []
for representative_dataset in representative_datasets:
all_datasets.append([])
for expanded_ds in representative_dataset.from_files():
updated_facets = {}
unexpanded_globs = {}
for key, value in dataset.facets.items():
if _isglob(value):
if key in expanded_ds.facets and not _isglob(
expanded_ds[key],
):
updated_facets[key] = expanded_ds.facets[key]
else:
unexpanded_globs[key] = value

if unexpanded_globs:
msg = _report_unexpanded_globs(
dataset,
expanded_ds,
unexpanded_globs,
)
errors.append(msg)
continue
for expanded_ds in dataset.from_files():
updated_facets = {}
unexpanded_globs = {}
for key, value in dataset.facets.items():
if _isglob(value):
if key in expanded_ds.facets and not _isglob(
expanded_ds[key],
):
updated_facets[key] = expanded_ds.facets[key]
else:
unexpanded_globs[key] = value

if unexpanded_globs:
msg = _report_unexpanded_globs(
dataset,
expanded_ds,
unexpanded_globs,
)
errors.append(msg)
continue

new_ds = dataset.copy()
new_ds.facets.update(updated_facets)
new_ds.supplementaries = expanded_ds.supplementaries
new_ds = dataset.copy()
new_ds.facets.update(updated_facets)
new_ds.supplementaries = expanded_ds.supplementaries

all_datasets[-1].append((updated_facets, new_ds))

# If globs have been expanded, only consider those datasets that contain
# all necessary input variables if derivation is necessary
for updated_facets, new_ds in all_datasets[0]:
other_facets = [[d[0] for d in ds] for ds in all_datasets[1:]]
if all(updated_facets in facets for facets in other_facets):
result.append(new_ds)
else:
logger.debug(
"Not all necessary input variables to derive '%s' are "
"available for dataset %s",
dataset["short_name"],
updated_facets,
)
result.append(new_ds)

if errors:
raise RecipeError("\n".join(errors))
Expand Down Expand Up @@ -538,59 +492,23 @@
return msg


def _derive_needed(dataset: Dataset) -> bool:
"""Check if dataset needs to be derived from other datasets."""
if not dataset.facets.get("derive"):
return False
if dataset.facets.get("force_derivation"):
return True
if _isglob(dataset.facets.get("timerange", "")):
# Our file finding routines are not able to handle globs.
dataset = dataset.copy()
dataset.facets.pop("timerange")

copy = dataset.copy()
copy.supplementaries = []
return not copy.files


def _get_input_datasets(dataset: Dataset) -> list[Dataset]:
"""Determine the input datasets needed for deriving `dataset`."""
facets = dataset.facets
if not _derive_needed(dataset):
_fix_cmip5_fx_ensemble(dataset)
return [dataset]
if not dataset._derivation_necessary(): # noqa: SLF001

Check notice on line 497 in esmvalcore/_recipe/to_datasets.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

esmvalcore/_recipe/to_datasets.py#L497

Access to a protected member _derivation_necessary of a client class (protected-access)
return dataset.input_datasets

# Configure input datasets needed to derive variable
datasets = []
required_vars = get_required(facets["short_name"], facets["project"]) # type: ignore
# idea: add option to specify facets in list of dicts that is value of
# 'derive' in the recipe and use that instead of get_required?
for input_facets in required_vars:
input_dataset = dataset.copy()
keep = {"alias", "recipe_dataset_index", *dataset.minimal_facets}
input_dataset.facets = {
k: v for k, v in input_dataset.facets.items() if k in keep
}
input_dataset.facets.update(input_facets)
input_dataset.augment_facets()
_fix_cmip5_fx_ensemble(input_dataset)
if input_facets.get("optional") and not input_dataset.files:
# Skip optional datasets if no data is available
input_datasets: list[Dataset] = []
for input_dataset in dataset.input_datasets:
if input_dataset.facets.get("optional") and not input_dataset.files:
logger.info(
"Skipping: no data found for %s which is marked as 'optional'",
input_dataset,
)
else:
datasets.append(input_dataset)
input_datasets.append(input_dataset)

# Check timeranges of available input data.
timeranges: set[str] = set()
for input_dataset in datasets:
if "timerange" in input_dataset.facets:
timeranges.add(input_dataset.facets["timerange"]) # type: ignore
check.differing_timeranges(timeranges, required_vars)

return datasets
return input_datasets


def _representative_datasets(dataset: Dataset) -> list[Dataset]:
Expand Down
Loading