|
5 | 5 | import copy |
6 | 6 | import inspect |
7 | 7 | import logging |
| 8 | +import re |
| 9 | +from collections.abc import Sequence |
| 10 | +from pathlib import Path |
8 | 11 | from pprint import pformat |
9 | 12 | from typing import TYPE_CHECKING, Any |
10 | 13 |
|
|
14 | 17 | from esmvalcore._task import BaseTask |
15 | 18 | from esmvalcore.cmor.check import cmor_check_data, cmor_check_metadata |
16 | 19 | from esmvalcore.cmor.fix import fix_data, fix_file, fix_metadata |
| 20 | +from esmvalcore.exceptions import RecipeError |
| 21 | +from esmvalcore.io.local import _parse_period |
17 | 22 | from esmvalcore.io.protocol import DataElement |
| 23 | +from esmvalcore.local import _get_output_file |
18 | 24 | from esmvalcore.preprocessor._area import ( |
19 | 25 | area_statistics, |
20 | 26 | extract_named_regions, |
|
102 | 108 | from esmvalcore.preprocessor._weighting import weighting_landsea_fraction |
103 | 109 |
|
104 | 110 | if TYPE_CHECKING: |
105 | | - from collections.abc import Callable, Iterable, Sequence |
106 | | - from pathlib import Path |
| 111 | + from collections.abc import Callable, Iterable |
107 | 112 |
|
108 | 113 | import prov.model |
109 | 114 | from dask.delayed import Delayed |
110 | 115 | from iris.cube import CubeList |
111 | 116 |
|
112 | 117 | from esmvalcore.dataset import Dataset |
| 118 | + from esmvalcore.typing import FacetValue |
113 | 119 |
|
114 | 120 | logger = logging.getLogger(__name__) |
115 | 121 |
|
|
254 | 260 | } |
255 | 261 |
|
256 | 262 |
|
| 263 | +def _get_preprocessor_filename(dataset: Dataset) -> Path: |
| 264 | + """Get a filename for storing a preprocessed dataset. |
| 265 | +
|
| 266 | + Parameters |
| 267 | + ---------- |
| 268 | + dataset: |
| 269 | + The dataset that will be preprocessed. |
| 270 | +
|
| 271 | + Returns |
| 272 | + ------- |
| 273 | + : |
| 274 | + A path for storing a preprocessed file. |
| 275 | + """ |
| 276 | + |
| 277 | + def is_facet_value(value: Any) -> bool: # noqa: ANN401 |
| 278 | + """Check if a value is of type `esmvalcore.typing.FacetValue`.""" |
| 279 | + return isinstance(value, str | int) or ( |
| 280 | + isinstance(value, Sequence) |
| 281 | + and all(isinstance(v, str) for v in value) |
| 282 | + ) |
| 283 | + |
| 284 | + default_template = "_".join( |
| 285 | + f"{{{k}}}" |
| 286 | + for k in sorted(dataset.minimal_facets) |
| 287 | + if is_facet_value(dataset.minimal_facets[k]) |
| 288 | + and k |
| 289 | + not in ("timerange", "diagnostic", "variable_group", "preprocessor") |
| 290 | + ) |
| 291 | + template = ( |
| 292 | + dataset.session["projects"] |
| 293 | + .get(dataset.facets["project"], {}) |
| 294 | + .get("preprocessor_filename_template", default_template) |
| 295 | + ) |
| 296 | + if template is default_template: |
| 297 | + try: |
| 298 | + # Use config-developer.yml for backward compatibility, remove in v2.16. |
| 299 | + return _get_output_file( |
| 300 | + dataset.facets, |
| 301 | + dataset.session.preproc_dir, |
| 302 | + ) |
| 303 | + except RecipeError: |
| 304 | + pass |
| 305 | + |
| 306 | + def normalize(value: FacetValue) -> str: |
| 307 | + """Normalize a facet value to a string that can be used in a filename.""" |
| 308 | + if isinstance(value, str | int): |
| 309 | + return re.sub("[^a-zA-Z0-9]+", "-", str(value))[:25] |
| 310 | + return "-".join(normalize(v) for v in value) |
| 311 | + |
| 312 | + normalized_facets = { |
| 313 | + k: normalize(v) for k, v in dataset.facets.items() if is_facet_value(v) |
| 314 | + } |
| 315 | + filename = template.format(**normalized_facets) |
| 316 | + if "timerange" in dataset.facets: |
| 317 | + start_time, end_time = _parse_period(dataset.facets["timerange"]) |
| 318 | + filename += f"_{start_time}-{end_time}" |
| 319 | + filename += ".nc" |
| 320 | + return Path( |
| 321 | + dataset.session.preproc_dir, |
| 322 | + dataset.facets.get("diagnostic", ""), # type: ignore[arg-type] |
| 323 | + dataset.facets.get("variable_group", ""), # type: ignore[arg-type] |
| 324 | + filename, |
| 325 | + ) |
| 326 | + |
| 327 | + |
257 | 328 | def _get_itype(step: str) -> str: |
258 | 329 | """Get the input type of a preprocessor function.""" |
259 | 330 | function = globals()[step] |
@@ -520,7 +591,7 @@ def __init__( |
520 | 591 | filename: Path, |
521 | 592 | attributes: dict[str, Any] | None = None, |
522 | 593 | settings: dict[str, Any] | None = None, |
523 | | - datasets: list | None = None, |
| 594 | + datasets: list[Dataset] | None = None, |
524 | 595 | ) -> None: |
525 | 596 | if datasets is not None: |
526 | 597 | # Load data using a Dataset |
|
0 commit comments