diff --git a/doc/quickstart/configure.rst b/doc/quickstart/configure.rst index 9b1383e7ca..90444492b4 100644 --- a/doc/quickstart/configure.rst +++ b/doc/quickstart/configure.rst @@ -685,22 +685,26 @@ Example: The following project-specific options are available: -+-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ -| Option | Description | Type | Default value | -+===============================+========================================+=============================+========================================+ -| ``data`` | Data sources are used to find input | :obj:`dict` | {} | -| | data and have to be configured before | | | -| | running the tool. See | | | -| | :ref:`config-data-sources` for | | | -| | details. | | | -+-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ -| ``extra_facets`` | Extra key-value pairs ("*facets*") | :obj:`dict` | See | -| | added to datasets in addition to the | | :ref:`config-extra-facets-defaults` | -| | facets defined in the recipe. See | | | -| | :ref:`config-extra-facets` for | | | -| | details. | | | -+-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +.. list-table:: + :widths: 15 50 15 20 + :header-rows: 1 + * - Option + - Description + - Type + - Default value + * - ``data`` + - Data sources are used to find input data and have to be configured before running the tool. Refer to :ref:`config-data-sources` for details. + - :obj:`dict` + - ``{}`` + * - ``extra_facets`` + - Extra key-value pairs ("*facets*") added to datasets in addition to the facets defined in the recipe. Refer to :ref:`config-extra-facets` for details. + - :obj:`dict` + - Refer to :ref:`config-extra-facets-defaults`. + * - ``preprocessor_filename_template`` + - A template defining the filenames to use for :ref:`preprocessed data ` when running a :ref:`recipe `. Refer to :ref:`config-preprocessor-filename-template` for details. + - :obj:`str` + - Refer to :ref:`config-preprocessor-filename-template`. .. _config-data-sources: @@ -954,6 +958,30 @@ Default extra facets are specified in ``extra_facets_*.yml`` files located in `__ directory. +.. _config-preprocessor-filename-template: + +Preprocessor output filenames +----------------------------- + +The filename to use for saving :ref:`preprocessed data ` +when running a :ref:`recipe ` is configured using ``preprocessor_filename_template``, +similar to the filename template in :class:`esmvalcore.io.local.LocalDataSource`. + +Default values are provided in ``defaults/preprocessor_filename_template.yml``, +for example: + +.. literalinclude:: ../configurations/defaults/preprocessor_filename_template.yml + :language: yaml + :caption: First few lines of ``defaults/preprocessor_filename_template.yml`` + :end-before: # Observational + +The facet names from the template are replaced with the facet values from the +recipe to create a filename. The extension ``.nc`` (and if applicable, a start +and end time) will automatically be appended to the filename. + +If no ``preprocessor_filename_template`` is configured for a project, the facets +describing the dataset in the recipe, as stored in +:attr:`esmvalcore.dataset.Dataset.minimal_facets`, are used. .. _config-esgf: @@ -1113,18 +1141,9 @@ Example of the CMIP6 project configuration: .. code-block:: yaml CMIP6: - output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}' cmor_type: 'CMIP6' cmor_strict: true -Preprocessor output files -------------------------- - -The filename to use for preprocessed data is configured using ``output_file``, -similar to the filename template in :class:`esmvalcore.io.local.LocalDataSource`. -Note that the extension ``.nc`` (and if applicable, a start and end time) will -automatically be appended to the filename. - .. _cmor_table_configuration: Project CMOR table configuration @@ -1233,13 +1252,11 @@ Example: native6: cmor_strict: false - output_file: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}' cmor_type: 'CMIP6' cmor_default_table_prefix: 'CMIP6_' ICON: cmor_strict: false - output_file: '{project}_{dataset}_{exp}_{var_type}_{mip}_{short_name}' cmor_type: 'CMIP6' cmor_default_table_prefix: 'CMIP6_' diff --git a/doc/quickstart/find_data.rst b/doc/quickstart/find_data.rst index 920e734023..fa1a07705e 100644 --- a/doc/quickstart/find_data.rst +++ b/doc/quickstart/find_data.rst @@ -834,5 +834,5 @@ a corresponding entry in the configuration file could look like: The same replacement mechanism can be employed everywhere where tags can be used, particularly in ``dirname_template`` and ``filename_template`` in -:class:`esmvalcore.io.local.LocalDataSource`, and in ``output_file`` in -:ref:`config-developer.yml `. +:class:`esmvalcore.io.local.LocalDataSource`, and in ``preprocessor_filename_template`` +under :ref:`config-projects`. diff --git a/doc/quickstart/output.rst b/doc/quickstart/output.rst index 71d35a00e5..24543df25b 100644 --- a/doc/quickstart/output.rst +++ b/doc/quickstart/output.rst @@ -8,7 +8,7 @@ The location is determined by the ``output_dir`` :ref:`configuration option `, the recipe name, and the date and time, using the the format: ``YYYYMMDD_HHMMSS``. -For instance, a typical output location would be: +For instance, a typical output location (:attr:`~esmvalcore.config.Session.session_dir`) would be: ``output_directory/recipe_ocean_amoc_20190118_1027/`` This is effectively produced by the combination: diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py index 7476c125b9..38f48fc663 100644 --- a/esmvalcore/_recipe/recipe.py +++ b/esmvalcore/_recipe/recipe.py @@ -27,7 +27,6 @@ GRIB_FORMATS, _dates_to_timerange, _get_multiproduct_filename, - _get_output_file, _parse_period, _truncate_dates, ) @@ -38,6 +37,7 @@ MULTI_MODEL_FUNCTIONS, PreprocessingTask, PreprocessorFile, + _get_preprocessor_filename, ) from esmvalcore.preprocessor._area import _update_shapefile_path from esmvalcore.preprocessor._multimodel import _get_stat_identifier @@ -678,11 +678,7 @@ def _get_preprocessor_products( _schedule_for_download(input_datasets) _log_input_files(input_datasets) logger.info("Found input files for %s", dataset.summary(shorten=True)) - - filename = _get_output_file( - dataset.facets, - dataset.session.preproc_dir, - ) + filename = _get_preprocessor_filename(dataset) product = PreprocessorFile( filename=filename, attributes=dataset.facets, diff --git a/esmvalcore/config/_config_object.py b/esmvalcore/config/_config_object.py index d03c8409b6..221a225d1b 100644 --- a/esmvalcore/config/_config_object.py +++ b/esmvalcore/config/_config_object.py @@ -11,7 +11,6 @@ import dask.config import esmvalcore -from esmvalcore.config._config import load_config_developer from esmvalcore.config._config_validators import ( _deprecated_options_defaults, _deprecators, @@ -129,10 +128,6 @@ def load_from_dirs(self, dirs: Iterable[str | Path]) -> None: new_config_dict = self._get_config_dict_from_dirs(dirs) self.clear() self.update(new_config_dict) - # Add known projects from config-developer file while we still have it. - for project in load_config_developer(self["config_developer_file"]): - if project not in self["projects"]: - self["projects"][project] = {} self.check_missing() def reload(self) -> None: @@ -268,12 +263,26 @@ class Session(ValidatedConfig): _deprecated_defaults = _deprecated_options_defaults relative_preproc_dir = Path("preproc") + """Relative path to the preprocessor output directory, with respect to :attr:`session_dir`.""" + relative_work_dir = Path("work") + """Relative path to diagnostic script output directory, with respect to :attr:`session_dir`.""" + relative_plot_dir = Path("plots") + """Relative path to diagnostic script plot directory, with respect to :attr:`session_dir`.""" + relative_run_dir = Path("run") + """Relative path to the directory with information about the run, with respect to :attr:`session_dir`.""" + relative_main_log = Path("run", "main_log.txt") + """Relative path to the log file, with respect to :attr:`session_dir`.""" + relative_main_log_debug = Path("run", "main_log_debug.txt") + """Relative path to the debug log file, with respect to :attr:`session_dir`.""" + relative_cmor_log = Path("run", "cmor_log.txt") + """Relative path to the log file with CMOR check messages, with respect to :attr:`session_dir`.""" + _relative_fixed_file_dir = Path("preproc", "fixed_files") def __init__(self, config: dict, name: str = "session") -> None: @@ -304,42 +313,45 @@ def set_session_name(self, name: str = "session") -> None: @property def session_dir(self): - """Return session directory.""" + """Session directory. + + This is a uniquely named directory inside the :ref:`output directory `. + """ return self["output_dir"] / self.session_name @property def preproc_dir(self): - """Return preproc directory.""" + """Directory with preprocessor output files.""" return self.session_dir / self.relative_preproc_dir @property def work_dir(self): - """Return work directory.""" + """Directory with diagnostic script output files.""" return self.session_dir / self.relative_work_dir @property def plot_dir(self): - """Return plot directory.""" + """Directory with diagnostic script plot files.""" return self.session_dir / self.relative_plot_dir @property def run_dir(self): - """Return run directory.""" + """Directory containing information about the run.""" return self.session_dir / self.relative_run_dir @property def main_log(self): - """Return main log file.""" + """Path to the log file.""" return self.session_dir / self.relative_main_log @property def main_log_debug(self): - """Return main log debug file.""" + """Path to the debug log file.""" return self.session_dir / self.relative_main_log_debug @property def cmor_log(self): - """Return CMOR log file.""" + """Path to the log file with CMOR check messages.""" return self.session_dir / self.relative_cmor_log @property diff --git a/esmvalcore/config/_config_validators.py b/esmvalcore/config/_config_validators.py index ca5045f976..0f58b4cffb 100644 --- a/esmvalcore/config/_config_validators.py +++ b/esmvalcore/config/_config_validators.py @@ -375,6 +375,7 @@ def validate_projects( options_for_project: dict[str, Callable[[Any], Any]] = { "data": validate_dict, # TODO: try to create data sources here "extra_facets": validate_dict, + "preprocessor_filename_template": validate_string, } for project, project_config in mapping.items(): for option, val in project_config.items(): diff --git a/esmvalcore/config/configurations/defaults/preprocessor_filename_template.yml b/esmvalcore/config/configurations/defaults/preprocessor_filename_template.yml new file mode 100644 index 0000000000..1bae3d73df --- /dev/null +++ b/esmvalcore/config/configurations/defaults/preprocessor_filename_template.yml @@ -0,0 +1,33 @@ +# Templates for the filenames used to write preprocessor output. +projects: + # ESGF projects. + CMIP3: + preprocessor_filename_template: "{project}_{institute}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}" + CMIP5: + preprocessor_filename_template: "{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}" + CMIP6: + preprocessor_filename_template: "{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}_{grid}" + CORDEX: + preprocessor_filename_template: "{project}_{institute}_{dataset}_{rcm_version}_{driver}_{domain}_{mip}_{exp}_{ensemble}_{short_name}" + obs4MIPs: + preprocessor_filename_template: "{project}_{dataset}_{short_name}" + # Observational and reanalysis data that has been CMORized by ESMValTool according to the CMIP5 standard. + OBS: + preprocessor_filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}" + # Observational and reanalysis data that has been CMORized by ESMValTool according to the CMIP6 standard. + OBS6: + preprocessor_filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}" + # Observational and reanalysis data that can be read in its native format by ESMValCore. + native6: + preprocessor_filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}" + # Data from various climate models in their native output format. + ACCESS: + preprocessor_filename_template: "{project}_{dataset}_{mip}_{exp}_{institute}_{sub_dataset}_{freq_attribute}_{short_name}" + CESM: + preprocessor_filename_template: "{project}_{dataset}_{case}_{gcomp}_{scomp}_{type}_{mip}_{short_name}" + EMAC: + preprocessor_filename_template: "{project}_{dataset}_{exp}_{channel}_{mip}_{short_name}" + ICON: + preprocessor_filename_template: "{project}_{dataset}_{exp}_{var_type}_{mip}_{short_name}" + IPSLCM: + preprocessor_filename_template: "{dataset}_{account}_{model}_{status}_{exp}_{simulation}_{freq}_{short_name}" diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py index 1ae3a224d8..6be9687a15 100644 --- a/esmvalcore/dataset.py +++ b/esmvalcore/dataset.py @@ -27,8 +27,8 @@ ) from esmvalcore.config._data_sources import _get_data_sources from esmvalcore.exceptions import InputFilesNotFound, RecipeError -from esmvalcore.io.local import _dates_to_timerange, _get_output_file -from esmvalcore.preprocessor import preprocess +from esmvalcore.io.local import _dates_to_timerange +from esmvalcore.preprocessor import _get_preprocessor_filename, preprocess if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Sequence @@ -815,7 +815,7 @@ def load(self) -> Cube: supplementary_cube = supplementary_dataset._load() # noqa: SLF001 supplementary_cubes.append(supplementary_cube) - output_file = _get_output_file(self.facets, self.session.preproc_dir) + output_file = _get_preprocessor_filename(self) cubes = preprocess( [cube], "add_supplementary_variables", @@ -833,7 +833,7 @@ def _load(self) -> Cube: msg = check.get_no_data_message(self) raise InputFilesNotFound(msg) - output_file = _get_output_file(self.facets, self.session.preproc_dir) + output_file = _get_preprocessor_filename(self) fix_dir_prefix = Path( self.session._fixed_file_dir, # noqa: SLF001 self._get_joined_summary_facets("_", join_lists=True) + "_", diff --git a/esmvalcore/io/local.py b/esmvalcore/io/local.py index c7b2059357..4ab49cca7f 100644 --- a/esmvalcore/io/local.py +++ b/esmvalcore/io/local.py @@ -58,7 +58,6 @@ from netCDF4 import Dataset import esmvalcore.io.protocol -from esmvalcore.config._config import get_project_config from esmvalcore.exceptions import RecipeError from esmvalcore.iris_helpers import ignore_warnings_context @@ -697,27 +696,6 @@ def _templates_to_regex(self) -> str: return pattern -def _get_output_file(variable: dict[str, Any], preproc_dir: Path) -> Path: - """Return the full path to the output (preprocessed) file.""" - cfg = get_project_config(variable["project"]) - - # Join different experiment names - if isinstance(variable.get("exp"), (list, tuple)): - variable = dict(variable) - variable["exp"] = "-".join(variable["exp"]) - outfile = _replace_tags(cfg["output_file"], variable)[0] - if "timerange" in variable: - timerange = variable["timerange"].replace("/", "-") - outfile = Path(f"{outfile}_{timerange}") - outfile = Path(f"{outfile}.nc") - return Path( - preproc_dir, - variable.get("diagnostic", ""), - variable.get("variable_group", ""), - outfile, - ) - - def _get_multiproduct_filename(attributes: dict, preproc_dir: Path) -> Path: """Get ensemble/multi-model filename depending on settings.""" relevant_keys = [ diff --git a/esmvalcore/local.py b/esmvalcore/local.py index e1b36b3572..850d32d0ef 100644 --- a/esmvalcore/local.py +++ b/esmvalcore/local.py @@ -9,9 +9,10 @@ import logging import os.path +import textwrap import warnings from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from esmvalcore.config import CFG from esmvalcore.config._config import get_ignored_warnings, get_project_config @@ -19,6 +20,7 @@ LocalDataSource, LocalFile, _filter_versions_called_latest, + _replace_tags, _select_latest_version, ) @@ -255,3 +257,39 @@ def find_files( globs.extend(data_source._get_glob_patterns(**facets)) # noqa: SLF001 return files, sorted(globs) return files + + +_GET_OUTPUT_FILE_WARNED: set[str] = set() + + +def _get_output_file(variable: dict[str, Any], preproc_dir: Path) -> Path: + """Return the full path to the output (preprocessed) file.""" + project = variable["project"] + cfg = get_project_config(project) + if project not in _GET_OUTPUT_FILE_WARNED: + _GET_OUTPUT_FILE_WARNED.add(project) + msg = textwrap.dedent( + f""" + Defining 'output_file' in config-developer.yml is deprecated and will be removed in version 2.16.0. Please use the following configuration instead: + projects: + {variable["project"]}: + preprocessor_filename_template: "{cfg["output_file"]}" + """, + ).rstrip() + logger.warning(msg) + + # Join different experiment names + if isinstance(variable.get("exp"), (list, tuple)): + variable = dict(variable) + variable["exp"] = "-".join(variable["exp"]) + outfile = _replace_tags(cfg["output_file"], variable)[0] + if "timerange" in variable: + timerange = variable["timerange"].replace("/", "-") + outfile = Path(f"{outfile}_{timerange}") + outfile = Path(f"{outfile}.nc") + return Path( + preproc_dir, + variable.get("diagnostic", ""), + variable.get("variable_group", ""), + outfile, + ) diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 2b9053bbe5..e08fd96daa 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -5,6 +5,9 @@ import copy import inspect import logging +import re +from collections.abc import Sequence +from pathlib import Path from pprint import pformat from typing import TYPE_CHECKING, Any @@ -14,7 +17,10 @@ from esmvalcore._task import BaseTask from esmvalcore.cmor.check import cmor_check_data, cmor_check_metadata from esmvalcore.cmor.fix import fix_data, fix_file, fix_metadata +from esmvalcore.exceptions import RecipeError +from esmvalcore.io.local import _parse_period from esmvalcore.io.protocol import DataElement +from esmvalcore.local import _get_output_file from esmvalcore.preprocessor._area import ( area_statistics, extract_named_regions, @@ -102,14 +108,14 @@ from esmvalcore.preprocessor._weighting import weighting_landsea_fraction if TYPE_CHECKING: - from collections.abc import Callable, Iterable, Sequence - from pathlib import Path + from collections.abc import Callable, Iterable import prov.model from dask.delayed import Delayed from iris.cube import CubeList from esmvalcore.dataset import Dataset + from esmvalcore.typing import FacetValue logger = logging.getLogger(__name__) @@ -254,6 +260,71 @@ } +def _get_preprocessor_filename(dataset: Dataset) -> Path: + """Get a filename for storing a preprocessed dataset. + + Parameters + ---------- + dataset: + The dataset that will be preprocessed. + + Returns + ------- + : + A path for storing a preprocessed file. + """ + + def is_facet_value(value: Any) -> bool: # noqa: ANN401 + """Check if a value is of type `esmvalcore.typing.FacetValue`.""" + return isinstance(value, str | int) or ( + isinstance(value, Sequence) + and all(isinstance(v, str) for v in value) + ) + + default_template = "_".join( + f"{{{k}}}" + for k in sorted(dataset.minimal_facets) + if is_facet_value(dataset.minimal_facets[k]) + and k + not in ("timerange", "diagnostic", "variable_group", "preprocessor") + ) + template = ( + dataset.session["projects"] + .get(dataset.facets["project"], {}) + .get("preprocessor_filename_template", default_template) + ) + if template is default_template: + try: + # Use config-developer.yml for backward compatibility, remove in v2.16. + return _get_output_file( + dataset.facets, + dataset.session.preproc_dir, + ) + except RecipeError: + pass + + def normalize(value: FacetValue) -> str: + """Normalize a facet value to a string that can be used in a filename.""" + if isinstance(value, str | int): + return re.sub("[^a-zA-Z0-9]+", "-", str(value))[:25] + return "-".join(normalize(v) for v in value) + + normalized_facets = { + k: normalize(v) for k, v in dataset.facets.items() if is_facet_value(v) + } + filename = template.format(**normalized_facets) + if "timerange" in dataset.facets: + start_time, end_time = _parse_period(dataset.facets["timerange"]) + filename += f"_{start_time}-{end_time}" + filename += ".nc" + return Path( + dataset.session.preproc_dir, + dataset.facets.get("diagnostic", ""), # type: ignore[arg-type] + dataset.facets.get("variable_group", ""), # type: ignore[arg-type] + filename, + ) + + def _get_itype(step: str) -> str: """Get the input type of a preprocessor function.""" function = globals()[step] @@ -520,7 +591,7 @@ def __init__( filename: Path, attributes: dict[str, Any] | None = None, settings: dict[str, Any] | None = None, - datasets: list | None = None, + datasets: list[Dataset] | None = None, ) -> None: if datasets is not None: # Load data using a Dataset diff --git a/tests/integration/io/test_local.py b/tests/integration/io/test_local.py index 08e217de4c..81a536f196 100644 --- a/tests/integration/io/test_local.py +++ b/tests/integration/io/test_local.py @@ -13,10 +13,9 @@ from esmvalcore.io.local import ( LocalDataSource, LocalFile, - _get_output_file, _parse_period, ) -from esmvalcore.local import _select_drs, find_files +from esmvalcore.local import _get_output_file, _select_drs, find_files # Load test configuration with open( diff --git a/tests/integration/recipe/test_recipe.py b/tests/integration/recipe/test_recipe.py index 40a5c09205..b953fd7118 100644 --- a/tests/integration/recipe/test_recipe.py +++ b/tests/integration/recipe/test_recipe.py @@ -31,8 +31,11 @@ from esmvalcore.config._diagnostics import TAGS from esmvalcore.dataset import Dataset from esmvalcore.exceptions import RecipeError -from esmvalcore.io.local import _get_output_file -from esmvalcore.preprocessor import DEFAULT_ORDER, PreprocessingTask +from esmvalcore.preprocessor import ( + DEFAULT_ORDER, + PreprocessingTask, + _get_preprocessor_filename, +) from tests.integration.test_provenance import check_provenance if TYPE_CHECKING: @@ -468,8 +471,7 @@ def which(interpreter): dataset = next( d for d in datasets - if _get_output_file(d.facets, session.preproc_dir) - == product.filename + if _get_preprocessor_filename(d) == product.filename ) assert product.datasets == [dataset] attributes = dict(dataset.facets) @@ -788,21 +790,21 @@ def test_empty_variable(tmp_path, patched_datafinder, session): ), ("1990/*", "1990-2019"), ("*/1992", "1990-1992"), - ("1990/P2Y", "1990-P2Y"), - ("19900101/P2Y2M1D", "19900101-P2Y2M1D"), + ("1990/P2Y", "19900101-19920101"), + ("19900101/P2Y2M1D", "19900101-19920302"), ( "19900101T0000/P2Y2M1DT12H00M00S", - "19900101T0000-P2Y2M1DT12H00M00S", + "19900101T000000-19920302T120000", ), - ("P2Y/1992", "P2Y-1992"), - ("P1Y2M1D/19920101", "P1Y2M1D-19920101"), - ("P1Y2M1D/19920101T120000", "P1Y2M1D-19920101T120000"), - ("P2Y/*", "P2Y-2019"), - ("P2Y2M1D/*", "P2Y2M1D-2019"), - ("P2Y21DT12H00M00S/*", "P2Y21DT12H00M00S-2019"), - ("*/P2Y", "1990-P2Y"), - ("*/P2Y2M1D", "1990-P2Y2M1D"), - ("*/P2Y21DT12H00M00S", "1990-P2Y21DT12H00M00S"), + ("P2Y/1992", "19900101-19920101"), + ("P1Y2M1D/19920101", "19901031-19920101"), + ("P1Y2M1D/19920101T120000", "19901031T120000-19920101T120000"), + ("P2Y/*", "20170101-20190101"), + ("P2Y2M1D/*", "20161031-20190101"), + ("P2Y21DT12H00M00S/*", "20161211-20190101"), + ("*/P2Y", "19900101-19920101"), + ("*/P2Y2M1D", "19900101-19920302"), + ("*/P2Y21DT12H00M00S", "19900101-19920122"), ] diff --git a/tests/unit/config/test_config.py b/tests/unit/config/test_config.py index 3bd3fcc566..b9c85131db 100644 --- a/tests/unit/config/test_config.py +++ b/tests/unit/config/test_config.py @@ -167,17 +167,10 @@ def test_load_default_config(cfg_default, monkeypatch): default_dev_file = root_path / "config-developer.yml" config_dir = root_path / "config" / "configurations" / "defaults" default_project_settings = dask.config.collect( - paths=[str(p) for p in config_dir.glob("extra_facets_*.yml")], + paths=[str(p) for p in config_dir.glob("extra_facets_*.yml")] + + [str(config_dir / "preprocessor_filename_template.yml")], env={}, )["projects"] - # Add in projects without extra facets from the config developer file - # until we have transitioned all of its content to the new configuration - # system. - for project in yaml.safe_load( - default_dev_file.read_text(encoding="utf-8"), - ): - if project not in default_project_settings: - default_project_settings[project] = {} session = cfg_default.start_session("recipe_example") @@ -238,6 +231,26 @@ def test_load_default_config(cfg_default, monkeypatch): for key, value in default_cfg.items(): assert session[key] == value + # Check that project settings were loaded + assert set(session["projects"]) == { + # ESGF + "CMIP3", + "CMIP5", + "CMIP6", + "CORDEX", + "obs4MIPs", + # ESMValCore supported projects + "native6", + "ACCESS", + "CESM", + "EMAC", + "ICON", + "IPSLCM", + # ESMValTool CMORizers + "OBS", + "OBS6", + } + # Check output directories assert str(session.session_dir).startswith( str(Path.home() / "esmvaltool_output" / "recipe_example"), diff --git a/tests/unit/preprocessor/test_configuration.py b/tests/unit/preprocessor/test_configuration.py index 984f6c599b..55e4252125 100644 --- a/tests/unit/preprocessor/test_configuration.py +++ b/tests/unit/preprocessor/test_configuration.py @@ -1,13 +1,25 @@ """Tests for the basic configuration of the preprocessor module.""" +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +from esmvalcore.dataset import Dataset from esmvalcore.preprocessor import ( DEFAULT_ORDER, FINAL_STEPS, INITIAL_STEPS, MULTI_MODEL_FUNCTIONS, TIME_PREPROCESSORS, + _get_preprocessor_filename, ) +if TYPE_CHECKING: + from esmvalcore.config import Session + from esmvalcore.typing import Facets + def test_non_repeated_keys(): """Check that there are not repeated keys in the lists.""" @@ -28,3 +40,99 @@ def test_multimodel_functions_in_default_order(): assert all( time_preproc in DEFAULT_ORDER for time_preproc in MULTI_MODEL_FUNCTIONS ) + + +@pytest.mark.parametrize( + ("facets", "filename"), + [ + ( + { + "project": "CMIP6", + "mip": "Amon", + "short_name": "tas", + "dataset": "GFDL-ESM4", + "ensemble": "r1i1p1f1", + "exp": ["historical", "ssp585"], + "version": "v20191115", + "grid": "gn", + "timerange": "1850/P250Y", + }, + "CMIP6_GFDL-ESM4_Amon_historical-ssp585_r1i1p1f1_tas_gn_18500101-21000101.nc", + ), + ( + { + "project": "CMIP6", + "mip": "fx", + "short_name": "areacella", + "dataset": "GFDL-ESM4", + "ensemble": "r1i1p1f1", + "exp": "historical", + "version": "v20191115", + "grid": "gn", + }, + "CMIP6_GFDL-ESM4_fx_historical_r1i1p1f1_areacella_gn.nc", + ), + ], +) +def test_get_preprocessor_filename( + session: Session, + facets: Facets, + filename: str, +) -> None: + """Test the function `_get_preprocessor_filename`.""" + dataset = Dataset(**facets) + dataset.session = session + result = _get_preprocessor_filename(dataset) + expected = session.preproc_dir / filename + assert result == expected + + +def test_get_preprocessor_filename_default( + session: Session, +) -> None: + """Test the function `_get_preprocessor_filename`.""" + session["projects"]["CMIP6"].pop("preprocessor_filename_template") + dataset = Dataset( + project="TestProject", + mip="Amon", + short_name="tas", + dataset="TestModel", + version="v20191115", + grid="gn", + timerange="1850/2100", + # Add some facets of the wrong type that are not used in the filename. + ignore=[1, 2], # type: ignore[list-item] + ignore_too={"a": 1}, # type: ignore[arg-type] + ) + # Add a facet that is not used in the filename. + dataset.facets["long_name"] = "Surface Air Temperature" + dataset.session = session + result = _get_preprocessor_filename(dataset) + filename = "TestModel_gn_Amon_TestProject_tas_v20191115_1850-2100.nc" + expected = session.preproc_dir / filename + assert result == expected + + +def test_get_preprocessor_filename_falls_back_to_config_developer( + session: Session, +) -> None: + """Test the function `_get_preprocessor_filename`.""" + session["projects"]["CMIP6"].pop("preprocessor_filename_template") + dataset = Dataset( + project="CMIP6", + mip="Amon", + short_name="tas", + dataset="GFDL-ESM4", + ensemble="r1i1p1f1", + exp=["historical", "ssp585"], + version="v20191115", + grid="gn", + timerange="1850/2100", + ) + dataset.session = session + result = _get_preprocessor_filename(dataset) + filename = ( + "CMIP6_GFDL-ESM4_Amon_historical-ssp585_r1i1p1f1_tas_gn_1850-2100.nc" + ) + expected = session.preproc_dir / filename + assert result == expected diff --git a/tests/unit/recipe/test_recipe.py b/tests/unit/recipe/test_recipe.py index be5f98a427..6ed350c34d 100644 --- a/tests/unit/recipe/test_recipe.py +++ b/tests/unit/recipe/test_recipe.py @@ -839,7 +839,7 @@ def test_limit_datasets(): def test_get_default_settings(mocker): mocker.patch.object( _recipe, - "_get_output_file", + "_get_preprocessor_filename", autospec=True, return_value=Path("/path/to/file.nc"), ) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 07cf485be8..a4fc0b527d 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1737,9 +1737,9 @@ def test_load(mocker, session): "fixed_files", "chl_Oyr_CMIP5_CanESM2_historical_r1i1p1_", ) - _get_output_file = mocker.patch.object( + _get_preprocessor_filename = mocker.patch.object( esmvalcore.dataset, - "_get_output_file", + "_get_preprocessor_filename", create_autospec=True, return_value=output_file, ) @@ -1846,7 +1846,7 @@ def mock_preprocess( assert args == load_args - _get_output_file.assert_called_with(dataset.facets, session.preproc_dir) + _get_preprocessor_filename.assert_called_with(dataset) items[0].prepare.assert_called_once()