diff --git a/.circleci/config.yml b/.circleci/config.yml
index 2950d4feed..e2e4907b69 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -66,7 +66,7 @@ commands:
       - run:
           name: Install git+ssh
           environment:
-            DEBIAN_FRONTEND: noninteractive  # needed to install tzdata
+            DEBIAN_FRONTEND: noninteractive # needed to install tzdata
           command: apt update && apt install -y git ssh
       - checkout
       - check_changes
@@ -141,6 +141,7 @@ jobs:
             . /opt/conda/etc/profile.d/conda.sh
             mkdir /logs
             conda activate esmvaltool
+            pip install intake-esgf 'globus-sdk<4' # TODO: remove before merging
             pip install --no-deps .[test] > /logs/install.txt 2>&1
             pip check
       - test_and_report:
@@ -155,7 +156,7 @@ jobs:
           name: Install gpg (required by codecov orb)
           command: apt update && apt install -y gpg
       - codecov/upload:
-          files: 'test-reports/coverage.xml'
+          files: "test-reports/coverage.xml"
           disable_search: true
 
   test_installation_from_source_test_mode:
diff --git a/.gitignore b/.gitignore
index 7f17eca52c..ee821184de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # Autogenerated files
 _sidebar.rst.inc
+jupyter_execute/
 
 # Distribution / packaging
 .Python
diff --git a/doc/api/esmvalcore.esgf.rst b/doc/api/esmvalcore.esgf.rst
index c6fac3553b..0420985f50 100644
--- a/doc/api/esmvalcore.esgf.rst
+++ b/doc/api/esmvalcore.esgf.rst
@@ -1,18 +1,10 @@
 Find and download files from ESGF
 =================================
 
-This module provides the function :py:func:`esmvalcore.esgf.find_files`
-for searching for files on ESGF using the ESMValTool vocabulary.
-It returns :py:class:`esmvalcore.esgf.ESGFFile` objects, which have a convenient
-:py:meth:`esmvalcore.esgf.ESGFFile.download` method for downloading the files.
-
-See :ref:`config-esgf` for instructions on configuring this module.
-
 esmvalcore.esgf
 ---------------
-.. autofunction:: esmvalcore.esgf.find_files
-.. autofunction:: esmvalcore.esgf.download
-.. autoclass:: esmvalcore.esgf.ESGFFile
+.. automodule:: esmvalcore.esgf
+    :noindex:
 
 esmvalcore.esgf.facets
 ----------------------
diff --git a/doc/api/esmvalcore.io.intake_esgf.rst b/doc/api/esmvalcore.io.intake_esgf.rst
new file mode 100644
index 0000000000..4fcb6c0bde
--- /dev/null
+++ b/doc/api/esmvalcore.io.intake_esgf.rst
@@ -0,0 +1,5 @@
+esmvalcore.io.intake_esgf
+=========================
+
+.. automodule:: esmvalcore.io.intake_esgf
+    :no-inherited-members:
diff --git a/doc/api/esmvalcore.io.protocol.rst b/doc/api/esmvalcore.io.protocol.rst
new file mode 100644
index 0000000000..f785893af9
--- /dev/null
+++ b/doc/api/esmvalcore.io.protocol.rst
@@ -0,0 +1,5 @@
+esmvalcore.io.protocol
+======================
+
+.. automodule:: esmvalcore.io.protocol
+    :no-inherited-members:
diff --git a/doc/api/esmvalcore.io.rst b/doc/api/esmvalcore.io.rst
new file mode 100644
index 0000000000..5d41a029c0
--- /dev/null
+++ b/doc/api/esmvalcore.io.rst
@@ -0,0 +1,18 @@
+Access data from any source
+===========================
+
+ESMValCore supports a modular system for reading data from various data sources.
+In the future, this module may be extended with support for writing output data.
+
+The interface is defined in the :mod:`esmvalcore.io.protocol` module and
+the other modules here provide an implementation for a particular data source.
+
+.. toctree::
+   :maxdepth: 1
+
+   esmvalcore.io.protocol
+   esmvalcore.io.intake_esgf
+
+esmvalcore.io
+-------------
+.. automodule:: esmvalcore.io
diff --git a/doc/api/esmvalcore.rst b/doc/api/esmvalcore.rst
index d160246243..a2833b821e 100644
--- a/doc/api/esmvalcore.rst
+++ b/doc/api/esmvalcore.rst
@@ -14,6 +14,7 @@ library. This section documents the public API of ESMValCore.
    esmvalcore.dataset
    esmvalcore.esgf
    esmvalcore.exceptions
+   esmvalcore.io
    esmvalcore.iris_helpers
    esmvalcore.local
    esmvalcore.preprocessor
diff --git a/doc/conf.py b/doc/conf.py
index 3d5bf9d9d3..8008b73645 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -461,6 +461,7 @@
     'dask': ('https://docs.dask.org/en/stable/', None),
     'distributed': ('https://distributed.dask.org/en/stable/', None),
     'iris': ('https://scitools-iris.readthedocs.io/en/stable/', None),
+    'intake_esgf': ('https://intake-esgf.readthedocs.io/en/stable/', None),
     'esmf_regrid': ('https://iris-esmf-regrid.readthedocs.io/en/stable/', None),
     'matplotlib': ('https://matplotlib.org/stable/', None),
     'ncdata': ('https://ncdata.readthedocs.io/en/stable/', None),
diff --git a/doc/configurations b/doc/configurations
new file mode 120000
index 0000000000..17a515d17e
--- /dev/null
+++ b/doc/configurations
@@ -0,0 +1 @@
+../esmvalcore/config/configurations
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
index 00a251a4ba..ff4fd48f70 100644
--- a/environment.yml
+++ b/environment.yml
@@ -9,7 +9,7 @@ dependencies:
   - cartopy
   - cf-units
   - cftime
-  - dask >=2025  # github.com/ESMValGroup/ESMValCore/issues/2503
+  - dask >=2025 # github.com/ESMValGroup/ESMValCore/issues/2503
   - dask-jobqueue
   - distributed
   - esgf-pyclient >=0.3.1
@@ -20,13 +20,15 @@ dependencies:
   - fire
   - geopy
   - humanfriendly
+  - intake-esgf
+  - globus-sdk <4 # https://github.com/esgf2-us/intake-esgf/issues/150
   - intake-esm
-  - iris >=3.12.2  # https://github.com/SciTools/iris/issues/6417
+  - iris >=3.12.2 # https://github.com/SciTools/iris/issues/6417
   - iris-esmf-regrid >=0.11.0
-  - iris-grib >=0.20.0  # github.com/ESMValGroup/ESMValCore/issues/2535
-  - isodate >=0.7.0  # incompatible with very old 0.6.1
+  - iris-grib >=0.20.0 # github.com/ESMValGroup/ESMValCore/issues/2535
+  - isodate >=0.7.0 # incompatible with very old 0.6.1
   - jinja2
-  - libnetcdf !=4.9.1  # to avoid hdf5 warnings; only on conda-forge
+  - libnetcdf !=4.9.1 # to avoid hdf5 warnings; only on conda-forge
   - nc-time-axis
   - ncdata
   - nested-lookup
diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py
index 7068f5afa2..f2b6f3b03b 100644
--- a/esmvalcore/_main.py
+++ b/esmvalcore/_main.py
@@ -159,6 +159,97 @@ class Config:
     files.
     """
 
+    def __init__(self) -> None:
+        from rich.console import Console
+
+        self.console = Console()
+
+    def show(
+        self,
+        filter: tuple[str] | None = ("extra_facets",),  # noqa: A002
+    ) -> None:
+        """Show the current configuration.
+
+        Parameters
+        ----------
+        filter:
+            Filter this list of keys. By default, the `extra_facets`
+            key is filtered out, as it can be very large.
+
+        """
+        import yaml
+        from nested_lookup import nested_delete
+        from rich.syntax import Syntax
+
+        from esmvalcore.config import CFG
+
+        cfg = dict(CFG)
+        if filter:
+            for key in filter:
+                cfg = nested_delete(cfg, key)
+        exclude_msg = (
+            ", excluding the keys " + ", ".join(f"'{f}'" for f in filter)
+            if filter
+            else ""
+        )
+        self.console.print(f"# Current configuration{exclude_msg}:")
+        self.console.print(
+            Syntax(
+                yaml.safe_dump(cfg),
+                "yaml",
+                background_color="default",
+            ),
+        )
+
+    def list(self) -> None:
+        """List all available example configuration files."""
+        import importlib.resources
+
+        import esmvalcore.config
+
+        config_dir = (
+            importlib.resources.files(esmvalcore.config) / "configurations"
+        )
+        self.console.print("Available configuration files:")
+        available_files = sorted(
+            f.name
+            for f in config_dir.iterdir()
+            if f.suffix == ".yml"  # type: ignore[attr-defined]
+        )
+        self.console.print("\n".join(f"- {f}" for f in available_files))
+
+    def copy(
+        self,
+        source_file: str,
+        target_file: Path | None = None,
+        overwrite: bool = False,
+    ) -> None:
+        """Copy one of the available example configuration files to the configuration directory."""
+        import importlib.resources
+
+        import esmvalcore.config
+
+        target_dir = esmvalcore.config._config_object._get_user_config_dir()  # noqa: SLF001
+        target_file = target_dir / (
+            source_file if target_file is None else target_file
+        )
+        config_dir = (
+            importlib.resources.files(esmvalcore.config) / "configurations"
+        )
+        available_files = sorted(
+            f.name
+            for f in config_dir.iterdir()
+            if f.suffix == ".yml"  # type: ignore[attr-defined]
+        )
+        if source_file not in available_files:
+            msg = (
+                f"Configuration file {source_file} not found, choose from "
+                f"{', '.join(available_files)}"
+            )
+            raise FileNotFoundError(msg)
+        with importlib.resources.as_file(config_dir / source_file) as file:
+            self._copy_config_file(file, target_file, overwrite=overwrite)
+
     @staticmethod
     def _copy_config_file(
         in_file: Path,
@@ -184,7 +275,7 @@ def _copy_config_file(
             logger.info("Creating folder %s", target_folder)
             target_folder.mkdir(parents=True, exist_ok=True)
 
-        logger.info("Copying file %s to path %s.", in_file, out_file)
+        logger.info("Copying file %s to path %s", in_file, out_file)
         shutil.copy2(in_file, out_file)
         logger.info("Copy finished.")
 
diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py
index dc669731e5..a4f3b4c79d 100644
--- a/esmvalcore/_provenance.py
+++ b/esmvalcore/_provenance.py
@@ -1,33 +1,48 @@
 """Provenance module."""
 
+from __future__ import annotations
+
 import copy
 import logging
 import os
 from functools import total_ordering
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
 
 from netCDF4 import Dataset
 from PIL import Image
 from PIL.PngImagePlugin import PngInfo
 from prov.model import ProvDerivation, ProvDocument
 
-from ._version import __version__
+from esmvalcore._version import __version__
+from esmvalcore.io.protocol import DataElement
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+    import prov.model
+
+    from esmvalcore._task import BaseTask
 
 logger = logging.getLogger(__name__)
 
 ESMVALTOOL_URI_PREFIX = "https://www.esmvaltool.org/"
 
 
-def create_namespace(provenance, namespace):
+def create_namespace(
+    provenance: prov.model.ProvBundle,
+    namespace: str,
+) -> None:
     """Create an esmvaltool namespace."""
     provenance.add_namespace(namespace, uri=ESMVALTOOL_URI_PREFIX + namespace)
 
 
-def get_esmvaltool_provenance():
+def get_esmvaltool_provenance() -> prov.model.ProvActivity:
     """Create an esmvaltool run activity."""
     provenance = ProvDocument()
     namespace = "software"
     create_namespace(provenance, namespace)
-    attributes = {}  # TODO: add dependencies with versions here
+    attributes: dict = {}  # TODO: add dependencies with versions here
     return provenance.activity(
         namespace + ":esmvaltool==" + __version__,
         other_attributes=attributes,
@@ -37,7 +52,10 @@ def get_esmvaltool_provenance():
 ESMVALTOOL_PROVENANCE = get_esmvaltool_provenance()
 
 
-def attribute_to_authors(entity, authors):
+def attribute_to_authors(
+    entity: prov.model.ProvEntity,
+    authors: list[dict[str, str]],
+) -> None:
     """Attribute entity to authors."""
     namespace = "author"
     create_namespace(entity.bundle, namespace)
@@ -53,7 +71,10 @@ def attribute_to_authors(entity, authors):
         entity.wasAttributedTo(agent)
 
 
-def attribute_to_projects(entity, projects):
+def attribute_to_projects(
+    entity: prov.model.ProvEntity,
+    projects: list[str],
+) -> None:
     """Attribute entity to projects."""
     namespace = "project"
     create_namespace(entity.bundle, namespace)
@@ -63,7 +84,10 @@ def attribute_to_projects(entity, projects):
         entity.wasAttributedTo(agent)
 
 
-def get_recipe_provenance(documentation, filename):
+def get_recipe_provenance(
+    documentation: dict[str, Any],
+    filename: Path,
+) -> prov.model.ProvEntity:
     """Create a provenance entity describing a recipe."""
     provenance = ProvDocument()
 
@@ -84,7 +108,10 @@ def get_recipe_provenance(documentation, filename):
     return entity
 
 
-def get_task_provenance(task, recipe_entity):
+def get_task_provenance(
+    task: BaseTask,
+    recipe_entity: prov.model.ProvEntity,
+) -> prov.model.ProvActivity:
     """Create a provenance activity describing a task."""
     provenance = ProvDocument()
     create_namespace(provenance, "task")
@@ -108,81 +135,102 @@ class TrackedFile:
 
     def __init__(
         self,
-        filename,
-        attributes=None,
-        ancestors=None,
-        prov_filename=None,
+        filename: DataElement | Path,
+        attributes: dict[str, Any] | None = None,
+        ancestors: Iterable[TrackedFile] | None = None,
+        prov_filename: str | None = None,
     ):
         """Create an instance of a file with provenance tracking.
 
         Arguments
         ---------
-        filename: str
-            Path to the file on disk.
-        attributes: dict
+        filename:
+            Path or data element containing the data described by the provenance.
+
+        Attributes
+        ----------
             Dictionary with facets describing the file. If set to None, this
             will be read from the file when provenance is initialized.
-        ancestors: :obj:`list` of :obj:`TrackedFile`
+        ancestors:
             Ancestor files.
-        prov_filename: str
+        prov_filename:
             The path this file has in the provenance record. This can
             differ from `filename` if the file was moved before resuming
             processing.
         """
         self._filename = filename
         if prov_filename is None:
-            self.prov_filename = filename
+            self.prov_filename = (
+                str(filename) if isinstance(filename, Path) else filename.name
+            )
         else:
             self.prov_filename = prov_filename
+
         self.attributes = copy.deepcopy(attributes)
 
         self.provenance = None
         self.entity = None
         self.activity = None
-        self._ancestors = [] if ancestors is None else ancestors
+        self._ancestors = [] if ancestors is None else list(ancestors)
+
+    @property
+    def attributes(self) -> dict[str, Any]:
+        """Attributes describing the file."""
+        if self._attributes is None:
+            msg = f"Call {self.__class__.__name__}.initialize_provenance before accessing attributes"
+            raise ValueError(msg)
+        return self._attributes
 
-    def __str__(self):
+    @attributes.setter
+    def attributes(self, value: dict[str, Any] | None):
+        """Set attributes describing the file."""
+        self._attributes = value
+
+    def __str__(self) -> str:
         """Return summary string."""
         return f"{self.__class__.__name__}: {self.filename}"
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Return representation string (e.g., used by ``pformat``)."""
         return f"{self.__class__.__name__}: {self.filename}"
 
-    def __eq__(self, other):
+    def __eq__(self, other) -> bool:
         """Check if `other` equals `self`."""
         return hasattr(other, "filename") and self.filename == other.filename
 
-    def __lt__(self, other):
+    def __lt__(self, other) -> bool:
         """Check if `other` should be sorted before `self`."""
         return hasattr(other, "filename") and self.filename < other.filename
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         """Return a unique hash for the file."""
         return hash(self.filename)
 
-    def copy_provenance(self):
+    def copy_provenance(self) -> TrackedFile:
         """Create a copy with identical provenance information."""
         if self.provenance is None:
             msg = f"Provenance of {self} not initialized"
             raise ValueError(msg)
-        new = TrackedFile(self.filename, self.attributes)
+        new = TrackedFile(Path(self.filename), self.attributes)
         new.provenance = copy.deepcopy(self.provenance)
         new.entity = new.provenance.get_record(self.entity.identifier)[0]
         new.activity = new.provenance.get_record(self.activity.identifier)[0]
         return new
 
     @property
-    def filename(self):
-        """Filename."""
+    def filename(self) -> DataElement | Path:
+        """Name of data described by this provenance document."""
         return self._filename
 
     @property
-    def provenance_file(self):
-        """Filename of provenance."""
-        return os.path.splitext(self.filename)[0] + "_provenance.xml"
-
-    def initialize_provenance(self, activity):
+    def provenance_file(self) -> Path:
+        """Filename of provenance file."""
+        if not isinstance(self.filename, Path):
+            msg = f"Saving provenance is only supported for pathlib.Path, not {type(self.filename)}"
+            raise NotImplementedError(msg)
+        return self.filename.with_name(f"{self.filename.stem}_provenance.xml")
+
+    def initialize_provenance(self, activity: prov.model.ProvActivity) -> None:
         """Initialize the provenance document.
 
         Note: this also copies the ancestor provenance. Therefore, changes
@@ -191,30 +239,33 @@ def initialize_provenance(self, activity):
         """
         if self.provenance is not None:
             msg = f"Provenance of {self} already initialized"
-            raise ValueError(
-                msg,
-            )
+            raise ValueError(msg)
         self.provenance = ProvDocument()
         self._initialize_namespaces()
         self._initialize_activity(activity)
         self._initialize_entity()
         self._initialize_ancestors(activity)
 
-    def _initialize_namespaces(self):
+    def _initialize_namespaces(self) -> None:
         """Initialize the namespaces."""
         for namespace in ("file", "attribute", "preprocessor", "task"):
             create_namespace(self.provenance, namespace)
 
-    def _initialize_activity(self, activity):
+    def _initialize_activity(self, activity: prov.model.ProvActivity) -> None:
         """Copy the preprocessor task activity."""
         self.activity = activity
-        self.provenance.update(activity.bundle)
+        self.provenance.update(activity.bundle)  # type: ignore[attr-defined]
 
-    def _initialize_entity(self):
+    def _initialize_entity(self) -> None:
         """Initialize the entity representing the file."""
-        if self.attributes is None:
-            # This happens for ancestor files of preprocessor files as created
-            # in esmvalcore.preprocessor.Processorfile.__init__.
+        if self._attributes is None:
+            if not isinstance(self.filename, DataElement):
+                msg = "Delayed reading of attributes is only supported for `DataElement`s"
+                raise TypeError(msg)
+            # This is used to delay reading the attributes of ancestor files of
+            # preprocessor files as created in
+            # esmvalcore.preprocessor.Processorfile.__init__ until after the data
+            # has been loaded.
             self.attributes = copy.deepcopy(self.filename.attributes)
 
         attributes = {
@@ -222,38 +273,44 @@ def _initialize_entity(self):
             for k, v in self.attributes.items()
             if k not in ("authors", "projects")
         }
-        self.entity = self.provenance.entity(
-            f"file:{self.filename}",
+        self.entity = self.provenance.entity(  # type: ignore[attr-defined]
+            f"file:{self.prov_filename}",
             attributes,
         )
 
         attribute_to_authors(self.entity, self.attributes.get("authors", []))
         attribute_to_projects(self.entity, self.attributes.get("projects", []))
 
-    def _initialize_ancestors(self, activity):
+    def _initialize_ancestors(self, activity: prov.model.ProvActivity) -> None:
         """Register ancestor files for provenance tracking."""
         for ancestor in self._ancestors:
             if ancestor.provenance is None:
-                if os.path.exists(ancestor.provenance_file):
+                if (
+                    isinstance(ancestor.filename, Path)
+                    and ancestor.provenance_file.exists()
+                ):
                     ancestor.restore_provenance()
                 else:
                     ancestor.initialize_provenance(activity)
-            self.provenance.update(ancestor.provenance)
+            self.provenance.update(ancestor.provenance)  # type: ignore[attr-defined]
             self.wasderivedfrom(ancestor)
 
-    def wasderivedfrom(self, other):
+    def wasderivedfrom(
+        self,
+        other: TrackedFile | prov.model.ProvEntity,
+    ) -> None:
         """Let the file know that it was derived from other."""
         if isinstance(other, TrackedFile):
             other_entity = other.entity
         else:
             other_entity = other
-        self.provenance.update(other_entity.bundle)
         if not self.activity:
-            msg = "Activity not initialized."
+            msg = f"Provenance of {self} not initialized"
             raise ValueError(msg)
+        self.provenance.update(other_entity.bundle)  # type: ignore[attr-defined, union-attr]
         self.entity.wasDerivedFrom(other_entity, self.activity)
 
-    def _select_for_include(self):
+    def _select_for_include(self) -> dict[str, str]:
         attributes = {
             "software": f"Created with ESMValTool v{__version__}",
         }
@@ -262,13 +319,19 @@ def _select_for_include(self):
         return attributes
 
     @staticmethod
-    def _include_provenance_nc(filename, attributes):
+    def _include_provenance_nc(
+        filename: Path,
+        attributes: dict[str, str],
+    ) -> None:
         with Dataset(filename, "a") as dataset:
             for key, value in attributes.items():
                 setattr(dataset, key, value)
 
     @staticmethod
-    def _include_provenance_png(filename, attributes):
+    def _include_provenance_png(
+        filename: Path,
+        attributes: dict[str, str],
+    ) -> None:
         pnginfo = PngInfo()
         exif_tags = {
             "caption": "ImageDescription",
@@ -279,8 +342,11 @@ def _include_provenance_png(filename, attributes):
         with Image.open(filename) as image:
             image.save(filename, pnginfo=pnginfo)
 
-    def _include_provenance(self):
+    def _include_provenance(self) -> None:
         """Include provenance information as metadata."""
+        if not isinstance(self.filename, Path):
+            msg = f"Writing attributes is only supported for pathlib.Path, not {type(self.filename)}"
+            raise NotImplementedError(msg)
         attributes = self._select_for_include()
 
         # Attach provenance to supported file types
@@ -289,32 +355,32 @@ def _include_provenance(self):
         if write:
             write(self.filename, attributes)
 
-    def save_provenance(self):
+    def save_provenance(self) -> None:
         """Export provenance information."""
         self.provenance = ProvDocument(
-            records=set(self.provenance.records),
-            namespaces=self.provenance.namespaces,
+            records=set(self.provenance.records),  # type: ignore[attr-defined]
+            namespaces=self.provenance.namespaces,  # type: ignore[attr-defined]
         )
         self._include_provenance()
         with open(self.provenance_file, "wb") as file:
             # Create file with correct permissions before saving.
-            self.provenance.serialize(file, format="xml")
+            self.provenance.serialize(file, format="xml")  # type: ignore[attr-defined]
         self.activity = None
         self.entity = None
         self.provenance = None
 
-    def restore_provenance(self):
+    def restore_provenance(self) -> None:
         """Import provenance information from a previously saved file."""
         self.provenance = ProvDocument.deserialize(
             self.provenance_file,
             format="xml",
         )
         entity_uri = f"{ESMVALTOOL_URI_PREFIX}file{self.prov_filename}"
-        self.entity = self.provenance.get_record(entity_uri)[0]
+        self.entity = self.provenance.get_record(entity_uri)[0]  # type: ignore[attr-defined]
         # Find the associated activity
-        for rec in self.provenance.records:
+        for rec in self.provenance.records:  # type: ignore[attr-defined]
             if isinstance(rec, ProvDerivation):
-                if rec.args[0] == self.entity.identifier:
+                if rec.args[0] == self.entity.identifier:  # type: ignore[attr-defined]
                     activity_id = rec.args[2]
-                    self.activity = self.provenance.get_record(activity_id)[0]
+                    self.activity = self.provenance.get_record(activity_id)[0]  # type: ignore[attr-defined]
                     break
diff --git a/esmvalcore/_recipe/check.py b/esmvalcore/_recipe/check.py
index 70bc46eeb6..51cf777caa 100644
--- a/esmvalcore/_recipe/check.py
+++ b/esmvalcore/_recipe/check.py
@@ -16,7 +16,7 @@
 
 import esmvalcore.preprocessor
 from esmvalcore.exceptions import InputFilesNotFound, RecipeError
-from esmvalcore.local import _get_start_end_year, _parse_period
+from esmvalcore.local import _parse_period
 from esmvalcore.preprocessor import TIME_PREPROCESSORS, PreprocessingTask
 from esmvalcore.preprocessor._multimodel import _get_operator_and_kwargs
 from esmvalcore.preprocessor._other import _get_var_info
@@ -231,7 +231,9 @@ def data_availability(dataset: Dataset, log: bool = True) -> None:
         msg = f"Missing data for {dataset.summary(True)}"
         raise InputFilesNotFound(msg)
 
-    if "timerange" not in facets:
+    if "timerange" not in facets or any(
+        "timerange" not in f.facets for f in input_files
+    ):
         return
 
     start_date, end_date = _parse_period(facets["timerange"])
@@ -241,8 +243,10 @@ def data_availability(dataset: Dataset, log: bool = True) -> None:
     available_years: set[int] = set()
 
     for file in input_files:
-        start, end = _get_start_end_year(file)
-        available_years.update(range(start, end + 1))
+        start_date, end_date = file.facets["timerange"].split("/")  # type: ignore[union-attr]
+        start_year = int(start_date[:4])
+        end_year = int(end_date[:4])
+        available_years.update(range(start_year, end_year + 1))
 
     missing_years = required_years - available_years
     if missing_years:
diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py
index d6285c9aed..d9777e2582 100644
--- a/esmvalcore/_recipe/recipe.py
+++ b/esmvalcore/_recipe/recipe.py
@@ -23,6 +23,7 @@
 from esmvalcore.dataset import Dataset
 from esmvalcore.exceptions import InputFilesNotFound, RecipeError
 from esmvalcore.local import (
+    GRIB_FORMATS,
     _dates_to_timerange,
     _get_multiproduct_filename,
     _get_output_file,
@@ -38,7 +39,6 @@
     PreprocessorFile,
 )
 from esmvalcore.preprocessor._area import _update_shapefile_path
-from esmvalcore.preprocessor._io import GRIB_FORMATS
 from esmvalcore.preprocessor._multimodel import _get_stat_identifier
 from esmvalcore.preprocessor._regrid import (
     _spec_to_latlonvals,
@@ -60,6 +60,7 @@
     from collections.abc import Iterable, Sequence
 
     from esmvalcore.config import Session
+    from esmvalcore.io.protocol import DataElement
     from esmvalcore.typing import Facets
 
 logger = logging.getLogger(__name__)
@@ -328,20 +329,12 @@ def _update_weighting_settings(
     _exclude_dataset(settings, facets, "weighting_landsea_fraction")
 
 
-def _add_to_download_list(dataset: Dataset) -> None:
-    """Add the files of `dataset` to `DOWNLOAD_FILES`."""
-    for i, file in enumerate(dataset.files):
-        if isinstance(file, esgf.ESGFFile):
-            DOWNLOAD_FILES.add(file)
-            dataset.files[i] = file.local_file(dataset.session["download_dir"])
-
-
 def _schedule_for_download(datasets: Iterable[Dataset]) -> None:
     """Schedule files for download."""
     for dataset in datasets:
-        _add_to_download_list(dataset)
+        DOWNLOAD_FILES.update(dataset.files)
         for supplementary_ds in dataset.supplementaries:
-            _add_to_download_list(supplementary_ds)
+            DOWNLOAD_FILES.update(supplementary_ds.files)
 
 
 def _log_input_files(datasets: Iterable[Dataset]) -> None:
@@ -367,12 +360,7 @@ def _log_input_files(datasets: Iterable[Dataset]) -> None:
 
 def _get_files_str(dataset: Dataset) -> str:
     """Get nice string representation of all files of a dataset."""
-    return "\n".join(
-        f"  {f}"
-        if f.exists()  # type: ignore
-        else f"  {f} (will be downloaded)"
-        for f in dataset.files
-    )
+    return "\n".join(f"  {f}" for f in dataset.files)
 
 
 def _check_input_files(input_datasets: Iterable[Dataset]) -> set[str]:
@@ -455,10 +443,7 @@ def _get_common_attributes(
 
     # Ensure that attributes start_year and end_year are always available if at
     # least one of the input datasets defines it
-    if "timerange" in attributes:
-        start_year, end_year = _parse_period(attributes["timerange"])
-        attributes["start_year"] = int(str(start_year[0:4]))
-        attributes["end_year"] = int(str(end_year[0:4]))
+    _set_start_end_year(attributes)
 
     return attributes
 
@@ -722,7 +707,7 @@ def _get_preprocessor_products(
     )
 
     for product in products:
-        _set_start_end_year(product)
+        _set_start_end_year(product.attributes)
         product.check()
 
     return products
@@ -782,18 +767,18 @@ def _configure_multi_product_preprocessor(
 
     for product in multimodel_products | ensemble_products:
         product.check()
-        _set_start_end_year(product)
+        _set_start_end_year(product.attributes)
 
 
-def _set_start_end_year(product: PreprocessorFile) -> None:
+def _set_start_end_year(attributes: dict[str, Any]) -> None:
     """Set the attributes `start_year` and `end_year`.
 
     These attributes are used by many diagnostic scripts in ESMValTool.
     """
-    if "timerange" in product.attributes:
-        start_year, end_year = _parse_period(product.attributes["timerange"])
-        product.attributes["start_year"] = int(str(start_year[0:4]))
-        product.attributes["end_year"] = int(str(end_year[0:4]))
+    if "timerange" in attributes:
+        start_year, end_year = _parse_period(attributes["timerange"])
+        attributes["start_year"] = int(str(start_year[0:4]))
+        attributes["end_year"] = int(str(end_year[0:4]))
 
 
 def _update_preproc_functions(
@@ -916,7 +901,7 @@ def __init__(
         # Clear the global variable containing the set of files to download
         DOWNLOAD_FILES.clear()
         USED_DATASETS.clear()
-        self._download_files: set[esgf.ESGFFile] = set()
+        self._download_files: set[DataElement] = set()
         self.session = session
         self.session["write_ncl_interface"] = self._need_ncl(
             raw_recipe["diagnostics"],
@@ -973,7 +958,7 @@ def _log_recipe_errors(self, exc: RecipeError) -> None:
             )
 
     @staticmethod
-    def _need_ncl(raw_diagnostics: Diagnostic) -> bool:
+    def _need_ncl(raw_diagnostics: dict[str, Diagnostic]) -> bool:
         if not raw_diagnostics:
             return False
         for diagnostic in raw_diagnostics.values():
@@ -996,8 +981,8 @@ def _initialize_provenance(self, raw_documentation: dict[str, Any]):
 
     def _initialize_diagnostics(
         self,
-        raw_diagnostics: Diagnostic,
-    ) -> Diagnostic:
+        raw_diagnostics: dict[str, Diagnostic],
+    ) -> dict[str, Diagnostic]:
         """Define diagnostics in recipe."""
         logger.debug("Retrieving diagnostics from recipe")
         check.diagnostics(raw_diagnostics)
@@ -1013,7 +998,7 @@ def _initialize_diagnostics(
             variable_names = tuple(raw_diagnostic.get("variables", {}))
             diagnostic["scripts"] = self._initialize_scripts(
                 name,
-                raw_diagnostic.get("scripts"),
+                raw_diagnostic.get("scripts", {}),
                 variable_names,
             )
             for key in ("themes", "realms"):
@@ -1342,8 +1327,10 @@ def run(self) -> None:
         filled_recipe = self.write_filled_recipe()
 
         # Download required data
-        if self.session["search_esgf"] != "never":
-            esgf.download(self._download_files, self.session["download_dir"])
+        # Add a special case for ESGF files to enable parallel downloads
+        esgf.download(self._download_files, self.session["download_dir"])
+        for file in self._download_files:
+            file.prepare()
 
         self.tasks.run(max_parallel_tasks=self.session["max_parallel_tasks"])
         logger.info(
diff --git a/esmvalcore/_recipe/to_datasets.py b/esmvalcore/_recipe/to_datasets.py
index 7aab83719b..5855353cc9 100644
--- a/esmvalcore/_recipe/to_datasets.py
+++ b/esmvalcore/_recipe/to_datasets.py
@@ -5,14 +5,13 @@
 import logging
 from collections.abc import Iterable, Iterator, Sequence
 from copy import deepcopy
-from numbers import Number
 from typing import TYPE_CHECKING, Any
 
 from esmvalcore.cmor.table import _CMOR_KEYS, _update_cmor_facets
 from esmvalcore.dataset import INHERITED_FACETS, Dataset, _isglob
 from esmvalcore.esgf.facets import FACETS
 from esmvalcore.exceptions import RecipeError
-from esmvalcore.local import LocalFile, _replace_years_with_timerange
+from esmvalcore.local import _replace_years_with_timerange
 from esmvalcore.preprocessor._derive import get_required
 from esmvalcore.preprocessor._io import DATASET_KEYS
 from esmvalcore.preprocessor._supplementary_vars import (
@@ -219,7 +218,7 @@ def _get_supplementary_short_names(
     var_facets = dict(facets)
     _update_cmor_facets(var_facets)
     realms = var_facets.get("modeling_realm", [])
-    if isinstance(realms, (str, Number, bool)):
+    if isinstance(realms, (str, int)):
         realms = [str(realms)]
     ocean_realms = {"ocean", "seaIce", "ocnBgchem"}
     is_ocean_variable = any(realm in ocean_realms for realm in realms)
@@ -511,16 +510,12 @@ def _report_unexpanded_globs(
     expanded_ds.supplementaries = []
 
     if expanded_ds.files:
-        if any(isinstance(f, LocalFile) for f in expanded_ds.files):
-            paths_msg = "paths to the "
-        else:
-            paths_msg = ""
         msg = (
-            f"{msg}\nDo the {paths_msg}files:\n"
+            f"{msg}\nPlease check why the files:\n"
             + "\n".join(
                 f"{f} with facets: {f.facets}" for f in expanded_ds.files
             )
-            + "\nprovide the missing facet values?"
+            + "\ndo not provide the missing facet values."
         )
     else:
         timerange = expanded_ds.facets.get("timerange")
diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py
index d2c0831ed3..fe77472888 100644
--- a/esmvalcore/_task.py
+++ b/esmvalcore/_task.py
@@ -351,7 +351,7 @@ def __init__(self, prev_preproc_dir, preproc_dir, name):
         for prov_filename, attributes in prev_metadata.items():
             # Update the filename in case the output directory was moved
             # since the original run
-            filename = str(prev_preproc_dir / Path(prov_filename).name)
+            filename = prev_preproc_dir / Path(prov_filename).name
             attributes["filename"] = filename
             product = TrackedFile(
                 filename,
@@ -676,7 +676,7 @@ def _run(self, input_files):
             msg,
         )
 
-    def _collect_provenance(self):
+    def _collect_provenance(self) -> None:
         """Process provenance information provided by the diagnostic script."""
         provenance_file = (
             Path(self.settings["run_dir"]) / "diagnostic_provenance.yml"
@@ -766,7 +766,7 @@ def _collect_provenance(self):
 
             TAGS.replace_tags_in_dict(attributes)
 
-            product = TrackedFile(filename, attributes, ancestors)
+            product = TrackedFile(Path(filename), attributes, ancestors)
             product.initialize_provenance(self.activity)
             _write_citation_files(product.filename, product.provenance)
             product.save_provenance()
diff --git a/esmvalcore/cmor/_fixes/icon/_base_fixes.py b/esmvalcore/cmor/_fixes/icon/_base_fixes.py
index c4c12da334..3e268ec29e 100644
--- a/esmvalcore/cmor/_fixes/icon/_base_fixes.py
+++ b/esmvalcore/cmor/_fixes/icon/_base_fixes.py
@@ -23,9 +23,10 @@
 from iris.cube import Cube, CubeList
 from iris.mesh import Connectivity, MeshXY
 
+import esmvalcore.local
 from esmvalcore.cmor._fixes.native_datasets import NativeDatasetFix
+from esmvalcore.config._data_sources import _get_data_sources
 from esmvalcore.iris_helpers import add_leading_dim_to_cube, date2num
-from esmvalcore.local import _get_data_sources
 
 logger = logging.getLogger(__name__)
 
@@ -322,10 +323,11 @@ def _get_grid_from_cube_attr(self, cube: Cube) -> Cube:
     def _get_grid_from_rootpath(self, grid_name: str) -> CubeList | None:
         """Try to get grid from the ICON rootpath."""
         glob_patterns: list[Path] = []
-        for data_source in _get_data_sources("ICON"):
-            glob_patterns.extend(
-                data_source.get_glob_patterns(**self.extra_facets),
-            )
+        for data_source in _get_data_sources(self.session, "ICON"):  # type: ignore[arg-type]
+            if isinstance(data_source, esmvalcore.local.LocalDataSource):
+                glob_patterns.extend(
+                    data_source.get_glob_patterns(**self.extra_facets),
+                )
         possible_grid_paths = [d.parent / grid_name for d in glob_patterns]
         for grid_path in possible_grid_paths:
             if grid_path.is_file():
diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py
index cfc803a796..e2d40aa05c 100644
--- a/esmvalcore/cmor/check.py
+++ b/esmvalcore/cmor/check.py
@@ -14,6 +14,7 @@
 import iris.exceptions
 import iris.util
 import numpy as np
+import yaml
 
 from esmvalcore.cmor._utils import (
     _get_alternative_generic_lev_coord,
@@ -50,6 +51,12 @@ class CheckLevels(IntEnum):
     """Do not fail for any discrepancy with CMOR standards."""
 
 
+yaml.representer.SafeRepresenter.add_representer(
+    CheckLevels,
+    lambda dumper, data: dumper.represent_str(data.name.lower()),
+)
+
+
 class CMORCheckError(Exception):
     """Exception raised when a cube does not pass the CMORCheck."""
 
diff --git a/esmvalcore/config/_config.py b/esmvalcore/config/_config.py
index 6a3670a7ca..121ee2b126 100644
--- a/esmvalcore/config/_config.py
+++ b/esmvalcore/config/_config.py
@@ -94,7 +94,7 @@ def warn_if_old_extra_facets_exist() -> None:
         )
 
 
-def load_config_developer(cfg_file):
+def load_config_developer(cfg_file) -> dict:
     """Read the developer's configuration file."""
     with open(cfg_file, encoding="utf-8") as file:
         cfg = yaml.safe_load(file)
@@ -120,6 +120,7 @@ def load_config_developer(cfg_file):
         CFG[project] = settings
 
     read_cmor_tables(cfg_file)
+    return cfg
 
 
 def get_project_config(project):
diff --git a/esmvalcore/config/_config_object.py b/esmvalcore/config/_config_object.py
index 0030b23adb..175c8115a4 100644
--- a/esmvalcore/config/_config_object.py
+++ b/esmvalcore/config/_config_object.py
@@ -13,6 +13,7 @@
 import yaml
 
 import esmvalcore
+from esmvalcore.config._config import load_config_developer
 from esmvalcore.config._config_validators import (
     _deprecated_options_defaults,
     _deprecators,
@@ -50,9 +51,7 @@ def _get_user_config_dir() -> Path:
                 f"ESMVALTOOL_CONFIG_DIR environment variable: "
                 f"{user_config_dir} is not an existing directory"
             )
-            raise NotADirectoryError(
-                msg,
-            )
+            raise NotADirectoryError(msg)
         return user_config_dir
     return Path.home() / ".config" / "esmvaltool"
 
@@ -85,10 +84,7 @@ class Config(ValidatedConfig):
     _validate = _validators
     _deprecate = _deprecators
     _deprecated_defaults = _deprecated_options_defaults
-    _warn_if_missing = (
-        ("drs", URL),
-        ("rootpath", URL),
-    )
+    _warn_if_missing = (("projects", URL),)
 
     def __init__(self, *args, **kwargs):
         """Initialize class instance."""
@@ -145,6 +141,10 @@ def _load_user_config(
 
         try:
             new.update(mapping)
+            # Add known projects from config-developer file while we still have it.
+            for project in load_config_developer(new["config_developer_file"]):
+                if project not in new["projects"]:
+                    new["projects"][project] = {}
             new.check_missing()
         except InvalidConfigParameter as exc:
             msg = (
@@ -362,7 +362,10 @@ def load_from_dirs(self, dirs: Iterable[str | Path]) -> None:
         new_config_dict = self._get_config_dict_from_dirs(dirs)
         self.clear()
         self.update(new_config_dict)
-
+        # Add known projects from config-developer file while we still have it.
+        for project in load_config_developer(self["config_developer_file"]):
+            if project not in self["projects"]:
+                self["projects"][project] = {}
         self.check_missing()
 
     def reload(self) -> None:
diff --git a/esmvalcore/config/_config_validators.py b/esmvalcore/config/_config_validators.py
index c61e4ea309..df45da3c93 100644
--- a/esmvalcore/config/_config_validators.py
+++ b/esmvalcore/config/_config_validators.py
@@ -347,6 +347,7 @@ def validate_projects(value: Any) -> Any:
     """Validate projects mapping."""
     mapping = validate_dict(value)
     options_for_project: dict[str, Callable[[Any], Any]] = {
+        "data": validate_dict,  # TODO: try to create data sources here
         "extra_facets": validate_dict,
     }
     for project, project_config in mapping.items():
@@ -490,11 +491,67 @@ def deprecate_extra_facets_dir(
     _handle_deprecation(option, deprecated_version, remove_version, more_info)
 
 
+def deprecate_rootpath(
+    validated_config: ValidatedConfig,
+    value: Any,
+    validated_value: Any,
+) -> None:
+    """Deprecate ``config_file`` option.
+
+    Parameters
+    ----------
+    validated_config:
+        ``ValidatedConfig`` instance which will be modified in place.
+    value:
+        Raw input value for ``config_file`` option.
+    validated_value:
+        Validated value for ``config_file`` option.
+
+    """
+    validated_config  # noqa: B018
+    value  # noqa: B018
+    validated_value  # noqa: B018
+    option = "rootpath"
+    deprecated_version = "2.13.0"
+    remove_version = "2.16.0"
+    more_info = " Please define data sources using the option `projects: data:` instead."
+    _handle_deprecation(option, deprecated_version, remove_version, more_info)
+
+
+def deprecate_drs(
+    validated_config: ValidatedConfig,
+    value: Any,
+    validated_value: Any,
+) -> None:
+    """Deprecate ``config_file`` option.
+
+    Parameters
+    ----------
+    validated_config:
+        ``ValidatedConfig`` instance which will be modified in place.
+    value:
+        Raw input value for ``config_file`` option.
+    validated_value:
+        Validated value for ``config_file`` option.
+
+    """
+    validated_config  # noqa: B018
+    value  # noqa: B018
+    validated_value  # noqa: B018
+    option = "drs"
+    deprecated_version = "2.13.0"
+    remove_version = "2.16.0"
+    more_info = " Please define data sources using the option `projects: data:` instead."
+    _handle_deprecation(option, deprecated_version, remove_version, more_info)
+
+
 # Example usage: see removed files in
 # https://github.com/ESMValGroup/ESMValCore/pull/2213
 _deprecators: dict[str, Callable] = {
     "config_file": deprecate_config_file,  # TODO: remove in v2.14.0
     "extra_facets_dir": deprecate_extra_facets_dir,  # TODO: remove in v2.15.0
+    "drs": deprecate_drs,  # TODO: remove in v2.16.0
+    "rootpath": deprecate_rootpath,  # TODO: remove in v2.16.0
 }
 
 
diff --git a/esmvalcore/config/_data_sources.py b/esmvalcore/config/_data_sources.py
new file mode 100644
index 0000000000..7da25beb6b
--- /dev/null
+++ b/esmvalcore/config/_data_sources.py
@@ -0,0 +1,71 @@
+"""Module for configuring data sources."""
+
+import logging
+
+import esmvalcore.esgf
+import esmvalcore.local
+from esmvalcore.config import Session
+from esmvalcore.exceptions import RecipeError
+from esmvalcore.io import load_data_sources
+from esmvalcore.io.protocol import DataSource
+
+logger = logging.getLogger(__name__)
+
+
+def _get_data_sources(
+    session: Session,
+    project: str,
+) -> list[DataSource]:
+    """Get the list of available data sources including legacy configuration.
+
+    Arguments
+    ---------
+    session:
+        The configuration.
+    project:
+        Data sources for this project are returned.
+
+    Returns
+    -------
+    :obj:`list` of :obj:`DataSource`:
+        A list of available data sources.
+
+    Raises
+    ------
+    ValueError:
+        If the project or its settings are not found in the configuration.
+
+    """
+    try:
+        return load_data_sources(session, project)
+    except ValueError:
+        pass
+
+    # Use legacy data sources from config-user.yml and config-developer.yml.
+    data_sources: list[DataSource] = []
+    try:
+        legacy_local_data_sources = esmvalcore.local._get_data_sources(project)  # noqa: SLF001
+    except (RecipeError, KeyError):
+        # The project is not configured in config-developer.yml
+        legacy_local_data_sources = []
+    else:
+        if (
+            session["search_esgf"] != "never"
+            and project in esmvalcore.esgf.facets.FACETS
+        ):
+            data_source = esmvalcore.esgf.ESGFDataSource(
+                name="legacy-esgf",
+                project=project,
+                priority=2,
+                download_dir=session["download_dir"],
+            )
+            data_sources.append(data_source)
+    data_sources.extend(legacy_local_data_sources)
+
+    if not data_sources:
+        msg = (
+            f"No data sources found for project '{project}'. "
+            f"Check your configuration under 'projects: {project}: data'"
+        )
+        raise ValueError(msg)
+    return data_sources
diff --git a/esmvalcore/config/_validated_config.py b/esmvalcore/config/_validated_config.py
index 0dfca3b521..624068c411 100644
--- a/esmvalcore/config/_validated_config.py
+++ b/esmvalcore/config/_validated_config.py
@@ -57,7 +57,7 @@ class ValidatedConfig(MutableMapping):
     """
 
     # validate values on the way in
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, **kwargs) -> None:
         super().__init__()
         self._mapping: dict[str, Any] = {}
         self.update(*args, **kwargs)
diff --git a/esmvalcore/config/config-logging.yml b/esmvalcore/config/config-logging.yml
index 6635ca63ec..8d4fb94b17 100644
--- a/esmvalcore/config/config-logging.yml
+++ b/esmvalcore/config/config-logging.yml
@@ -1,25 +1,28 @@
 # Logger configuration
 ---
-
 version: 1
 disable_existing_loggers: false
 formatters:
   console:
-    format: '%(asctime)s UTC [%(process)d] %(levelname)-7s %(message)s'
+    format: "%(asctime)s UTC [%(process)d] %(levelname)-7s %(message)s"
   brief:
-    format: '%(levelname)-7s [%(process)d] %(message)s'
+    format: "%(levelname)-7s [%(process)d] %(message)s"
   debug:
-    format: '%(asctime)s UTC [%(process)d] %(levelname)-7s %(name)s:%(lineno)s %(message)s'
+    format: "%(asctime)s UTC [%(process)d] %(levelname)-7s %(name)s:%(lineno)s %(message)s"
 filters:
-  only_cmor:  # only events from CMOR check and generic fixes
+  only_cmor: # only events from CMOR check and generic fixes
     (): esmvalcore.config._logging.FilterMultipleNames
     names: [esmvalcore.cmor.check, esmvalcore.cmor._fixes.fix.genericfix]
     mode: allow
-  no_cmor:  # no events from CMOR check and generic fixes
+  no_cmor: # no events from CMOR check and generic fixes
     (): esmvalcore.config._logging.FilterMultipleNames
     names: [esmvalcore.cmor.check, esmvalcore.cmor._fixes.fix.genericfix]
     mode: disallow
-  no_external_warnings:  # no events from external Python warnings
+  no_intake_esgf: # no events from intake-esgf
+    (): esmvalcore.config._logging.FilterMultipleNames
+    names: ["intake-esgf"]
+    mode: disallow
+  no_external_warnings: # no events from external Python warnings
     (): esmvalcore.config._logging.FilterExternalWarnings
 handlers:
   console:
@@ -27,21 +30,21 @@ handlers:
     level: INFO
     formatter: console
     stream: ext://sys.stdout
-    filters: [no_cmor, no_external_warnings]
+    filters: [no_cmor, no_external_warnings, no_intake_esgf]
   simple_log_file:
     class: logging.FileHandler
     level: INFO
     formatter: brief
     filename: main_log.txt
     mode: w
-    filters: [no_cmor, no_external_warnings]
+    filters: [no_cmor, no_external_warnings, no_intake_esgf]
   debug_log_file:
     class: logging.FileHandler
     level: DEBUG
     formatter: debug
     filename: main_log_debug.txt
     mode: w
-  cmor_log:  # only contains output from CMOR check and generic fixes
+  cmor_log: # only contains output from CMOR check and generic fixes
     class: logging.FileHandler
     level: INFO
     formatter: brief
diff --git a/esmvalcore/config/configurations/access-data.yml b/esmvalcore/config/configurations/access-data.yml
new file mode 100644
index 0000000000..259bc32cd4
--- /dev/null
+++ b/esmvalcore/config/configurations/access-data.yml
@@ -0,0 +1,13 @@
+projects:
+  ACCESS:
+    data:
+      access-sub-dataset:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "{dataset}/{sub_dataset}/{exp}/{modeling_realm}/netCDF"
+        filename_template: "{sub_dataset}.{freq_attribute}-*.nc"
+      access-ocean:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "{dataset}/{sub_dataset}/{exp}/{modeling_realm}/netCDF"
+        filename_template: "ocean_{freq_attribute}.nc-*"
diff --git a/esmvalcore/config/configurations/badc-data.yml b/esmvalcore/config/configurations/badc-data.yml
new file mode 100644
index 0000000000..0b9558335f
--- /dev/null
+++ b/esmvalcore/config/configurations/badc-data.yml
@@ -0,0 +1,50 @@
+projects:
+  CMIP6:
+    data:
+      badc:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /badc/cmip6/data
+        dirname_template: "{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc"
+  CMIP5:
+    data:
+      badc:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /badc/cmip5/data
+        dirname_template: "{project.lower}/{product}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc"
+  CMIP3:
+    data:
+      badc:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /badc/cmip3_drs/data
+        dirname_template: "{project.lower}/output/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{short_name}/{ensemble}/{version}"
+        filename_template: "{short_name}_*.nc"
+  CORDEX:
+    data:
+      badc:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /badc/cordex/data
+        dirname_template: "{project}/output/{domain}/{institute}/{driver}/{exp}/{ensemble}/{institute}-{dataset}/{rcm_version}/{mip}/{short_name}/{version}"
+        filename_template: "{short_name}_{domain}_{driver}_{exp}_{ensemble}_{institute}-{dataset}_{rcm_version}_{mip}*.nc"
+  obs4MIPs:
+    data:
+      badc:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /gws/nopw/j04/esmeval/obsdata-v2
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{short_name}_*.nc"
+  OBS6:
+    data:
+      badc:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /gws/nopw/j04/esmeval/obsdata-v2
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
+  OBS:
+    data:
+      badc:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /gws/nopw/j04/esmeval/obsdata-v2
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
diff --git a/esmvalcore/config/configurations/cesm-data.yml b/esmvalcore/config/configurations/cesm-data.yml
new file mode 100644
index 0000000000..95439d6d8c
--- /dev/null
+++ b/esmvalcore/config/configurations/cesm-data.yml
@@ -0,0 +1,14 @@
+projects:
+  CESM:
+    data:
+      run: &cesm
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "" # run directory
+        filename_template: "{case}.{scomp}.{type}.{string}*nc"
+      short-term-archive:
+        <<: *cesm
+        dirname_template: "{case}/{gcomp}/hist" # short-term archiving
+      postprocessed:
+        <<: *cesm
+        dirname_template: "{case}/{gcomp}/proc/{tdir}/{tperiod}" # postprocessed data
diff --git a/esmvalcore/config/configurations/defaults/config-user.yml b/esmvalcore/config/configurations/defaults/config-user.yml
index b2f8950a1c..a13aa6e32d 100644
--- a/esmvalcore/config/configurations/defaults/config-user.yml
+++ b/esmvalcore/config/configurations/defaults/config-user.yml
@@ -4,17 +4,12 @@
 #
 # Note for users:
 # --------------
-# Site-specific entries for different HPC centers are given at the bottom of
-# this file. Comment out/replace as needed. This default version of the file
-# can be used in combination with the command line argument
-# ``search_esgf=when_missing``. If only certain values are allowed for an
-# option, these are listed after ``---``. The option in square brackets is the
-# default value, i.e., the one that is used if this option is omitted in the
-# file.
+# If only certain values are allowed for an option, these are listed after
+# ``---``. The option in square brackets is the default value, i.e., the one
+# that is used if this option is omitted in the file.
 #
 ###############################################################################
 ---
-
 # Destination directory where all output will be written
 # Includes log files and performance stats.
 output_dir: ~/esmvaltool_output
@@ -82,225 +77,3 @@ config_developer_file: null
 # A profiler tells you which functions in your code take most time to run.
 # Only available for Python diagnostics.
 profile_diagnostic: false
-
-# Rootpaths to the data from different projects
-# This default setting will work if files have been downloaded by ESMValTool
-# via ``search_esgf``. Lists are also possible. For site-specific entries and
-# more examples, see below. Comment out these when using a site-specific path.
-rootpath:
-  default: ~/climate_data
-
-# Directory structure for input data --- [default]/ESGF/BADC/DKRZ/ETHZ/etc.
-# This default setting will work if files have been downloaded by ESMValTool
-# via ``search_esgf``. See ``config-developer.yml`` for definitions.  Comment
-# out/replace as per needed.
-drs:
-  CMIP3: ESGF
-  CMIP5: ESGF
-  CMIP6: ESGF
-  CORDEX: ESGF
-  obs4MIPs: ESGF
-
-# Example rootpaths and directory structure names for different projects.
-# For each project, the entry can be a single path, a list of paths, or a
-# mapping from paths to directory structure names.
-# For single paths and list of paths, the directory structure names can be
-# defined under 'drs'.
-# If no path is defined for a project, the tool will look in the 'default'
-# path.
-# If no directory structure name is given, the name 'default' will be used.
-# Directory structures corresponding to the names are defined in the file
-# config-developer.yml.
-# For site-specific entries, see below.
-#rootpath:
-#  CMIP6:
-#    /path/to/data: DKRZ
-#    ~/path/to/more/data: ESGF
-#  CMIP5:
-#    - ~/cmip5_inputpath1
-#    - ~/cmip5_inputpath2
-#  CMIP3: ~/cmip6_inputpath
-#  OBS: ~/obs_inputpath
-#  OBS6: ~/obs6_inputpath
-#  obs4MIPs: ~/obs4mips_inputpath
-#  ana4mips: ~/ana4mips_inputpath
-#  native6: ~/native6_inputpath
-#  RAWOBS: ~/rawobs_inputpath
-#  default: ~/default_inputpath
-#drs:
-#  CMIP3: ESGF
-#  CMIP5: ESGF
-#  CORDEX: ESGF
-#  obs4MIPs: ESGF
-
-# Directory tree created by automatically downloading from ESGF
-# Uncomment the lines below to locate data that has been automatically
-# downloaded from ESGF (using ``search_esgf``).
-#rootpath:
-#  CMIP3: ~/climate_data
-#  CMIP5: ~/climate_data
-#  CMIP6: ~/climate_data
-#  CORDEX: ~/climate_data
-#  obs4MIPs: ~/climate_data
-#drs:
-#  CMIP3: ESGF
-#  CMIP5: ESGF
-#  CMIP6: ESGF
-#  CORDEX: ESGF
-#  obs4MIPs: ESGF
-
-# Site-specific entries: JASMIN
-# Uncomment the lines below to locate data on JASMIN.
-#auxiliary_data_dir: /gws/nopw/j04/esmeval/aux_data/AUX
-#rootpath:
-#  CMIP6: /badc/cmip6/data/CMIP6
-#  CMIP5: /badc/cmip5/data/cmip5/output1
-#  CMIP3: /badc/cmip3_drs/data/cmip3/output
-#  OBS: /gws/nopw/j04/esmeval/obsdata-v2
-#  OBS6: /gws/nopw/j04/esmeval/obsdata-v2
-#  obs4MIPs: /gws/nopw/j04/esmeval/obsdata-v2
-#  ana4mips: /gws/nopw/j04/esmeval/obsdata-v2
-#  CORDEX: /badc/cordex/data/CORDEX/output
-#drs:
-#  CMIP6: BADC
-#  CMIP5: BADC
-#  CMIP3: BADC
-#  CORDEX: BADC
-#  OBS: default
-#  OBS6: default
-#  obs4MIPs: default
-#  ana4mips: default
-
-# Site-specific entries: DKRZ-Levante
-# For bd0854 members a shared download directory is available
-#search_esgf: when_missing
-#download_dir: /work/bd0854/DATA/ESMValTool2/download
-# Uncomment the lines below to locate data on Levante at DKRZ.
-#auxiliary_data_dir: /work/bd0854/DATA/ESMValTool2/AUX
-#rootpath:
-#  CMIP6:
-#    /work/bd0854/DATA/ESMValTool2/CMIP6_DKRZ: DKRZ
-#    /work/bd0854/DATA/ESMValTool2/download: ESGF
-#  CMIP5:
-#    /work/bd0854/DATA/ESMValTool2/CMIP5_DKRZ: DKRZ
-#    /work/bd0854/DATA/ESMValTool2/download: ESGF
-#  CMIP3:
-#    /work/bd0854/DATA/ESMValTool2/CMIP3: DKRZ
-#    /work/bd0854/DATA/ESMValTool2/download: ESGF
-#  CORDEX:
-#    /work/ik1017/C3SCORDEX/data/c3s-cordex/output: BADC
-#    /work/bd0854/DATA/ESMValTool2/download: ESGF
-#  OBS: /work/bd0854/DATA/ESMValTool2/OBS
-#  OBS6: /work/bd0854/DATA/ESMValTool2/OBS
-#  obs4MIPs:
-#    /work/bd0854/DATA/ESMValTool2/OBS: default
-#    /work/bd0854/DATA/ESMValTool2/download: ESGF
-#  ana4mips: /work/bd0854/DATA/ESMValTool2/OBS
-#  native6:
-#    /work/bd0854/DATA/ESMValTool2/RAWOBS: default
-#    /pool/data/ERA5: DKRZ-ERA5-GRIB
-#  RAWOBS: /work/bd0854/DATA/ESMValTool2/RAWOBS
-#drs:
-#  ana4mips: default
-#  OBS: default
-#  OBS6: default
-#  native6: default
-
-# Site-specific entries: ETHZ
-# Uncomment the lines below to locate data at ETHZ.
-#rootpath:
-#  CMIP6: /net/atmos/data/cmip6
-#  CMIP5: /net/atmos/data/cmip5
-#  CMIP3: /net/atmos/data/cmip3
-#  OBS: /net/exo/landclim/PROJECTS/C3S/datadir/obsdir/
-#drs:
-#  CMIP6: ETHZ
-#  CMIP5: ETHZ
-#  CMIP3: ETHZ
-
-# Site-specific entries: IPSL
-# Uncomment the lines below to locate data on Ciclad at IPSL.
-#rootpath:
-#  IPSLCM: /
-#  CMIP5: /bdd/CMIP5/output
-#  CMIP6: /bdd/CMIP6
-#  CMIP3: /bdd/CMIP3
-#  CORDEX: /bdd/CORDEX/output
-#  obs4MIPs: /bdd/obs4MIPS/obs-CFMIP/observations
-#  ana4mips: /not_yet
-#  OBS: /not_yet
-#  OBS6: /not_yet
-#  RAWOBS: /not_yet
-#drs:
-#  CMIP6: DKRZ
-#  CMIP5: DKRZ
-#  CMIP3: IPSL
-#  CORDEX: BADC
-#  obs4MIPs: IPSL
-#  ana4mips: default
-#  OBS: not_yet
-#  OBS6: not_yet
-
-# Site-specific entries: Met Office - Old VDI
-# Uncomment the lines below to locate data at the Met Office.
-#rootpath:
-#  CMIP5: /project/champ/data/cmip5/output1
-#  CMIP6: /project/champ/data/CMIP6
-#  CORDEX: /project/champ/data/cordex/output
-#  OBS: /data/users/esmval/ESMValTool/obs
-#  OBS6: /data/users/esmval/ESMValTool/obs
-#  obs4MIPs: /data/users/esmval/ESMValTool/obs
-#  ana4mips: /project/champ/data/ana4MIPs
-#  native6: /data/users/esmval/ESMValTool/rawobs
-#  RAWOBS: /data/users/esmval/ESMValTool/rawobs
-#drs:
-#  CMIP5: BADC
-#  CMIP6: BADC
-#  CORDEX: BADC
-#  OBS: default
-#  OBS6: default
-#  obs4MIPs: default
-#  ana4mips: BADC
-#  native6: default
-
-# Site-specific entries: Met Office - New VDI
-# Uncomment the lines below to locate data at the Met Office.
-#rootpath:
-#  CMIP5: /data/users/managecmip/champ/cmip5/output1
-#  CMIP6: /data/users/managecmip/champ/CMIP6
-#  CORDEX: /data/users/managecmip/champ/cordex/output
-#  OBS: /data/users/esmval/ESMValTool/obs
-#  OBS6: /data/users/esmval/ESMValTool/obs
-#  obs4MIPs: /data/users/esmval/ESMValTool/obs
-#  ana4mips: /data/users/managecmip/champ/ana4MIPs
-#  native6: /data/users/esmval/ESMValTool/rawobs
-#  RAWOBS: /data/users/esmval/ESMValTool/rawobs
-#drs:
-#  CMIP5: BADC
-#  CMIP6: BADC
-#  CORDEX: BADC
-#  OBS: default
-#  OBS6: default
-#  obs4MIPs: default
-#  ana4mips: BADC
-#  native6: default
-
-# Site-specific entries: NCI
-# Uncomment the lines below to locate data at NCI.
-#rootpath:
-#  CMIP6: [/g/data/oi10/replicas/CMIP6, /g/data/fs38/publications/CMIP6, /g/data/xp65/public/apps/esmvaltool/replicas/CMIP6]
-#  CMIP5: [/g/data/r87/DRSv3/CMIP5, /g/data/al33/replicas/CMIP5/combined, /g/data/rr3/publications/CMIP5/output1, /g/data/xp65/public/apps/esmvaltool/replicas/cmip5/output1]
-#  CMIP3: /g/data/r87/DRSv3/CMIP3
-#  OBS: /g/data/ct11/access-nri/replicas/esmvaltool/obsdata-v2
-#  OBS6: /g/data/ct11/access-nri/replicas/esmvaltool/obsdata-v2
-#  obs4MIPs: /g/data/ct11/access-nri/replicas/esmvaltool/obsdata-v2
-#  ana4mips: /g/data/ct11/access-nri/replicas/esmvaltool/obsdata-v2
-#  native6: /g/data/xp65/public/apps/esmvaltool/native6
-#
-#drs:
-#  CMIP6: NCI
-#  CMIP5: NCI
-#  CMIP3: NCI
-#  CORDEX: ESGF
-#  obs4MIPs: default
-#  ana4mips: default
diff --git a/esmvalcore/config/configurations/dkrz-data.yml b/esmvalcore/config/configurations/dkrz-data.yml
new file mode 100644
index 0000000000..bbf418eae7
--- /dev/null
+++ b/esmvalcore/config/configurations/dkrz-data.yml
@@ -0,0 +1,87 @@
+projects:
+  CMIP6:
+    data:
+      dkrz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/ik1017/CMIP6/data
+        dirname_template: "{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc"
+      esgf-cache:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/bd0854/DATA/ESMValTool2/download
+        dirname_template: "{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc"
+  CMIP5:
+    data:
+      dkrz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/kd0956/CMIP5/data
+        dirname_template: "{project.lower}/{product}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}/{short_name}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc"
+      esgf-cache:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/bd0854/DATA/ESMValTool2/download
+        dirname_template: "{project.lower}/{product}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc"
+  CMIP3:
+    data:
+      dkrz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/bd0854/DATA/ESMValTool2/CMIP3
+        dirname_template: "{exp}/{modeling_realm}/{frequency}/{short_name}/{dataset}/{ensemble}"
+        filename_template: "{short_name}_*.nc"
+      esgf-cache:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/bd0854/DATA/ESMValTool2/download
+        dirname_template: "{project.lower}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{ensemble}/{short_name}/{version}"
+        filename_template: "{short_name}_*.nc"
+  CORDEX:
+    data:
+      dkrz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/ik1017/C3SCORDEX/data/c3s-cordex/output
+        dirname_template: "{domain}/{institute}/{driver}/{exp}/{ensemble}/{institute}-{dataset}/{rcm_version}/{mip}/{short_name}/{version}"
+        filename_template: "{short_name}_{domain}_{driver}_{exp}_{ensemble}_{institute}-{dataset}_{rcm_version}_{mip}*.nc"
+      esgf-cache:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/bd0854/DATA/ESMValTool2/download
+        dirname_template: "{project.lower}/output/{domain}/{institute}/{driver}/{exp}/{ensemble}/{dataset}/{rcm_version}/{frequency}/{short_name}/{version}"
+        filename_template: "{short_name}_{domain}_{driver}_{exp}_{ensemble}_{institute}-{dataset}_{rcm_version}_{mip}*.nc"
+  obs4MIPs:
+    data:
+      dkrz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/bd0854/DATA/ESMValTool2/OBS
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{short_name}_*.nc"
+      esgf-cache:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/bd0854/DATA/ESMValTool2/download
+        dirname_template: "{project}/{dataset}/{version}"
+        filename_template: "{short_name}_*.nc"
+  native6:
+    data:
+      dkrz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/bd0854/DATA/ESMValTool2/RAWOBS
+        dirname_template: "Tier{tier}/{dataset}/{version}/{frequency}/{short_name}"
+        filename_template: "*.nc"
+      dkrz-era5:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /pool/data/ERA5
+        dirname_template: "{family}/{level}/{type}/{tres}/{grib_id}"
+        filename_template: "{family}{level}{typeid}_{tres}_*_{grib_id}.grb"
+  OBS6:
+    data:
+      dkrz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/bd0854/DATA/ESMValTool2/OBS
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
+  OBS:
+    data:
+      dkrz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /work/bd0854/DATA/ESMValTool2/OBS
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
diff --git a/esmvalcore/config/configurations/emac-data.yml b/esmvalcore/config/configurations/emac-data.yml
new file mode 100644
index 0000000000..5875f68cc8
--- /dev/null
+++ b/esmvalcore/config/configurations/emac-data.yml
@@ -0,0 +1,8 @@
+projects:
+  EMAC:
+    data:
+      emac:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "{exp}/{channel}"
+        filename_template: "{exp}*{channel}{postproc_flag}.nc"
diff --git a/esmvalcore/config/configurations/esgf-pyclient-data.yml b/esmvalcore/config/configurations/esgf-pyclient-data.yml
new file mode 100644
index 0000000000..96c253f138
--- /dev/null
+++ b/esmvalcore/config/configurations/esgf-pyclient-data.yml
@@ -0,0 +1,17 @@
+# Use a lower priority than for esmvalcore.local.LocalDataSource
+# to avoid searching ESGF with the setting `search_esgf: when_missing`.
+projects:
+  CMIP6: &esgf-pyclient-data
+    data:
+      esgf-pyclient:
+        type: "esmvalcore.esgf.ESGFDataSource"
+        download_dir: ~/climate_data
+        priority: 10
+  CMIP5:
+    <<: *esgf-pyclient-data
+  CMIP3:
+    <<: *esgf-pyclient-data
+  CORDEX:
+    <<: *esgf-pyclient-data
+  obs4MIPs:
+    <<: *esgf-pyclient-data
diff --git a/esmvalcore/config/configurations/ethz-data.yml b/esmvalcore/config/configurations/ethz-data.yml
new file mode 100644
index 0000000000..c2bead9523
--- /dev/null
+++ b/esmvalcore/config/configurations/ethz-data.yml
@@ -0,0 +1,29 @@
+projects:
+  CMIP6:
+    data:
+      ethz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /net/atmos/data
+        dirname_template: "{project.lower}/{exp}/{mip}/{short_name}/{dataset}/{ensemble}/{grid}/"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc"
+  CMIP5:
+    data:
+      ethz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /net/atmos/data
+        dirname_template: "{project.lower}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}/{short_name}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc"
+  CMIP3:
+    data:
+      ethz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /net/atmos/data
+        dirname_template: "{project.lower}/{exp}/{modeling_realm}/{frequency}/{short_name}/{dataset}/{ensemble}"
+        filename_template: "{short_name}_*.nc"
+  OBS:
+    data:
+      ethz:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /net/exo/landclim/PROJECTS/C3S/datadir/obsdir/
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
diff --git a/esmvalcore/config/configurations/icon-data.yml b/esmvalcore/config/configurations/icon-data.yml
new file mode 100644
index 0000000000..f4c5c4799a
--- /dev/null
+++ b/esmvalcore/config/configurations/icon-data.yml
@@ -0,0 +1,14 @@
+projects:
+  ICON:
+    data:
+      icon: &icon
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "{exp}"
+        filename_template: "{exp}_{var_type}*.nc"
+      icon-outdata:
+        <<: *icon
+        dirname_template: "{exp}/outdata"
+      icon-output:
+        <<: *icon
+        dirname_template: "{exp}/output"
diff --git a/esmvalcore/config/configurations/intake-esgf-data.yml b/esmvalcore/config/configurations/intake-esgf-data.yml
new file mode 100644
index 0000000000..e4e8f7e045
--- /dev/null
+++ b/esmvalcore/config/configurations/intake-esgf-data.yml
@@ -0,0 +1,76 @@
+projects:
+  CMIP6:
+    data:
+      intake-esgf:
+        type: "esmvalcore.io.intake_esgf.IntakeESGFDataSource"
+        facets:
+          activity: "activity_drs"
+          dataset: "source_id"
+          ensemble: "member_id"
+          exp: "experiment_id"
+          institute: "institution_id"
+          grid: "grid_label"
+          mip: "table_id"
+          project: "project"
+          short_name: "variable_id"
+  CMIP5:
+    data:
+      intake-esgf:
+        type: "esmvalcore.io.intake_esgf.IntakeESGFDataSource"
+        facets:
+          dataset: "model"
+          ensemble: "ensemble"
+          exp: "experiment"
+          frequency: "time_frequency"
+          institute: "institute"
+          mip: "cmor_table"
+          product: "product"
+          project: "project"
+          short_name: "variable"
+        values:
+          dataset:
+            "ACCESS1-0": "ACCESS1.0"
+            "ACCESS1-3": "ACCESS1.3"
+            "bcc-csm1-1": "BCC-CSM1.1"
+            "bcc-csm1-1-m": "BCC-CSM1.1(m)"
+            "CESM1-BGC": "CESM1(BGC)"
+            "CESM1-CAM5": "CESM1(CAM5)"
+            "CESM1-CAM5-1-FV2": "CESM1(CAM5.1,FV2)"
+            "CESM1-FASTCHEM": "CESM1(FASTCHEM)"
+            "CESM1-WACCM": "CESM1(WACCM)"
+            "CSIRO-Mk3-6-0": "CSIRO-Mk3.6.0"
+            "fio-esm": "FIO-ESM"
+            "GFDL-CM2p1": "GFDL-CM2.1"
+            "inmcm4": "INM-CM4"
+            "MRI-AGCM3-2H": "MRI-AGCM3.2H"
+            "MRI-AGCM3-2S": "MRI-AGCM3.2S"
+  CMIP3:
+    data:
+      intake-esgf:
+        type: "esmvalcore.io.intake_esgf.IntakeESGFDataSource"
+        facets:
+          dataset: "model"
+          ensemble: "ensemble"
+          exp: "experiment"
+          frequency: "time_frequency"
+          project: "project"
+          short_name: "variable"
+  obs4MIPs:
+    data:
+      intake-esgf-v2:
+        type: "esmvalcore.io.intake_esgf.IntakeESGFDataSource"
+        facets:
+          dataset: "source_id"
+          frequency: "frequency"
+          institute: "institution_id"
+          project: "project"
+          short_name: "variable_id"
+      # TODO: Add support for older ODS V1.0 obs4MIPs (CMIP5 style) data to intake-esgf
+      # intake-esgf-v1:
+      #   type: "esmvalcore.io.intake_esgf.IntakeESGFDataSource"
+      #   facets:
+      #     dataset: "source_id"
+      #     frequency: "time_frequency"
+      #     institute: "institute"
+      #     project: "project"
+      #     short_name: "variable"
diff --git a/esmvalcore/config/configurations/ipsl-data.yml b/esmvalcore/config/configurations/ipsl-data.yml
new file mode 100644
index 0000000000..1a84d47606
--- /dev/null
+++ b/esmvalcore/config/configurations/ipsl-data.yml
@@ -0,0 +1,36 @@
+projects:
+  CMIP6:
+    data:
+      ipsl:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /bdd
+        dirname_template: "{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc"
+  CMIP5:
+    data:
+      ipsl:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /bdd
+        dirname_template: "{project}/output/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}/{short_name}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc"
+  CMIP3:
+    data:
+      ipsl:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /bdd
+        dirname_template: "{project}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{ensemble}/{short_name}/{version}/{short_name}"
+        filename_template: "{short_name}_*.nc"
+  CORDEX:
+    data:
+      ipsl:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /bdd
+        dirname_template: "{project}/output/{domain}/{institute}/{driver}/{exp}/{ensemble}/{institute}-{dataset}/{rcm_version}/{mip}/{short_name}/{version}"
+        filename_template: "{short_name}_{domain}_{driver}_{exp}_{ensemble}_{institute}-{dataset}_{rcm_version}_{mip}*.nc"
+  obs4MIPs:
+    data:
+      ipsl:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /bdd
+        dirname_template: "{project}/obs-CFMIP/observations/{realm}/{short_name}/{frequency}/{grid}/{institute}/{dataset}/{version}"
+        filename_template: "{short_name}_*.nc"
diff --git a/esmvalcore/config/configurations/ipslcm-data.yml b/esmvalcore/config/configurations/ipslcm-data.yml
new file mode 100644
index 0000000000..e51344915d
--- /dev/null
+++ b/esmvalcore/config/configurations/ipslcm-data.yml
@@ -0,0 +1,13 @@
+projects:
+  IPSLCM:
+    data:
+      ipslcm-varname:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "{root}/{account}/{model}/{status}/{exp}/{simulation}/{dir}/{out}/{freq}"
+        filename_template: "{simulation}_*_{ipsl_varname}.nc"
+      ipslcm-group:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "{root}/{account}/{model}/{status}/{exp}/{simulation}/{dir}/{out}/{freq}"
+        filename_template: "{simulation}_*_{group}.nc"
diff --git a/esmvalcore/config/configurations/local-data.yml b/esmvalcore/config/configurations/local-data.yml
new file mode 100644
index 0000000000..81cc931d46
--- /dev/null
+++ b/esmvalcore/config/configurations/local-data.yml
@@ -0,0 +1,57 @@
+projects:
+  CMIP6:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc"
+  CMIP5:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "{project.lower}/{product}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc"
+  CMIP3:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "{project.lower}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{ensemble}/{short_name}/{version}"
+        filename_template: "{short_name}_*.nc"
+  CORDEX:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "{project.lower}/output/{domain}/{institute}/{driver}/{exp}/{ensemble}/{dataset}/{rcm_version}/{frequency}/{short_name}/{version}"
+        filename_template: "{short_name}_{domain}_{driver}_{exp}_{ensemble}_{institute}-{dataset}_{rcm_version}_{mip}*.nc"
+  obs4MIPs:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "{project}/{dataset}/{version}"
+        filename_template: "{short_name}_*.nc"
+  native6:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "Tier{tier}/{dataset}/{version}/{frequency}/{short_name}"
+        filename_template: "*.nc"
+  OBS6:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
+  OBS:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: ~/climate_data
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
diff --git a/esmvalcore/config/configurations/mo-data.yml b/esmvalcore/config/configurations/mo-data.yml
new file mode 100644
index 0000000000..a8aa420396
--- /dev/null
+++ b/esmvalcore/config/configurations/mo-data.yml
@@ -0,0 +1,62 @@
+projects:
+  CMIP6:
+    data:
+      mo: &cmip6
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /data/users/managecmip/champ
+        dirname_template: "{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc"
+      mo-old-vdi:
+        <<: *cmip6
+        priority: 2
+        rootpath: /project/champ/data
+  CMIP5:
+    data:
+      mo: &cmip5
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /data/users/managecmip/champ
+        dirname_template: "{project.lower}/{product}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}/{short_name}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc"
+      mo-old-vdi:
+        <<: *cmip5
+        priority: 2
+        rootpath: /project/champ/data
+  CORDEX:
+    data:
+      mo: &cordex
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /data/users/managecmip/champ
+        dirname_template: "{project.lower}/output/{domain}/{institute}/{driver}/{exp}/{ensemble}/{institute}-{dataset}/{rcm_version}/{mip}/{short_name}/{version}"
+        filename_template: "{short_name}_{domain}_{driver}_{exp}_{ensemble}_{institute}-{dataset}_{rcm_version}_{mip}*.nc"
+      mo-old-vdi:
+        <<: *cordex
+        priority: 2
+        rootpath: /project/champ/data
+  obs4MIPs:
+    data:
+      mo:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /data/users/esmval/ESMValTool/obs
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{short_name}_*.nc"
+  native6:
+    data:
+      mo:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /data/users/esmval/ESMValTool/rawobs
+        dirname_template: "Tier{tier}/{dataset}/{version}/{frequency}/{short_name}"
+        filename_template: "*.nc"
+  OBS6:
+    data:
+      mo:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /data/users/esmval/ESMValTool/obs
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
+  OBS:
+    data:
+      mo:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /data/users/esmval/ESMValTool/obs
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
diff --git a/esmvalcore/config/configurations/nci-data.yml b/esmvalcore/config/configurations/nci-data.yml
new file mode 100644
index 0000000000..9179abc1a5
--- /dev/null
+++ b/esmvalcore/config/configurations/nci-data.yml
@@ -0,0 +1,66 @@
+projects:
+  CMIP6:
+    data:
+      oi10: &cmip6
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /g/data/oi10/replicas
+        dirname_template: "{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc"
+      fs38:
+        <<: *cmip6
+        rootpath: /g/data/fs38/publications
+      xp65:
+        <<: *cmip6
+        rootpath: /g/data/xp65/public/apps/esmvaltool/replicas
+  CMIP5:
+    data:
+      r87: &cmip5
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /g/data/r87/DRSv3/CMIP5
+        dirname_template: "{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}/{short_name}"
+        filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc"
+      al33:
+        <<: *cmip5
+        rootpath: /g/data/al33/replicas/CMIP5/combined
+      rr3: &cmip5-default
+        <<: *cmip5
+        rootpath: /g/data/rr3/publications
+        dirname_template: "{project}/{product}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}/{short_name}"
+      xp65:
+        <<: *cmip5-default
+        rootpath: /g/data/xp65/public/apps/esmvaltool/replicas
+  CMIP3:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /g/data/r87/DRSv3/CMIP3
+        dirname_template: "{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{ensemble}/{short_name}/{latestversion}"
+        filename_template: "{short_name}_*.nc"
+  obs4MIPs:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /g/data/ct11/access-nri/replicas/esmvaltool/obsdata-v2
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{short_name}_*.nc"
+  native6:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /g/data/xp65/public/apps/esmvaltool/native6
+        dirname_template: "Tier{tier}/{dataset}/{version}/{frequency}/{short_name}"
+        filename_template: "*.nc"
+  OBS6:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /g/data/ct11/access-nri/replicas/esmvaltool/obsdata-v2
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
+  OBS:
+    data:
+      local:
+        type: "esmvalcore.local.LocalDataSource"
+        rootpath: /g/data/ct11/access-nri/replicas/esmvaltool/obsdata-v2
+        dirname_template: "Tier{tier}/{dataset}"
+        filename_template: "{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc"
diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py
index 229ba59bd9..5ec8a40b9b 100644
--- a/esmvalcore/dataset.py
+++ b/esmvalcore/dataset.py
@@ -15,7 +15,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
-from esmvalcore import esgf, local
+from esmvalcore import esgf
 from esmvalcore._recipe import check
 from esmvalcore._recipe.from_datasets import datasets_to_recipe
 from esmvalcore.cmor.table import _get_mips, _update_cmor_facets
@@ -26,11 +26,11 @@
     get_institutes,
     load_extra_facets,
 )
+from esmvalcore.config._data_sources import _get_data_sources
 from esmvalcore.exceptions import InputFilesNotFound, RecipeError
 from esmvalcore.local import (
     _dates_to_timerange,
     _get_output_file,
-    _get_start_end_date,
 )
 from esmvalcore.preprocessor import preprocess
 
@@ -39,6 +39,8 @@
 
     from iris.cube import Cube
 
+    from esmvalcore.io.protocol import DataElement
+    from esmvalcore.preprocessor import PreprocessorItem
     from esmvalcore.typing import Facets, FacetValue
 
 __all__ = [
@@ -49,8 +51,6 @@
 
 logger = logging.getLogger(__name__)
 
-File = esgf.ESGFFile | local.LocalFile
-
 INHERITED_FACETS: list[str] = [
     "dataset",
     "domain",
@@ -130,8 +130,8 @@ def __init__(self, **facets: FacetValue) -> None:
 
         self._persist: set[str] = set()
         self._session: Session | None = None
-        self._files: Sequence[File] | None = None
-        self._file_globs: Sequence[Path] | None = None
+        self._files: Sequence[DataElement] | None = None
+        self._file_globs: Sequence[str] = []
 
         for key, value in facets.items():
             self.set_facet(key, deepcopy(value), persist=True)
@@ -192,7 +192,7 @@ def _derivation_necessary(self) -> bool:
 
     def _file_to_dataset(
         self,
-        file: esgf.ESGFFile | local.LocalFile,
+        file: DataElement,
     ) -> Dataset:
         """Create a dataset from a file with a `facets` attribute."""
         facets = dict(file.facets)
@@ -243,6 +243,12 @@ def _get_available_datasets(self) -> Iterator[Dataset]:
         expanded = False
         for file in dataset_template.files:
             dataset = self._file_to_dataset(file)
+            # Do not use the timerange facet from the file because there may be multiple
+            # files per dataset.
+            dataset.facets.pop("timerange", None)
+            # Restore the original timerange facet if it was specified.
+            if "timerange" in self.facets:
+                dataset.facets["timerange"] = self.facets["timerange"]
 
             # Filter out identical datasets
             facetset = frozenset(
@@ -267,10 +273,8 @@ def _get_available_datasets(self) -> Iterator[Dataset]:
         for dataset, file in partially_defined:
             msg = (
                 f"{dataset} with unexpanded wildcards, created from file "
-                f"{file} with facets {file.facets}. Are the missing facets "
-                "in the path to the file?"
-                if isinstance(file, local.LocalFile)
-                else "available on ESGF?"
+                f"{file} with facets {file.facets}. Please check why "
+                "the missing facets are not available for the file."
             )
             if expanded:
                 logger.info("Ignoring %s", msg)
@@ -287,7 +291,6 @@ def from_files(self) -> Iterator[Dataset]:
 
         The facet values for local files are retrieved from the directory tree
         where the directories represent the facets values.
-        Reading facet values from file names is not yet supported.
         See :ref:`CMOR-DRS` for more information on this kind of file
         organization.
 
@@ -750,56 +753,43 @@ def find_files(self) -> None:
             supplementary.find_files()
 
     def _find_files(self) -> None:
-        self.files, self._file_globs = local.find_files(
-            debug=True,
-            **self.facets,
-        )
-
-        # If project does not support automatic downloads from ESGF, stop here
-        if self.facets["project"] not in esgf.facets.FACETS:
-            return
-
-        # 'never' mode: never download files from ESGF and stop here
-        if self.session["search_esgf"] == "never":
-            return
-
-        # 'when_missing' mode: if files are available locally, do not check
-        # ESGF
-        if self.session["search_esgf"] == "when_missing":
-            try:
-                check.data_availability(self, log=False)
-            except InputFilesNotFound:
-                pass  # search ESGF for files
-            else:
-                return  # use local files
-
-        # Local files are not available in 'when_missing' mode or 'always' mode
-        # is used: check ESGF
-        local_files = {f.name: f for f in self.files}
-        search_result = esgf.find_files(**self.facets)
-        for file in search_result:
-            if file.name not in local_files:
-                # Use ESGF files that are not available locally.
-                self.files.append(file)
-            else:
-                # Use ESGF files that are newer than the locally available
-                # files.
-                local_file = local_files[file.name]
-                if "version" in local_file.facets:
-                    if file.facets["version"] > local_file.facets["version"]:
-                        idx = self.files.index(local_file)
-                        self.files[idx] = file
+        def version(file: DataElement) -> str:
+            return str(file.facets.get("version", ""))
+
+        self._file_globs = []
+        files: dict[str, DataElement] = {}
+        for data_source in sorted(
+            _get_data_sources(self.session, self.facets["project"]),  # type: ignore[arg-type]
+            key=lambda ds: ds.priority,
+        ):
+            result = data_source.find_data(**self.facets)
+            for file in result:
+                if file.name not in files:
+                    files[file.name] = file
+                if version(files[file.name]) < version(file):
+                    files[file.name] = file
+            self.files = list(files.values())
+            self._file_globs.append(data_source.debug_info)
+            # 'when_missing' mode: if files are available from a higher
+            # priority source, do not search lower priority sources.
+            if self.session["search_esgf"] == "when_missing":
+                try:
+                    check.data_availability(self, log=False)
+                except InputFilesNotFound:
+                    pass  # continue search for data
+                else:
+                    return  # use what has been found so far
 
     @property
-    def files(self) -> list[File]:
+    def files(self) -> list[DataElement]:
         """The files associated with this dataset."""
         if self._files is None:
             self.find_files()
         return self._files  # type: ignore
 
     @files.setter
-    def files(self, value: Sequence[File]) -> None:
-        self._files = value
+    def files(self, value: Sequence[DataElement]) -> None:
+        self._files = list(value)
 
     def load(self) -> Cube:
         """Load dataset.
@@ -897,12 +887,7 @@ def _load(self) -> Cube:
             "short_name": self.facets["short_name"],
         }
 
-        result = [
-            file.local_file(self.session["download_dir"])
-            if isinstance(file, esgf.ESGFFile)
-            else file
-            for file in self.files
-        ]
+        result: Sequence[PreprocessorItem] = self.files
         for step, kwargs in settings.items():
             result = preprocess(
                 result,
@@ -993,25 +978,37 @@ def _update_timerange(self) -> None:
         check.valid_time_selection(timerange)
 
         if "*" in timerange:
+            # Replace wildcards in timerange with "timerange" from DataElements,
+            # but only if all DataElements have the "timerange" facet.
             dataset = self.copy()
             dataset.facets.pop("timerange")
             dataset.supplementaries = []
             check.data_availability(dataset)
-            intervals = [_get_start_end_date(f) for f in dataset.files]
-
-            min_date = min(interval[0] for interval in intervals)
-            max_date = max(interval[1] for interval in intervals)
+            if all("timerange" in f.facets for f in dataset.files):
+                # "timerange" can only be reliably computed when all DataElements
+                # provide it.
+                intervals = [
+                    f.facets["timerange"].split("/")  # type: ignore[union-attr]
+                    for f in dataset.files
+                ]
 
-            if timerange == "*":
-                timerange = f"{min_date}/{max_date}"
-            if "*" in timerange.split("/")[0]:
-                timerange = timerange.replace("*", min_date)
-            if "*" in timerange.split("/")[1]:
-                timerange = timerange.replace("*", max_date)
+                min_date = min(interval[0] for interval in intervals)
+                max_date = max(interval[1] for interval in intervals)
 
-        # Make sure that years are in format YYYY
-        start_date, end_date = timerange.split("/")
-        timerange = _dates_to_timerange(start_date, end_date)
-        check.valid_time_selection(timerange)
+                if timerange == "*":
+                    timerange = f"{min_date}/{max_date}"
+                if "*" in timerange.split("/")[0]:
+                    timerange = timerange.replace("*", min_date)
+                if "*" in timerange.split("/")[1]:
+                    timerange = timerange.replace("*", max_date)
 
-        self.set_facet("timerange", timerange)
+        if "*" in timerange:
+            # Drop the timerange facet if it still contains wildcards.
+            self.facets.pop("timerange")
+        else:
+            # Make sure that years are in format YYYY
+            start_date, end_date = timerange.split("/")
+            timerange = _dates_to_timerange(start_date, end_date)
+            # Update the timerange
+            check.valid_time_selection(timerange)
+            self.set_facet("timerange", timerange)
diff --git a/esmvalcore/esgf/__init__.py b/esmvalcore/esgf/__init__.py
index ca8607f964..2e03b90013 100644
--- a/esmvalcore/esgf/__init__.py
+++ b/esmvalcore/esgf/__init__.py
@@ -1,10 +1,41 @@
-"""Find files on the ESGF and download them."""
+"""Find files on the ESGF and download them.
 
-from ._download import ESGFFile, download
-from ._search import find_files
+This module uses `esgf-pyclient <https://esgf-pyclient.readthedocs.io>`_
+to search for and download files from the Earth System Grid Federation (ESGF).
+`esgf-pyclient`_ uses a
+`deprecated API <https://esgf.github.io/esg-search/ESGF_Search_RESTful_API.html>`__
+that is scheduled to be taken offline and replaced by new APIs based on
+STAC (ESGF East) and Globus (ESGF West). An ESGF node mimicking the deprecated
+API but built op top of Globus will be kept online for some time at
+https://esgf-node.ornl.gov/esgf-1-5-bridge, but users are encouraged
+to migrate to the new APIs as soon as possible by using the
+:mod:`esmvalcore.io.intake_esgf` module instead.
+
+This module provides the function :py:func:`esmvalcore.esgf.find_files`
+for searching for files on ESGF using the ESMValTool vocabulary.
+It returns :class:`esmvalcore.esgf.ESGFFile` objects, which have a convenient
+:meth:`esmvalcore.esgf.ESGFFile.download` method for downloading the file.
+A :func:`esmvalcore.esgf.download` function for downloading multiple files in
+parallel is also available.
+
+It also provides an :class:`esmvalcore.esgf.ESGFDataSource` that can be
+used to find files on ESGF from the :class:`~esmvalcore.dataset.Dataset`
+or the :ref:`recipe <recipe>`. To use it, create a file with the following
+:ref:`configuration <config_overview>` in ``~/.config/esmvaltool``:
+
+.. literalinclude:: ../configurations/esgf-pyclient-data.yml
+   :language: yaml
+
+See :ref:`config-esgf` for instructions on additional configuration
+options of this module.
+"""
+
+from esmvalcore.esgf._download import ESGFFile, download
+from esmvalcore.esgf._search import ESGFDataSource, find_files
 
 __all__ = [
     "ESGFFile",
+    "ESGFDataSource",
     "download",
     "find_files",
 ]
diff --git a/esmvalcore/esgf/_download.py b/esmvalcore/esgf/_download.py
index 9a1ff04fcb..9a0b04c24f 100644
--- a/esmvalcore/esgf/_download.py
+++ b/esmvalcore/esgf/_download.py
@@ -11,16 +11,26 @@
 import random
 import re
 import shutil
+from collections.abc import Iterable
 from pathlib import Path
 from statistics import median
 from tempfile import NamedTemporaryFile
+from typing import Any
 from urllib.parse import urlparse
 
+import iris.cube
 import requests
 import yaml
 from humanfriendly import format_size, format_timespan
-
-from esmvalcore.local import LocalFile
+from pyesgf.search.results import FileResult
+
+from esmvalcore.config import CFG
+from esmvalcore.io.protocol import DataElement
+from esmvalcore.local import (
+    LocalFile,
+    _dates_to_timerange,
+    _get_start_end_date_from_filename,
+)
 from esmvalcore.typing import Facets
 
 from .facets import DATASET_MAP, FACETS
@@ -166,7 +176,7 @@ def sort_hosts(urls):
 
 
 @functools.total_ordering
-class ESGFFile:
+class ESGFFile(DataElement):
     """File on the ESGF.
 
     This is the object returned by :func:`esmvalcore.esgf.find_files`.
@@ -185,7 +195,11 @@ class ESGFFile:
         The URLs where the file can be downloaded.
     """
 
-    def __init__(self, results):
+    def __init__(
+        self,
+        results: Iterable[FileResult],
+        dest_folder: Path | None = None,
+    ) -> None:
         results = list(results)
         self.name = str(Path(results[0].filename).with_suffix(".nc"))
         self.size = results[0].size
@@ -196,6 +210,39 @@ def __init__(self, results):
         for result in results:
             self.urls.append(result.download_url)
             self._checksums.append((result.checksum_type, result.checksum))
+        self.dest_folder = (
+            CFG["download_dir"] if dest_folder is None else dest_folder
+        )
+        self._attributes: dict[str, Any] | None = None
+
+    def prepare(self) -> None:
+        """Prepare the data for access."""
+        self.download(self.dest_folder)
+
+    @property
+    def attributes(self) -> dict[str, Any]:
+        """Attributes are key-value pairs describing the data."""
+        if self._attributes is None:
+            msg = (
+                "Attributes have not been read yet. Call the `to_iris` method "
+                "first to read the attributes from the file."
+            )
+            raise ValueError(msg)
+        return self._attributes
+
+    @attributes.setter
+    def attributes(self, value: dict[str, Any]) -> None:
+        self._attributes = value
+
+    def to_iris(
+        self,
+        ignore_warnings: list[dict[str, Any]] | None = None,
+    ) -> iris.cube.CubeList:
+        self.prepare()
+        local_file = self.local_file(self.dest_folder)
+        cube = local_file.to_iris(ignore_warnings=ignore_warnings)
+        self.attributes = local_file.attributes
+        return cube
 
     @classmethod
     def _from_results(cls, results, facets):
@@ -275,6 +322,9 @@ def _get_facets(self, results):
                     self.name,
                 )
                 facets[facet] = value
+        start_date, end_date = _get_start_end_date_from_filename(self.name)
+        if start_date and end_date:
+            facets["timerange"] = _dates_to_timerange(start_date, end_date)
         return facets
 
     @staticmethod
@@ -383,16 +433,16 @@ def __lt__(self, other):
         """Compare `self` to `other`."""
         return (self.dataset, self.name) < (other.dataset, other.name)
 
-    def __hash__(self):
-        """Compute a unique hash value."""
+    def __hash__(self) -> int:
+        """Return a number uniquely representing the data element."""
         return hash((self.dataset, self.name))
 
-    def local_file(self, dest_folder):
+    def local_file(self, dest_folder: Path | None) -> LocalFile:
         """Return the path to the local file after download.
 
         Arguments
         ---------
-        dest_folder: Path
+        dest_folder:
             The destination folder.
 
         Returns
@@ -400,16 +450,17 @@ def local_file(self, dest_folder):
         LocalFile
             The path where the file will be located after download.
         """
+        dest_folder = self.dest_folder if dest_folder is None else dest_folder
         file = LocalFile(dest_folder, self._get_relative_path())
         file.facets = self.facets
         return file
 
-    def download(self, dest_folder):
+    def download(self, dest_folder: Path | None) -> LocalFile:
         """Download the file.
 
         Arguments
         ---------
-        dest_folder: Path
+        dest_folder:
             The destination folder.
 
         Raises
@@ -424,7 +475,6 @@ def download(self, dest_folder):
         """
         local_file = self.local_file(dest_folder)
         if local_file.exists():
-            logger.debug("Skipping download of existing file %s", local_file)
             return local_file
 
         os.makedirs(local_file.parent, exist_ok=True)
@@ -552,9 +602,6 @@ def download(files, dest_folder, n_jobs=4):
         and not file.local_file(dest_folder).exists()
     ]
     if not files:
-        logger.debug(
-            "All required data is available locally, not downloading anything.",
-        )
         return
 
     files = sorted(files)
diff --git a/esmvalcore/esgf/_search.py b/esmvalcore/esgf/_search.py
index 911e44cacb..841cef5529 100644
--- a/esmvalcore/esgf/_search.py
+++ b/esmvalcore/esgf/_search.py
@@ -2,18 +2,22 @@
 
 import itertools
 import logging
+import os.path
+from dataclasses import dataclass, field
 from functools import lru_cache
+from pathlib import Path
 
 import pyesgf.search
 import requests.exceptions
 
 from esmvalcore.config._esgf_pyclient import get_esgf_config
+from esmvalcore.io.protocol import DataSource
 from esmvalcore.local import (
-    _get_start_end_date,
     _parse_period,
     _replace_years_with_timerange,
     _truncate_dates,
 )
+from esmvalcore.typing import FacetValue
 
 from ._download import ESGFFile
 from .facets import DATASET_MAP, FACETS
@@ -177,17 +181,16 @@ def select_by_time(files, timerange):
 
     for file in files:
         start_date, end_date = _parse_period(timerange)
-        try:
-            start, end = _get_start_end_date(file)
-        except ValueError:
-            # If start and end year cannot be read from the filename
-            # just select everything.
-            selection.append(file)
-        else:
+        if "timerange" in file.facets:
+            start, end = file.facets["timerange"].split("/")
             start_date, end = _truncate_dates(start_date, end)
             end_date, start = _truncate_dates(end_date, start)
             if start <= end_date and end >= start_date:
                 selection.append(file)
+        else:
+            # If start and end year cannot be read from the filename just select
+            # everything.
+            selection.append(file)
 
     return selection
 
@@ -378,3 +381,44 @@ def cached_search(**facets):
         logger.debug("Selected files:\n%s", "\n".join(str(f) for f in files))
 
     return files
+
+
+@dataclass
+class ESGFDataSource(DataSource):
+    name: str
+    """A name identifying the data source."""
+
+    project: str
+    """The project that the data source provides data for."""
+
+    priority: int
+    """The priority of the data source. Lower values have priority."""
+
+    download_dir: Path
+    """The destination directory where data will be downloaded."""
+
+    debug_info: str = field(init=False, default="")
+    """A string containing debug information when no data is found."""
+
+    def __post_init__(self) -> None:
+        self.download_dir = Path(
+            os.path.expandvars(self.download_dir),
+        ).expanduser()
+
+    def find_data(self, **facets: FacetValue) -> list[ESGFFile]:
+        """Find data.
+
+        Parameters
+        ----------
+        **facets :
+            Find data matching these facets.
+
+        Returns
+        -------
+        :obj:`list` of :obj:`esmvalcore.esgf.ESGFFile`
+            A list of files that have been found on ESGF.
+        """
+        files = find_files(**facets)
+        for file in files:
+            file.dest_folder = self.download_dir
+        return files
diff --git a/esmvalcore/io/__init__.py b/esmvalcore/io/__init__.py
new file mode 100644
index 0000000000..97cd78653b
--- /dev/null
+++ b/esmvalcore/io/__init__.py
@@ -0,0 +1,76 @@
+"""A modular system for reading input data from various sources."""
+
+import importlib
+import logging
+
+from esmvalcore.config import Session
+from esmvalcore.io.protocol import DataSource
+
+logger = logging.getLogger(__name__)
+
+
+def load_data_sources(
+    session: Session,
+    project: str | None = None,
+) -> list[DataSource]:
+    """Get the list of available data sources.
+
+    Arguments
+    ---------
+    session:
+        The configuration.
+    project:
+        If specified, only data sources for this project are returned.
+
+    Returns
+    -------
+    :obj:`list` of :obj:`DataSource`:
+        A list of available data sources.
+
+    Raises
+    ------
+    ValueError:
+        If the project or its settings are not found in the configuration.
+
+    """
+    data_sources: list[DataSource] = []
+    if project is not None and project not in session["projects"]:
+        msg = f"Unknown project '{project}', please configure it under 'projects'."
+        raise ValueError(msg)
+    settings = (
+        session["projects"]
+        if project is None
+        else {project: session["projects"][project]}
+    )
+    for project_, project_settings in settings.items():
+        for name, orig_kwargs in project_settings.get("data", {}).items():
+            kwargs = orig_kwargs.copy()
+            module_name, cls_name = kwargs.pop("type").rsplit(".", 1)
+            module = importlib.import_module(module_name)
+            cls = getattr(module, cls_name)
+            priority = kwargs.pop("priority", 1)
+            data_source = cls(
+                name=name,
+                project=project_,
+                priority=priority,
+                **kwargs,
+            )
+            if not isinstance(data_source, DataSource):
+                msg = (
+                    "Expected a data source of type `esmvalcore.io.protocol.DataSource`, "
+                    f"but your configuration for project '{project_}' contains "
+                    f"'{data_source}' of type '{type(data_source)}'."
+                )
+                raise TypeError(msg)
+            data_sources.append(data_source)
+
+    if not data_sources:
+        if project is None:
+            msg = "No data sources found. Check your configuration under 'projects'"
+        else:
+            msg = (
+                f"No data sources found for project '{project}'. "
+                f"Check your configuration under 'projects: {project}: data'"
+            )
+        raise ValueError(msg)
+    return data_sources
diff --git a/esmvalcore/io/intake_esgf.py b/esmvalcore/io/intake_esgf.py
new file mode 100644
index 0000000000..106dcdf08f
--- /dev/null
+++ b/esmvalcore/io/intake_esgf.py
@@ -0,0 +1,241 @@
+"""Access data using `intake-esgf <https://intake-esgf.readthedocs.io>`_.
+
+This module replaces the :mod:`esmvalcore.io.esgf` module. Please use this
+module instead of :mod:`esmvalcore.io.esgf` to access data on ESGF. If you
+encounter any issues using this module, please report them at
+https://github.com/ESMValGroup/ESMValCore/issues.
+
+Run the command ``esmvalcore config copy intake-esgf-data.yml`` to update
+your :ref:`configuration <config_overview>` to use this module. This will
+create a file with the following content in ``~/.config/esmvaltool`` or
+the path specified by the ``ESMVALTOOL_CONFIG_DIR`` environment variable:
+
+.. literalinclude:: ../configurations/intake-esgf-data.yml
+   :language: yaml
+
+"""
+
+import copy
+from dataclasses import dataclass, field
+from typing import Any
+
+import intake_esgf
+import intake_esgf.exceptions
+import iris.cube
+import isodate
+
+from esmvalcore.io.protocol import DataElement, DataSource
+from esmvalcore.iris_helpers import dataset_to_iris
+from esmvalcore.local import _parse_period
+from esmvalcore.typing import Facets, FacetValue
+
+__all__ = [
+    "IntakeESGFDataSource",
+    "IntakeESGFDataset",
+]
+
+
+@dataclass
+class IntakeESGFDataset(DataElement):
+    """A dataset that can be used to load data found using intake-esgf_."""
+
+    name: str
+    """A unique name identifying the data."""
+
+    facets: Facets
+    """Facets are key-value pairs that were used to find this data."""
+
+    catalog: intake_esgf.ESGFCatalog
+    """The intake-esgf catalog describing this data."""
+
+    _attributes: dict[str, Any] | None = field(init=False, default=None)
+
+    def __hash__(self) -> int:
+        """Return a number uniquely representing the data element."""
+        return hash(self.name)
+
+    def prepare(self) -> None:
+        """Prepare the data for access."""
+        self.catalog.to_path_dict()
+
+    @property
+    def attributes(self) -> dict[str, Any]:
+        """Attributes are key-value pairs describing the data."""
+        if self._attributes is None:
+            msg = (
+                "Attributes have not been read yet. Call the `to_iris` method "
+                "first to read the attributes from the file."
+            )
+            raise ValueError(msg)
+        return self._attributes
+
+    @attributes.setter
+    def attributes(self, value: dict[str, Any]) -> None:
+        self._attributes = value
+
+    def to_iris(self, ignore_warnings=None) -> iris.cube.CubeList:
+        """Load the data as Iris cubes.
+
+        Returns
+        -------
+        :
+            The loaded data.
+        """
+        files = self.catalog.to_path_dict(
+            minimal_keys=False,
+            quiet=True,
+        )[self.name]
+        dataset = self.catalog.to_dataset_dict(
+            minimal_keys=False,
+            add_measures=False,
+            quiet=True,
+        )[self.name]
+        # Store the local paths in the attributes for easier debugging.
+        dataset.attrs["source_file"] = ", ".join(str(f) for f in files)
+        # Cache the attributes.
+        self.attributes = copy.deepcopy(dataset.attrs)
+        return dataset_to_iris(dataset, ignore_warnings=ignore_warnings)
+
+
+@dataclass
+class IntakeESGFDataSource(DataSource):
+    """Data source that can be used to find data using intake-esgf."""
+
+    name: str
+    """A name identifying the data source."""
+
+    project: str
+    """The project that the data source provides data for."""
+
+    priority: int
+    """The priority of the data source. Lower values have priority."""
+
+    facets: dict[str, str]
+    """Mapping between the ESMValCore and ESGF facet names."""
+
+    values: dict[str, dict[str, str]] = field(default_factory=dict)
+    """Mapping between the ESMValCore and ESGF facet values."""
+
+    debug_info: str = field(init=False, default="")
+    """A string containing debug information when no data is found."""
+
+    catalog: intake_esgf.ESGFCatalog = field(
+        init=False,
+        default_factory=intake_esgf.ESGFCatalog,
+    )
+    """The intake-esgf catalog used to find data."""
+
+    def __post_init__(self):
+        self.catalog.project = intake_esgf.projects.projects[
+            self.project.lower()
+        ]
+
+    def find_data(self, **facets: FacetValue) -> list[IntakeESGFDataset]:
+        """Find data.
+
+        Parameters
+        ----------
+        **facets :
+            Find data matching these facets.
+
+        Returns
+        -------
+        :
+            A list of data elements that have been found.
+        """
+        # Normalize facets so all values are `list[str]`.
+        our_facets = {
+            facet: [str(values)] if isinstance(values, str | int) else values
+            for facet, values in facets.items()
+        }
+        # Translate "our" facets to ESGF facets and "our" values to ESGF values.
+        esgf_facets = {
+            their_facet: [
+                self.values.get(our_facet, {}).get(v, v)
+                for v in our_facets[our_facet]
+            ]
+            for our_facet, their_facet in self.facets.items()
+            if our_facet in our_facets
+        }
+        if (
+            "timerange" in facets and "*" not in facets["timerange"]  # type: ignore[operator]
+        ):
+            start, end = _parse_period(facets["timerange"])
+            esgf_facets["file_start"] = isodate.date_isoformat(
+                isodate.parse_date(start.split("T")[0]),
+            )
+            esgf_facets["file_end"] = isodate.date_isoformat(
+                isodate.parse_date(end.split("T")[0]),
+            )
+        # Search ESGF.
+        try:
+            self.catalog.search(**esgf_facets, quiet=True)
+        except intake_esgf.exceptions.NoSearchResults:
+            self.debug_info = (
+                "intake_esgf.ESGFCatalog.search("
+                + ", ".join(
+                    [
+                        f"{k}={v if isinstance(v, list) else [v]}"
+                        for k, v in self.catalog.last_search.items()
+                    ],
+                )
+                + ") did not return any results."
+            )
+            return []
+
+        # Return a list of datasets, with one IntakeESGFDataset per dataset_id.
+        result: list[IntakeESGFDataset] = []
+
+        # These are the keys in the dict[str, xarray.Dataset] returned by
+        # `intake_esgf.ESGFCatalog.to_dataset_dict`. Taken from:
+        # https://github.com/esgf2-us/intake-esgf/blob/c34124e54078e70ef271709a6d158edb22bcdb96/intake_esgf/catalog.py#L523-L528
+        self.catalog.df["key"] = self.catalog.df.apply(
+            lambda row: ".".join(
+                [row[f] for f in self.catalog.project.master_id_facets()],
+            ),
+            axis=1,
+        )
+        inverse_values = {
+            our_facet: {
+                their_value: our_value
+                for our_value, their_value in self.values[our_facet].items()
+            }
+            for our_facet in self.values
+        }
+        for _, row in self.catalog.df.iterrows():
+            dataset_id = row["key"]
+            # Subset the catalog to a single dataset.
+            cat = self.catalog.clone()
+            cat.file_start = self.catalog.file_start
+            cat.file_end = self.catalog.file_end
+            cat.df = self.catalog.df[self.catalog.df.key == dataset_id]
+            # Discard all but the latest version. It is not clear how/if
+            # `intake_esgf.ESGFCatalog.to_dataset_dict` supports multiple versions.
+            cat.df = cat.df[cat.df.version == cat.df.version.max()]
+            cat.project = self.catalog.project
+            if "short_name" in our_facets:
+                cat.last_search[self.facets["short_name"]] = [
+                    self.values.get("short_name", {}).get(v, v)
+                    for v in our_facets["short_name"]
+                ]
+            # Retrieve "our" facets associated with the dataset_id.
+            dataset_facets = {}
+            for our_facet, esgf_facet in self.facets.items():
+                if esgf_facet in row:
+                    esgf_values = row[esgf_facet]
+                    if isinstance(esgf_values, str):
+                        esgf_values = [esgf_values]
+                    our_values = [
+                        inverse_values.get(our_facet, {}).get(v, v)
+                        for v in esgf_values
+                    ]
+                    if len(our_values) == 1:
+                        our_values = our_values[0]
+                    dataset_facets[our_facet] = our_values
+            dataset = IntakeESGFDataset(
+                name=dataset_id,
+                facets=dataset_facets,  # type: ignore[arg-type]
+                catalog=cat,
+            )
+            result.append(dataset)
+        return result
diff --git a/esmvalcore/io/protocol.py b/esmvalcore/io/protocol.py
new file mode 100644
index 0000000000..a2799f7947
--- /dev/null
+++ b/esmvalcore/io/protocol.py
@@ -0,0 +1,120 @@
+"""Protocols for accessing data.
+
+An input data source can be defined in the configuration by using :obj:`esmvalcore.config.CFG`
+
+.. code-block:: python
+
+    >>> from esmvalcore.config import CFG
+    >>> CFG["projects"]["example-project"]["data"]["example-source-name"] = {
+            "type": "example_module.ExampleDataSource"
+            "argument1": "value1"
+            "argument2": "value2"
+        }
+
+or as a :ref:`YAML configuration file <config_overview>`
+
+.. code-block:: yaml
+
+    projects:
+      example-project:
+        data:
+          example-source-name
+            type: example_module.ExampleDataSource
+            argument1: value1
+            argument2: value2
+
+
+where ``example-project`` is a project, e.g. ``CMIP6``, and ``example-source-name``
+is a unique name describing the data source. The datasource type, in the
+example above called ``example_module.ExampleDataSource`` needs to implement the
+:class:`esmvalcore.io.protocol.DataSource` protocol. Any remaining key-value pairs
+in the configuration, ``argument1: value1`` and ``argument2: value2`` are
+passed as keyword arguments to the data source when it is created.
+
+Dedeplication of search results happens based on the
+:attr:`esmvalcore.io.protocol.DataElement.name` attribute and the ``"version"``
+facet in :attr:`esmvalcore.io.protocol.DataElement.facets` of the data elements
+provided by the data sources. If there is a tie, the data element provided by
+the data source with the lowest value of
+:attr:`esmvalcore.io.protocol.DataSource.priority` is chosen.
+"""
+
+from collections.abc import Iterable
+from typing import Any, Protocol, runtime_checkable
+
+import iris.cube
+
+from esmvalcore.typing import FacetValue
+
+
+@runtime_checkable
+class DataElement(Protocol):
+    """A data element represents some data that can be loaded.
+
+    A file is an example of a data element.
+    """
+
+    name: str
+    """A unique name identifying the data."""
+
+    facets: dict[str, FacetValue]
+    """Facets are key-value pairs that can be used for searching the data."""
+
+    attributes: dict[str, Any]
+    """Attributes are key-value pairs describing the data."""
+
+    def __hash__(self) -> int:
+        """Return a number uniquely representing the data element."""
+
+    def prepare(self) -> None:
+        """Prepare the data for access."""
+
+    def to_iris(
+        self,
+        ignore_warnings: list[dict[str, Any]] | None = None,
+    ) -> iris.cube.CubeList:
+        """Load the data as Iris cubes.
+
+        Parameters
+        ----------
+        ignore_warnings:
+            Keyword arguments passed to :func:`warnings.filterwarnings` used to
+            ignore warnings issued by :func:`iris.load_raw`. Each list element
+            corresponds to one call to :func:`warnings.filterwarnings`.
+
+        Returns
+        -------
+        iris.cube.CubeList
+            The loaded data.
+        """
+
+
+@runtime_checkable
+class DataSource(Protocol):
+    """A data source can be used to find data."""
+
+    name: str
+    """A name identifying the data source."""
+
+    project: str
+    """The project that the data source provides data for."""
+
+    priority: int
+    """The priority of the data source. Lower values have priority."""
+
+    debug_info: str
+    """A string containing debug information when no data is found."""
+
+    def find_data(self, **facets: FacetValue) -> Iterable[DataElement]:
+        """Find data.
+
+        Parameters
+        ----------
+        **facets :
+            Find data matching these facets.
+
+        Returns
+        -------
+        :obj:`typing.Iterable` of :obj:`esmvalcore.io.base.DataElement`
+            The data elements that have been found.
+        """
diff --git a/esmvalcore/local.py b/esmvalcore/local.py
index 70f30adee2..208c4267cb 100644
--- a/esmvalcore/local.py
+++ b/esmvalcore/local.py
@@ -1,4 +1,48 @@
-"""Find files on the local filesystem."""
+"""Find files on the local filesystem.
+
+Example configuration to find CMIP6 data on a personal computer:
+
+.. code-block:: yaml
+
+   projects:
+     CMIP6:
+       data:
+         local-data:
+           type: "esmvalcore.local.LocalDataSource"
+           rootpath: ~/climate_data
+           dirname_template: "{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}"
+           filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc"
+
+The module will find files matching the :func:`glob.glob` pattern formed by
+``rootpath/dirname_template/filename_template``, where the facets defined
+inside the curly braces of the templates are replaced by their values
+from the :class:`~esmvalcore.dataset.Dataset` or the :ref:`recipe <recipe>`. Note
+that the name of the data source, ``local-data`` in the example above,
+must be unique within each project but can otherwise be chosen freely.
+
+To start using this module, download the complete file for personal computers
+:download:`here <../configurations/local-data.yml>`, copy it to the
+directory ``~/.config/esmvaltool/``, and tailor it for your own system
+if needed.
+
+Example configuration files for popular HPC systems are also available:
+
+  - :download:`Jasmin at CEDA<../configurations/badc-data.yml>`
+  - :download:`Levante at DKRZ<../configurations/dkrz-data.yml>`
+  - :download:`UK Met Office <../configurations/mo-data.yml>`
+  - :download:`NCI Australia <../configurations/nci-data.yml>`
+  - :download:`IPSL <../configurations/ipsl-data.yml>`
+  - :download:`ETHZ <../configurations/ethz-data.yml>`
+
+Example configuration files for
+:ref:`supported climate models <read_native_models>` are also available:
+
+    - :download:`ACCESS <../configurations/access-data.yml>`
+    - :download:`ICON <../configurations/icon-data.yml>`
+    - :download:`IPSLCM <../configurations/ipslcm-data.yml>`
+    - :download:`EMAC <../configurations/emac-data.yml>`
+
+"""
 
 from __future__ import annotations
 
@@ -6,27 +50,29 @@
 import itertools
 import logging
 import os
+import os.path
 import re
-from dataclasses import dataclass
+import warnings
+from dataclasses import dataclass, field
 from glob import glob
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
+import iris.cube
+import iris.fileformats.cf
 import isodate
 from cf_units import Unit
 from netCDF4 import Dataset, Variable
 
+import esmvalcore.io.protocol
 from esmvalcore.config import CFG
 from esmvalcore.config._config import get_project_config
 from esmvalcore.exceptions import RecipeError
-from esmvalcore.preprocessor._io import _load_from_file
+from esmvalcore.iris_helpers import ignore_warnings_context
 
 if TYPE_CHECKING:
     from collections.abc import Iterable
 
-    import iris.cube
-
-    from esmvalcore.esgf import ESGFFile
     from esmvalcore.typing import Facets, FacetValue
 
 logger = logging.getLogger(__name__)
@@ -86,9 +132,9 @@ def _get_var_name(variable: Variable) -> str:
     return str(variable.name)
 
 
-def _get_start_end_date(
-    file: str | Path | LocalFile | ESGFFile,
-) -> tuple[str, str]:
+def _get_start_end_date_from_filename(
+    file: str | Path,
+) -> tuple[str | None, str | None]:
     """Get the start and end dates as a string from a file name.
 
     Examples of allowed dates: 1980, 198001, 1980-01, 19801231, 1980-12-31,
@@ -117,13 +163,6 @@ def _get_start_end_date(
     ValueError
         Start or end date cannot be determined.
     """
-    if hasattr(file, "name"):  # noqa: SIM108
-        # Path, LocalFile, ESGFFile
-        stem = Path(file.name).stem
-    else:
-        # str
-        stem = Path(file).stem
-
     start_date = end_date = None
 
     # Build regex
@@ -151,9 +190,34 @@ def _get_start_end_date(
     start_date, end_date = _get_from_pattern(
         datetime_pattern,
         date_range_pattern,
-        stem,
+        Path(file).stem,
         "datetime",
     )
+    return start_date, end_date
+
+
+def _get_start_end_date(file: str | Path) -> tuple[str, str]:
+    """Get the start and end dates as a string from a file.
+
+    This function first tries to finds the dates from the filename and if that
+    fails it will try to read them from the file.
+
+    Parameters
+    ----------
+    file:
+        The file to read the start and end data from.
+
+    Returns
+    -------
+    tuple[str, str]
+        The start and end date.
+
+    Raises
+    ------
+    ValueError
+        Start or end date cannot be determined.
+    """
+    start_date, end_date = _get_start_end_date_from_filename(file)
 
     # As final resort, try to get the dates from the file contents
     if (
@@ -199,17 +263,6 @@ def _get_start_end_date(
     return start_date, end_date
 
 
-def _get_start_end_year(
-    file: str | Path | LocalFile | ESGFFile,
-) -> tuple[int, int]:
-    """Get the start and end year as int from a file name.
-
-    See :func:`_get_start_end_date`.
-    """
-    (start_date, end_date) = _get_start_end_date(file)
-    return (int(start_date[:4]), int(end_date[:4]))
-
-
 def _dates_to_timerange(start_date: int | str, end_date: int | str) -> str:
     """Convert ``start_date`` and ``end_date`` to ``timerange``.
 
@@ -467,15 +520,33 @@ def _select_drs(input_type: str, project: str, structure: str) -> list[str]:
 
 
 @dataclass(order=True)
-class DataSource:
-    """Class for storing a data source and finding the associated files."""
+class LocalDataSource(esmvalcore.io.protocol.DataSource):
+    """Data source for finding files on a local filesystem."""
+
+    name: str
+    """A name identifying the data source."""
+
+    project: str
+    """The project that the data source provides data for."""
+
+    priority: int
+    """The priority of the data source. Lower values have priority."""
+
+    debug_info: str = field(init=False, default="")
+    """A string containing debug information when no data is found."""
 
     rootpath: Path
+    """The path where the directories are located."""
+
     dirname_template: str
+    """The template for the directory names."""
+
     filename_template: str
+    """The template for the file names."""
 
     def __post_init__(self) -> None:
         """Set further attributes."""
+        self.rootpath = Path(os.path.expandvars(self.rootpath)).expanduser()
         self._regex_pattern = self._templates_to_regex()
 
     @property
@@ -495,30 +566,59 @@ def get_glob_patterns(self, **facets) -> list[Path]:
 
     def find_files(self, **facets) -> list[LocalFile]:
         """Find files."""
+        # TODO: deprecate this method
+        return self.find_data(**facets)
+
+    def find_data(self, **facets) -> list[LocalFile]:
+        """Find data locally."""
+        facets = dict(facets)
+        if "original_short_name" in facets:
+            facets["short_name"] = facets["original_short_name"]
+
         globs = self.get_glob_patterns(**facets)
+        self.debug_info = "\n".join(str(g) for g in globs)
         logger.debug("Looking for files matching %s", globs)
 
         files: list[LocalFile] = []
         for glob_ in globs:
             for filename in glob(str(glob_)):
                 file = LocalFile(filename)
-                file.facets.update(self.path2facets(file))
+                file.facets.update(
+                    self.path2facets(
+                        file,
+                        add_timerange="timerange" in facets,
+                    ),
+                )
                 files.append(file)
+
+        files = _filter_versions_called_latest(files)
+
+        if "version" not in facets:
+            files = _select_latest_version(files)
+
         files.sort()  # sorting makes it easier to see what was found
 
         if "timerange" in facets:
             files = _select_files(files, facets["timerange"])
         return files
 
-    def path2facets(self, path: Path) -> dict[str, str]:
+    def path2facets(self, path: Path, add_timerange: bool) -> dict[str, str]:
         """Extract facets from path."""
         facets: dict[str, str] = {}
-        match = re.search(self.regex_pattern, str(path))
-        if match is None:
-            return facets
-        for facet, value in match.groupdict().items():
-            if value:
-                facets[facet] = value
+
+        if (match := re.search(self.regex_pattern, str(path))) is not None:
+            for facet, value in match.groupdict().items():
+                if value:
+                    facets[facet] = value
+
+        if add_timerange:
+            try:
+                start_date, end_date = _get_start_end_date(path)
+            except ValueError:
+                pass
+            else:
+                facets["timerange"] = _dates_to_timerange(start_date, end_date)
+
         return facets
 
     def _templates_to_regex(self) -> str:
@@ -607,11 +707,40 @@ def _templates_to_regex(self) -> str:
         return pattern
 
 
+class DataSource(LocalDataSource):
+    """Data source for finding files on a local filesystem.
+
+    .. deprecated:: 2.13.0
+         This class is deprecated and will be removed in version 2.16.0.
+         Please use 'esmvalcore.local.LocalDataSource' instead.
+    """
+
+    def __init__(self, *args, **kwargs) -> None:
+        msg = (
+            "The 'esmvalcore.local.LocalDataSource' class is deprecated and will be "
+            "removed in version 2.16.0. Please use 'esmvalcore.local.LocalDataSource'"
+        )
+        warnings.warn(msg, DeprecationWarning, stacklevel=2)
+        super().__init__(*args, **kwargs)
+
+
 _ROOTPATH_WARNED: set[tuple[str, tuple[str]]] = set()
 
+_LEGACY_DATA_SOURCES_WARNED: set[str] = set()
 
-def _get_data_sources(project: str) -> list[DataSource]:
+
+def _get_data_sources(project: str) -> list[LocalDataSource]:
     """Get a list of data sources."""
+    if project not in _LEGACY_DATA_SOURCES_WARNED:
+        logger.warning(
+            (
+                "Using legacy data sources for project '%s' using 'rootpath' "
+                "and 'drs' settings and the path templates from '%s'"
+            ),
+            project,
+            CFG["config_developer_file"],
+        )
+        _LEGACY_DATA_SOURCES_WARNED.add(project)
     rootpaths = CFG["rootpath"]
     for key in (project, "default"):
         if key in rootpaths:
@@ -627,12 +756,19 @@ def _get_data_sources(project: str) -> list[DataSource]:
             if isinstance(paths, list):
                 structure = CFG["drs"].get(project, "default")
                 paths = dict.fromkeys(paths, structure)
-            sources: list[DataSource] = []
+            sources: list[LocalDataSource] = []
             for path, structure in paths.items():
                 dir_templates = _select_drs("input_dir", project, structure)
                 file_templates = _select_drs("input_file", project, structure)
                 sources.extend(
-                    DataSource(Path(path), d, f)
+                    LocalDataSource(
+                        name="legacy-local",
+                        project=project,
+                        priority=1,
+                        rootpath=Path(path),
+                        dirname_template=d,
+                        filename_template=f,
+                    )
                     for d in dir_templates
                     for f in file_templates
                 )
@@ -746,6 +882,7 @@ def version(file):
     return result
 
 
+# TODO: Deprecate this?
 def find_files(
     *,
     debug: bool = False,
@@ -839,18 +976,48 @@ def find_files(
     return files
 
 
-class LocalFile(type(Path())):  # type: ignore
+GRIB_FORMATS = (".grib2", ".grib", ".grb2", ".grb", ".gb2", ".gb")
+"""GRIB file extensions."""
+
+
+def _get_attr_from_field_coord(
+    ncfield: iris.fileformats.cf.CFVariable,
+    coord_name: str | None,
+    attr: str,
+) -> Any:
+    """Get attribute from netCDF field coordinate."""
+    if coord_name is not None:
+        attrs = ncfield.cf_group[coord_name].cf_attrs()
+        attr_val = [value for (key, value) in attrs if key == attr]
+        if attr_val:
+            return attr_val[0]
+    return None
+
+
+def _restore_lat_lon_units(
+    cube: iris.cube.Cube,
+    field: iris.fileformats.cf.CFVariable,
+    filename: str,  # noqa: ARG001
+) -> None:  # pylint: disable=unused-argument
+    """Use this callback to restore the original lat/lon units."""
+    # Iris chooses to change longitude and latitude units to degrees
+    # regardless of value in file, so reinstating file value
+    for coord in cube.coords():
+        if coord.standard_name in ["longitude", "latitude"]:
+            units = _get_attr_from_field_coord(field, coord.var_name, "units")
+            if units is not None:
+                coord.units = units
+
+
+class LocalFile(type(Path()), esmvalcore.io.protocol.DataElement):  # type: ignore
     """File on the local filesystem."""
 
+    def prepare(self) -> None:
+        """Prepare the data for access."""
+
     @property
     def facets(self) -> Facets:
-        """Facets describing the file.
-
-        Note
-        ----
-        When using :func:`find_files`, facets are read from the directory
-        structure. Facets stored in filenames are not yet supported.
-        """
+        """Facets are key-value pairs that were used to find this data."""
         if not hasattr(self, "_facets"):
             self._facets: Facets = {}
         return self._facets
@@ -861,7 +1028,7 @@ def facets(self, value: Facets) -> None:
 
     @property
     def attributes(self) -> dict[str, Any]:
-        """Attributes read from the file."""
+        """Attributes are key-value pairs describing the data."""
         if not hasattr(self, "_attributes"):
             msg = (
                 "Attributes have not been read yet. Call the `to_iris` method "
@@ -885,7 +1052,22 @@ def to_iris(
         iris.cube.CubeList
             The loaded data.
         """
-        cubes = _load_from_file(self, ignore_warnings=ignore_warnings)
+        file = Path(self)
+        logger.debug("Loading:\n%s", file)
+
+        with ignore_warnings_context(ignore_warnings):
+            # GRIB files need to be loaded with iris.load, otherwise we will
+            # get separate (lat, lon) slices for each time step, pressure
+            # level, etc.
+            if file.suffix in GRIB_FORMATS:
+                cubes = iris.load(file, callback=_restore_lat_lon_units)
+            else:
+                cubes = iris.load_raw(file, callback=_restore_lat_lon_units)
+        logger.debug("Done with loading %s", file)
+
+        for cube in cubes:
+            cube.attributes.globals["source_file"] = str(file)
+
         # Cache the attributes.
         self.attributes = copy.deepcopy(dict(cubes[0].attributes.globals))
         return cubes
diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py
index ff6f560cac..de4337f948 100644
--- a/esmvalcore/preprocessor/__init__.py
+++ b/esmvalcore/preprocessor/__init__.py
@@ -5,7 +5,6 @@
 import copy
 import inspect
 import logging
-from pathlib import Path
 from pprint import pformat
 from typing import TYPE_CHECKING, Any, TypeAlias
 
@@ -15,6 +14,7 @@
 from esmvalcore._task import BaseTask
 from esmvalcore.cmor.check import cmor_check_data, cmor_check_metadata
 from esmvalcore.cmor.fix import fix_data, fix_file, fix_metadata
+from esmvalcore.io.protocol import DataElement
 from esmvalcore.preprocessor._area import (
     area_statistics,
     extract_named_regions,
@@ -103,11 +103,12 @@
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterable, Sequence
+    from pathlib import Path
 
     import prov.model
     from dask.delayed import Delayed
 
-    from esmvalcore.dataset import Dataset, File
+    from esmvalcore.dataset import Dataset
 
 logger = logging.getLogger(__name__)
 
@@ -374,7 +375,7 @@ def _run_preproc_function(
     function: Callable,
     items: PreprocessorItem | Sequence[PreprocessorItem],
     kwargs: Any,
-    input_files: Sequence[File] | None = None,
+    input_files: Sequence[DataElement] | None = None,
 ) -> PreprocessorItem | Sequence[PreprocessorItem]:
     """Run preprocessor function."""
     kwargs_str = ",\n".join(
@@ -410,7 +411,7 @@ def _run_preproc_function(
             )
 
         # Make sure that the arguments are indexable
-        if isinstance(items, (PreprocessorFile, Cube, str, Path)):
+        if isinstance(items, (PreprocessorFile, Cube, DataElement)):
             items = [items]
         if isinstance(items, set):
             items = list(items)
@@ -438,7 +439,7 @@ def _run_preproc_function(
 def preprocess(
     items: Sequence[PreprocessorItem],
     step: str,
-    input_files: list[File] | None = None,
+    input_files: list[DataElement] | None = None,
     output_file: Path | None = None,
     debug: bool = False,
     **settings: Any,
@@ -478,7 +479,7 @@ def preprocess(
 
     items = []
     for item in result:
-        if isinstance(item, (PreprocessorFile, Cube, str, Path)):
+        if isinstance(item, (PreprocessorFile, Cube, DataElement)):
             items.append(item)
         else:
             items.extend(item)
@@ -573,7 +574,7 @@ def apply(self, step: str, debug: bool = False) -> None:
             self.cubes,
             step,
             input_files=self._input_files,
-            output_file=self.filename,
+            output_file=self.filename,  # type: ignore[arg-type]
             debug=debug,
             **self.settings[step],
         )
@@ -646,7 +647,7 @@ def _initialize_entity(self) -> None:
         settings = {
             "preprocessor:" + k: str(v) for k, v in self.settings.items()
         }
-        self.entity.add_attributes(settings)
+        self.entity.add_attributes(settings)  # type: ignore[attr-defined]
 
     def group(self, keys: list) -> str:
         """Generate group keyword.
@@ -671,7 +672,7 @@ def group(self, keys: list) -> str:
         return "_".join(identifier)
 
 
-PreprocessorItem: TypeAlias = PreprocessorFile | Cube | str | Path
+PreprocessorItem: TypeAlias = PreprocessorFile | Cube | DataElement
 
 
 def _apply_multimodel(
diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py
index f050c4cfa7..bd0926f31c 100644
--- a/esmvalcore/preprocessor/_io.py
+++ b/esmvalcore/preprocessor/_io.py
@@ -20,16 +20,14 @@
 
 from esmvalcore._task import write_ncl_settings
 from esmvalcore.exceptions import ESMValCoreLoadWarning
-from esmvalcore.iris_helpers import (
-    dataset_to_iris,
-    ignore_warnings_context,
-)
+from esmvalcore.io.protocol import DataElement
+from esmvalcore.iris_helpers import dataset_to_iris
+from esmvalcore.local import LocalFile
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
     from dask.delayed import Delayed
-    from iris.fileformats.cf import CFVariable
 
 logger = logging.getLogger(__name__)
 
@@ -42,40 +40,16 @@
     "reference_dataset",
     "alternative_dataset",
 }
-GRIB_FORMATS = (".grib2", ".grib", ".grb2", ".grb", ".gb2", ".gb")
-
-
-def _get_attr_from_field_coord(
-    ncfield: CFVariable,
-    coord_name: str | None,
-    attr: str,
-) -> Any:
-    """Get attribute from netCDF field coordinate."""
-    if coord_name is not None:
-        attrs = ncfield.cf_group[coord_name].cf_attrs()
-        attr_val = [value for (key, value) in attrs if key == attr]
-        if attr_val:
-            return attr_val[0]
-    return None
-
-
-def _restore_lat_lon_units(
-    cube: Cube,
-    field: CFVariable,
-    filename: str,  # noqa: ARG001
-) -> None:  # pylint: disable=unused-argument
-    """Use this callback to restore the original lat/lon units."""
-    # Iris chooses to change longitude and latitude units to degrees
-    # regardless of value in file, so reinstating file value
-    for coord in cube.coords():
-        if coord.standard_name in ["longitude", "latitude"]:
-            units = _get_attr_from_field_coord(field, coord.var_name, "units")
-            if units is not None:
-                coord.units = units
 
 
 def load(
-    file: str | Path | Cube | CubeList | xr.Dataset | ncdata.NcData,
+    file: str
+    | Path
+    | DataElement
+    | Cube
+    | CubeList
+    | xr.Dataset
+    | ncdata.NcData,
     ignore_warnings: list[dict[str, Any]] | None = None,
     backend_kwargs: dict[str, Any] | None = None,
 ) -> CubeList:
@@ -113,7 +87,7 @@ def load(
         Invalid type for ``file``.
 
     """
-    if hasattr(file, "to_iris"):
+    if isinstance(file, DataElement):
         cubes = file.to_iris(ignore_warnings=ignore_warnings)
     elif isinstance(file, (str, Path)):
         extension = (
@@ -122,7 +96,7 @@ def load(
             else os.path.splitext(file)[1]
         )
         if "zarr" not in extension:
-            cubes = _load_from_file(file, ignore_warnings=ignore_warnings)
+            cubes = LocalFile(file).to_iris(ignore_warnings=ignore_warnings)
         else:
             cubes = _load_zarr(
                 file,
@@ -161,7 +135,7 @@ def load(
 
 
 def _load_zarr(
-    file: str | Path | Cube | CubeList | xr.Dataset | ncdata.NcData,
+    file: str | Path,
     ignore_warnings: list[dict[str, Any]] | None = None,
     backend_kwargs: dict[str, Any] | None = None,
 ) -> CubeList:
@@ -222,30 +196,6 @@ def _load_zarr(
     return dataset_to_iris(zarr_xr, ignore_warnings=ignore_warnings)
 
 
-def _load_from_file(
-    file: str | Path,
-    ignore_warnings: list[dict[str, Any]] | None = None,
-) -> CubeList:
-    """Load data from file."""
-    file = Path(file)
-    logger.debug("Loading:\n%s", file)
-
-    with ignore_warnings_context(ignore_warnings):
-        # GRIB files need to be loaded with iris.load, otherwise we will
-        # get separate (lat, lon) slices for each time step, pressure
-        # level, etc.
-        if file.suffix in GRIB_FORMATS:
-            cubes = iris.load(file, callback=_restore_lat_lon_units)
-        else:
-            cubes = iris.load_raw(file, callback=_restore_lat_lon_units)
-    logger.debug("Done with loading %s", file)
-
-    for cube in cubes:
-        cube.attributes.globals["source_file"] = str(file)
-
-    return cubes
-
-
 def save(  # noqa: C901
     cubes: Sequence[Cube],
     filename: Path | str,
diff --git a/esmvalcore/typing.py b/esmvalcore/typing.py
index 7880bdac1b..1e3735d4f2 100644
--- a/esmvalcore/typing.py
+++ b/esmvalcore/typing.py
@@ -3,19 +3,18 @@
 from __future__ import annotations
 
 from collections.abc import Iterable, Sequence
-from numbers import Number
 
 import dask.array as da
 import numpy as np
 from iris.cube import Cube
 
-FacetValue = str | Sequence[str] | Number | bool
+FacetValue = str | Sequence[str] | int
 """Type describing a single facet."""
 
 Facets = dict[str, FacetValue]
 """Type describing a collection of facets."""
 
-NetCDFAttr = str | Number | Iterable
+NetCDFAttr = str | int | float | Iterable
 """Type describing netCDF attributes.
 
 `NetCDF attributes
diff --git a/pyproject.toml b/pyproject.toml
index bd853ba50d..0857deda01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,6 +45,7 @@ dependencies = [
     "fire",
     "geopy",
     "humanfriendly",
+    "intake-esgf",
     "intake-esm",
     "iris-grib>=0.20.0",  # github.com/ESMValGroup/ESMValCore/issues/2535
     "isodate>=0.7.0",
@@ -143,6 +144,7 @@ minversion = "6"
 markers = [
     "installation: Test requires installation of dependencies",
     "use_sample_data: Run functional tests using real data",
+    "online: Run tests that require internet access",
 ]
 testpaths = ["tests"]
 xfail_strict = true
@@ -220,6 +222,7 @@ ignore = [
     "D102",     # Missing docstring in public method
     "D103",     # Missing docstring in public function
     "D104",     # Missing docstring in public package
+    "PT013",    # Allow importing fixtures from pytest to avoid repeating 'pytest' many times
 ]
 "doc/gensidebar.py" = [
     "INP001",   # File is part of an implicit namespace package
diff --git a/tests/conftest.py b/tests/conftest.py
index fc6a39c7b2..ea2dceaeb6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -27,7 +27,6 @@ def _load_default_config():
             "ignore",
             message="Do not instantiate `Config` objects directly",
             category=UserWarning,
-            module="esmvalcore",
         )
         cfg = Config()
     cfg.load_from_dirs([])
@@ -50,7 +49,6 @@ def ignore_existing_user_config(monkeypatch, cfg_default):
 @pytest.fixture
 def session(tmp_path: Path, ignore_existing_user_config, monkeypatch):
     """Session object with default settings."""
-    monkeypatch.setitem(CFG, "rootpath", {"default": {tmp_path: "default"}})
     monkeypatch.setitem(CFG, "output_dir", tmp_path / "esmvaltool_output")
     return CFG.start_session("recipe_test")
 
diff --git a/tests/integration/cmor/_fixes/icon/conftest.py b/tests/integration/cmor/_fixes/icon/conftest.py
new file mode 100644
index 0000000000..ee9d6beac7
--- /dev/null
+++ b/tests/integration/cmor/_fixes/icon/conftest.py
@@ -0,0 +1,36 @@
+"""Fixtures for ICON fixes tests."""
+
+import importlib.resources
+from pathlib import Path
+
+import pytest
+import yaml
+
+import esmvalcore.config
+from esmvalcore.cmor._fixes.icon._base_fixes import IconFix
+
+
+@pytest.fixture(autouse=True)
+def tmp_cache_dir(monkeypatch, tmp_path):
+    """Use temporary path as cache directory for all tests in this module."""
+    monkeypatch.setattr(IconFix, "CACHE_DIR", tmp_path)
+
+
+@pytest.fixture
+def session(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+    session: esmvalcore.config.Session,
+) -> esmvalcore.config.Session:
+    """Configure ICON data source for all tests in this module."""
+    with importlib.resources.as_file(
+        importlib.resources.files(esmvalcore.config)
+        / "configurations"
+        / "icon-data.yml",
+    ) as config_file:
+        cfg = yaml.safe_load(config_file.read_text(encoding="utf-8"))
+    for data_source in cfg["projects"]["ICON"]["data"]:
+        cfg["projects"]["ICON"]["data"][data_source]["rootpath"] = tmp_path
+    session["projects"]["ICON"]["data"] = cfg["projects"]["ICON"]["data"]
+    session["auxiliary_data_dir"] = tmp_path
+    return session
diff --git a/tests/integration/cmor/_fixes/icon/test_icon.py b/tests/integration/cmor/_fixes/icon/test_icon.py
index ce7cd6317a..de6e205f52 100644
--- a/tests/integration/cmor/_fixes/icon/test_icon.py
+++ b/tests/integration/cmor/_fixes/icon/test_icon.py
@@ -1,6 +1,5 @@
 """Test the ICON on-the-fly CMORizer."""
 
-from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
 from unittest import mock
@@ -26,7 +25,6 @@
 )
 from esmvalcore.cmor.fix import Fix
 from esmvalcore.cmor.table import CoordinateInfo, get_var_info
-from esmvalcore.config import CFG
 from esmvalcore.dataset import Dataset
 
 TEST_GRID_FILE_URI = (
@@ -36,12 +34,6 @@
 TEST_GRID_FILE_NAME = "icon_grid.nc"
 
 
-@pytest.fixture(autouse=True)
-def tmp_cache_dir(monkeypatch, tmp_path):
-    """Use temporary path as cache directory for all tests in this module."""
-    monkeypatch.setattr(IconFix, "CACHE_DIR", tmp_path)
-
-
 @pytest.fixture
 def cubes_atm_2d(test_data_path):
     """2D sample cubes."""
@@ -572,9 +564,10 @@ def test_get_areacella_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_areacella_fix(cubes_grid):
+@pytest.mark.online
+def test_areacella_fix(cubes_grid, session):
     """Test fix."""
-    fix = get_allvars_fix("fx", "areacella")
+    fix = get_allvars_fix("fx", "areacella", session=session)
     fix.extra_facets["var_type"] = "fx"
     fixed_cubes = fix.fix_metadata(cubes_grid)
 
@@ -595,9 +588,10 @@ def test_get_areacello_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_areacello_fix(cubes_grid):
+@pytest.mark.online
+def test_areacello_fix(cubes_grid, session):
     """Test fix."""
-    fix = get_allvars_fix("Ofx", "areacello")
+    fix = get_allvars_fix("Ofx", "areacello", session=session)
     fix.extra_facets["var_type"] = "fx"
     fixed_cubes = fix.fix_metadata(cubes_grid)
 
@@ -655,9 +649,10 @@ def test_get_lwp_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_lwp_fix(cubes_atm_2d):
+@pytest.mark.online
+def test_lwp_fix(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("AERmon", "lwp")
+    fix = get_allvars_fix("AERmon", "lwp", session)
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
     assert len(fixed_cubes) == 1
@@ -683,9 +678,10 @@ def test_get_rsdt_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_rsdt_fix(cubes_atm_2d):
+@pytest.mark.online
+def test_rsdt_fix(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "rsdt")
+    fix = get_allvars_fix("Amon", "rsdt", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
     assert len(fixed_cubes) == 1
@@ -706,9 +702,10 @@ def test_get_rsut_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_rsut_fix(cubes_atm_2d):
+@pytest.mark.online
+def test_rsut_fix(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "rsut")
+    fix = get_allvars_fix("Amon", "rsut", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
     assert len(fixed_cubes) == 1
@@ -732,9 +729,10 @@ def test_get_siconc_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_siconc_fix(cubes_atm_2d):
+@pytest.mark.online
+def test_siconc_fix(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("SImon", "siconc")
+    fix = get_allvars_fix("SImon", "siconc", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
     cube = check_siconc_metadata(
@@ -758,9 +756,10 @@ def test_get_siconca_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_siconca_fix(cubes_atm_2d):
+@pytest.mark.online
+def test_siconca_fix(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("SImon", "siconca")
+    fix = get_allvars_fix("SImon", "siconca", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
     cube = check_siconc_metadata(
@@ -787,9 +786,10 @@ def test_get_ta_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_ta_fix(cubes_atm_3d):
+@pytest.mark.online
+def test_ta_fix(cubes_atm_3d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "ta")
+    fix = get_allvars_fix("Amon", "ta", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_3d)
 
     cube = check_ta_metadata(fixed_cubes)
@@ -798,9 +798,10 @@ def test_ta_fix(cubes_atm_3d):
     check_lat_lon(cube)
 
 
-def test_ta_fix_no_plev_bounds(cubes_atm_3d):
+@pytest.mark.online
+def test_ta_fix_no_plev_bounds(cubes_atm_3d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "ta")
+    fix = get_allvars_fix("Amon", "ta", session=session)
     cubes = CubeList(
         [
             cubes_atm_3d.extract_cube(NameConstraint(var_name="ta")),
@@ -824,9 +825,10 @@ def test_get_tas_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_tas_fix(cubes_atm_2d):
+@pytest.mark.online
+def test_tas_fix(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
     cube = check_tas_metadata(fixed_cubes)
@@ -835,9 +837,10 @@ def test_tas_fix(cubes_atm_2d):
     check_heightxm(cube, 2.0)
 
 
-def test_tas_spatial_index_coord_already_present(cubes_atm_2d):
+@pytest.mark.online
+def test_tas_spatial_index_coord_already_present(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
 
     index_coord = DimCoord(np.arange(8), var_name="ncells")
     cube = cubes_atm_2d.extract_cube(NameConstraint(var_name="tas"))
@@ -849,9 +852,10 @@ def test_tas_spatial_index_coord_already_present(cubes_atm_2d):
     check_lat_lon(cube)
 
 
-def test_tas_scalar_height2m_already_present(cubes_atm_2d):
+@pytest.mark.online
+def test_tas_scalar_height2m_already_present(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
 
     # Scalar height (with wrong metadata) already present
     height_coord = AuxCoord(2.0, var_name="h", standard_name="height")
@@ -901,9 +905,10 @@ def test_tas_no_mesh(cubes_atm_2d):
     assert cube.coord_dims(lat) == cube.coord_dims(i_coord)
 
 
-def test_tas_dim_height2m_already_present(cubes_atm_2d):
+@pytest.mark.online
+def test_tas_dim_height2m_already_present(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
 
     # Dimensional coordinate height (with wrong metadata) already present
     height_coord = AuxCoord(2.0, var_name="h", standard_name="height")
@@ -920,9 +925,10 @@ def test_tas_dim_height2m_already_present(cubes_atm_2d):
     check_heightxm(cube, 2.0)
 
 
-def test_tas_no_shift_time(cubes_atm_2d):
+@pytest.mark.online
+def test_tas_no_shift_time(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
     fix.extra_facets["shift_time"] = False
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
@@ -944,9 +950,10 @@ def test_tas_no_shift_time(cubes_atm_2d):
     assert time.attributes == {}
 
 
-def test_fix_does_not_change_cached_grid(cubes_atm_2d):
+@pytest.mark.online
+def test_fix_does_not_change_cached_grid(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
     assert not fix._horizontal_grids
     assert not fix._meshes
 
@@ -975,9 +982,10 @@ def test_get_uas_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_uas_fix(cubes_atm_2d):
+@pytest.mark.online
+def test_uas_fix(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "uas")
+    fix = get_allvars_fix("Amon", "uas", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
     assert len(fixed_cubes) == 1
@@ -1001,9 +1009,10 @@ def test_uas_fix(cubes_atm_2d):
     assert height.bounds is None
 
 
-def test_uas_scalar_height10m_already_present(cubes_atm_2d):
+@pytest.mark.online
+def test_uas_scalar_height10m_already_present(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "uas")
+    fix = get_allvars_fix("Amon", "uas", session=session)
 
     # Scalar height (with wrong metadata) already present
     height_coord = AuxCoord(10.0, var_name="h", standard_name="height")
@@ -1017,9 +1026,10 @@ def test_uas_scalar_height10m_already_present(cubes_atm_2d):
     check_heightxm(cube, 10.0)
 
 
-def test_uas_dim_height10m_already_present(cubes_atm_2d):
+@pytest.mark.online
+def test_uas_dim_height10m_already_present(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "uas")
+    fix = get_allvars_fix("Amon", "uas", session=session)
 
     # Dimensional coordinate height (with wrong metadata) already present
     height_coord = AuxCoord(10.0, var_name="h", standard_name="height")
@@ -1108,9 +1118,10 @@ def test_ch4clim_fix(cubes_regular_grid):
 # Test fix with empty standard_name
 
 
-def test_empty_standard_name_fix(cubes_atm_2d, monkeypatch):
+@pytest.mark.online
+def test_empty_standard_name_fix(cubes_atm_2d, monkeypatch, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
     # We know that tas has a standard name, but this being native model output
     # there may be variables with no standard name. The code is designed to
     # handle this gracefully and here we test it with an artificial, but
@@ -1130,7 +1141,8 @@ def test_empty_standard_name_fix(cubes_atm_2d, monkeypatch):
 # Test automatic addition of missing coordinates
 
 
-def test_add_time(cubes_atm_2d):
+@pytest.mark.online
+def test_add_time(cubes_atm_2d, session):
     """Test fix."""
     # Remove time from tas cube to test automatic addition
     tas_cube = cubes_atm_2d.extract_cube(NameConstraint(var_name="tas"))
@@ -1139,7 +1151,7 @@ def test_add_time(cubes_atm_2d):
     tas_cube.remove_coord("time")
     cubes = CubeList([tas_cube, uas_cube])
 
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
     fixed_cubes = fix.fix_metadata(cubes)
 
     cube = check_tas_metadata(fixed_cubes)
@@ -1162,13 +1174,14 @@ def test_add_time_fail():
         fix._add_time(cube, cubes)
 
 
-def test_add_latitude(cubes_atm_2d):
+@pytest.mark.online
+def test_add_latitude(cubes_atm_2d, session):
     """Test fix."""
     # Remove latitude from tas cube to test automatic addition
     tas_cube = cubes_atm_2d.extract_cube(NameConstraint(var_name="tas"))
     tas_cube.remove_coord("latitude")
     cubes = CubeList([tas_cube])
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
 
     assert len(fix._horizontal_grids) == 0
     fixed_cubes = fix.fix_metadata(cubes)
@@ -1180,13 +1193,14 @@ def test_add_latitude(cubes_atm_2d):
     assert TEST_GRID_FILE_NAME in fix._horizontal_grids
 
 
-def test_add_longitude(cubes_atm_2d):
+@pytest.mark.online
+def test_add_longitude(cubes_atm_2d, session):
     """Test fix."""
     # Remove longitude from tas cube to test automatic addition
     tas_cube = cubes_atm_2d.extract_cube(NameConstraint(var_name="tas"))
     tas_cube.remove_coord("longitude")
     cubes = CubeList([tas_cube])
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
 
     assert len(fix._horizontal_grids) == 0
     fixed_cubes = fix.fix_metadata(cubes)
@@ -1198,14 +1212,15 @@ def test_add_longitude(cubes_atm_2d):
     assert TEST_GRID_FILE_NAME in fix._horizontal_grids
 
 
-def test_add_latitude_longitude(cubes_atm_2d):
+@pytest.mark.online
+def test_add_latitude_longitude(cubes_atm_2d, session):
     """Test fix."""
     # Remove latitude and longitude from tas cube to test automatic addition
     tas_cube = cubes_atm_2d.extract_cube(NameConstraint(var_name="tas"))
     tas_cube.remove_coord("latitude")
     tas_cube.remove_coord("longitude")
     cubes = CubeList([tas_cube])
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
 
     assert len(fix._horizontal_grids) == 0
     fixed_cubes = fix.fix_metadata(cubes)
@@ -1259,14 +1274,15 @@ def test_add_coord_from_grid_file_fail_no_url():
         fix._add_coord_from_grid_file(Cube(0), "clat")
 
 
-def test_add_coord_from_grid_fail_no_unnamed_dim(cubes_atm_2d):
+@pytest.mark.online
+def test_add_coord_from_grid_fail_no_unnamed_dim(cubes_atm_2d, session):
     """Test fix."""
     # Remove latitude from tas cube to test automatic addition
     tas_cube = cubes_atm_2d.extract_cube(NameConstraint(var_name="tas"))
     tas_cube.remove_coord("latitude")
     index_coord = DimCoord(np.arange(8), var_name="ncells")
     tas_cube.add_dim_coord(index_coord, 1)
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
 
     msg = (
         "Cannot determine coordinate dimension for coordinate 'clat', "
@@ -1276,13 +1292,14 @@ def test_add_coord_from_grid_fail_no_unnamed_dim(cubes_atm_2d):
         fix._add_coord_from_grid_file(tas_cube, "clat")
 
 
-def test_add_coord_from_grid_fail_two_unnamed_dims(cubes_atm_2d):
+@pytest.mark.online
+def test_add_coord_from_grid_fail_two_unnamed_dims(cubes_atm_2d, session):
     """Test fix."""
     # Remove latitude from tas cube to test automatic addition
     tas_cube = cubes_atm_2d.extract_cube(NameConstraint(var_name="tas"))
     tas_cube.remove_coord("latitude")
     tas_cube = iris.util.new_axis(tas_cube)
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
 
     msg = (
         "Cannot determine coordinate dimension for coordinate 'clat', "
@@ -1321,19 +1338,16 @@ def test_get_horizontal_grid_from_attr_cached_in_dict(
 @mock.patch.object(IconFix, "_get_grid_from_facet", autospec=True)
 def test_get_horizontal_grid_from_attr_rootpath(
     mock_get_grid_from_facet,
-    monkeypatch,
     tmp_path,
+    session,
 ):
     """Test fix."""
-    rootpath = deepcopy(CFG["rootpath"])
-    rootpath["ICON"] = str(tmp_path)
-    monkeypatch.setitem(CFG, "rootpath", rootpath)
     cube = Cube(0, attributes={"grid_file_uri": "grid.nc"})
     grid_cube = Cube(0, var_name="test_grid_cube")
     (tmp_path / "amip").mkdir(parents=True, exist_ok=True)
     iris.save(grid_cube, tmp_path / "amip" / "grid.nc")
 
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
     fix._horizontal_grids["grid_from_facet.nc"] = mock.sentinel.wrong_grid
 
     grid = fix.get_horizontal_grid(cube)
@@ -1353,6 +1367,7 @@ def test_get_horizontal_grid_from_attr_cached_in_file(
     mock_requests,
     mock_get_grid_from_facet,
     tmp_path,
+    session,
 ):
     """Test fix."""
     cube = Cube(
@@ -1361,7 +1376,7 @@ def test_get_horizontal_grid_from_attr_cached_in_file(
             "grid_file_uri": "https://temporary.url/this/is/the/grid_file.nc",
         },
     )
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
     assert len(fix._horizontal_grids) == 0
 
     # Save temporary grid file
@@ -1380,15 +1395,17 @@ def test_get_horizontal_grid_from_attr_cached_in_file(
     mock_get_grid_from_facet.assert_not_called()
 
 
+@pytest.mark.online
 @mock.patch.object(IconFix, "_get_grid_from_facet", autospec=True)
 def test_get_horizontal_grid_from_attr_cache_file_too_old(
     mock_get_grid_from_facet,
     tmp_path,
     monkeypatch,
+    session,
 ):
     """Test fix."""
     cube = Cube(0, attributes={"grid_file_uri": TEST_GRID_FILE_URI})
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
     assert len(fix._horizontal_grids) == 0
 
     # Save temporary grid file
@@ -1417,11 +1434,9 @@ def test_get_horizontal_grid_from_attr_cache_file_too_old(
 def test_get_horizontal_grid_from_facet_cached_in_dict(
     mock_get_grid_from_cube_attr,
     tmp_path,
+    session,
 ):
     """Test fix."""
-    session = CFG.start_session("my session")
-    session["auxiliary_data_dir"] = tmp_path
-
     # Save temporary grid file (this will not be used; however, it is necessary
     # to not raise a FileNotFoundError)
     grid_path = "grid.nc"
@@ -1451,11 +1466,9 @@ def test_get_horizontal_grid_from_facet(
     mock_get_grid_from_cube_attr,
     grid_path,
     tmp_path,
+    session,
 ):
     """Test fix."""
-    session = CFG.start_session("my session")
-    session["auxiliary_data_dir"] = tmp_path
-
     # Make sure that grid specified by cube attribute is NOT used
     cube = Cube(0, attributes={"grid_file_uri": "cached_grid_url.nc"})
 
@@ -1479,11 +1492,8 @@ def test_get_horizontal_grid_from_facet(
     mock_get_grid_from_cube_attr.assert_not_called()
 
 
-def test_get_horizontal_grid_from_facet_fail(tmp_path):
+def test_get_horizontal_grid_from_facet_fail(session):
     """Test fix."""
-    session = CFG.start_session("my session")
-    session["auxiliary_data_dir"] = tmp_path
-
     cube = Cube(0)
     fix = get_allvars_fix("Amon", "tas", session=session)
     fix.extra_facets["horizontal_grid"] = "/this/does/not/exist.nc"
@@ -1742,9 +1752,10 @@ def test_invalid_time_units(cubes_atm_2d):
 # Test fix with (sub-)hourly data
 
 
-def test_hourly_data(cubes_atm_2d):
+@pytest.mark.online
+def test_hourly_data(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
     fix.extra_facets["frequency"] = "1hr"
     for cube in cubes_atm_2d:
         cube.coord("time").points = [20041104.5833333]
@@ -2093,8 +2104,9 @@ def test_get_previous_timestep(frequency, datetime_in, datetime_out):
 # Test mesh creation raises warning because bounds do not match vertices
 
 
+@pytest.mark.online
 @mock.patch("esmvalcore.cmor._fixes.icon._base_fixes.logger", autospec=True)
-def test_get_mesh_fail_invalid_clat_bounds(mock_logger, cubes_atm_2d):
+def test_get_mesh_fail_invalid_clat_bounds(mock_logger, cubes_atm_2d, session):
     """Test fix."""
     # Slightly modify latitude bounds from tas cube to make mesh creation fail
     tas_cube = cubes_atm_2d.extract_cube(NameConstraint(var_name="tas"))
@@ -2102,7 +2114,7 @@ def test_get_mesh_fail_invalid_clat_bounds(mock_logger, cubes_atm_2d):
     lat_bnds[0, 0] = 40.0
     tas_cube.coord("latitude").bounds = lat_bnds
     cubes = CubeList([tas_cube])
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
 
     fixed_cubes = fix.fix_metadata(cubes)
     cube = check_tas_metadata(fixed_cubes)
@@ -2117,8 +2129,9 @@ def test_get_mesh_fail_invalid_clat_bounds(mock_logger, cubes_atm_2d):
     )
 
 
+@pytest.mark.online
 @mock.patch("esmvalcore.cmor._fixes.icon._base_fixes.logger", autospec=True)
-def test_get_mesh_fail_invalid_clon_bounds(mock_logger, cubes_atm_2d):
+def test_get_mesh_fail_invalid_clon_bounds(mock_logger, cubes_atm_2d, session):
     """Test fix."""
     # Slightly modify longitude bounds from tas cube to make mesh creation fail
     tas_cube = cubes_atm_2d.extract_cube(NameConstraint(var_name="tas"))
@@ -2126,7 +2139,7 @@ def test_get_mesh_fail_invalid_clon_bounds(mock_logger, cubes_atm_2d):
     lon_bnds[0, 1] = 40.0
     tas_cube.coord("longitude").bounds = lon_bnds
     cubes = CubeList([tas_cube])
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
 
     fixed_cubes = fix.fix_metadata(cubes)
     cube = check_tas_metadata(fixed_cubes)
@@ -2189,11 +2202,8 @@ def test_get_mesh_not_cached_from_attr(monkeypatch):
     fix._create_mesh.assert_called_once_with(cube)
 
 
-def test_get_mesh_cached_from_facet(monkeypatch, tmp_path):
+def test_get_mesh_cached_from_facet(monkeypatch, tmp_path, session):
     """Test fix."""
-    session = CFG.start_session("my session")
-    session["auxiliary_data_dir"] = tmp_path
-
     # Save temporary grid file (this will not be used; however, it is necessary
     # to not raise a FileNotFoundError)
     grid_path = "grid.nc"
@@ -2213,11 +2223,8 @@ def test_get_mesh_cached_from_facet(monkeypatch, tmp_path):
     fix._create_mesh.assert_not_called()
 
 
-def test_get_mesh_not_cached_from_facet(monkeypatch, tmp_path):
+def test_get_mesh_not_cached_from_facet(monkeypatch, tmp_path, session):
     """Test fix."""
-    session = CFG.start_session("my session")
-    session["auxiliary_data_dir"] = tmp_path
-
     # Save temporary grid file (this will not be used; however, it is necessary
     # to not raise a FileNotFoundError)
     grid_path = "grid.nc"
@@ -2245,10 +2252,8 @@ def test_get_mesh_not_cached_from_facet(monkeypatch, tmp_path):
         ("b.nc", "Grid file", "{tmp_path}/b.nc"),
     ],
 )
-def test_get_path_from_facet(path, description, output, tmp_path):
+def test_get_path_from_facet(path, description, output, tmp_path, session):
     """Test fix."""
-    session = CFG.start_session("my session")
-    session["auxiliary_data_dir"] = tmp_path
     path = path.format(tmp_path=tmp_path)
     fix = get_allvars_fix("Amon", "tas", session=session)
     fix.extra_facets["test_path"] = path
@@ -2271,10 +2276,8 @@ def test_get_path_from_facet(path, description, output, tmp_path):
         ("b.nc", "Grid file"),
     ],
 )
-def test_get_path_from_facet_fail(path, description, tmp_path):
+def test_get_path_from_facet_fail(path, description, tmp_path, session):
     """Test fix."""
-    session = CFG.start_session("my session")
-    session["auxiliary_data_dir"] = tmp_path
     path = path.format(tmp_path=tmp_path)
     fix = get_allvars_fix("Amon", "tas", session=session)
     fix.extra_facets["test_path"] = path
@@ -2288,10 +2291,8 @@ def test_get_path_from_facet_fail(path, description, tmp_path):
 
 @pytest.mark.parametrize("facet", ["zg_file", "zghalf_file"])
 @pytest.mark.parametrize("path", ["{tmp_path}/a.nc", "a.nc"])
-def test_add_additional_cubes(path, facet, tmp_path):
+def test_add_additional_cubes(path, facet, tmp_path, session):
     """Test fix."""
-    session = CFG.start_session("my session")
-    session["auxiliary_data_dir"] = tmp_path
     path = path.format(tmp_path=tmp_path)
     fix = get_allvars_fix("Amon", "tas", session=session)
     fix.extra_facets[facet] = path
@@ -2310,10 +2311,8 @@ def test_add_additional_cubes(path, facet, tmp_path):
 
 @pytest.mark.parametrize("facet", ["zg_file", "zghalf_file"])
 @pytest.mark.parametrize("path", ["{tmp_path}/a.nc", "a.nc"])
-def test_add_additional_cubes_fail(path, facet, tmp_path):
+def test_add_additional_cubes_fail(path, facet, tmp_path, session):
     """Test fix."""
-    session = CFG.start_session("my session")
-    session["auxiliary_data_dir"] = tmp_path
     path = path.format(tmp_path=tmp_path)
     fix = get_allvars_fix("Amon", "tas", session=session)
     fix.extra_facets[facet] = path
diff --git a/tests/integration/cmor/_fixes/icon/test_icon_xpp.py b/tests/integration/cmor/_fixes/icon/test_icon_xpp.py
index 42d711dd43..a089eba095 100644
--- a/tests/integration/cmor/_fixes/icon/test_icon_xpp.py
+++ b/tests/integration/cmor/_fixes/icon/test_icon_xpp.py
@@ -10,7 +10,7 @@
 
 import esmvalcore.cmor._fixes.icon.icon_xpp
 from esmvalcore.cmor._fixes.fix import GenericFix
-from esmvalcore.cmor._fixes.icon._base_fixes import AllVarsBase, IconFix
+from esmvalcore.cmor._fixes.icon._base_fixes import AllVarsBase
 from esmvalcore.cmor._fixes.icon.icon_xpp import (
     AllVars,
     Clwvi,
@@ -30,12 +30,6 @@
 from esmvalcore.dataset import Dataset
 
 
-@pytest.fixture(autouse=True)
-def tmp_cache_dir(monkeypatch, tmp_path):
-    """Use temporary path as cache directory for all tests in this module."""
-    monkeypatch.setattr(IconFix, "CACHE_DIR", tmp_path)
-
-
 @pytest.fixture
 def cubes_atm_2d(test_data_path):
     """2D sample cubes."""
@@ -732,7 +726,8 @@ def test_get_rlutcs_fix():
     assert fix == [Rlutcs(None), AllVars(None), GenericFix(None)]
 
 
-def test_rlutcs_fix(cubes_atm_3d):
+@pytest.mark.online
+def test_rlutcs_fix(cubes_atm_3d, session):
     """Test fix."""
     cube = cubes_atm_3d.extract_cube(NameConstraint(var_name="temp"))
     cube.var_name = "lwflx_up_clr"
@@ -740,7 +735,7 @@ def test_rlutcs_fix(cubes_atm_3d):
     cube.data = np.arange(1 * 47 * 8, dtype=np.float32).reshape(1, 47, 8)
     cubes = CubeList([cube])
 
-    fixed_cubes = fix_metadata(cubes, "Amon", "rlutcs")
+    fixed_cubes = fix_metadata(cubes, "Amon", "rlutcs", session=session)
 
     assert len(fixed_cubes) == 1
     cube = fixed_cubes[0]
@@ -770,9 +765,10 @@ def test_get_rsdt_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_rsdt_fix(cubes_atm_2d):
+@pytest.mark.online
+def test_rsdt_fix(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "rsdt")
+    fix = get_allvars_fix("Amon", "rsdt", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
     assert len(fixed_cubes) == 1
@@ -793,9 +789,10 @@ def test_get_rsut_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_rsut_fix(cubes_atm_2d):
+@pytest.mark.online
+def test_rsut_fix(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "rsut")
+    fix = get_allvars_fix("Amon", "rsut", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
     assert len(fixed_cubes) == 1
@@ -819,7 +816,8 @@ def test_get_rsutcs_fix():
     assert fix == [Rsutcs(None), AllVars(None), GenericFix(None)]
 
 
-def test_rsutcs_fix(cubes_atm_3d):
+@pytest.mark.online
+def test_rsutcs_fix(cubes_atm_3d, session):
     """Test fix."""
     cube = cubes_atm_3d.extract_cube(NameConstraint(var_name="temp"))
     cube.var_name = "swflx_up_clr"
@@ -827,7 +825,7 @@ def test_rsutcs_fix(cubes_atm_3d):
     cube.data = np.arange(1 * 47 * 8, dtype=np.float32).reshape(1, 47, 8)
     cubes = CubeList([cube])
 
-    fixed_cubes = fix_metadata(cubes, "Amon", "rsutcs")
+    fixed_cubes = fix_metadata(cubes, "Amon", "rsutcs", session=session)
 
     assert len(fixed_cubes) == 1
     cube = fixed_cubes[0]
@@ -923,7 +921,8 @@ def test_get_siconc_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_siconc_fix(cubes_ocean_3d):
+@pytest.mark.online
+def test_siconc_fix(cubes_ocean_3d, session):
     """Test fix."""
     cubes = CubeList(
         [cubes_ocean_3d.extract_cube(NameConstraint(var_name="to")).copy()],
@@ -936,7 +935,7 @@ def test_siconc_fix(cubes_ocean_3d):
     cubes[0].remove_coord("depth")
     cubes[0].add_dim_coord(DimCoord(0.0, var_name="lev"), 1)
 
-    fix = get_allvars_fix("SImon", "siconc")
+    fix = get_allvars_fix("SImon", "siconc", session=session)
     fixed_cubes = fix.fix_metadata(cubes)
 
     cube = check_siconc_metadata(
@@ -978,9 +977,10 @@ def test_get_siconca_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_siconca_fix(cubes_atm_2d):
+@pytest.mark.online
+def test_siconca_fix(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("SImon", "siconca")
+    fix = get_allvars_fix("SImon", "siconca", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
     cube = check_siconc_metadata(
@@ -1007,9 +1007,10 @@ def test_get_ta_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_ta_fix(cubes_atm_3d):
+@pytest.mark.online
+def test_ta_fix(cubes_atm_3d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "ta")
+    fix = get_allvars_fix("Amon", "ta", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_3d)
 
     cube = check_ta_metadata(fixed_cubes)
@@ -1030,9 +1031,10 @@ def test_get_tas_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_tas_fix(cubes_atm_2d):
+@pytest.mark.online
+def test_tas_fix(cubes_atm_2d, session):
     """Test fix."""
-    fix = get_allvars_fix("Amon", "tas")
+    fix = get_allvars_fix("Amon", "tas", session=session)
     fixed_cubes = fix.fix_metadata(cubes_atm_2d)
 
     cube = check_tas_metadata(fixed_cubes)
@@ -1068,9 +1070,10 @@ def test_get_thetao_fix():
     assert fix == [AllVars(None), GenericFix(None)]
 
 
-def test_thetao_fix(cubes_ocean_3d):
+@pytest.mark.online
+def test_thetao_fix(cubes_ocean_3d, session):
     """Test fix."""
-    fix = get_allvars_fix("Omon", "thetao")
+    fix = get_allvars_fix("Omon", "thetao", session=session)
 
     fixed_cubes = fix.fix_metadata(cubes_ocean_3d)
 
@@ -1089,7 +1092,8 @@ def test_thetao_fix(cubes_ocean_3d):
     assert cube.shape == (1, 47, 8)
 
 
-def test_thetao_fix_already_bounds(cubes_ocean_3d):
+@pytest.mark.online
+def test_thetao_fix_already_bounds(cubes_ocean_3d, session):
     """Test fix."""
     cube = cubes_ocean_3d.extract_cube(NameConstraint(var_name="to"))
     cube.coord("depth").guess_bounds()
@@ -1098,7 +1102,7 @@ def test_thetao_fix_already_bounds(cubes_ocean_3d):
     cube.coord("depth").bounds = bounds
     cubes = CubeList([cube])
 
-    fix = get_allvars_fix("Omon", "thetao")
+    fix = get_allvars_fix("Omon", "thetao", session=session)
 
     fixed_cubes = fix.fix_metadata(cubes)
 
@@ -1118,12 +1122,13 @@ def test_thetao_fix_already_bounds(cubes_ocean_3d):
     assert cube.shape == (1, 47, 8)
 
 
-def test_thetao_fix_no_bounds(cubes_ocean_3d):
+@pytest.mark.online
+def test_thetao_fix_no_bounds(cubes_ocean_3d, session):
     """Test fix."""
     cube = cubes_ocean_3d.extract_cube(NameConstraint(var_name="to"))
     cubes = CubeList([cube])
 
-    fix = get_allvars_fix("Omon", "thetao")
+    fix = get_allvars_fix("Omon", "thetao", session=session)
 
     fixed_cubes = fix.fix_metadata(cubes)
 
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 391d2ab258..85b1505866 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,4 +1,5 @@
 import os
+from collections.abc import Iterator
 from pathlib import Path
 
 import iris
@@ -11,6 +12,7 @@
     _select_drs,
     _select_files,
 )
+from esmvalcore.typing import Facets
 
 
 def create_test_file(filename, tracking_id=None):
@@ -27,7 +29,12 @@ def create_test_file(filename, tracking_id=None):
     iris.save(cube, filename)
 
 
-def _get_files(root_path, facets, tracking_id):
+def _get_files(  # noqa: C901,PLR0912
+    root_path: Path,
+    facets: Facets,
+    tracking_id: Iterator[int],
+    suffix: str = "nc",
+) -> tuple[list[LocalFile], list[Path]]:
     """Return dummy files.
 
     Wildcards are only supported for `dataset` and `institute`; in this case
@@ -43,8 +50,8 @@ def _get_files(root_path, facets, tracking_id):
         all_facets = [facets]
 
     # Globs without expanded facets
-    dir_template = _select_drs("input_dir", facets["project"], "default")
-    file_template = _select_drs("input_file", facets["project"], "default")
+    dir_template = _select_drs("input_dir", facets["project"], "default")  # type: ignore[arg-type]
+    file_template = _select_drs("input_file", facets["project"], "default")  # type: ignore[arg-type]
     dir_globs = _replace_tags(dir_template, facets)
     file_globs = _replace_tags(file_template, facets)
     globs = sorted(
@@ -56,49 +63,57 @@ def _get_files(root_path, facets, tracking_id):
         filenames = []
         dir_template = _select_drs(
             "input_dir",
-            expanded_facets["project"],
+            expanded_facets["project"],  # type: ignore[arg-type]
             "default",
         )
         file_template = _select_drs(
             "input_file",
-            expanded_facets["project"],
+            expanded_facets["project"],  # type: ignore[arg-type]
             "default",
         )
+
         dir_globs = _replace_tags(dir_template, expanded_facets)
         file_globs = _replace_tags(file_template, expanded_facets)
         filename = str(
             root_path / "input" / dir_globs[0] / Path(file_globs[0]).name,
         )
+        if filename.endswith("nc"):
+            filename = f"{filename[:-2]}{suffix}"
+
+        if filename.endswith(f"[_.]*{suffix}"):
+            filename = filename.replace(f"[_.]*{suffix}", f"_*.{suffix}")
 
-        if filename.endswith("[_.]*nc"):
-            filename = filename.replace("[_.]*nc", "_*.nc")
-
-        if filename.endswith("*.nc"):
-            filename = filename[: -len("*.nc")] + "_"
-            if facets["frequency"] == "fx":
-                intervals = [""]
-            else:
-                intervals = [
-                    "1990_1999",
-                    "2000_2009",
-                    "2010_2019",
-                ]
+        if facets["frequency"] == "fx":
+            intervals = [""]
+        else:
+            intervals = [
+                "1990-1999",
+                "2000-2009",
+                "2010-2019",
+            ]
+        if filename.endswith(f"*.{suffix}"):
+            filename = filename[: -len(f"*.{suffix}")]
             for interval in intervals:
-                filenames.append(filename + interval + ".nc")
+                filenames.append(f"{filename}_{interval}.{suffix}")
         else:
             filenames.append(filename)
 
-        if "timerange" in facets:
-            filenames = _select_files(filenames, facets["timerange"])
-
-        for filename in filenames:
-            create_test_file(filename, next(tracking_id))
+        if suffix == "nc":
+            for filename in filenames:
+                create_test_file(filename, next(tracking_id))
 
         for filename in filenames:
             file = LocalFile(filename)
-            file.facets = expanded_facets
+            file.facets = dict(expanded_facets)
+            if facets["frequency"] != "fx":
+                for interval in intervals:
+                    if interval in filename:
+                        file.facets["timerange"] = interval.replace("-", "/")
             files.append(file)
 
+    if "timerange" in facets:
+        files = _select_files(files, facets["timerange"])
+
     return files, globs
 
 
@@ -108,13 +123,11 @@ def _tracking_ids(i=0):
         i += 1
 
 
-def _get_find_files_func(path: Path, suffix: str = ".nc"):
+def _get_find_files_func(path: Path, suffix: str = "nc"):
     tracking_id = _tracking_ids()
 
-    def find_files(*, debug: bool = False, **facets):
-        files, file_globs = _get_files(path, facets, tracking_id)
-        files = [f.with_suffix(suffix) for f in files]
-        file_globs = [g.with_suffix(suffix) for g in file_globs]
+    def find_files(self, *, debug: bool = False, **facets):
+        files, file_globs = _get_files(path, facets, tracking_id, suffix)
         if debug:
             return files, file_globs
         return files
@@ -125,13 +138,21 @@ def find_files(*, debug: bool = False, **facets):
 @pytest.fixture
 def patched_datafinder(tmp_path, monkeypatch):
     find_files = _get_find_files_func(tmp_path)
-    monkeypatch.setattr(esmvalcore.local, "find_files", find_files)
+    monkeypatch.setattr(
+        esmvalcore.local.LocalDataSource,
+        "find_data",
+        find_files,
+    )
 
 
 @pytest.fixture
 def patched_datafinder_grib(tmp_path, monkeypatch):
-    find_files = _get_find_files_func(tmp_path, suffix=".grib")
-    monkeypatch.setattr(esmvalcore.local, "find_files", find_files)
+    find_files = _get_find_files_func(tmp_path, suffix="grib")
+    monkeypatch.setattr(
+        esmvalcore.local.LocalDataSource,
+        "find_data",
+        find_files,
+    )
 
 
 @pytest.fixture
@@ -147,7 +168,7 @@ def patched_failing_datafinder(tmp_path, monkeypatch):
     """
     tracking_id = _tracking_ids()
 
-    def find_files(*, debug: bool = False, **facets):
+    def find_files(self, *, debug: bool = False, **facets):
         files, file_globs = _get_files(tmp_path, facets, tracking_id)
         if facets["frequency"] == "fx":
             files = []
@@ -159,4 +180,8 @@ def find_files(*, debug: bool = False, **facets):
             return returned_files, file_globs
         return returned_files
 
-    monkeypatch.setattr(esmvalcore.local, "find_files", find_files)
+    monkeypatch.setattr(
+        esmvalcore.local.LocalDataSource,
+        "find_data",
+        find_files,
+    )
diff --git a/tests/integration/esgf/search_results/expected.yml b/tests/integration/esgf/search_results/expected.yml
index 24f02b9181..11f3f423e8 100644
--- a/tests/integration/esgf/search_results/expected.yml
+++ b/tests/integration/esgf/search_results/expected.yml
@@ -20,6 +20,7 @@ Amon_r1i1p1_historical,rcp85_INM-CM4_CMIP5_tas.json:
       project: CMIP5
       modeling_realm: atmos
       short_name: tas
+      timerange: "185001/200512"
       version: v20130207
     local_file: cmip5/output1/INM/inmcm4/historical/mon/atmos/Amon/r1i1p1/v20130207/tas_Amon_inmcm4_historical_r1i1p1_185001-200512.nc
     name: tas_Amon_inmcm4_historical_r1i1p1_185001-200512.nc
@@ -50,6 +51,7 @@ Amon_r1i1p1_historical,rcp85_INM-CM4_CMIP5_tas.json:
       project: CMIP5
       modeling_realm: atmos
       short_name: tas
+      timerange: "200601/210012"
       version: v20130207
     local_file: cmip5/output1/INM/inmcm4/rcp85/mon/atmos/Amon/r1i1p1/v20130207/tas_Amon_inmcm4_rcp85_r1i1p1_200601-210012.nc
     name: tas_Amon_inmcm4_rcp85_r1i1p1_200601-210012.nc
@@ -81,6 +83,7 @@ Amon_r1i1p1_historical_FIO-ESM_CMIP5_tas.json:
       project: CMIP5
       modeling_realm: atmos
       short_name: tas
+      timerange: "185001/200512"
       version: v20121010
     local_file: cmip5/output1/FIO/FIO-ESM/historical/mon/atmos/Amon/r1i1p1/v20121010/tas_Amon_FIO-ESM_historical_r1i1p1_185001-200512.nc
     name: tas_Amon_FIO-ESM_historical_r1i1p1_185001-200512.nc
@@ -108,6 +111,7 @@ Amon_r1i1p1_rcp85_HadGEM2-CC_CMIP5_tas.json:
       project: CMIP5
       modeling_realm: atmos
       short_name: tas
+      timerange: "205512/208011"
       version: v20120531
     local_file: cmip5/output1/MOHC/HadGEM2-CC/rcp85/mon/atmos/Amon/r1i1p1/v20120531/tas_Amon_HadGEM2-CC_rcp85_r1i1p1_205512-208011.nc
     name: tas_Amon_HadGEM2-CC_rcp85_r1i1p1_205512-208011.nc
@@ -132,6 +136,7 @@ Amon_r1i1p1_rcp85_HadGEM2-CC_CMIP5_tas.json:
       project: CMIP5
       modeling_realm: atmos
       short_name: tas
+      timerange: "208012/209912"
       version: v20120531
     local_file: cmip5/output1/MOHC/HadGEM2-CC/rcp85/mon/atmos/Amon/r1i1p1/v20120531/tas_Amon_HadGEM2-CC_rcp85_r1i1p1_208012-209912.nc
     name: tas_Amon_HadGEM2-CC_rcp85_r1i1p1_208012-209912.nc
@@ -156,6 +161,7 @@ Amon_r1i1p1_rcp85_HadGEM2-CC_CMIP5_tas.json:
       project: CMIP5
       modeling_realm: atmos
       short_name: tas
+      timerange: "210001/210012"
       version: v20120531
     local_file: cmip5/output1/MOHC/HadGEM2-CC/rcp85/mon/atmos/Amon/r1i1p1/v20120531/tas_Amon_HadGEM2-CC_rcp85_r1i1p1_210001-210012.nc
     name: tas_Amon_HadGEM2-CC_rcp85_r1i1p1_210001-210012.nc
@@ -180,6 +186,7 @@ EUR-11_MOHC-HadGEM2-ES_r1i1p1_historical_CORDEX_RACMO22E_mon_tas.json:
       project: CORDEX
       rcm_version: v2
       short_name: tas
+      timerange: "195001/195012"
       version: v20160620
     local_file: cordex/output/EUR-11/KNMI/MOHC-HadGEM2-ES/historical/r1i1p1/RACMO22E/v2/mon/tas/v20160620/tas_EUR-11_MOHC-HadGEM2-ES_historical_r1i1p1_KNMI-RACMO22E_v2_mon_195001-195012.nc
     name: tas_EUR-11_MOHC-HadGEM2-ES_historical_r1i1p1_KNMI-RACMO22E_v2_mon_195001-195012.nc
@@ -202,6 +209,7 @@ EUR-11_MOHC-HadGEM2-ES_r1i1p1_historical_CORDEX_RACMO22E_mon_tas.json:
       project: CORDEX
       rcm_version: v2
       short_name: tas
+      timerange: "195101/196012"
       version: v20160620
     local_file: cordex/output/EUR-11/KNMI/MOHC-HadGEM2-ES/historical/r1i1p1/RACMO22E/v2/mon/tas/v20160620/tas_EUR-11_MOHC-HadGEM2-ES_historical_r1i1p1_KNMI-RACMO22E_v2_mon_195101-196012.nc
     name: tas_EUR-11_MOHC-HadGEM2-ES_historical_r1i1p1_KNMI-RACMO22E_v2_mon_195101-196012.nc
@@ -233,6 +241,7 @@ historical_gn_r4i1p1f1_CMIP6_CESM2_Amon_tas.json:
       mip: Amon
       project: CMIP6
       short_name: tas
+      timerange: "185001/201412"
       version: v20190308
     local_file: CMIP6/CMIP/NCAR/CESM2/historical/r4i1p1f1/Amon/tas/gn/v20190308/tas_Amon_CESM2_historical_r4i1p1f1_gn_185001-201412.nc
     name: tas_Amon_CESM2_historical_r4i1p1f1_gn_185001-201412.nc
@@ -256,6 +265,7 @@ obs4MIPs_CERES-EBAF_mon_rsutcs.json:
       project: obs4MIPs
       modeling_realm: atmos
       short_name: rsutcs
+      timerange: "200003/201404"
       version: v20160610
     local_file: obs4MIPs/CERES-EBAF/v20160610/rsutcs_CERES-EBAF_L3B_Ed2-8_200003-201404.nc
     name: rsutcs_CERES-EBAF_L3B_Ed2-8_200003-201404.nc
@@ -273,6 +283,7 @@ obs4MIPs_GPCP-V2.3_pr.json:
       institute: NASA-GSFC
       project: obs4MIPs
       short_name: pr
+      timerange: "197901/201710"
       version: v20180519
     local_file: obs4MIPs/GPCP-V2.3/v20180519/pr_GPCP-SG_L3_v2.3_197901-201710.nc
     name: pr_GPCP-SG_L3_v2.3_197901-201710.nc
@@ -293,6 +304,7 @@ run1_historical_cccma_cgcm3_1_CMIP3_mon_tas.json:
       project: CMIP3
       modeling_realm: atmos
       short_name: tas
+      timerange: "1850/2000"
       version: v1
     local_file: cmip3/CCCma/cccma_cgcm3_1/historical/mon/atmos/run1/tas/v1/tas_a1_20c3m_1_cgcm3.1_t47_1850_2000.nc
     name: tas_a1_20c3m_1_cgcm3.1_t47_1850_2000.nc
diff --git a/tests/integration/esgf/test_search_download.py b/tests/integration/esgf/test_search_download.py
index 33680a42b3..685e55c937 100644
--- a/tests/integration/esgf/test_search_download.py
+++ b/tests/integration/esgf/test_search_download.py
@@ -183,6 +183,7 @@ def test_mock_search(variable, mocker):
         ]
 
 
+@pytest.mark.online
 def test_real_search():
     """Test a real search for a single file."""
     variable = {
diff --git a/tests/integration/preprocessor/_io/test_load.py b/tests/integration/preprocessor/_io/test_load.py
index 59fbe09d78..1a9e747f4a 100644
--- a/tests/integration/preprocessor/_io/test_load.py
+++ b/tests/integration/preprocessor/_io/test_load.py
@@ -13,7 +13,7 @@
 from iris.cube import Cube, CubeList
 
 from esmvalcore.exceptions import ESMValCoreLoadWarning
-from esmvalcore.preprocessor._io import _get_attr_from_field_coord, load
+from esmvalcore.preprocessor._io import load
 from tests import assert_array_equal
 
 
@@ -141,15 +141,13 @@ def test_callback_fix_lat_units(tmp_path, sample_cube):
     assert str(sample_cube.coord("latitude").units) == "degrees_north"
 
 
-def test_get_attr_from_field_coord_none(mocker):
-    """Test ``_get_attr_from_field_coord``."""
-    attr = _get_attr_from_field_coord(mocker.sentinel.ncfield, None, "attr")
-    assert attr is None
-
-
 def test_fail_empty_cubes(mocker):
     """Test that ValueError is raised when cubes are empty."""
-    mocker.patch("iris.load_raw", autospec=True, return_value=CubeList([]))
+    mocker.patch(
+        "esmvalcore.preprocessor._io.LocalFile.to_iris",
+        autospec=True,
+        return_value=CubeList([]),
+    )
     msg = "myfilename does not contain any data"
     with pytest.raises(ValueError, match=msg):
         load("myfilename")
diff --git a/tests/integration/preprocessor/_io/test_zarr.py b/tests/integration/preprocessor/_io/test_zarr.py
index fc5684c967..7899a107a9 100644
--- a/tests/integration/preprocessor/_io/test_zarr.py
+++ b/tests/integration/preprocessor/_io/test_zarr.py
@@ -48,6 +48,7 @@ def test_load_zarr2_local(input_type):
     assert "latitude" in coord_names
 
 
+@pytest.mark.online
 def test_load_zarr2_remote():
     """Test loading a Zarr2 store from a https Object Store."""
     zarr_path = (
@@ -88,6 +89,7 @@ def test_load_zarr2_remote():
     assert "latitude" in coord_names
 
 
+@pytest.mark.online
 def test_load_zarr3_remote():
     """Test loading a Zarr3 store from a https Object Store."""
     zarr_path = (
@@ -114,6 +116,7 @@ def test_load_zarr3_remote():
     assert "latitude" in coord_names
 
 
+@pytest.mark.online
 def test_load_zarr3_cmip6_metadata():
     """
     Test loading a Zarr3 store from a https Object Store.
diff --git a/tests/integration/recipe/test_check.py b/tests/integration/recipe/test_check.py
index 8c6c7009ce..ab00d9a9f4 100644
--- a/tests/integration/recipe/test_check.py
+++ b/tests/integration/recipe/test_check.py
@@ -14,6 +14,7 @@
 from esmvalcore._recipe import check
 from esmvalcore.dataset import Dataset
 from esmvalcore.exceptions import RecipeError
+from esmvalcore.local import LocalFile
 from esmvalcore.preprocessor import PreprocessorFile
 
 
@@ -142,7 +143,12 @@ def test_ncl_version_broken(mocker):
 def test_data_availability_data(mock_logger, input_files, var, error):
     """Test check for data when data is present."""
     dataset = Dataset(**var)
-    dataset.files = [Path(f) for f in input_files]
+    files = []
+    for filename in input_files:
+        file = LocalFile(filename)
+        file.facets["timerange"] = filename.split("_")[-1].replace("-", "/")
+        files.append(file)
+    dataset.files = files
     if error is None:
         check.data_availability(dataset)
         mock_logger.error.assert_not_called()
@@ -324,9 +330,9 @@ def test_data_availability_nonexistent(tmp_path):
 def test_reference_for_bias_preproc_empty():
     """Test ``reference_for_bias_preproc``."""
     products = {
-        PreprocessorFile(filename=10),
-        PreprocessorFile(filename=20),
-        PreprocessorFile(filename=30),
+        PreprocessorFile(filename=Path("10")),
+        PreprocessorFile(filename=Path("20")),
+        PreprocessorFile(filename=Path("30")),
     }
     check.reference_for_bias_preproc(products)
 
@@ -334,11 +340,11 @@ def test_reference_for_bias_preproc_empty():
 def test_reference_for_bias_preproc_one_ref():
     """Test ``reference_for_bias_preproc`` with one reference."""
     products = {
-        PreprocessorFile(filename=90),
-        PreprocessorFile(filename=10, settings={"bias": {}}),
-        PreprocessorFile(filename=20, settings={"bias": {}}),
+        PreprocessorFile(filename=Path("90")),
+        PreprocessorFile(filename=Path("10"), settings={"bias": {}}),
+        PreprocessorFile(filename=Path("20"), settings={"bias": {}}),
         PreprocessorFile(
-            filename=30,
+            filename=Path("30"),
             settings={"bias": {}},
             attributes={"reference_for_bias": True},
         ),
@@ -349,10 +355,10 @@ def test_reference_for_bias_preproc_one_ref():
 def test_reference_for_bias_preproc_no_ref():
     """Test ``reference_for_bias_preproc`` with no reference."""
     products = {
-        PreprocessorFile(filename=90),
-        PreprocessorFile(filename=10, settings={"bias": {}}),
-        PreprocessorFile(filename=20, settings={"bias": {}}),
-        PreprocessorFile(filename=30, settings={"bias": {}}),
+        PreprocessorFile(filename=Path("90")),
+        PreprocessorFile(filename=Path("10"), settings={"bias": {}}),
+        PreprocessorFile(filename=Path("20"), settings={"bias": {}}),
+        PreprocessorFile(filename=Path("30"), settings={"bias": {}}),
     }
     with pytest.raises(RecipeError) as rec_err:
         check.reference_for_bias_preproc(products)
@@ -376,15 +382,15 @@ def test_reference_for_bias_preproc_no_ref():
 def test_reference_for_bias_preproc_two_refs():
     """Test ``reference_for_bias_preproc`` with two references."""
     products = {
-        PreprocessorFile(filename=90),
-        PreprocessorFile(filename=10, settings={"bias": {}}),
+        PreprocessorFile(filename=Path("90")),
+        PreprocessorFile(filename=Path("10"), settings={"bias": {}}),
         PreprocessorFile(
-            filename=20,
+            filename=Path("20"),
             attributes={"reference_for_bias": True},
             settings={"bias": {}},
         ),
         PreprocessorFile(
-            filename=30,
+            filename=Path("30"),
             attributes={"reference_for_bias": True},
             settings={"bias": {}},
         ),
diff --git a/tests/integration/recipe/test_recipe.py b/tests/integration/recipe/test_recipe.py
index adeb030ea3..21a9c37bcd 100644
--- a/tests/integration/recipe/test_recipe.py
+++ b/tests/integration/recipe/test_recipe.py
@@ -1,10 +1,13 @@
+import importlib.resources
 import inspect
 import os
 import re
 from collections import defaultdict
+from functools import lru_cache
 from pathlib import Path
 from pprint import pformat
 from textwrap import dedent
+from typing import TYPE_CHECKING
 from unittest.mock import create_autospec
 
 import iris
@@ -30,6 +33,48 @@
 from esmvalcore.preprocessor import DEFAULT_ORDER, PreprocessingTask
 from tests.integration.test_provenance import check_provenance
 
+if TYPE_CHECKING:
+    from esmvalcore.typing import Facets
+
+
+@lru_cache
+def _load_data_sources(
+    filename,
+) -> dict[
+    str,
+    dict[str, dict[str, dict[str, dict[str, str]]]],
+]:
+    """Load data source configurations."""
+    with importlib.resources.as_file(
+        importlib.resources.files(esmvalcore.config)
+        / "configurations"
+        / filename,
+    ) as config_file:
+        return yaml.safe_load(config_file.read_text(encoding="utf-8"))
+
+
+def update_data_sources(
+    session: Session,
+    filename: str,
+    rootpath: Path,
+) -> None:
+    """Update the data sources in `session` using config file `filename`."""
+    cfg = _load_data_sources(filename)
+    projects = cfg["projects"]
+    for project in projects:
+        data_sources = projects[project]["data"]
+        for data_source in data_sources.values():
+            data_source["rootpath"] = str(rootpath)
+        session["projects"][project]["data"] = data_sources
+
+
+@pytest.fixture
+def session(tmp_path: Path, session: Session) -> Session:
+    """Session fixture with default data sources."""
+    update_data_sources(session, "local-data.yml", tmp_path)
+    return session
+
+
 TAGS_FOR_TESTING = {
     "authors": {
         "andela_bouwe": {
@@ -692,7 +737,7 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, session):
         "remove_supplementary_variables": {},
         "save": {
             "compress": False,
-            "filename": product.filename,
+            "filename": Path(product.filename),
             "compute": False,
         },
     }
@@ -1539,7 +1584,7 @@ def test_diagnostic_task_provenance(
     # Test that provenance was saved to xml and info embedded in netcdf
     product = next(
         iter(
-            p for p in diagnostic_task.products if p.filename.endswith(".nc")
+            p for p in diagnostic_task.products if p.filename.suffix == ".nc"
         ),
     )
     cube = iris.load_cube(product.filename)
@@ -2464,10 +2509,15 @@ def test_recipe_run(tmp_path, patched_datafinder, session, mocker):
     session["search_esgf"] = "when_missing"
 
     mocker.patch.object(
-        esmvalcore._recipe.recipe.esgf,
+        esmvalcore.esgf,
         "download",
         create_autospec=True,
     )
+    mocker.patch.object(
+        esmvalcore.local.LocalFile,
+        "prepare",
+        create_autospec=True,
+    )
 
     recipe = get_recipe(tmp_path, content, session)
 
@@ -2476,10 +2526,8 @@ def test_recipe_run(tmp_path, patched_datafinder, session, mocker):
     recipe.write_html_summary = mocker.Mock()
     recipe.run()
 
-    esmvalcore._recipe.recipe.esgf.download.assert_called_once_with(
-        set(),
-        session["download_dir"],
-    )
+    esmvalcore.esgf.download.assert_called()
+    esmvalcore.local.LocalFile.prepare.assert_called()
     recipe.tasks.run.assert_called_once_with(
         max_parallel_tasks=session["max_parallel_tasks"],
     )
@@ -2487,8 +2535,14 @@ def test_recipe_run(tmp_path, patched_datafinder, session, mocker):
     recipe.write_html_summary.assert_called_once()
 
 
-def test_representative_dataset_regular_var(patched_datafinder, session):
+def test_representative_dataset_regular_var(
+    tmp_path: Path,
+    patched_datafinder: None,
+    session: Session,
+):
     """Test ``_representative_dataset`` with regular variable."""
+    update_data_sources(session, "icon-data.yml", tmp_path)
+
     variable = {
         "dataset": "ICON",
         "exp": "atm_amip-rad_R2B4_r1i1p1f1",
@@ -2505,18 +2559,20 @@ def test_representative_dataset_regular_var(patched_datafinder, session):
     datasets = _representative_datasets(dataset)
     assert len(datasets) == 1
     filename = datasets[0].files[0]
-    path = Path(filename)
-    assert path.name == "atm_amip-rad_R2B4_r1i1p1f1_atm_2d_ml_1990_1999.nc"
+    assert filename.name == "atm_amip-rad_R2B4_r1i1p1f1_atm_2d_ml_1990-1999.nc"
 
 
 @pytest.mark.parametrize("force_derivation", [True, False])
 def test_representative_dataset_derived_var(
-    patched_datafinder,
-    session,
-    force_derivation,
+    tmp_path: Path,
+    patched_datafinder: None,
+    session: Session,
+    force_derivation: bool,
 ):
     """Test ``_representative_dataset`` with derived variable."""
-    variable = {
+    update_data_sources(session, "icon-data.yml", tmp_path)
+
+    variable: Facets = {
         "dataset": "ICON",
         "derive": True,
         "exp": "atm_amip-rad_R2B4_r1i1p1f1",
@@ -2533,7 +2589,7 @@ def test_representative_dataset_derived_var(
     dataset.session = session
     representative_datasets = _representative_datasets(dataset)
 
-    expected_facets = {
+    expected_facets: Facets = {
         # Already present in variable
         "dataset": "ICON",
         "derive": True,
diff --git a/tests/unit/config/test_config.py b/tests/unit/config/test_config.py
index 7795189b3d..61e1597725 100644
--- a/tests/unit/config/test_config.py
+++ b/tests/unit/config/test_config.py
@@ -138,6 +138,14 @@ def test_load_default_config(cfg_default, monkeypatch):
         paths=[str(p) for p in config_dir.glob("extra_facets_*.yml")],
         env={},
     )["projects"]
+    # Add in projects without extra facets from the config developer file
+    # until we have transitioned all of its content to the new configuration
+    # system.
+    for project in yaml.safe_load(
+        default_dev_file.read_text(encoding="utf-8"),
+    ):
+        if project not in default_project_settings:
+            default_project_settings[project] = {}
 
     session = cfg_default.start_session("recipe_example")
 
@@ -164,13 +172,6 @@ def test_load_default_config(cfg_default, monkeypatch):
         },
         "diagnostics": None,
         "download_dir": Path.home() / "climate_data",
-        "drs": {
-            "CMIP3": "ESGF",
-            "CMIP5": "ESGF",
-            "CMIP6": "ESGF",
-            "CORDEX": "ESGF",
-            "obs4MIPs": "ESGF",
-        },
         "exit_on_warning": False,
         "log_level": "info",
         "logging": {"log_progress_interval": 0.0},
@@ -183,7 +184,6 @@ def test_load_default_config(cfg_default, monkeypatch):
         "projects": default_project_settings,
         "remove_preproc_dir": True,
         "resume_from": [],
-        "rootpath": {"default": [Path.home() / "climate_data"]},
         "run_diagnostic": True,
         "search_esgf": "never",
         "skip_nonexistent": False,
diff --git a/tests/unit/config/test_config_object.py b/tests/unit/config/test_config_object.py
index e51fa40707..0b0ceae8ae 100644
--- a/tests/unit/config/test_config_object.py
+++ b/tests/unit/config/test_config_object.py
@@ -525,7 +525,9 @@ def test_load_from_dirs(dirs, output_file_type, rootpath, tmp_path):
     cfg.load_from_dirs(config_dirs)
 
     assert cfg["output_file_type"] == output_file_type
-    assert cfg["rootpath"] == rootpath
+    if any(Path(d).exists() for d in config_dirs):
+        # Legacy setting "rootpath" is not available in default config.
+        assert cfg["rootpath"] == rootpath
     assert cfg["search_esgf"] == "never"
 
 
diff --git a/tests/unit/config/test_data_sources.py b/tests/unit/config/test_data_sources.py
new file mode 100644
index 0000000000..50b0e6fd5c
--- /dev/null
+++ b/tests/unit/config/test_data_sources.py
@@ -0,0 +1,18 @@
+import pytest
+
+import esmvalcore.config._data_sources
+from esmvalcore.config import Session
+
+
+def test_load_data_sources_no_project_data_sources_configured(
+    session: Session,
+) -> None:
+    """Test that loading data sources when no data sources are configured raises."""
+    with pytest.raises(
+        ValueError,
+        match=r"No data sources found for project 'test'.*",
+    ):
+        esmvalcore.config._data_sources._get_data_sources(
+            session,
+            project="test",
+        )
diff --git a/tests/unit/esgf/test_download.py b/tests/unit/esgf/test_download.py
index 85b5cbae3e..9b0a2df2e0 100644
--- a/tests/unit/esgf/test_download.py
+++ b/tests/unit/esgf/test_download.py
@@ -11,6 +11,7 @@
 import requests
 import yaml
 from pyesgf.search.results import FileResult
+from pytest_mock import MockerFixture
 
 import esmvalcore.esgf
 from esmvalcore.esgf import _download
@@ -241,6 +242,7 @@ def test_init():
         "dataset": "ABC",
         "project": "CMIP6",
         "short_name": "tas",
+        "timerange": "2000/2001",
         "version": "v1",
     }
     txt = f"ESGFFile:CMIP6/ABC/v1/{filename} on hosts ['something.org']"
@@ -248,6 +250,62 @@ def test_init():
     assert hash(file) == hash(("CMIP6.ABC.v1", filename))
 
 
+@pytest.fixture
+def esgf_file() -> _download.ESGFFile:
+    """ESGFFile fixture."""
+    json = {
+        "dataset_id": "CMIP6.dataset.v1|something.org",
+        "dataset_id_template_": ["%(mip_era)s.%(source_id)s"],
+        "project": ["CMIP6"],
+        "size": 12,
+        "title": "test.nc",
+    }
+    return _download.ESGFFile(
+        [FileResult(json=json, context=None)],
+        dest_folder=Path("/path/to/climate_data"),
+    )
+
+
+def test_prepare(mocker: MockerFixture, esgf_file: _download.ESGFFile) -> None:
+    """Test `ESGFFile.prepare`."""
+    download = mocker.patch.object(_download.ESGFFile, "download")
+    esgf_file.prepare()
+    download.assert_called_once_with(esgf_file.dest_folder)
+
+
+def test_attribute_not_set(esgf_file: _download.ESGFFile) -> None:
+    """Test accessing `ESGFFile.attributes` before calling to_iris."""
+    with pytest.raises(
+        ValueError,
+        match=r"Attributes have not been read yet. Call the `to_iris` method .*",
+    ):
+        _ = esgf_file.attributes
+
+
+def test_to_iris(mocker: MockerFixture, esgf_file: _download.ESGFFile) -> None:
+    """Test `ESGFFile.prepare`."""
+    prepare = mocker.patch.object(_download.ESGFFile, "prepare")
+    local_file_to_iris = mocker.patch.object(
+        esmvalcore.esgf._download.LocalFile,
+        "to_iris",
+        return_value=mocker.sentinel.iris_cubes,
+    )
+    mocker.patch.object(
+        esmvalcore.esgf._download.LocalFile,
+        "attributes",
+        new_callable=mocker.PropertyMock,
+        return_value={"attribute": "value"},
+    )
+    cubes = esgf_file.to_iris(mocker.sentinel.ignore_warnings)
+
+    assert cubes == mocker.sentinel.iris_cubes
+    assert esgf_file.attributes == {"attribute": "value"}
+    prepare.assert_called_once()
+    local_file_to_iris.assert_called_once_with(
+        ignore_warnings=mocker.sentinel.ignore_warnings,
+    )
+
+
 def test_from_results():
     """Test ESGFFile._from_results()."""
     facets = {
@@ -478,7 +536,7 @@ def test_single_download(mocker, tmp_path, checksum):
     response.iter_content.assert_called_with(chunk_size=2**20)
 
 
-def test_download_skip_existing(tmp_path, caplog):
+def test_download_skip_existing(tmp_path: Path, mocker: MockerFixture) -> None:
     filename = "test.nc"
     dataset = "dataset"
     dest_folder = tmp_path
@@ -496,12 +554,9 @@ def test_download_skip_existing(tmp_path, caplog):
     local_file = file.local_file(dest_folder)
     local_file.parent.mkdir(parents=True)
     local_file.touch()
-
-    caplog.set_level(logging.DEBUG)
-
+    mock_download = mocker.patch.object(_download.ESGFFile, "_download")
     local_file = file.download(dest_folder)
-
-    assert f"Skipping download of existing file {local_file}" in caplog.text
+    mock_download.assert_not_called()
 
 
 def test_single_download_fail(mocker, tmp_path):
@@ -632,10 +687,8 @@ def test_download_fail(mocker, tmp_path, caplog):
         file.download.assert_called_with(dest_folder)
 
 
-def test_download_noop(caplog):
+def test_download_noop(mocker: MockerFixture) -> None:
     """Test downloading no files."""
-    caplog.set_level("DEBUG")
+    mock_download = mocker.patch.object(_download.ESGFFile, "_download")
     esmvalcore.esgf.download([], dest_folder="/does/not/exist")
-
-    msg = "All required data is available locally, not downloading anything."
-    assert msg in caplog.text
+    mock_download.assert_not_called()
diff --git a/tests/unit/esgf/test_search.py b/tests/unit/esgf/test_search.py
index 11b582fffb..5949cc5792 100644
--- a/tests/unit/esgf/test_search.py
+++ b/tests/unit/esgf/test_search.py
@@ -2,13 +2,16 @@
 
 import copy
 import textwrap
+from pathlib import Path
 
 import pyesgf.search
 import pytest
 import requests.exceptions
 from pyesgf.search.results import FileResult
+from pytest_mock import MockerFixture
 
-from esmvalcore.esgf import ESGFFile, _search, find_files
+import esmvalcore.io.protocol
+from esmvalcore.esgf import ESGFDataSource, ESGFFile, _search, find_files
 
 OUR_FACETS = (
     {
@@ -433,3 +436,39 @@ def test_search_unknown_project():
     )
     with pytest.raises(ValueError, match=msg):
         find_files(project=project, dataset="", short_name="")
+
+
+class TestESGFDataSource:
+    """Test `esmvalcore.esgf.ESGFDataSource`."""
+
+    def test_init(self) -> None:
+        """Test initialization."""
+        data_source = ESGFDataSource(
+            name="esgf-cmip6",
+            project="CMIP6",
+            priority=1,
+            download_dir=Path("/path/to/climate_data"),
+        )
+        assert isinstance(data_source, esmvalcore.io.protocol.DataSource)
+
+    def test_find_data(self, mocker: MockerFixture) -> None:
+        """Test find_data method."""
+        data_source = ESGFDataSource(
+            name="esgf-cmip6",
+            project="CMIP6",
+            priority=1,
+            download_dir=Path("/path/to/climate_data"),
+        )
+
+        mock_result = [mocker.create_autospec(ESGFFile, instance=True)]
+        mock_find_files = mocker.patch(
+            "esmvalcore.esgf._search.find_files",
+            return_value=mock_result,
+        )
+
+        facets = {"short_name": "tas", "dataset": "A", "project": "CMIP6"}
+        result = data_source.find_data(**facets)
+
+        mock_find_files.assert_called_once_with(**facets)
+        assert result is mock_result
+        assert result[0].dest_folder == Path("/path/to/climate_data")
diff --git a/tests/unit/io/__init__.py b/tests/unit/io/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/unit/io/test_intake_esgf.py b/tests/unit/io/test_intake_esgf.py
new file mode 100644
index 0000000000..34effd97f1
--- /dev/null
+++ b/tests/unit/io/test_intake_esgf.py
@@ -0,0 +1,315 @@
+"""Unit tests for esmvalcore.io.intake_esgf."""
+
+import importlib.resources
+
+import intake_esgf
+import iris.cube
+import pandas as pd
+import pytest
+import xarray as xr
+import yaml
+from pytest import MonkeyPatch
+from pytest_mock import MockerFixture
+
+import esmvalcore.io.intake_esgf
+from esmvalcore.config import Session
+from esmvalcore.io.intake_esgf import IntakeESGFDataset, IntakeESGFDataSource
+
+
+def test_prepare(mocker: MockerFixture) -> None:
+    """IntakeESGFDataset.prepare should call the catalog.to_path_dict method."""
+    cat = intake_esgf.ESGFCatalog()
+    to_path_mock = mocker.patch.object(cat, "to_path_dict", autospec=True)
+    dataset = IntakeESGFDataset(name="id", facets={}, catalog=cat)
+
+    dataset.prepare()
+    to_path_mock.assert_called_once_with()
+
+
+def test_attributes_raises_before_to_iris() -> None:
+    """Accessing attributes before to_iris should raise ValueError."""
+    cat = intake_esgf.ESGFCatalog()
+    dataset = IntakeESGFDataset(name="id", facets={}, catalog=cat)
+    with pytest.raises(ValueError, match="Attributes have not been read yet"):
+        _ = dataset.attributes
+
+
+def test_to_iris(mocker: MockerFixture) -> None:
+    """`to_iris` should load the data and cache attributes."""
+    cat = intake_esgf.ESGFCatalog()
+    key = "my.dataset.1"
+    mocker.patch.object(
+        cat,
+        "to_path_dict",
+        return_value={key: ["/path/to/file.nc"]},
+    )
+    ds = xr.Dataset(attrs={"attr": "value"})
+    mocker.patch.object(cat, "to_dataset_dict", return_value={key: ds})
+
+    cubes = mocker.sentinel.cubes
+    mocker.patch.object(
+        esmvalcore.io.intake_esgf,
+        "dataset_to_iris",
+        return_value=cubes,
+    )
+
+    dataset = IntakeESGFDataset(name=key, facets={}, catalog=cat)
+    result = dataset.to_iris(ignore_warnings=[{"message": "ignore"}])
+    assert result is cubes
+
+    assert dataset.attributes == {
+        "attr": "value",
+        "source_file": "/path/to/file.nc",
+    }
+
+
+@pytest.mark.online
+def test_to_iris_online():
+    """`to_iris` should load data from a real ESGF catalog."""
+    data_source = IntakeESGFDataSource(
+        name="src",
+        project="CMIP6",
+        priority=1,
+        facets={
+            "activity": "activity_drs",
+            "dataset": "source_id",
+            "ensemble": "member_id",
+            "exp": "experiment_id",
+            "grid": "grid_label",
+            "institute": "institution_id",
+            "mip": "table_id",
+            "project": "project",
+            "short_name": "variable_id",
+        },
+        values={},
+    )
+    results = data_source.find_data(
+        dataset="CanESM5",
+        ensemble="r1i1p1f1",
+        exp="historical",
+        grid="gn",
+        mip="fx",
+        project="CMIP6",
+        short_name="areacella",
+    )
+    assert len(results) == 1
+    dataset = results[0]
+    assert isinstance(dataset, IntakeESGFDataset)
+    cubes = dataset.to_iris()
+    assert len(cubes) == 1
+    assert isinstance(cubes[0], iris.cube.Cube)
+    # Check that the "source_file" attributes is present for debugging.
+    assert "source_file" in dataset.attributes
+    assert dataset.attributes["source_file"].endswith(".nc")
+
+
+def test_find_data_no_results_sets_debug_info(mocker: MockerFixture) -> None:
+    """When catalog.search raises NoSearchResults, find_data should return empty list and set debug_info."""
+    data_source = IntakeESGFDataSource(
+        name="src",
+        project="CMIP6",
+        priority=1,
+        facets={"short_name": "variable_id"},
+    )
+
+    cat = intake_esgf.ESGFCatalog()
+    # Ensure last_search is present so debug_info can be constructed
+    cat.last_search = {"variable_id": "tas"}
+    mocker.patch.object(
+        cat,
+        "search",
+        side_effect=intake_esgf.exceptions.NoSearchResults("no results"),
+    )
+    data_source.catalog = cat
+
+    result = data_source.find_data(short_name="tas")
+    assert result == []
+    expected_debug_info = "intake_esgf.ESGFCatalog.search(variable_id=['tas']) did not return any results."
+    assert data_source.debug_info == expected_debug_info
+
+
+def test_find_data(mocker: MockerFixture, monkeypatch: MonkeyPatch):
+    """find_data should convert catalog.df rows into IntakeESGFDataset instances."""
+    cat = intake_esgf.ESGFCatalog()
+    cat.project = intake_esgf.projects.projects["cmip6"]
+    cat.df = pd.DataFrame.from_dict(
+        {
+            "project": ["CMIP6", "CMIP6"],
+            "mip_era": ["CMIP6", "CMIP6"],
+            "activity_drs": ["CMIP", "ScenarioMIP"],
+            "institution_id": ["CCCma", "CCCma"],
+            "source_id": ["CanESM5", "CanESM5"],
+            "experiment_id": ["historical", "ssp585"],
+            "member_id": ["r1i1p1f1", "r1i1p1f1"],
+            "table_id": ["Amon", "Amon"],
+            "variable_id": ["tas", "tas"],
+            "grid_label": ["gn", "gn"],
+            "version": ["20190429", "20190429"],
+            "id": [
+                [
+                    "CMIP6.CMIP.CCCma.CanESM5.historical.r1i1p1f1.Amon.tas.gn.v20190429|crd-esgf-drc.ec.gc.ca",
+                    "CMIP6.CMIP.CCCma.CanESM5.historical.r1i1p1f1.Amon.tas.gn.v20190429|eagle.alcf.anl.gov",
+                    "CMIP6.CMIP.CCCma.CanESM5.historical.r1i1p1f1.Amon.tas.gn.v20190429|esgf-data04.diasjp.net",
+                    "CMIP6.CMIP.CCCma.CanESM5.historical.r1i1p1f1.Amon.tas.gn.v20190429|esgf-node.ornl.gov",
+                ],
+                [
+                    "CMIP6.ScenarioMIP.CCCma.CanESM5.ssp585.r1i1p1f1.Amon.tas.gn.v20190429|crd-esgf-drc.ec.gc.ca",
+                    "CMIP6.ScenarioMIP.CCCma.CanESM5.ssp585.r1i1p1f1.Amon.tas.gn.v20190429|eagle.alcf.anl.gov",
+                    "CMIP6.ScenarioMIP.CCCma.CanESM5.ssp585.r1i1p1f1.Amon.tas.gn.v20190429|esgf-data04.diasjp.net",
+                    "CMIP6.ScenarioMIP.CCCma.CanESM5.ssp585.r1i1p1f1.Amon.tas.gn.v20190429|esgf-node.ornl.gov",
+                ],
+            ],
+        },
+    )
+
+    # Patch search to just record last_search
+    def fake_search(**kwargs):
+        cat.last_search = kwargs
+
+    mocker.patch.object(cat, "search", side_effect=fake_search)
+
+    data_source = IntakeESGFDataSource(
+        name="src",
+        project="CMIP6",
+        priority=1,
+        facets={
+            "activity": "activity_drs",
+            "dataset": "source_id",
+            "ensemble": "member_id",
+            "exp": "experiment_id",
+            "institute": "institution_id",
+            "grid": "grid_label",
+            "mip": "table_id",
+            "project": "project",
+            "short_name": "variable_id",
+        },
+        values={},
+    )
+    data_source.catalog = cat
+
+    # Call find_data - it should use the df we set and return one dataset
+    results = data_source.find_data(short_name="tas")
+    assert isinstance(results, list)
+    assert len(results) == 2
+
+    dataset = results[0]
+    assert isinstance(dataset, IntakeESGFDataset)
+    assert (
+        dataset.name
+        == "CMIP6.CMIP.CCCma.CanESM5.historical.r1i1p1f1.Amon.tas.gn"
+    )
+
+    assert dataset.facets == {
+        "activity": "CMIP",
+        "dataset": "CanESM5",
+        "ensemble": "r1i1p1f1",
+        "exp": "historical",
+        "grid": "gn",
+        "institute": "CCCma",
+        "mip": "Amon",
+        "project": "CMIP6",
+        "short_name": "tas",
+    }
+    dataset = results[1]
+    assert (
+        dataset.name
+        == "CMIP6.ScenarioMIP.CCCma.CanESM5.ssp585.r1i1p1f1.Amon.tas.gn"
+    )
+    assert dataset.facets == {
+        "activity": "ScenarioMIP",
+        "dataset": "CanESM5",
+        "ensemble": "r1i1p1f1",
+        "exp": "ssp585",
+        "grid": "gn",
+        "institute": "CCCma",
+        "mip": "Amon",
+        "project": "CMIP6",
+        "short_name": "tas",
+    }
+
+
+@pytest.fixture
+def data_sources(session: Session) -> list[esmvalcore.io.protocol.DataSource]:
+    """Fixture providing the default list of IntakeESGFDataSource data sources."""
+    with importlib.resources.as_file(
+        importlib.resources.files(esmvalcore.config)
+        / "configurations"
+        / "intake-esgf-data.yml",
+    ) as config_file:
+        cfg = yaml.safe_load(config_file.read_text(encoding="utf-8"))
+    session["projects"] = cfg["projects"]
+    return esmvalcore.io.load_data_sources(session)
+
+
+@pytest.mark.online
+@pytest.mark.parametrize(
+    ("facets", "expected_names"),
+    [
+        (
+            {
+                "dataset": "CanESM5",
+                "ensemble": "r1i1p1f1",
+                "exp": ["historical", "ssp585"],
+                "grid": "gn",
+                "mip": "Amon",
+                "project": "CMIP6",
+                "short_name": "tas",
+                "timerange": "1850/2100",
+            },
+            {
+                "CMIP6.CMIP.CCCma.CanESM5.historical.r1i1p1f1.Amon.tas.gn",
+                "CMIP6.ScenarioMIP.CCCma.CanESM5.ssp585.r1i1p1f1.Amon.tas.gn",
+            },
+        ),
+        (
+            {
+                "dataset": "ACCESS1-0",
+                "ensemble": "r1i1p1",
+                "exp": ["historical", "rcp85"],
+                "mip": "Amon",
+                "project": "CMIP5",
+                "short_name": "tas",
+            },
+            {
+                "CSIRO-BOM.ACCESS1.0.historical.mon.atmos.Amon.r1i1p1.tas",
+                "CSIRO-BOM.ACCESS1.0.rcp85.mon.atmos.Amon.r1i1p1.tas",
+            },
+        ),
+        (
+            {
+                "dataset": "cccma_cgcm3_1",
+                "ensemble": "run1",
+                "exp": "historical",
+                "mip": "A1",
+                "project": "CMIP3",
+                "short_name": "tas",
+            },
+            {
+                "CMIP3.CCCMA.cccma_cgcm3_1.historical.day.atmos.run1.tas",
+                "CMIP3.CCCMA.cccma_cgcm3_1.historical.mon.atmos.run1.tas",
+            },
+        ),
+        (
+            {
+                "dataset": "ERA-5",
+                "project": "obs4MIPs",
+                "short_name": "tas",
+            },
+            {
+                "obs4MIPs.ECMWF.ERA-5.mon.tas.gn",
+            },
+        ),
+    ],
+)
+def test_find_data_online(
+    data_sources: list[IntakeESGFDataSource],
+    facets: dict[str, str | list[str]],
+    expected_names: list[str],
+) -> None:
+    """Test finding data from a real ESGF catalog."""
+    data_source = next(
+        ds for ds in data_sources if ds.project == facets["project"]
+    )
+    result = data_source.find_data(**facets)
+    result_names = {ds.name for ds in result}
+    assert result_names == expected_names
diff --git a/tests/unit/io/test_load_data_sources.py b/tests/unit/io/test_load_data_sources.py
new file mode 100644
index 0000000000..de1f7bad23
--- /dev/null
+++ b/tests/unit/io/test_load_data_sources.py
@@ -0,0 +1,82 @@
+"""Tests for :func:`esmvalcore.io.load_data_sources`."""
+
+import importlib.resources
+from dataclasses import dataclass
+
+import pytest
+
+import esmvalcore.config
+import esmvalcore.io
+
+
+def test_configurations_valid(cfg_default: esmvalcore.config.Config) -> None:
+    """Test that the data sources configuration in esmvalcore/config/configurations are valid."""
+    configurations = (
+        importlib.resources.files(esmvalcore.config) / "configurations"
+    )
+    with importlib.resources.as_file(configurations) as config_dir:
+        cfg_default.load_from_dirs([config_dir])
+    session = cfg_default.start_session("test")
+    data_sources = esmvalcore.io.load_data_sources(session)
+    for data_source in data_sources:
+        assert isinstance(data_source, esmvalcore.io.DataSource)
+
+
+def test_load_data_sources_unknown_project(
+    session: esmvalcore.config.Session,
+) -> None:
+    """Test that loading data sources for an unknown project raises."""
+    with pytest.raises(ValueError, match=r"Unknown project 'unknown'.*"):
+        esmvalcore.io.load_data_sources(session, project="unknown")
+
+
+def test_load_data_sources_no_data_sources_configured(
+    session: esmvalcore.config.Session,
+) -> None:
+    """Test that loading data sources when no data sources are configured raises."""
+    session["projects"].clear()
+    with pytest.raises(
+        ValueError,
+        match=r"No data sources found. Check your configuration under 'projects'",
+    ):
+        esmvalcore.io.load_data_sources(session)
+
+
+def test_load_data_sources_no_project_data_sources_configured(
+    session: esmvalcore.config.Session,
+) -> None:
+    """Test that loading data sources when no data sources are configured raises."""
+    session["projects"]["test"] = {}
+    with pytest.raises(
+        ValueError,
+        match=r"No data sources found for project 'test'.*",
+    ):
+        esmvalcore.io.load_data_sources(session, project="test")
+
+
+@dataclass
+class IncompleteDataSource:
+    """An incomplete data source class for testing."""
+
+    name: str
+    project: str
+    priority: int
+    # Note the missing implementation of DataSource methods.
+
+
+def test_load_data_sources_invalid_data_source_type(
+    session: esmvalcore.config.Session,
+) -> None:
+    """Test that loading data sources with an invalid data source type raises."""
+    session["projects"]["test"] = {
+        "data": {
+            "invalid_source": {
+                "type": "tests.unit.io.test_load_data_sources.IncompleteDataSource",
+            },
+        },
+    }
+    with pytest.raises(
+        TypeError,
+        match=r"Expected a data source of type `esmvalcore.io.protocol.DataSource`.*",
+    ):
+        esmvalcore.io.load_data_sources(session, project="test")
diff --git a/tests/unit/local/test_facets.py b/tests/unit/local/test_facets.py
index 1373b961c6..8aa5123f5f 100644
--- a/tests/unit/local/test_facets.py
+++ b/tests/unit/local/test_facets.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from esmvalcore.local import DataSource, LocalFile
+from esmvalcore.local import LocalDataSource, LocalFile
 
 
 @pytest.mark.parametrize(
@@ -25,6 +25,28 @@
                 "facet2": "filename",
             },
         ),
+        (
+            "/climate_data/value1/filename_2000-2001.nc",
+            "/climate_data",
+            "{facet1}",
+            "{facet2}[_.]*nc",
+            {
+                "facet1": "value1",
+                "facet2": "filename",
+                "timerange": "2000/2001",
+            },
+        ),
+        (
+            "/climate_data/value1/filename_20001201-20011231.nc",
+            "/climate_data",
+            "{facet1}",
+            "{facet2}[_.]*nc",
+            {
+                "facet1": "value1",
+                "facet2": "filename",
+                "timerange": "20001201/20011231",
+            },
+        ),
         (
             "/climate_data/value1/xyz/filename.nc",
             "/climate_data",
@@ -125,6 +147,7 @@
             {
                 "tier": "3",
                 "dataset": "ds",
+                "timerange": "1993/1993",
             },
         ),
         (
@@ -136,6 +159,7 @@
                 "tier": "3",
                 "dataset": "ds",
                 "short_name": "tas",
+                "timerange": "1993/1993",
             },
         ),
         (
@@ -145,6 +169,7 @@
             "{short_name}_*",
             {
                 "short_name": "tas",
+                "timerange": "1993/1993",
             },
         ),
         (
@@ -165,6 +190,7 @@
             {
                 "short_name": "tas",
                 "dataset": "ds",
+                "timerange": "1993/1993",
             },
         ),
         (
@@ -258,14 +284,42 @@ def test_path2facets(
     filename_template,
     facets,
 ):
-    """Test `DataSource.path2facets."""
+    """Test `LocalDataSource.path2facets."""
     path = Path(path)
     rootpath = Path(rootpath)
-    data_source = DataSource(rootpath, dirname_template, filename_template)
-    result = data_source.path2facets(path)
+    data_source = LocalDataSource(
+        name="test-source",
+        project="test-project",
+        priority=1,
+        rootpath=rootpath,
+        dirname_template=dirname_template,
+        filename_template=filename_template,
+    )
+    add_timerange = "timerange" in facets
+    result = data_source.path2facets(path, add_timerange=add_timerange)
     assert result == facets
 
 
+def test_path2facets_no_timerange():
+    # Test that `LocalDataSource.path2facets` does not add "timerange"
+    # if it cannot determine the timerange.
+    path = Path("/climate_data/value1/filename.nc")
+    rootpath = Path("/climate_data")
+    data_source = LocalDataSource(
+        name="test-source",
+        project="test-project",
+        priority=1,
+        rootpath=rootpath,
+        dirname_template="{facet1}",
+        filename_template="{facet2}[_.]*nc",
+    )
+    result = data_source.path2facets(path, add_timerange=True)
+    assert result == {
+        "facet1": "value1",
+        "facet2": "filename",
+    }
+
+
 def test_localfile():
     file = LocalFile("/a/b.nc")
     file.facets = {"a": "A"}
diff --git a/tests/unit/local/test_get_data_sources.py b/tests/unit/local/test_get_data_sources.py
index cef6d49891..6494f4be66 100644
--- a/tests/unit/local/test_get_data_sources.py
+++ b/tests/unit/local/test_get_data_sources.py
@@ -4,7 +4,7 @@
 
 from esmvalcore.config import CFG
 from esmvalcore.config._config_validators import validate_config_developer
-from esmvalcore.local import DataSource, _get_data_sources
+from esmvalcore.local import LocalDataSource, _get_data_sources
 
 
 @pytest.mark.parametrize(
@@ -33,7 +33,7 @@ def test_get_data_sources(monkeypatch, rootpath_drs):
     monkeypatch.setitem(CFG, "drs", drs)
     sources = _get_data_sources("CMIP6")
     source = sources[0]
-    assert isinstance(source, DataSource)
+    assert isinstance(source, LocalDataSource)
     assert source.rootpath == Path("/climate_data")
     assert "{project}" in source.dirname_template
     assert "{short_name}" in source.filename_template
diff --git a/tests/unit/local/test_time.py b/tests/unit/local/test_time.py
index 30d5d1ea97..5548dfe254 100644
--- a/tests/unit/local/test_time.py
+++ b/tests/unit/local/test_time.py
@@ -12,7 +12,6 @@
     LocalFile,
     _dates_to_timerange,
     _get_start_end_date,
-    _get_start_end_year,
     _replace_years_with_timerange,
     _truncate_dates,
 )
@@ -33,104 +32,46 @@ def _get_esgf_file(path):
     return ESGFFile([result])
 
 
-FILENAME_CASES = [
-    ["var_whatever_1980-1981", 1980, 1981],
-    ["var_whatever_1980.nc", 1980, 1980],
-    ["a.b.x_yz_185001-200512.nc", 1850, 2005],
-    ["var_whatever_19800101-19811231.nc1", 1980, 1981],
-    ["var_whatever_19800101.nc", 1980, 1980],
-    ["1980-1981_var_whatever.nc", 1980, 1981],
-    ["1980_var_whatever.nc", 1980, 1980],
-    ["var_control-1980_whatever.nc", 1980, 1980],
-    ["19800101-19811231_var_whatever.nc", 1980, 1981],
-    ["19800101_var_whatever.nc", 1980, 1980],
-    ["var_control-19800101_whatever.nc", 1980, 1980],
-    ["19800101_var_control-1950_whatever.nc", 1980, 1980],
-    ["var_control-1950_whatever_19800101.nc", 1980, 1980],
-    ["CM61-LR-hist-03.1950_18500101_19491231_1M_concbc.nc", 1850, 1949],
-    [
-        "icon-2.6.1_atm_amip_R2B5_r1v1i1p1l1f1_phy_3d_ml_20150101T000000Z.nc",
-        2015,
-        2015,
-    ],
-    ["pr_A1.186101-200012.nc", 1861, 2000],
-    ["tas_A1.20C3M_1.CCSM.atmm.1990-01_cat_1999-12.nc", 1990, 1999],
-    ["E5sf00_1M_1940_032.grb", 1940, 1940],
-    ["E5sf00_1D_1998-04_167.grb", 1998, 1998],
-    ["E5sf00_1H_1986-04-11_167.grb", 1986, 1986],
-    ["E5sf00_1M_1940-1941_032.grb", 1940, 1941],
-    ["E5sf00_1D_1998-01_1999-12_167.grb", 1998, 1999],
-    ["E5sf00_1H_2000-01-01_2001-12-31_167.grb", 2000, 2001],
-]
-
-FILENAME_DATE_CASES = [
-    ["var_whatever_1980-1981", "1980", "1981"],
-    ["var_whatever_1980.nc", "1980", "1980"],
-    ["a.b.x_yz_185001-200512.nc", "185001", "200512"],
-    ["var_whatever_19800101-19811231.nc1", "19800101", "19811231"],
-    ["var_whatever_19800101.nc", "19800101", "19800101"],
-    ["1980-1981_var_whatever.nc", "1980", "1981"],
-    ["1980_var_whatever.nc", "1980", "1980"],
-    ["var_control-1980_whatever.nc", "1980", "1980"],
-    ["19800101-19811231_var_whatever.nc", "19800101", "19811231"],
-    ["19800101_var_whatever.nc", "19800101", "19800101"],
-    ["var_control-19800101_whatever.nc", "19800101", "19800101"],
-    ["19800101_var_control-1950_whatever.nc", "19800101", "19800101"],
-    ["var_control-1950_whatever_19800101.nc", "19800101", "19800101"],
-    [
-        "CM61-LR-hist-03.1950_18500101_19491231_1M_concbc.nc",
-        "18500101",
-        "19491231",
-    ],
+@pytest.mark.parametrize(
+    "case",
     [
-        "icon-2.6.1_atm_amip_R2B5_r1v1i1p1l1f1_phy_3d_ml_20150101T000000Z.nc",
-        "20150101T000000Z",
-        "20150101T000000Z",
+        ["var_whatever_1980-1981", "1980", "1981"],
+        ["var_whatever_1980.nc", "1980", "1980"],
+        ["a.b.x_yz_185001-200512.nc", "185001", "200512"],
+        ["var_whatever_19800101-19811231.nc1", "19800101", "19811231"],
+        ["var_whatever_19800101.nc", "19800101", "19800101"],
+        ["1980-1981_var_whatever.nc", "1980", "1981"],
+        ["1980_var_whatever.nc", "1980", "1980"],
+        ["var_control-1980_whatever.nc", "1980", "1980"],
+        ["19800101-19811231_var_whatever.nc", "19800101", "19811231"],
+        ["19800101_var_whatever.nc", "19800101", "19800101"],
+        ["var_control-19800101_whatever.nc", "19800101", "19800101"],
+        ["19800101_var_control-1950_whatever.nc", "19800101", "19800101"],
+        ["var_control-1950_whatever_19800101.nc", "19800101", "19800101"],
+        [
+            "CM61-LR-hist-03.1950_18500101_19491231_1M_concbc.nc",
+            "18500101",
+            "19491231",
+        ],
+        [
+            "icon-2.6.1_atm_amip_R2B5_r1v1i1p1l1f1_phy_3d_ml_20150101T000000Z.nc",
+            "20150101T000000Z",
+            "20150101T000000Z",
+        ],
+        ["pr_A1.186101-200012.nc", "186101", "200012"],
+        [
+            "tas_A1.20C3M_1.CCSM.atmm.1990-01_cat_1999-12.nc",
+            "199001",
+            "199912",
+        ],
+        ["E5sf00_1M_1940_032.grb", "1940", "1940"],
+        ["E5sf00_1D_1998-04_167.grb", "199804", "199804"],
+        ["E5sf00_1H_1986-04-11_167.grb", "19860411", "19860411"],
+        ["E5sf00_1M_1940-1941_032.grb", "1940", "1941"],
+        ["E5sf00_1D_1998-01_1999-12_167.grb", "199801", "199912"],
+        ["E5sf00_1H_2000-01-01_2001-12-31_167.grb", "20000101", "20011231"],
     ],
-    ["pr_A1.186101-200012.nc", "186101", "200012"],
-    ["tas_A1.20C3M_1.CCSM.atmm.1990-01_cat_1999-12.nc", "199001", "199912"],
-    ["E5sf00_1M_1940_032.grb", "1940", "1940"],
-    ["E5sf00_1D_1998-04_167.grb", "199804", "199804"],
-    ["E5sf00_1H_1986-04-11_167.grb", "19860411", "19860411"],
-    ["E5sf00_1M_1940-1941_032.grb", "1940", "1941"],
-    ["E5sf00_1D_1998-01_1999-12_167.grb", "199801", "199912"],
-    ["E5sf00_1H_2000-01-01_2001-12-31_167.grb", "20000101", "20011231"],
-]
-
-
-@pytest.mark.parametrize("case", FILENAME_CASES)
-def test_get_start_end_year(case):
-    """Tests for _get_start_end_year function."""
-    filename, case_start, case_end = case
-
-    # If the filename is inconclusive or too difficult we resort to reading the
-    # file, which fails here because the file is not there.
-    if case_start is None and case_end is None:
-        with pytest.raises(ValueError):
-            _get_start_end_year(filename)
-        with pytest.raises(ValueError):
-            _get_start_end_year(Path(filename))
-        with pytest.raises(ValueError):
-            _get_start_end_year(LocalFile(filename))
-        with pytest.raises(ValueError):
-            _get_start_end_year(_get_esgf_file(filename))
-
-    else:
-        start, end = _get_start_end_year(filename)
-        assert case_start == start
-        assert case_end == end
-        start, end = _get_start_end_year(Path(filename))
-        assert case_start == start
-        assert case_end == end
-        start, end = _get_start_end_year(LocalFile(filename))
-        assert case_start == start
-        assert case_end == end
-        start, end = _get_start_end_year(_get_esgf_file(filename))
-        assert case_start == start
-        assert case_end == end
-
-
-@pytest.mark.parametrize("case", FILENAME_DATE_CASES)
+)
 def test_get_start_end_date(case):
     """Tests for _get_start_end_date function."""
     filename, case_start, case_end = case
@@ -145,7 +86,7 @@ def test_get_start_end_date(case):
         with pytest.raises(ValueError):
             _get_start_end_date(LocalFile(filename))
         with pytest.raises(ValueError):
-            _get_start_end_date(_get_esgf_file(filename))
+            _get_start_end_date(_get_esgf_file(filename).name)
 
     else:
         start, end = _get_start_end_date(filename)
@@ -157,7 +98,7 @@ def test_get_start_end_date(case):
         start, end = _get_start_end_date(LocalFile(filename))
         assert case_start == start
         assert case_end == end
-        start, end = _get_start_end_date(_get_esgf_file(filename))
+        start, end = _get_start_end_date(_get_esgf_file(filename).name)
         assert case_start == start
         assert case_end == end
 
@@ -173,9 +114,9 @@ def test_read_years_from_cube(tmp_path):
     )
     cube.add_dim_coord(time, 0)
     iris.save(cube, temp_file)
-    start, end = _get_start_end_year(temp_file)
-    assert start == 1990
-    assert end == 1991
+    start, end = _get_start_end_date(temp_file)
+    assert int(start[:4]) == 1990
+    assert int(end[:4]) == 1991
 
 
 def test_read_datetime_from_cube(tmp_path):
@@ -210,8 +151,6 @@ def test_raises_if_unable_to_deduce_no_time(tmp_path):
     iris.save(cube, temp_file)
     with pytest.raises(ValueError):
         _get_start_end_date(temp_file)
-    with pytest.raises(ValueError):
-        _get_start_end_year(temp_file)
 
 
 def test_raises_if_unable_to_deduce_no_time_units(tmp_path):
@@ -223,16 +162,12 @@ def test_raises_if_unable_to_deduce_no_time_units(tmp_path):
     iris.save(cube, temp_file)
     with pytest.raises(ValueError):
         _get_start_end_date(temp_file)
-    with pytest.raises(ValueError):
-        _get_start_end_year(temp_file)
 
 
 def test_fails_if_no_date_present():
     """Test raises if no date is present."""
     with pytest.raises(ValueError):
         _get_start_end_date("var_whatever")
-    with pytest.raises(ValueError):
-        _get_start_end_year("var_whatever")
 
 
 def test_get_timerange_from_years():
diff --git a/tests/unit/local/test_to_iris.py b/tests/unit/local/test_to_iris.py
index 15a50729ac..44a6a881d3 100644
--- a/tests/unit/local/test_to_iris.py
+++ b/tests/unit/local/test_to_iris.py
@@ -1,11 +1,14 @@
+from pathlib import Path
+
 import iris.cube
 import pytest
+from pytest_mock import MockerFixture
 
-from esmvalcore.local import LocalFile
+from esmvalcore.local import LocalFile, _get_attr_from_field_coord
 
 
 @pytest.fixture
-def local_file(tmp_path):
+def local_file(tmp_path: Path) -> LocalFile:
     cube = iris.cube.Cube([0])
     cube.attributes.globals["attribute"] = "value"
     file = tmp_path / "test.nc"
@@ -13,21 +16,27 @@ def local_file(tmp_path):
     return LocalFile(file)
 
 
-def test_to_iris(local_file):
+def test_to_iris(local_file: LocalFile) -> None:
     cubes = local_file.to_iris()
     assert len(cubes) == 1
 
 
-def test_attributes(local_file):
+def test_attributes(local_file: LocalFile) -> None:
     local_file.to_iris()  # Load the file to populate attributes
     attrs = local_file.attributes
     assert attrs["attribute"] == "value"
 
 
-def test_attributes_without_loading(local_file):
+def test_attributes_without_loading(local_file: LocalFile) -> None:
     """Test that accessing attributes without loading the file first raises."""
     with pytest.raises(
         ValueError,
         match=r"Attributes have not been read yet.*",
     ):
         local_file.attributes  # noqa: B018
+
+
+def test_get_attr_from_field_coord_none(mocker: MockerFixture) -> None:
+    """Test ``_get_attr_from_field_coord``."""
+    attr = _get_attr_from_field_coord(mocker.sentinel.ncfield, None, "attr")
+    assert attr is None
diff --git a/tests/unit/preprocessor/test_shared.py b/tests/unit/preprocessor/test_shared.py
index 773f380794..a1860ecde2 100644
--- a/tests/unit/preprocessor/test_shared.py
+++ b/tests/unit/preprocessor/test_shared.py
@@ -2,6 +2,7 @@
 
 import inspect
 import warnings
+from pathlib import Path
 
 import dask.array as da
 import iris.analysis
@@ -384,17 +385,17 @@ def test_compute_area_weights(lazy):
     )
 
 
-def test_group_products_string_list():
+def test_group_products_string_list() -> None:
     products = [
         PreprocessorFile(
-            filename="A_B.nc",
+            filename=Path("A_B.nc"),
             attributes={
                 "project": "A",
                 "dataset": "B",
             },
         ),
         PreprocessorFile(
-            filename="A_C.nc",
+            filename=Path("A_C.nc"),
             attributes={
                 "project": "A",
                 "dataset": "C",
diff --git a/tests/unit/provenance/test_trackedfile.py b/tests/unit/provenance/test_trackedfile.py
index 9e22ca461b..5caebc173b 100644
--- a/tests/unit/provenance/test_trackedfile.py
+++ b/tests/unit/provenance/test_trackedfile.py
@@ -1,21 +1,48 @@
+from dataclasses import dataclass
 from pathlib import Path
+from typing import Any
 
+import iris.cube
+import prov.model
 import pytest
 from prov.model import ProvDocument
 
 from esmvalcore._provenance import ESMVALTOOL_URI_PREFIX, TrackedFile
+from esmvalcore.io.protocol import DataElement
 from esmvalcore.local import LocalFile
 
 
+def test_set() -> None:
+    assert {
+        TrackedFile(Path("file1.nc"), attributes={}),
+        TrackedFile(Path("file1.nc"), attributes={}),
+        TrackedFile(Path("file2.nc"), attributes={}),
+    } == {
+        TrackedFile(Path("file1.nc"), attributes={}),
+        TrackedFile(Path("file2.nc"), attributes={}),
+    }
+
+
+def test_sort() -> None:
+    file1 = TrackedFile(Path("file1.nc"), attributes={})
+    file2 = TrackedFile(Path("file2.nc"), attributes={})
+    assert sorted([file2, file1]) == [file1, file2]
+
+
+def test_equals() -> None:
+    file = TrackedFile(Path("file.nc"), attributes={})
+    assert file == TrackedFile(Path("file.nc"), attributes={})
+
+
 @pytest.fixture
-def tracked_input_file_nc():
+def tracked_input_file_nc() -> TrackedFile:
     input_file_nc = LocalFile("/path/to/file.nc")
     input_file_nc.attributes = {"a": "A"}
     return TrackedFile(filename=input_file_nc)
 
 
 @pytest.fixture
-def tracked_output_file_nc():
+def tracked_output_file_nc() -> TrackedFile:
     return TrackedFile(
         filename=Path("/path/to/file.nc"),
         attributes={"a": "A"},
@@ -23,41 +50,56 @@ def tracked_output_file_nc():
 
 
 @pytest.fixture
-def tracked_input_file_grb():
+def tracked_input_file_grb() -> TrackedFile:
     input_file_grb = LocalFile("/path/to/file.grb")
     input_file_grb.attributes = {"a": "A"}
     return TrackedFile(filename=input_file_grb)
 
 
-def test_init_input_nc(tracked_input_file_nc):
+def test_init_input_nc(tracked_input_file_nc: TrackedFile) -> None:
     """Test `esmvalcore._provenance.TrackedFile.__init__`."""
     assert tracked_input_file_nc.filename == LocalFile("/path/to/file.nc")
-    assert tracked_input_file_nc.attributes is None
+    with pytest.raises(
+        ValueError,
+        match=r"Call TrackedFile.initialize_provenance before accessing attributes",
+    ):
+        tracked_input_file_nc.attributes  # noqa: B018
 
 
-def test_init_output_nc(tracked_output_file_nc):
+def test_init_output_nc(tracked_output_file_nc: TrackedFile) -> None:
     """Test `esmvalcore._provenance.TrackedFile.__init__`."""
     assert tracked_output_file_nc.filename == Path("/path/to/file.nc")
     assert tracked_output_file_nc.attributes == {"a": "A"}
 
 
-def test_init_grb(tracked_input_file_grb):
+def test_init_grb(tracked_input_file_grb: TrackedFile) -> None:
     """Test `esmvalcore._provenance.TrackedFile.__init__`."""
     assert tracked_input_file_grb.filename == LocalFile("/path/to/file.grb")
-    assert tracked_input_file_grb.attributes is None
+    with pytest.raises(
+        ValueError,
+        match=r"Call TrackedFile.initialize_provenance before accessing attributes",
+    ):
+        tracked_input_file_grb.attributes  # noqa: B018
+
+
+@pytest.fixture
+def activity() -> prov.model.ProvActivity:
+    provenance = ProvDocument()
+    provenance.add_namespace("task", uri=ESMVALTOOL_URI_PREFIX + "task")
+    return provenance.activity("task:test-task-name")
 
 
 @pytest.mark.parametrize(
     "fixture_name",
     ["tracked_input_file_nc", "tracked_output_file_nc"],
 )
-def test_initialize_provenance_nc(fixture_name, request):
+def test_initialize_provenance_nc(
+    fixture_name: str,
+    request: pytest.FixtureRequest,
+    activity: prov.model.ProvActivity,
+) -> None:
     """Test `esmvalcore._provenance.TrackedFile.initialize_provenance`."""
     tracked_file_nc = request.getfixturevalue(fixture_name)
-    provenance = ProvDocument()
-    provenance.add_namespace("task", uri=ESMVALTOOL_URI_PREFIX + "task")
-    activity = provenance.activity("task:test-task-name")
-
     tracked_file_nc.initialize_provenance(activity)
     assert isinstance(tracked_file_nc.provenance, ProvDocument)
     assert tracked_file_nc.activity == activity
@@ -65,33 +107,59 @@ def test_initialize_provenance_nc(fixture_name, request):
     assert tracked_file_nc.attributes == {"a": "A"}
 
 
-def test_initialize_provenance_grb(tracked_input_file_grb):
+def test_initialize_provenance_grb(
+    tracked_input_file_grb: TrackedFile,
+    activity: prov.model.ProvActivity,
+) -> None:
     """Test `esmvalcore._provenance.TrackedFile.initialize_provenance`."""
-    provenance = ProvDocument()
-    provenance.add_namespace("task", uri=ESMVALTOOL_URI_PREFIX + "task")
-    activity = provenance.activity("task:test-task-name")
-
     tracked_input_file_grb.initialize_provenance(activity)
     assert isinstance(tracked_input_file_grb.provenance, ProvDocument)
     assert tracked_input_file_grb.activity == activity
     assert (
-        str(tracked_input_file_grb.entity.identifier)
+        str(tracked_input_file_grb.entity.identifier)  # type: ignore[attr-defined]
         == "file:/path/to/file.grb"
     )
     assert tracked_input_file_grb.attributes == {"a": "A"}
 
 
+def test_initialize_provenance_twice_raises(
+    tracked_output_file_nc: TrackedFile,
+    activity: prov.model.ProvActivity,
+) -> None:
+    """Test `esmvalcore._provenance.TrackedFile.initialize_provenance` raises if called twice."""
+    tracked_output_file_nc.initialize_provenance(activity)
+
+    with pytest.raises(
+        ValueError,
+        match=r"Provenance of TrackedFile: /path/to/file.nc already initialized",
+    ):
+        tracked_output_file_nc.initialize_provenance(activity)
+
+
+def test_initialize_provenance_no_attributes_raises(
+    activity: prov.model.ProvActivity,
+) -> None:
+    """Test `esmvalcore._provenance.TrackedFile.initialize_provenance` with no attributes."""
+    tracked_file = TrackedFile(filename=Path("/path/to/file.nc"))
+
+    with pytest.raises(
+        TypeError,
+        match=r"Delayed reading of attributes is only supported for `DataElement`s",
+    ):
+        tracked_file.initialize_provenance(activity)
+
+
 @pytest.mark.parametrize(
     "fixture_name",
     ["tracked_input_file_nc", "tracked_output_file_nc"],
 )
-def test_copy_provenance(fixture_name, request):
+def test_copy_provenance(
+    fixture_name: str,
+    request: pytest.FixtureRequest,
+    activity: prov.model.ProvActivity,
+) -> None:
     """Test `esmvalcore._provenance.TrackedFile.copy_provenance`."""
     tracked_file_nc = request.getfixturevalue(fixture_name)
-    provenance = ProvDocument()
-    provenance.add_namespace("task", uri=ESMVALTOOL_URI_PREFIX + "task")
-    activity = provenance.activity("task:test-task-name")
-
     tracked_file_nc.initialize_provenance(activity)
 
     copied_file = tracked_file_nc.copy_provenance()
@@ -99,3 +167,83 @@ def test_copy_provenance(fixture_name, request):
     assert copied_file.entity == tracked_file_nc.entity
     assert copied_file.provenance == tracked_file_nc.provenance
     assert copied_file.provenance is not tracked_file_nc.provenance
+
+
+def test_copy_provenance_not_initialized() -> None:
+    """Test `esmvalcore._provenance.TrackedFile.copy_provenance` raises if provenance not initialized."""
+    tracked_file = TrackedFile(filename=Path("/path/to/file.nc"))
+
+    with pytest.raises(
+        ValueError,
+        match=r"Provenance of TrackedFile: /path/to/file.nc not initialized",
+    ):
+        tracked_file.copy_provenance()
+
+
+def test_wasderivedfrom_not_initialized() -> None:
+    """Test `esmvalcore._provenance.TrackedFile.wasderivedfrom` raises if provenance not initialized."""
+    tracked_file = TrackedFile(filename=Path("/path/to/file.nc"))
+    other_tracked_file = TrackedFile(filename=Path("/path/to/other_file.nc"))
+
+    with pytest.raises(
+        ValueError,
+        match=r"Provenance of TrackedFile: /path/to/file.nc not initialized",
+    ):
+        tracked_file.wasderivedfrom(other_tracked_file)
+
+
+@dataclass
+class MockDataElement(DataElement):
+    """Mock DataElement for testing purposes."""
+
+    name: str
+    facets: dict[str, Any]
+    attributes: dict[str, Any]
+
+    def prepare(self) -> None:
+        pass
+
+    def __hash__(self) -> int:
+        return hash(self.name)
+
+    def to_iris(
+        self,
+        ignore_warnings: list[dict[str, Any]] | None = None,
+    ) -> iris.cube.CubeList:
+        return []
+
+
+def test_provenance_file_nonpath_notimplemented() -> None:
+    """Test `esmvalcore._provenance.TrackedFile.provenance_file` with a DataElement."""
+    input_file = MockDataElement(
+        name="/path/to/input_file.nc",
+        facets={},
+        attributes={},
+    )
+    tracked_file = TrackedFile(filename=input_file)
+
+    assert tracked_file.filename == input_file
+    with pytest.raises(
+        NotImplementedError,
+        match=r"Saving provenance is only supported for pathlib.Path.*",
+    ):
+        _ = tracked_file.provenance_file
+
+
+def test_save_provenance_notimplemented(
+    activity: prov.model.ProvActivity,
+) -> None:
+    """Test `esmvalcore._provenance.TrackedFile.save_provenance` with a DataElement."""
+    input_file = MockDataElement(
+        name="/path/to/input_file.nc",
+        facets={},
+        attributes={},
+    )
+    tracked_file = TrackedFile(filename=input_file)
+    tracked_file.initialize_provenance(activity)
+
+    with pytest.raises(
+        NotImplementedError,
+        match=r"Writing attributes is only supported for pathlib.Path.*",
+    ):
+        tracked_file.save_provenance()
diff --git a/tests/unit/recipe/test_recipe.py b/tests/unit/recipe/test_recipe.py
index 367eff6e17..55771a0b23 100644
--- a/tests/unit/recipe/test_recipe.py
+++ b/tests/unit/recipe/test_recipe.py
@@ -148,6 +148,7 @@ def create_esgf_search_results():
     return [file0, file1]
 
 
+@pytest.mark.skip(reason="May not be needed anymore.")
 @pytest.mark.parametrize("local_availability", ["all", "partial", "none"])
 def test_schedule_for_download(monkeypatch, tmp_path, local_availability):
     """Test that `_schedule_for_download` updates DOWNLOAD_FILES."""
diff --git a/tests/unit/task/test_diagnostic_task.py b/tests/unit/task/test_diagnostic_task.py
index 15517187bb..cb6047bc11 100644
--- a/tests/unit/task/test_diagnostic_task.py
+++ b/tests/unit/task/test_diagnostic_task.py
@@ -228,7 +228,7 @@ def test_collect_provenance(mocker, diagnostic_task):
     diagnostic_task._collect_provenance()
 
     tracked_file_class.assert_called_once_with(
-        "test.png",
+        Path("test.png"),
         {
             "caption": "Some figure",
             "plot_type": ("tag_value",),
diff --git a/tests/unit/task/test_print.py b/tests/unit/task/test_print.py
index 0ed1352f68..53aad046d3 100644
--- a/tests/unit/task/test_print.py
+++ b/tests/unit/task/test_print.py
@@ -2,20 +2,22 @@
 
 import copy
 import textwrap
+from pathlib import Path
 
 import pytest
 
 from esmvalcore._task import DiagnosticTask
 from esmvalcore.dataset import Dataset
+from esmvalcore.local import LocalFile
 from esmvalcore.preprocessor import PreprocessingTask, PreprocessorFile
 
 
 @pytest.fixture
 def preproc_file():
     dataset = Dataset(short_name="tas")
-    dataset.files = ["/path/to/input_file.nc"]
+    dataset.files = [LocalFile("/path/to/input_file.nc")]
     return PreprocessorFile(
-        filename="/output/preproc/file.nc",
+        filename=Path("/output/preproc/file.nc"),
         attributes={"short_name": "tas"},
         settings={
             "extract_levels": {"scheme": "linear", "levels": [95000]},
@@ -52,9 +54,9 @@ def test_repr_preproc_task(preproc_task):
     PreprocessingTask: diag_1/tas
     order: ['extract_levels', 'save']
     PreprocessorFile: /output/preproc/file.nc
-    input files: ['/path/to/input_file.nc']
+    input files: [LocalFile('/path/to/input_file.nc')]
     settings: {'extract_levels': {'levels': [95000], 'scheme': 'linear'},
-     'save': {'filename': '/output/preproc/file.nc'}}
+     'save': {'filename': PosixPath('/output/preproc/file.nc')}}
     ancestors:
     None
     """)
@@ -97,9 +99,9 @@ def test_repr_simple_tree(preproc_task, diagnostic_task):
       PreprocessingTask: diag_1/tas
       order: ['extract_levels', 'save']
       PreprocessorFile: /output/preproc/file.nc
-      input files: ['/path/to/input_file.nc']
+      input files: [LocalFile('/path/to/input_file.nc')]
       settings: {'extract_levels': {'levels': [95000], 'scheme': 'linear'},
-       'save': {'filename': '/output/preproc/file.nc'}}
+       'save': {'filename': PosixPath('/output/preproc/file.nc')}}
       ancestors:
       None
     """)
@@ -141,25 +143,25 @@ def test_repr_full_tree(preproc_task, diagnostic_task):
         PreprocessingTask: diag_1/tas
         order: ['extract_levels', 'save']
         PreprocessorFile: /output/preproc/file.nc
-        input files: ['/path/to/input_file.nc']
+        input files: [LocalFile('/path/to/input_file.nc')]
         settings: {'extract_levels': {'levels': [95000], 'scheme': 'linear'},
-         'save': {'filename': '/output/preproc/file.nc'}}
+         'save': {'filename': PosixPath('/output/preproc/file.nc')}}
         ancestors:
           PreprocessingTask: diag_1/tas_derive_input_1
           order: ['extract_levels', 'save']
           PreprocessorFile: /output/preproc/file.nc
-          input files: ['/path/to/input_file.nc']
+          input files: [LocalFile('/path/to/input_file.nc')]
           settings: {'extract_levels': {'levels': [95000], 'scheme': 'linear'},
-           'save': {'filename': '/output/preproc/file.nc'}}
+           'save': {'filename': PosixPath('/output/preproc/file.nc')}}
           ancestors:
           None
 
           PreprocessingTask: diag_1/tas_derive_input_2
           order: ['extract_levels', 'save']
           PreprocessorFile: /output/preproc/file.nc
-          input files: ['/path/to/input_file.nc']
+          input files: [LocalFile('/path/to/input_file.nc')]
           settings: {'extract_levels': {'levels': [95000], 'scheme': 'linear'},
-           'save': {'filename': '/output/preproc/file.nc'}}
+           'save': {'filename': PosixPath('/output/preproc/file.nc')}}
           ancestors:
           None
     """)
diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py
index 68e8ceed05..e4f6548e71 100644
--- a/tests/unit/test_dataset.py
+++ b/tests/unit/test_dataset.py
@@ -1,10 +1,12 @@
+import importlib.resources
 import textwrap
 from collections import defaultdict
+from functools import lru_cache
 from pathlib import Path
-from unittest import mock
 
 import pyesgf
 import pytest
+import yaml
 
 import esmvalcore.dataset
 import esmvalcore.local
@@ -13,6 +15,33 @@
 from esmvalcore.dataset import Dataset
 from esmvalcore.esgf import ESGFFile
 from esmvalcore.exceptions import InputFilesNotFound, RecipeError
+from esmvalcore.typing import Facets
+
+
+@lru_cache
+def _load_default_data_sources() -> dict[
+    str,
+    dict[str, dict[str, dict[str, dict[str, str]]]],
+]:
+    """Load default data sources for local users."""
+    with importlib.resources.as_file(
+        importlib.resources.files(esmvalcore.config)
+        / "configurations"
+        / "local-data.yml",
+    ) as config_file:
+        return yaml.safe_load(config_file.read_text(encoding="utf-8"))
+
+
+@pytest.fixture
+def session(tmp_path: Path, session: Session) -> Session:
+    """Session fixture with default local data sources."""
+    projects = _load_default_data_sources()["projects"]
+    for project in projects:
+        data_sources = projects[project]["data"]
+        for data_source in data_sources.values():
+            data_source["rootpath"] = str(tmp_path)
+        session["projects"][project]["data"] = data_sources
+    return session
 
 
 def test_repr():
@@ -873,6 +902,7 @@ def test_from_files_with_globs(monkeypatch, session):
         "mip": "Amon",
         "project": "CMIP6",
         "short_name": "tas",
+        "timerange": "185001/201412",
         "version": "v20181126",
     }
     file2 = esmvalcore.local.LocalFile(
@@ -984,6 +1014,7 @@ def test_from_files_with_globs_and_missing_facets(monkeypatch, session):
         "mip": "Amon",
         "project": "CMIP6",
         "short_name": "tas",
+        "timerange": "185001/201412",
         "version": "v20181126",
     }
     file2 = esmvalcore.local.LocalFile(
@@ -1030,7 +1061,6 @@ def test_from_files_with_globs_and_missing_facets(monkeypatch, session):
         mip="Amon",
         project="CMIP6",
         short_name="tas",
-        timerange="185001/201412",
     )
 
     expected.session = session
@@ -1065,6 +1095,7 @@ def test_from_files_with_globs_and_automatic_missing(monkeypatch, session):
         "mip": "Amon",
         "project": "CMIP6",
         "short_name": "tas",
+        "timerange": "185001/201412",
         "version": "v20181126",
     }
 
@@ -1250,7 +1281,7 @@ def test_concatenating_historical_and_future_exps(mocker):
     assert dataset.supplementaries[0].facets["exp"] == "historical"
 
 
-def test_from_recipe_with_glob(tmp_path, session, mocker):
+def test_from_recipe_with_glob(tmp_path: Path, session: Session) -> None:
     recipe_txt = textwrap.dedent("""
 
     diagnostics:
@@ -1267,8 +1298,6 @@ def test_from_recipe_with_glob(tmp_path, session, mocker):
     recipe = tmp_path / "recipe_test.yml"
     recipe.write_text(recipe_txt, encoding="utf-8")
 
-    session["drs"]["CMIP5"] = "ESGF"
-    CFG["rootpath"]["CMIP5"] = [tmp_path]
     filenames = [
         "cmip5/output1/CSIRO-QCCCE/CSIRO-Mk3-6-0/rcp85/mon/atmos/Amon/r1i1p1/"
         "v20120323/tas_Amon_CSIRO-Mk3-6-0_rcp85_r1i1p1_200601-210012.nc",
@@ -1280,7 +1309,7 @@ def test_from_recipe_with_glob(tmp_path, session, mocker):
         path.parent.mkdir(parents=True, exist_ok=True)
         path.write_text("")
 
-    definitions = [
+    definitions: list[Facets] = [
         {
             "diagnostic": "diagnostic1",
             "variable_group": "tas",
@@ -1420,18 +1449,32 @@ def dataset():
         mip="Amon",
         frequency="mon",
         short_name="tas",
-        dataset="EC.-Earth3",
+        dataset="EC-Earth3",
         exp="historical",
         ensemble="r1i1p1f1",
         grid="gr",
         timerange="1850/1851",
-        alias="CMIP6_EC-Eeath3_tas",
+        alias="CMIP6_EC-Earth3_tas",
     )
     dataset.session = {
         "search_esgf": "when_missing",
         "download_dir": Path("/download_dir"),
-        "rootpath": None,
-        "drs": {},
+        "projects": {
+            "CMIP6": {
+                "data": {
+                    "local": {
+                        "type": "esmvalcore.local.LocalDataSource",
+                        "rootpath": Path("/local_dir"),
+                        "dirname_template": "{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}",
+                        "filename_template": "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc",
+                    },
+                    "esgf": {
+                        "type": "esmvalcore.esgf.ESGFDataSource",
+                        "download_dir": Path("/download_dir"),
+                    },
+                },
+            },
+        },
     }
     return dataset
 
@@ -1461,14 +1504,14 @@ def test_find_files(mocker, dataset, local_availability):
     )
 
     mocker.patch.object(
-        esmvalcore.dataset.local,
-        "find_files",
+        esmvalcore.local.LocalDataSource,
+        "find_data",
         autospec=True,
-        return_value=(list(local_files), []),
+        return_value=list(local_files),
     )
     mocker.patch.object(
-        esmvalcore.dataset.esgf,
-        "find_files",
+        esmvalcore.esgf.ESGFDataSource,
+        "find_data",
         autospec=True,
         return_value=list(esgf_files),
     )
@@ -1498,14 +1541,14 @@ def test_find_files_wildcard_timerange(mocker, dataset):
     )
 
     mocker.patch.object(
-        esmvalcore.dataset.local,
-        "find_files",
+        esmvalcore.local.LocalDataSource,
+        "find_data",
         autospec=True,
-        return_value=(local_files, []),
+        return_value=list(local_files),
     )
     mocker.patch.object(
-        esmvalcore.dataset.esgf,
-        "find_files",
+        esmvalcore.esgf.ESGFDataSource,
+        "find_data",
         autospec=True,
         return_value=list(esgf_files),
     )
@@ -1535,14 +1578,14 @@ def test_find_files_outdated_local(mocker, dataset):
     )
 
     mocker.patch.object(
-        esmvalcore.dataset.local,
-        "find_files",
+        esmvalcore.local.LocalDataSource,
+        "find_data",
         autospec=True,
-        return_value=(local_files, []),
+        return_value=list(local_files),
     )
     mocker.patch.object(
-        esmvalcore.dataset.esgf,
-        "find_files",
+        esmvalcore.esgf.ESGFDataSource,
+        "find_data",
         autospec=True,
         return_value=list(esgf_files),
     )
@@ -1552,20 +1595,39 @@ def test_find_files_outdated_local(mocker, dataset):
 
 @pytest.mark.parametrize(
     "project",
-    ["CESM", "EMAC", "ICON", "IPSLCM", "OBS", "OBS6", "ana4mips", "native6"],
+    ["CESM", "EMAC", "ICON", "IPSLCM", "OBS", "OBS6", "native6"],
 )
-def test_find_files_non_esgf_projects(mocker, project, monkeypatch):
+def test_find_files_non_esgf_projects(mocker, monkeypatch, session, project):
     """Test that find_files does never download files for non-ESGF projects."""
     monkeypatch.setitem(CFG, "search_esgf", "always")
+    # Add "model" projects that are not part of the default local configuration.
+    with importlib.resources.as_file(
+        importlib.resources.files(esmvalcore.config)
+        / "configurations"
+        / f"{project.lower()}-data.yml",
+    ) as config_file:
+        if config_file.exists():
+            cfg = yaml.safe_load(config_file.read_text(encoding="utf-8"))
+            session["projects"][project]["data"] = cfg["projects"][project][
+                "data"
+            ]
+
+    files = [
+        mocker.create_autospec(
+            esmvalcore.local.LocalFile,
+            spec_set=True,
+            instance=True,
+        ),
+    ]
     mock_local_find_files = mocker.patch.object(
-        esmvalcore.dataset.local,
-        "find_files",
+        esmvalcore.local.LocalDataSource,
+        "find_data",
         autospec=True,
-        return_value=(mock.sentinel.files, mock.sentinel.file_globs),
+        return_value=files,
     )
     mock_esgf_find_files = mocker.patch.object(
-        esmvalcore.dataset.esgf,
-        "find_files",
+        esmvalcore.esgf.ESGFDataSource,
+        "find_data",
         autospec=True,
     )
 
@@ -1599,14 +1661,14 @@ def test_find_files_non_esgf_projects(mocker, project, monkeypatch):
         var_type="var_type",
         version=1,
     )
+    tas.session = session
     tas.augment_facets()
     tas.find_files()
 
-    mock_local_find_files.assert_called_once()
+    mock_local_find_files.assert_called()
     mock_esgf_find_files.assert_not_called()
 
-    assert tas.files == mock.sentinel.files
-    assert tas._file_globs == mock.sentinel.file_globs
+    assert tas.files == files
 
 
 def test_set_version():
@@ -2137,7 +2199,7 @@ def test_get_extra_facets_native6():
     }
 
 
-OBS6_SAT_FACETS = {
+OBS6_SAT_FACETS: Facets = {
     "project": "OBS6",
     "dataset": "SAT",
     "mip": "Amon",
@@ -2191,8 +2253,11 @@ def test_derivation_necessary_no_derivation():
     assert dataset._derivation_necessary() is False
 
 
-def test_derivation_necessary_no_force_derivation_no_files():
+def test_derivation_necessary_no_force_derivation_no_files(
+    session: Session,
+) -> None:
     dataset = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
+    dataset.session = session
     assert dataset._derivation_necessary() is True
 
 
diff --git a/tests/unit/test_provenance.py b/tests/unit/test_provenance.py
deleted file mode 100644
index b6c20dbc2e..0000000000
--- a/tests/unit/test_provenance.py
+++ /dev/null
@@ -1,25 +0,0 @@
-"""Test `esmvalcore._provenance`."""
-
-from esmvalcore._provenance import TrackedFile
-
-
-def test_set():
-    assert {
-        TrackedFile("file1.nc", attributes={}),
-        TrackedFile("file1.nc", attributes={}),
-        TrackedFile("file2.nc", attributes={}),
-    } == {
-        TrackedFile("file1.nc", attributes={}),
-        TrackedFile("file2.nc", attributes={}),
-    }
-
-
-def test_sort():
-    file1 = TrackedFile("file1.nc", attributes={})
-    file2 = TrackedFile("file2.nc", attributes={})
-    assert sorted([file2, file1]) == [file1, file2]
-
-
-def test_equals():
-    file = TrackedFile("file.nc", attributes={})
-    assert file == TrackedFile("file.nc", attributes={})