diff --git a/esmvalcore/_recipe/check.py b/esmvalcore/_recipe/check.py
index a33868da74..aafd4a0e3a 100644
--- a/esmvalcore/_recipe/check.py
+++ b/esmvalcore/_recipe/check.py
@@ -36,7 +36,6 @@
 
     from esmvalcore._task import TaskSet
     from esmvalcore.dataset import Dataset
-    from esmvalcore.typing import Facets
 
 
 logger = logging.getLogger(__name__)
@@ -466,20 +465,6 @@ def valid_time_selection(timerange: str) -> None:
             _check_timerange_values(date, timerange_list)
 
 
-def differing_timeranges(
-    timeranges: set[str],
-    required_vars: list[Facets],
-) -> None:
-    """Log error if required variables have differing timeranges."""
-    if len(timeranges) > 1:
-        msg = (
-            f"Differing timeranges with values {timeranges} "
-            f"found for required variables {required_vars}. "
-            "Set `timerange` to a common value."
-        )
-        raise ValueError(msg)
-
-
 def _check_literal(
     settings: dict,
     *,
diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py
index c749f4aff1..ad9274240e 100644
--- a/esmvalcore/_recipe/recipe.py
+++ b/esmvalcore/_recipe/recipe.py
@@ -51,7 +51,6 @@
 from . import check
 from .from_datasets import datasets_to_recipe
 from .to_datasets import (
-    _derive_needed,
     _get_input_datasets,
     _representative_datasets,
 )
@@ -246,7 +245,7 @@ def _get_default_settings(dataset: Dataset) -> PreprocessorSettings:
 
     settings = {}
 
-    if _derive_needed(dataset):
+    if dataset._derivation_necessary():  # noqa: SLF001 (will be replaced soon)
         settings["derive"] = {
             "short_name": facets["short_name"],
             "standard_name": facets["standard_name"],
@@ -615,21 +614,26 @@ def _allow_skipping(dataset: Dataset) -> bool:
     )
 
 
-def _set_version(dataset: Dataset, input_datasets: list[Dataset]) -> None:
-    """Set the 'version' facet based on derivation input datasets."""
-    versions = set()
-    for in_dataset in input_datasets:
-        in_dataset.set_version()
-        if version := in_dataset.facets.get("version"):
-            if isinstance(version, list):
-                versions.update(version)
-            else:
-                versions.add(version)
-    if versions:
-        version = versions.pop() if len(versions) == 1 else sorted(versions)
-        dataset.set_facet("version", version)
-    for supplementary_ds in dataset.supplementaries:
-        supplementary_ds.set_version()
+def _fix_cmip5_fx_ensemble(dataset: Dataset) -> None:
+    """Automatically correct the wrong ensemble for CMIP5 fx variables."""
+    if (
+        dataset.facets.get("project") == "CMIP5"
+        and dataset.facets.get("mip") == "fx"
+        and dataset.facets.get("ensemble") != "r0i0p0"
+        and not dataset.files
+    ):
+        original_ensemble = dataset["ensemble"]
+        copy = dataset.copy()
+        copy.facets["ensemble"] = "r0i0p0"
+        if copy.files:
+            dataset.facets["ensemble"] = "r0i0p0"
+            logger.info(
+                "Corrected wrong 'ensemble' from '%s' to '%s' for %s",
+                original_ensemble,
+                dataset["ensemble"],
+                dataset.summary(shorten=True),
+            )
+            dataset.find_files()
 
 
 def _get_preprocessor_products(
@@ -655,6 +659,7 @@ def _get_preprocessor_products(
         settings = _get_default_settings(dataset)
         _apply_preprocessor_profile(settings, profile)
         _update_multi_dataset_settings(dataset.facets, settings)
+        _fix_cmip5_fx_ensemble(dataset)
         _update_preproc_functions(settings, dataset, datasets, missing_vars)
         _add_dataset_specific_settings(dataset, settings)
         check.preprocessor_supplementaries(dataset, settings)
@@ -666,7 +671,7 @@ def _get_preprocessor_products(
             else:
                 missing_vars.update(missing)
             continue
-        _set_version(dataset, input_datasets)
+        dataset.set_version()
         USED_DATASETS.append(dataset)
         _schedule_for_download(input_datasets)
         _log_input_files(input_datasets)
diff --git a/esmvalcore/_recipe/to_datasets.py b/esmvalcore/_recipe/to_datasets.py
index 7aab83719b..6f6bd46f23 100644
--- a/esmvalcore/_recipe/to_datasets.py
+++ b/esmvalcore/_recipe/to_datasets.py
@@ -13,7 +13,6 @@
 from esmvalcore.esgf.facets import FACETS
 from esmvalcore.exceptions import RecipeError
 from esmvalcore.local import LocalFile, _replace_years_with_timerange
-from esmvalcore.preprocessor._derive import get_required
 from esmvalcore.preprocessor._io import DATASET_KEYS
 from esmvalcore.preprocessor._supplementary_vars import (
     PREPROCESSOR_SUPPLEMENTARIES,
@@ -188,28 +187,6 @@ def _merge_supplementary_dicts(
     return list(merged.values())
 
 
-def _fix_cmip5_fx_ensemble(dataset: Dataset) -> None:
-    """Automatically correct the wrong ensemble for CMIP5 fx variables."""
-    if (
-        dataset.facets.get("project") == "CMIP5"
-        and dataset.facets.get("mip") == "fx"
-        and dataset.facets.get("ensemble") != "r0i0p0"
-        and not dataset.files
-    ):
-        original_ensemble = dataset["ensemble"]
-        copy = dataset.copy()
-        copy.facets["ensemble"] = "r0i0p0"
-        if copy.files:
-            dataset.facets["ensemble"] = "r0i0p0"
-            logger.info(
-                "Corrected wrong 'ensemble' from '%s' to '%s' for %s",
-                original_ensemble,
-                dataset["ensemble"],
-                dataset.summary(shorten=True),
-            )
-            dataset.find_files()
-
-
 def _get_supplementary_short_names(
     facets: Facets,
     step: str,
@@ -428,9 +405,7 @@ def datasets_from_recipe(
     return datasets
 
 
-def _dataset_from_files(  # noqa: C901
-    dataset: Dataset,
-) -> list[Dataset]:
+def _dataset_from_files(dataset: Dataset) -> list[Dataset]:
     """Replace facet values of '*' based on available files."""
     result: list[Dataset] = []
     errors: list[str] = []
@@ -441,53 +416,32 @@ def _dataset_from_files(  # noqa: C901
             dataset.summary(shorten=True),
         )
 
-    representative_datasets = _representative_datasets(dataset)
-
-    # For derived variables, representative_datasets might contain more than
-    # one element
-    all_datasets: list[list[tuple[dict, Dataset]]] = []
-    for representative_dataset in representative_datasets:
-        all_datasets.append([])
-        for expanded_ds in representative_dataset.from_files():
-            updated_facets = {}
-            unexpanded_globs = {}
-            for key, value in dataset.facets.items():
-                if _isglob(value):
-                    if key in expanded_ds.facets and not _isglob(
-                        expanded_ds[key],
-                    ):
-                        updated_facets[key] = expanded_ds.facets[key]
-                    else:
-                        unexpanded_globs[key] = value
-
-            if unexpanded_globs:
-                msg = _report_unexpanded_globs(
-                    dataset,
-                    expanded_ds,
-                    unexpanded_globs,
-                )
-                errors.append(msg)
-                continue
+    for expanded_ds in dataset.from_files():
+        updated_facets = {}
+        unexpanded_globs = {}
+        for key, value in dataset.facets.items():
+            if _isglob(value):
+                if key in expanded_ds.facets and not _isglob(
+                    expanded_ds[key],
+                ):
+                    updated_facets[key] = expanded_ds.facets[key]
+                else:
+                    unexpanded_globs[key] = value
+
+        if unexpanded_globs:
+            msg = _report_unexpanded_globs(
+                dataset,
+                expanded_ds,
+                unexpanded_globs,
+            )
+            errors.append(msg)
+            continue
 
-            new_ds = dataset.copy()
-            new_ds.facets.update(updated_facets)
-            new_ds.supplementaries = expanded_ds.supplementaries
+        new_ds = dataset.copy()
+        new_ds.facets.update(updated_facets)
+        new_ds.supplementaries = expanded_ds.supplementaries
 
-            all_datasets[-1].append((updated_facets, new_ds))
-
-    # If globs have been expanded, only consider those datasets that contain
-    # all necessary input variables if derivation is necessary
-    for updated_facets, new_ds in all_datasets[0]:
-        other_facets = [[d[0] for d in ds] for ds in all_datasets[1:]]
-        if all(updated_facets in facets for facets in other_facets):
-            result.append(new_ds)
-        else:
-            logger.debug(
-                "Not all necessary input variables to derive '%s' are "
-                "available for dataset %s",
-                dataset["short_name"],
-                updated_facets,
-            )
+        result.append(new_ds)
 
     if errors:
         raise RecipeError("\n".join(errors))
@@ -538,59 +492,23 @@ def _report_unexpanded_globs(
     return msg
 
 
-def _derive_needed(dataset: Dataset) -> bool:
-    """Check if dataset needs to be derived from other datasets."""
-    if not dataset.facets.get("derive"):
-        return False
-    if dataset.facets.get("force_derivation"):
-        return True
-    if _isglob(dataset.facets.get("timerange", "")):
-        # Our file finding routines are not able to handle globs.
-        dataset = dataset.copy()
-        dataset.facets.pop("timerange")
-
-    copy = dataset.copy()
-    copy.supplementaries = []
-    return not copy.files
-
-
 def _get_input_datasets(dataset: Dataset) -> list[Dataset]:
     """Determine the input datasets needed for deriving `dataset`."""
-    facets = dataset.facets
-    if not _derive_needed(dataset):
-        _fix_cmip5_fx_ensemble(dataset)
-        return [dataset]
+    if not dataset._derivation_necessary():  # noqa: SLF001
+        return dataset.input_datasets
 
-    # Configure input datasets needed to derive variable
-    datasets = []
-    required_vars = get_required(facets["short_name"], facets["project"])  # type: ignore
-    # idea: add option to specify facets in list of dicts that is value of
-    # 'derive' in the recipe and use that instead of get_required?
-    for input_facets in required_vars:
-        input_dataset = dataset.copy()
-        keep = {"alias", "recipe_dataset_index", *dataset.minimal_facets}
-        input_dataset.facets = {
-            k: v for k, v in input_dataset.facets.items() if k in keep
-        }
-        input_dataset.facets.update(input_facets)
-        input_dataset.augment_facets()
-        _fix_cmip5_fx_ensemble(input_dataset)
-        if input_facets.get("optional") and not input_dataset.files:
+    # Skip optional datasets if no data is available
+    input_datasets: list[Dataset] = []
+    for input_dataset in dataset.input_datasets:
+        if input_dataset.facets.get("optional") and not input_dataset.files:
             logger.info(
                 "Skipping: no data found for %s which is marked as 'optional'",
                 input_dataset,
             )
         else:
-            datasets.append(input_dataset)
+            input_datasets.append(input_dataset)
 
-    # Check timeranges of available input data.
-    timeranges: set[str] = set()
-    for input_dataset in datasets:
-        if "timerange" in input_dataset.facets:
-            timeranges.add(input_dataset.facets["timerange"])  # type: ignore
-    check.differing_timeranges(timeranges, required_vars)
-
-    return datasets
+    return input_datasets
 
 
 def _representative_datasets(dataset: Dataset) -> list[Dataset]:
diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py
index bc5998d58a..5fc24adb53 100644
--- a/esmvalcore/dataset.py
+++ b/esmvalcore/dataset.py
@@ -33,6 +33,7 @@
     _get_start_end_date,
 )
 from esmvalcore.preprocessor import preprocess
+from esmvalcore.preprocessor._derive import get_required
 
 if TYPE_CHECKING:
     from collections.abc import Iterable, Iterator, Sequence
@@ -102,7 +103,7 @@ class Dataset:
 
     Attributes
     ----------
-    supplementaries : list[Dataset]
+    supplementaries: list[Dataset]
         List of supplementary datasets.
     facets: :obj:`esmvalcore.typing.Facets`
         Facets describing the dataset.
@@ -132,6 +133,7 @@ def __init__(self, **facets: FacetValue) -> None:
         self._session: Session | None = None
         self._files: Sequence[File] | None = None
         self._file_globs: Sequence[Path] | None = None
+        self._input_datasets: list[Dataset] = []
 
         for key, value in facets.items():
             self.set_facet(key, deepcopy(value), persist=True)
@@ -188,50 +190,170 @@ def _derivation_necessary(self) -> bool:
         # are found
         ds_copy = self.copy()
         ds_copy.supplementaries = []
+
+        # Avoid potential errors from missing data during timerange glob
+        # expansion
+        if _isglob(ds_copy.facets.get("timerange", "")):
+            ds_copy.facets.pop("timerange", None)
+
         return not ds_copy.files
 
+    def _get_input_datasets(self) -> list[Dataset]:
+        """Get input datasets."""
+        input_datasets: list[Dataset] = []
+        required_vars_facets = get_required(
+            self.facets["short_name"],  # type: ignore
+            self.facets["project"],  # type: ignore
+        )
+
+        for required_facets in required_vars_facets:
+            input_dataset = self._copy(derive=False, force_derivation=False)
+            keep = {"alias", "recipe_dataset_index", *self.minimal_facets}
+            input_dataset.facets = {
+                k: v for k, v in input_dataset.facets.items() if k in keep
+            }
+            input_dataset.facets.update(required_facets)
+            input_dataset.augment_facets()
+            input_datasets.append(input_dataset)
+
+        return input_datasets
+
+    @property
+    def input_datasets(self) -> list[Dataset]:
+        """Get input datasets.
+
+        For non-derived variables (i.e., those with facet ``derive=False``),
+        this will simply return the dataset itself in a list.
+
+        For derived variables (i.e., those with facet ``derive=True``), this
+        will return the datasets required for derivation if derivation is
+        necessary, and the dataset itself if derivation is not necessary.
+        Derivation is necessary if the facet ``force_derivation=True`` is set
+        or no files for the dataset itself are available.
+
+        See also :func:`esmvalcore.preprocessor.derive` for an example usage.
+
+        """
+        if self._input_datasets:
+            return self._input_datasets
+
+        if not self._derivation_necessary():
+            input_datasets = [self]
+        else:
+            input_datasets = self._get_input_datasets()
+
+        self._input_datasets = input_datasets
+        return input_datasets
+
+    @staticmethod
     def _file_to_dataset(
-        self,
+        dataset: Dataset,
         file: esgf.ESGFFile | local.LocalFile,
     ) -> Dataset:
         """Create a dataset from a file with a `facets` attribute."""
         facets = dict(file.facets)
-        if "version" not in self.facets:
+        if "version" not in dataset.facets:
             # Remove version facet if no specific version requested
             facets.pop("version", None)
 
         updated_facets = {
             f: v
             for f, v in facets.items()
-            if f in self.facets
-            and _isglob(self.facets[f])
-            and _ismatch(v, self.facets[f])
+            if f in dataset.facets
+            and _isglob(dataset.facets[f])
+            and _ismatch(v, dataset.facets[f])
         }
-        dataset = self.copy()
-        dataset.facets.update(updated_facets)
+        new_dataset = dataset.copy()
+        new_dataset.facets.update(updated_facets)
 
         # If possible, remove unexpanded facets that can be automatically
         # populated.
-        unexpanded = {f for f, v in dataset.facets.items() if _isglob(v)}
+        unexpanded = {f for f, v in new_dataset.facets.items() if _isglob(v)}
         required_for_augment = {"project", "mip", "short_name", "dataset"}
         if unexpanded and not unexpanded & required_for_augment:
-            copy = dataset.copy()
+            copy = new_dataset.copy()
             copy.supplementaries = []
             for facet in unexpanded:
                 copy.facets.pop(facet)
             copy.augment_facets()
             for facet in unexpanded:
                 if facet in copy.facets:
-                    dataset.facets.pop(facet)
+                    new_dataset.facets.pop(facet)
+
+        return new_dataset
+
+    def _get_all_available_datasets(self) -> Iterator[Dataset]:  # noqa: C901
+        """Yield datasets based on the available files.
+
+        This function requires that self.facets['mip'] is not a glob pattern.
 
-        return dataset
+        Does take variable derivation into account, i.e., datasets available
+        through variable derivation are returned.
 
-    def _get_available_datasets(self) -> Iterator[Dataset]:
+        """
+        datasets_found = False
+
+        # If no forced derivation is requested, search for datasets based on
+        # files from self
+        if not self._is_force_derived():
+            for dataset in self._get_available_datasets(self):
+                datasets_found = True
+                yield dataset
+
+        # For variables that cannot be derived, we are done here
+        if not self._is_derived():
+            return
+
+        # If forced derivation is requested or no datasets based on files from
+        # self have been found, search for datasets based on files from input
+        # datasets
+        if self._is_force_derived() or not datasets_found:
+            all_datasets: list[list[tuple[dict, Dataset]]] = []
+            for input_dataset in self._get_input_datasets():
+                all_datasets.append([])
+                for expanded_ds in self._get_available_datasets(
+                    input_dataset,
+                ):
+                    updated_facets = {}
+                    for key, value in self.facets.items():
+                        if _isglob(value):
+                            if key in expanded_ds.facets and not _isglob(
+                                expanded_ds[key],
+                            ):
+                                updated_facets[key] = expanded_ds.facets[key]
+                    new_ds = self.copy()
+                    new_ds.facets.update(updated_facets)
+                    new_ds.supplementaries = self.supplementaries
+
+                    all_datasets[-1].append((updated_facets, new_ds))
+
+            # Only consider those datasets that contain all input variables
+            # necessary for derivation
+            for updated_facets, new_ds in all_datasets[0]:
+                other_facets = [[d[0] for d in ds] for ds in all_datasets[1:]]
+                if all(updated_facets in facets for facets in other_facets):
+                    yield new_ds
+                else:
+                    logger.debug(
+                        "Not all necessary input variables to derive '%s' are "
+                        "available for %s with facets %s",
+                        self["short_name"],
+                        new_ds.summary(shorten=True),
+                        updated_facets,
+                    )
+
+    def _get_available_datasets(self, dataset: Dataset) -> Iterator[Dataset]:
         """Yield datasets based on the available files.
 
         This function requires that self.facets['mip'] is not a glob pattern.
+
+        Does not take variable derivation into account, i.e., datasets
+        potentially available through variable derivation are ignored. To
+        consider derived variables properly, use the function
+        :func:`_get_all_available_datasets`.
+
         """
-        dataset_template = self.copy()
+        dataset_template = dataset.copy()
         dataset_template.supplementaries = []
         if _isglob(dataset_template.facets.get("timerange")):
             # Remove wildcard `timerange` facet, because data finding cannot
@@ -242,31 +364,30 @@ def _get_available_datasets(self) -> Iterator[Dataset]:
         partially_defined = []
         expanded = False
         for file in dataset_template.files:
-            dataset = self._file_to_dataset(file)
+            new_dataset = self._file_to_dataset(dataset, file)
 
             # Filter out identical datasets
             facetset = frozenset(
                 (f, frozenset(v) if isinstance(v, list) else v)
-                for f, v in dataset.facets.items()
+                for f, v in new_dataset.facets.items()
             )
             if facetset not in seen:
                 seen.add(facetset)
                 if any(
                     _isglob(v)
-                    for f, v in dataset.facets.items()
+                    for f, v in new_dataset.facets.items()
                     if f != "timerange"
                 ):
-                    partially_defined.append((dataset, file))
+                    partially_defined.append((new_dataset, file))
                 else:
-                    dataset._update_timerange()  # noqa: SLF001
-                    dataset._supplementaries_from_files()  # noqa: SLF001
+                    new_dataset._update_timerange()  # noqa: SLF001
                     expanded = True
-                    yield dataset
+                    yield new_dataset
 
         # Only yield datasets with globs if there is no better alternative
-        for dataset, file in partially_defined:
+        for new_dataset, file in partially_defined:
             msg = (
-                f"{dataset} with unexpanded wildcards, created from file "
+                f"{new_dataset} with unexpanded wildcards, created from file "
                 f"{file} with facets {file.facets}. Are the missing facets "
                 "in the path to the file?"
                 if isinstance(file, local.LocalFile)
@@ -280,14 +401,13 @@ def _get_available_datasets(self) -> Iterator[Dataset]:
                     "because it still contains wildcards.",
                     msg,
                 )
-                yield dataset
+                yield new_dataset
 
     def from_files(self) -> Iterator[Dataset]:
         """Create datasets based on the available files.
 
         The facet values for local files are retrieved from the directory tree
         where the directories represent the facets values.
-        Reading facet values from file names is not yet supported.
         See :ref:`CMOR-DRS` for more information on this kind of file
         organization.
 
@@ -305,6 +425,10 @@ def from_files(self) -> Iterator[Dataset]:
         Supplementary datasets will in inherit the facet values from the main
         dataset for those facets listed in :obj:`INHERITED_FACETS`.
 
+        This also works for :ref:`derived variables <Variable derivation>`. The
+        input datasets that are necessary for derivation can be accessed via
+        :attr:`Dataset.input_datasets`.
+
         Examples
         --------
         See :ref:`/notebooks/discovering-data.ipynb` for example use cases.
@@ -331,7 +455,8 @@ def from_files(self) -> Iterator[Dataset]:
 
             for mip in mips:
                 dataset_template = self.copy(mip=mip)
-                for dataset in dataset_template._get_available_datasets():  # noqa: SLF001
+                for dataset in dataset_template._get_all_available_datasets():  # noqa: SLF001
+                    dataset._supplementaries_from_files()  # noqa: SLF001
                     expanded = True
                     yield dataset
 
@@ -606,15 +731,29 @@ def minimal_facets(self) -> Facets:
         """Return a dictionary with the persistent facets."""
         return {k: v for k, v in self.facets.items() if k in self._persist}
 
+    @staticmethod
+    def _get_version(dataset: Dataset) -> str | list[str]:
+        """Get available version(s) of dataset."""
+        versions: set[str] = set()
+        for file in dataset.files:
+            if "version" in file.facets:
+                versions.add(str(file.facets["version"]))
+        return versions.pop() if len(versions) == 1 else sorted(versions)
+
     def set_version(self) -> None:
         """Set the ``'version'`` facet based on the available data."""
         versions: set[str] = set()
-        for file in self.files:
-            if "version" in file.facets:
-                versions.add(file.facets["version"])  # type: ignore
+        for input_dataset in self.input_datasets:
+            version = self._get_version(input_dataset)
+            if version:
+                if isinstance(version, list):
+                    versions.update(version)
+                else:
+                    versions.add(version)
         version = versions.pop() if len(versions) == 1 else sorted(versions)
         if version:
             self.set_facet("version", version)
+
         for supplementary_ds in self.supplementaries:
             supplementary_ds.set_version()
 
diff --git a/esmvalcore/preprocessor/_derive/__init__.py b/esmvalcore/preprocessor/_derive/__init__.py
index cbf138e2d7..5c14367dd6 100644
--- a/esmvalcore/preprocessor/_derive/__init__.py
+++ b/esmvalcore/preprocessor/_derive/__init__.py
@@ -2,6 +2,7 @@
 
 import importlib
 import logging
+from collections.abc import Sequence
 from copy import deepcopy
 from pathlib import Path
 
@@ -70,7 +71,7 @@ def get_required(short_name: str, project: str) -> list[Facets]:
 
 
 def derive(
-    cubes: CubeList,
+    cubes: Sequence[Cube],
     short_name: str,
     long_name: str,
     units: str | Unit,
@@ -81,8 +82,7 @@ def derive(
     Parameters
     ----------
     cubes:
-        Includes all the needed variables for derivation defined in
-        :func:`get_required`.
+        Includes all the needed variables for derivation.
     short_name:
         short_name
     long_name:
@@ -96,6 +96,38 @@ def derive(
     -------
     iris.cube.Cube
         The new derived variable.
+
+    Examples
+    --------
+    Input variables for derivation can be obtained via
+    :attr:`esmvalcore.dataset.Dataset.input_datasets`.
+
+    For example, to derive the longwave cloud radiative effect (LWCRE) for the
+    model CESM2, you can use:
+
+    >>> from esmvalcore.dataset import Dataset
+    >>> from esmvalcore.preprocessor import derive
+    >>> dataset = Dataset(
+    ...     project="CMIP6",
+    ...     dataset="CESM2",
+    ...     exp="historical",
+    ...     ensemble="r1i1p1f1",
+    ...     grid="gn",
+    ...     timerange="2000/2014",
+    ...     short_name="lwcre",
+    ...     mip="Amon",
+    ...     derive=True,
+    ... )
+    >>> cubes = [d.load() for d in dataset.input_datasets]
+    >>> cube = derive(
+    ...     cubes,
+    ...     short_name="lwcre",
+    ...     long_name="TOA Longwave Cloud Radiative Effect",
+    ...     units="W m-2",
+    ... )
+    >>> print(cube.var_name)
+    lwcre  # doctest: +SKIP
+
     """
     if short_name == cubes[0].var_name:
         return cubes[0]
diff --git a/notebooks/discovering-data.ipynb b/notebooks/discovering-data.ipynb
index d6c9001ef2..581e8ca249 100644
--- a/notebooks/discovering-data.ipynb
+++ b/notebooks/discovering-data.ipynb
@@ -13,7 +13,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "f0ccfe7f-c535-4606-99ce-be24960aece1",
    "metadata": {},
    "outputs": [],
@@ -89,7 +89,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Found 778 datasets, showing the first 10:\n"
+      "Found 727 datasets, showing the first 10:\n"
      ]
     },
     {
@@ -168,20 +168,20 @@
        "  'grid': 'gn',\n",
        "  'institute': 'AWI'},\n",
        " Dataset:\n",
-       " {'dataset': 'BCC-CSM2-MR',\n",
+       " {'dataset': 'AWI-ESM-1-REcoM',\n",
        "  'project': 'CMIP6',\n",
        "  'mip': 'Amon',\n",
        "  'short_name': 'tas',\n",
        "  'ensemble': 'r1i1p1f1',\n",
        "  'exp': 'historical',\n",
        "  'grid': 'gn',\n",
-       "  'institute': 'BCC'},\n",
+       "  'institute': 'AWI'},\n",
        " Dataset:\n",
        " {'dataset': 'BCC-CSM2-MR',\n",
        "  'project': 'CMIP6',\n",
        "  'mip': 'Amon',\n",
        "  'short_name': 'tas',\n",
-       "  'ensemble': 'r2i1p1f1',\n",
+       "  'ensemble': 'r1i1p1f1',\n",
        "  'exp': 'historical',\n",
        "  'grid': 'gn',\n",
        "  'institute': 'BCC'}]"
@@ -253,7 +253,7 @@
     {
      "data": {
       "text/plain": [
-       "[ESGFFile:CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/Amon/tas/gn/v20200623/tas_Amon_TaiESM1_historical_r1i1p1f1_gn_185001-201412.nc on hosts ['esgf-data1.llnl.gov', 'esgf.ceda.ac.uk', 'esgf.rcec.sinica.edu.tw', 'esgf3.dkrz.de', 'esgf-data04.diasjp.net', 'esgf.nci.org.au', 'esgf3.dkrz.de']]"
+       "[ESGFFile:CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/Amon/tas/gn/v20200623/tas_Amon_TaiESM1_historical_r1i1p1f1_gn_185001-201412.nc on hosts ['esgf.ceda.ac.uk', 'esgf.rcec.sinica.edu.tw', 'esgf3.dkrz.de', 'esgf3.dkrz.de']]"
       ]
      },
      "execution_count": 6,
@@ -282,7 +282,7 @@
     {
      "data": {
       "text/plain": [
-       "LocalFile('~/climate_data/CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/Amon/tas/gn/v20200623/tas_Amon_TaiESM1_historical_r1i1p1f1_gn_185001-201412.nc')"
+       "LocalFile('/home/manuel/climate_data/CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/Amon/tas/gn/v20200623/tas_Amon_TaiESM1_historical_r1i1p1f1_gn_185001-201412.nc')"
       ]
      },
      "execution_count": 7,
@@ -312,6 +312,235 @@
    "source": [
     "download(dataset.files, CFG[\"download_dir\"])"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d3006d90",
+   "metadata": {},
+   "source": [
+    "`Dataset.from_files` can also handle derived variables properly:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "b75314e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset_template = Dataset(\n",
+    "    short_name=\"lwcre\",\n",
+    "    mip=\"Amon\",\n",
+    "    project=\"CMIP6\",\n",
+    "    exp=\"historical\",\n",
+    "    dataset=\"*\",\n",
+    "    institute=\"*\",\n",
+    "    ensemble=\"r1i1p1f1\",\n",
+    "    grid=\"gn\",\n",
+    "    derive=True,\n",
+    "    force_derivation=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "b87c247f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 36 datasets, showing the first 10:\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[Dataset:\n",
+       " {'dataset': 'TaiESM1',\n",
+       "  'project': 'CMIP6',\n",
+       "  'mip': 'Amon',\n",
+       "  'short_name': 'lwcre',\n",
+       "  'derive': True,\n",
+       "  'ensemble': 'r1i1p1f1',\n",
+       "  'exp': 'historical',\n",
+       "  'force_derivation': True,\n",
+       "  'grid': 'gn',\n",
+       "  'institute': 'AS-RCEC'},\n",
+       " Dataset:\n",
+       " {'dataset': 'AWI-CM-1-1-MR',\n",
+       "  'project': 'CMIP6',\n",
+       "  'mip': 'Amon',\n",
+       "  'short_name': 'lwcre',\n",
+       "  'derive': True,\n",
+       "  'ensemble': 'r1i1p1f1',\n",
+       "  'exp': 'historical',\n",
+       "  'force_derivation': True,\n",
+       "  'grid': 'gn',\n",
+       "  'institute': 'AWI'},\n",
+       " Dataset:\n",
+       " {'dataset': 'AWI-ESM-1-1-LR',\n",
+       "  'project': 'CMIP6',\n",
+       "  'mip': 'Amon',\n",
+       "  'short_name': 'lwcre',\n",
+       "  'derive': True,\n",
+       "  'ensemble': 'r1i1p1f1',\n",
+       "  'exp': 'historical',\n",
+       "  'force_derivation': True,\n",
+       "  'grid': 'gn',\n",
+       "  'institute': 'AWI'},\n",
+       " Dataset:\n",
+       " {'dataset': 'AWI-ESM-1-REcoM',\n",
+       "  'project': 'CMIP6',\n",
+       "  'mip': 'Amon',\n",
+       "  'short_name': 'lwcre',\n",
+       "  'derive': True,\n",
+       "  'ensemble': 'r1i1p1f1',\n",
+       "  'exp': 'historical',\n",
+       "  'force_derivation': True,\n",
+       "  'grid': 'gn',\n",
+       "  'institute': 'AWI'},\n",
+       " Dataset:\n",
+       " {'dataset': 'BCC-CSM2-MR',\n",
+       "  'project': 'CMIP6',\n",
+       "  'mip': 'Amon',\n",
+       "  'short_name': 'lwcre',\n",
+       "  'derive': True,\n",
+       "  'ensemble': 'r1i1p1f1',\n",
+       "  'exp': 'historical',\n",
+       "  'force_derivation': True,\n",
+       "  'grid': 'gn',\n",
+       "  'institute': 'BCC'},\n",
+       " Dataset:\n",
+       " {'dataset': 'BCC-ESM1',\n",
+       "  'project': 'CMIP6',\n",
+       "  'mip': 'Amon',\n",
+       "  'short_name': 'lwcre',\n",
+       "  'derive': True,\n",
+       "  'ensemble': 'r1i1p1f1',\n",
+       "  'exp': 'historical',\n",
+       "  'force_derivation': True,\n",
+       "  'grid': 'gn',\n",
+       "  'institute': 'BCC'},\n",
+       " Dataset:\n",
+       " {'dataset': 'CAMS-CSM1-0',\n",
+       "  'project': 'CMIP6',\n",
+       "  'mip': 'Amon',\n",
+       "  'short_name': 'lwcre',\n",
+       "  'derive': True,\n",
+       "  'ensemble': 'r1i1p1f1',\n",
+       "  'exp': 'historical',\n",
+       "  'force_derivation': True,\n",
+       "  'grid': 'gn',\n",
+       "  'institute': 'CAMS'},\n",
+       " Dataset:\n",
+       " {'dataset': 'CAS-ESM2-0',\n",
+       "  'project': 'CMIP6',\n",
+       "  'mip': 'Amon',\n",
+       "  'short_name': 'lwcre',\n",
+       "  'derive': True,\n",
+       "  'ensemble': 'r1i1p1f1',\n",
+       "  'exp': 'historical',\n",
+       "  'force_derivation': True,\n",
+       "  'grid': 'gn',\n",
+       "  'institute': 'CAS'},\n",
+       " Dataset:\n",
+       " {'dataset': 'FGOALS-g3',\n",
+       "  'project': 'CMIP6',\n",
+       "  'mip': 'Amon',\n",
+       "  'short_name': 'lwcre',\n",
+       "  'derive': True,\n",
+       "  'ensemble': 'r1i1p1f1',\n",
+       "  'exp': 'historical',\n",
+       "  'force_derivation': True,\n",
+       "  'grid': 'gn',\n",
+       "  'institute': 'CAS'},\n",
+       " Dataset:\n",
+       " {'dataset': 'IITM-ESM',\n",
+       "  'project': 'CMIP6',\n",
+       "  'mip': 'Amon',\n",
+       "  'short_name': 'lwcre',\n",
+       "  'derive': True,\n",
+       "  'ensemble': 'r1i1p1f1',\n",
+       "  'exp': 'historical',\n",
+       "  'force_derivation': True,\n",
+       "  'grid': 'gn',\n",
+       "  'institute': 'CCCR-IITM'}]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "datasets = list(dataset_template.from_files())\n",
+    "print(f\"Found {len(datasets)} datasets, showing the first 10:\")\n",
+    "datasets[:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18e3a0b7",
+   "metadata": {},
+   "source": [
+    "The facet `force_derivation=True` ensures variable derivation. If omitted and files that provide the variable `lwcre` without derivation are present, only those are returned."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f00a886f",
+   "metadata": {},
+   "source": [
+    "If variable derivation is necessary (this will always be the case if `force_derivation=True` is used), the `files` attribute of the datasets may be empty. In this case, the input files of the input variables necessary for derivation can be accessed via the `Dataset.input_datasets` attribute:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "c5edfa65",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset = datasets[0]\n",
+    "dataset.files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "97cdf12d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "rlut\n",
+      "[ESGFFile:CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/Amon/rlut/gn/v20200623/rlut_Amon_TaiESM1_historical_r1i1p1f1_gn_185001-201412.nc on hosts ['esgf.ceda.ac.uk', 'esgf.rcec.sinica.edu.tw', 'esgf3.dkrz.de', 'esgf3.dkrz.de']]\n",
+      "rlutcs\n",
+      "[ESGFFile:CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/Amon/rlutcs/gn/v20200623/rlutcs_Amon_TaiESM1_historical_r1i1p1f1_gn_185001-201412.nc on hosts ['esgf.ceda.ac.uk', 'esgf.rcec.sinica.edu.tw', 'esgf3.dkrz.de']]\n"
+     ]
+    }
+   ],
+   "source": [
+    "for d in dataset.input_datasets:\n",
+    "    print(d[\"short_name\"])\n",
+    "    print(d.files)"
+   ]
   }
  ],
  "metadata": {
diff --git a/tests/integration/recipe/test_check.py b/tests/integration/recipe/test_check.py
index 551603a446..6aec456f80 100644
--- a/tests/integration/recipe/test_check.py
+++ b/tests/integration/recipe/test_check.py
@@ -274,27 +274,6 @@ def test_valid_time_selection_rejections(timerange, message):
     assert str(rec_err.value) == message
 
 
-def test_differing_timeranges(caplog):
-    timeranges = set()
-    timeranges.add("1950/1951")
-    timeranges.add("1950/1952")
-    required_variables = [
-        {"short_name": "rsdscs", "timerange": "1950/1951"},
-        {"short_name": "rsuscs", "timerange": "1950/1952"},
-    ]
-    with pytest.raises(ValueError) as exc:
-        check.differing_timeranges(timeranges, required_variables)
-    expected_log = (
-        f"Differing timeranges with values {timeranges} "
-        "found for required variables "
-        "[{'short_name': 'rsdscs', 'timerange': '1950/1951'}, "
-        "{'short_name': 'rsuscs', 'timerange': '1950/1952'}]. "
-        "Set `timerange` to a common value."
-    )
-
-    assert expected_log in str(exc.value)
-
-
 def test_data_availability_nonexistent(tmp_path):
     var = {
         "dataset": "ABC",
diff --git a/tests/integration/recipe/test_recipe.py b/tests/integration/recipe/test_recipe.py
index 5bd6ad47dc..8be287d7d6 100644
--- a/tests/integration/recipe/test_recipe.py
+++ b/tests/integration/recipe/test_recipe.py
@@ -127,7 +127,7 @@ def get_required(short_name, _):
         ]
 
     monkeypatch.setattr(
-        esmvalcore._recipe.to_datasets,
+        esmvalcore.dataset,
         "get_required",
         get_required,
     )
@@ -2538,9 +2538,7 @@ def test_representative_dataset_derived_var(
     expected_facets = {
         # Already present in variable
         "dataset": "ICON",
-        "derive": True,
         "exp": "atm_amip-rad_R2B4_r1i1p1f1",
-        "force_derivation": force_derivation,
         "frequency": "mon",
         "mip": "Amon",
         "project": "ICON",
@@ -2550,6 +2548,9 @@ def test_representative_dataset_derived_var(
         "units": "W m-2",
         # Added by _add_extra_facets
         "var_type": "atm_2d_ml",
+        # Added/changed by Dataset._get_input_datasets()
+        "derive": False,
+        "force_derivation": False,
     }
     if force_derivation:
         expected_datasets = [
@@ -2604,9 +2605,7 @@ def test_get_derive_input_variables(patched_datafinder, session):
         "short_name": "rsdscs",
         # Already present in variables
         "dataset": "ICON",
-        "derive": True,
         "exp": "atm_amip-rad_R2B4_r1i1p1f1",
-        "force_derivation": True,
         "frequency": "mon",
         "mip": "Amon",
         "project": "ICON",
@@ -2619,6 +2618,9 @@ def test_get_derive_input_variables(patched_datafinder, session):
         "units": "W m-2",
         # Added by _add_extra_facets
         "var_type": "atm_2d_ml",
+        # Added/changed by Dataset._get_input_datasets()
+        "derive": False,
+        "force_derivation": False,
     }
     rsdscs = Dataset(**rsdscs_facets)
     rsdscs.session = session
@@ -2628,9 +2630,7 @@ def test_get_derive_input_variables(patched_datafinder, session):
         "short_name": "rsuscs",
         # Already present in variables
         "dataset": "ICON",
-        "derive": True,
         "exp": "atm_amip-rad_R2B4_r1i1p1f1",
-        "force_derivation": True,
         "frequency": "mon",
         "mip": "Amon",
         "project": "ICON",
@@ -2643,6 +2643,9 @@ def test_get_derive_input_variables(patched_datafinder, session):
         "units": "W m-2",
         # Added by _add_extra_facets
         "var_type": "atm_2d_ml",
+        # Added/changed by Dataset._get_input_datasets()
+        "derive": False,
+        "force_derivation": False,
     }
     rsuscs = Dataset(**rsuscs_facets)
     rsuscs.session = session
diff --git a/tests/unit/recipe/test_recipe.py b/tests/unit/recipe/test_recipe.py
index a3678924e8..4cf7bfc7b4 100644
--- a/tests/unit/recipe/test_recipe.py
+++ b/tests/unit/recipe/test_recipe.py
@@ -912,28 +912,6 @@ def test_get_default_settings(mocker):
     }
 
 
-def test_set_version(mocker):
-    dataset = Dataset(short_name="tas")
-    supplementary = Dataset(short_name="areacella")
-    dataset.supplementaries = [supplementary]
-
-    input_dataset = Dataset(short_name="tas")
-    file1 = mocker.Mock()
-    file1.facets = {"version": "v1"}
-    file2 = mocker.Mock()
-    file2.facets = {"version": "v2"}
-    input_dataset.files = [file1, file2]
-
-    file3 = mocker.Mock()
-    file3.facets = {"version": "v3"}
-    supplementary.files = [file3]
-
-    _recipe._set_version(dataset, [input_dataset])
-    print(dataset)
-    assert dataset.facets["version"] == ["v1", "v2"]
-    assert dataset.supplementaries[0].facets["version"] == "v3"
-
-
 def test_extract_preprocessor_order():
     profile = {
         "custom_order": True,
@@ -1003,3 +981,23 @@ def test_special_name_to_dataset_invalid_special_name_type():
     )
     with pytest.raises(RecipeError, match=msg):
         _recipe._special_name_to_dataset(facets, "reference_dataset")
+
+
+def test_fix_cmip5_fx_ensemble(monkeypatch):
+    def find_files(self):
+        if self.facets["ensemble"] == "r0i0p0":
+            self._files = ["file1.nc"]
+
+    monkeypatch.setattr(Dataset, "find_files", find_files)
+
+    dataset = Dataset(
+        dataset="dataset1",
+        short_name="orog",
+        mip="fx",
+        project="CMIP5",
+        ensemble="r1i1p1",
+    )
+
+    _recipe._fix_cmip5_fx_ensemble(dataset)
+
+    assert dataset["ensemble"] == "r0i0p0"
diff --git a/tests/unit/recipe/test_to_datasets.py b/tests/unit/recipe/test_to_datasets.py
index 20439a1d07..2e560765f1 100644
--- a/tests/unit/recipe/test_to_datasets.py
+++ b/tests/unit/recipe/test_to_datasets.py
@@ -1,3 +1,4 @@
+import logging
 import textwrap
 from pathlib import Path
 
@@ -302,6 +303,57 @@ def test_get_input_datasets_derive(session):
     assert rlns["frequency"] == "1hr"
 
 
+def test_get_input_datasets_derive_optional(caplog, tmp_path, session):
+    facets = {
+        "project": "OBS6",
+        "dataset": "SAT",
+        "mip": "SImon",
+        "short_name": "siextent",
+        "tier": 2,
+        "type": "sat",
+        "timerange": "1980/2000",
+        "derive": True,
+    }
+
+    input_dir = tmp_path / "Tier2" / "SAT"
+    input_dir.mkdir(parents=True, exist_ok=True)
+    sic_file = LocalFile(
+        input_dir / "OBS6_SAT_sat_1_SImon_siconca_1980-2000.nc",
+    )
+    sic_file.touch()
+
+    dataset = Dataset(**facets)
+    dataset.files = []
+    dataset.session = session
+
+    with caplog.at_level(logging.INFO):
+        datasets = to_datasets._get_input_datasets(dataset)
+
+    expected = Dataset(
+        dataset="SAT",
+        project="OBS6",
+        mip="SImon",
+        short_name="siconca",
+        derive=False,
+        frequency="mon",
+        long_name="Sea-Ice Area Percentage (Atmospheric Grid)",
+        modeling_realm=["seaIce"],
+        optional="true",
+        original_short_name="siconca",
+        standard_name="sea_ice_area_fraction",
+        tier=2,
+        timerange="1980/2000",
+        type="sat",
+        units="%",
+    )
+    expected.session = session
+
+    assert datasets == [expected]
+
+    logger_infos = [r.message for r in caplog.records if r.levelname == "INFO"]
+    assert "which is marked as 'optional'" in logger_infos[-1]
+
+
 def test_max_years(session):
     recipe_txt = textwrap.dedent("""
     diagnostics:
@@ -347,26 +399,6 @@ def from_files(_):
         to_datasets._dataset_from_files(dataset)
 
 
-def test_fix_cmip5_fx_ensemble(monkeypatch):
-    def find_files(self):
-        if self.facets["ensemble"] == "r0i0p0":
-            self._files = ["file1.nc"]
-
-    monkeypatch.setattr(Dataset, "find_files", find_files)
-
-    dataset = Dataset(
-        dataset="dataset1",
-        short_name="orog",
-        mip="fx",
-        project="CMIP5",
-        ensemble="r1i1p1",
-    )
-
-    to_datasets._fix_cmip5_fx_ensemble(dataset)
-
-    assert dataset["ensemble"] == "r0i0p0"
-
-
 def test_get_supplementary_short_names(monkeypatch):
     def _update_cmor_facets(facets):
         facets["modeling_realm"] = "atmos"
diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py
index 68e8ceed05..2aa1b91317 100644
--- a/tests/unit/test_dataset.py
+++ b/tests/unit/test_dataset.py
@@ -1,3 +1,4 @@
+import logging
 import textwrap
 from collections import defaultdict
 from pathlib import Path
@@ -1181,6 +1182,642 @@ def test_from_files_with_globs_and_only_missing_facets(monkeypatch, session):
     assert datasets == [expected]
 
 
+OBS6_SAT_FACETS = {
+    "project": "OBS6",
+    "dataset": "SAT",
+    "mip": "Amon",
+    "tier": 2,
+    "type": "sat",
+    "timerange": "1980/2000",
+}
+
+
+def test_from_files_no_files_glob(session):
+    dataset = Dataset(**{**OBS6_SAT_FACETS, "type": "*"}, short_name="tas")
+    datasets = list(dataset.from_files())
+    assert datasets == [dataset]
+
+
+@pytest.mark.parametrize("timerange", ["1980/2000", "*"])
+def test_from_files_with_derived_no_files_glob(timerange, session):
+    dataset = Dataset(
+        **{**OBS6_SAT_FACETS, "type": "*", "timerange": timerange},
+        short_name="lwcre",
+        derive=True,
+    )
+    datasets = list(dataset.from_files())
+    assert datasets == [dataset]
+
+
+@pytest.fixture
+def lwcre_file(tmp_path):
+    input_dir = tmp_path / "Tier2" / "SAT"
+    input_dir.mkdir(parents=True, exist_ok=True)
+    lwcre = esmvalcore.local.LocalFile(
+        input_dir,
+        "OBS6_SAT_sat_1_Amon_lwcre_1980-2000.nc",
+    )
+    lwcre.touch()
+    return lwcre
+
+
+@pytest.fixture
+def lwcre_file_ground(tmp_path):
+    input_dir = tmp_path / "Tier2" / "SAT"
+    input_dir.mkdir(parents=True, exist_ok=True)
+    lwcre = esmvalcore.local.LocalFile(
+        input_dir,
+        "OBS6_SAT_ground_1_Amon_lwcre_1980-2000.nc",
+    )
+    lwcre.touch()
+    return lwcre
+
+
+@pytest.fixture
+def rlut_file(tmp_path):
+    input_dir = tmp_path / "Tier2" / "SAT"
+    input_dir.mkdir(parents=True, exist_ok=True)
+    rlut = esmvalcore.local.LocalFile(
+        input_dir,
+        "OBS6_SAT_sat_1_Amon_rlut_1980-2000.nc",
+    )
+    rlut.touch()
+    return rlut
+
+
+@pytest.fixture
+def rlut_file_ground(tmp_path):
+    input_dir = tmp_path / "Tier2" / "SAT"
+    input_dir.mkdir(parents=True, exist_ok=True)
+    rlut = esmvalcore.local.LocalFile(
+        input_dir,
+        "OBS6_SAT_ground_1_Amon_rlut_1980-2000.nc",
+    )
+    rlut.touch()
+    return rlut
+
+
+@pytest.fixture
+def rlutcs_file(tmp_path):
+    input_dir = tmp_path / "Tier2" / "SAT"
+    input_dir.mkdir(parents=True, exist_ok=True)
+    rlutcs = esmvalcore.local.LocalFile(
+        input_dir,
+        "OBS6_SAT_sat_1_Amon_rlutcs_1980-2000.nc",
+    )
+    rlutcs.touch()
+    return rlutcs
+
+
+@pytest.fixture
+def pr_file(tmp_path):
+    input_dir = tmp_path / "Tier2" / "SAT"
+    input_dir.mkdir(parents=True, exist_ok=True)
+    pr = esmvalcore.local.LocalFile(
+        input_dir,
+        "OBS6_SAT_sat_1_Amon_pr_1980-2000.nc",
+    )
+    pr.touch()
+    return pr
+
+
+def test_from_files_with_derived_no_derivation(lwcre_file, session):
+    """Test `from_files` with derived variable and supplementary."""
+    dataset = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
+    dataset.add_supplementary(short_name="pr")
+    dataset.session = session
+
+    datasets = list(dataset.from_files())
+
+    expected = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
+    expected.add_supplementary(short_name="pr")
+    expected.session = session
+
+    assert datasets == [expected]
+    assert datasets[0].files == [lwcre_file]
+    assert datasets[0].supplementaries[0].files == []
+
+    expected_input_dataset = Dataset(
+        **OBS6_SAT_FACETS,
+        short_name="lwcre",
+        derive=True,
+        frequency="mon",
+        long_name="TOA Longwave Cloud Radiative Effect",
+        modeling_realm=["atmos"],
+        original_short_name="lwcre",
+        standard_name="",
+        units="W m-2",
+    )
+    expected_input_dataset.supplementaries = [
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="pr",
+            derive=False,
+            frequency="mon",
+            long_name="Precipitation",
+            modeling_realm=["atmos"],
+            original_short_name="pr",
+            standard_name="precipitation_flux",
+            units="kg m-2 s-1",
+        ),
+    ]
+    expected_input_dataset.session = session
+
+    assert datasets[0].input_datasets == [expected_input_dataset]
+    assert expected_input_dataset.files == [lwcre_file]
+
+
+@pytest.mark.parametrize("timerange", ["1980/2000", "*"])
+def test_from_files_with_derived_no_derivation_glob(
+    timerange,
+    lwcre_file,
+    lwcre_file_ground,
+    pr_file,
+    session,
+):
+    """Test `from_files` with derived variable and supplementary."""
+    dataset = Dataset(
+        **{**OBS6_SAT_FACETS, "type": "*", "timerange": timerange},
+        short_name="lwcre",
+        derive=True,
+    )
+    dataset.add_supplementary(short_name="pr")
+    dataset.session = session
+
+    datasets = list(dataset.from_files())
+
+    expected_datasets = [
+        Dataset(
+            **{**OBS6_SAT_FACETS, "type": "ground"},
+            short_name="lwcre",
+            derive=True,
+        ),
+        Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True),
+    ]
+    for expected_ds in expected_datasets:
+        expected_ds.add_supplementary(short_name="pr", type="sat")
+        expected_ds.session = session
+
+    assert datasets == expected_datasets
+    assert datasets[0].files == [lwcre_file_ground]
+    assert datasets[0].supplementaries[0].files == [pr_file]
+    assert datasets[1].files == [lwcre_file]
+    assert datasets[1].supplementaries[0].files == [pr_file]
+
+    expected_input_datasets = [
+        Dataset(
+            **{**OBS6_SAT_FACETS, "type": "ground"},
+            short_name="lwcre",
+            derive=True,
+            frequency="mon",
+            long_name="TOA Longwave Cloud Radiative Effect",
+            modeling_realm=["atmos"],
+            original_short_name="lwcre",
+            standard_name="",
+            units="W m-2",
+        ),
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="lwcre",
+            derive=True,
+            frequency="mon",
+            long_name="TOA Longwave Cloud Radiative Effect",
+            modeling_realm=["atmos"],
+            original_short_name="lwcre",
+            standard_name="",
+            units="W m-2",
+        ),
+    ]
+    for expected_ds in expected_input_datasets:
+        expected_ds.supplementaries = [
+            Dataset(
+                **OBS6_SAT_FACETS,
+                short_name="pr",
+                derive=False,
+                frequency="mon",
+                long_name="Precipitation",
+                modeling_realm=["atmos"],
+                original_short_name="pr",
+                standard_name="precipitation_flux",
+                units="kg m-2 s-1",
+            ),
+        ]
+        expected_ds.session = session
+
+    for dataset, expected in zip(
+        datasets,
+        expected_input_datasets,
+        strict=True,
+    ):
+        assert dataset.input_datasets == [expected]
+    assert expected_input_datasets[0].files == [lwcre_file_ground]
+    assert expected_input_datasets[1].files == [lwcre_file]
+
+
+def test_from_files_with_derived(rlut_file, rlutcs_file, session):
+    """Test `from_files` with derived variable and supplementary."""
+    dataset = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
+    dataset.add_supplementary(short_name="pr")
+    dataset.session = session
+
+    datasets = list(dataset.from_files())
+
+    expected = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
+    expected.add_supplementary(short_name="pr")
+    expected.session = session
+
+    assert datasets == [expected]
+    assert datasets[0].files == []
+    assert datasets[0].supplementaries[0].files == []
+
+    expected_input_datasets = [
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlut",
+            derive=False,
+            frequency="mon",
+            long_name="TOA Outgoing Longwave Radiation",
+            modeling_realm=["atmos"],
+            original_short_name="rlut",
+            standard_name="toa_outgoing_longwave_flux",
+            units="W m-2",
+        ),
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlutcs",
+            derive=False,
+            frequency="mon",
+            long_name="TOA Outgoing Clear-Sky Longwave Radiation",
+            modeling_realm=["atmos"],
+            original_short_name="rlutcs",
+            standard_name="toa_outgoing_longwave_flux_assuming_clear_sky",
+            units="W m-2",
+        ),
+    ]
+    for expected_ds in expected_input_datasets:
+        expected_ds.session = session
+
+    assert datasets[0].input_datasets == expected_input_datasets
+    assert expected_input_datasets[0].files == [rlut_file]
+    assert expected_input_datasets[1].files == [rlutcs_file]
+
+
+@pytest.mark.parametrize("timerange", ["1980/2000", "*"])
+def test_from_files_with_derived_glob(
+    timerange,
+    rlut_file,
+    rlut_file_ground,
+    rlutcs_file,
+    pr_file,
+    session,
+    caplog,
+):
+    """Test `from_files` with derived variable and supplementary."""
+    dataset = Dataset(
+        **{**OBS6_SAT_FACETS, "type": "*", "timerange": timerange},
+        short_name="lwcre",
+        derive=True,
+    )
+    dataset.add_supplementary(short_name="pr")
+    dataset.session = session
+
+    with caplog.at_level(logging.DEBUG):
+        datasets = list(dataset.from_files())
+
+    expected = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
+    expected.add_supplementary(short_name="pr")
+    expected.session = session
+
+    assert datasets == [expected]
+    assert datasets[0].files == []
+    assert datasets[0].supplementaries[0].files == [pr_file]
+
+    expected_input_datasets = [
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlut",
+            derive=False,
+            frequency="mon",
+            long_name="TOA Outgoing Longwave Radiation",
+            modeling_realm=["atmos"],
+            original_short_name="rlut",
+            standard_name="toa_outgoing_longwave_flux",
+            units="W m-2",
+        ),
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlutcs",
+            derive=False,
+            frequency="mon",
+            long_name="TOA Outgoing Clear-Sky Longwave Radiation",
+            modeling_realm=["atmos"],
+            original_short_name="rlutcs",
+            standard_name="toa_outgoing_longwave_flux_assuming_clear_sky",
+            units="W m-2",
+        ),
+    ]
+    for expected_ds in expected_input_datasets:
+        expected_ds.session = session
+
+    assert datasets[0].input_datasets == expected_input_datasets
+    assert expected_input_datasets[0].files == [rlut_file]
+    assert expected_input_datasets[1].files == [rlutcs_file]
+
+    log_debugs = [r.message for r in caplog.records if r.levelname == "DEBUG"]
+    msg = "Not all necessary input variables to derive 'lwcre' are available"
+    for log_debug in log_debugs:
+        if msg in log_debug:
+            break
+    else:
+        pytest.fail(f"No debug message '{msg}'")
+
+
+def test_from_files_with_derived_no_force_derivation(
+    lwcre_file,
+    rlut_file,
+    rlutcs_file,
+    session,
+):
+    """Test `from_files` with derived variable and supplementary."""
+    dataset = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
+    dataset.add_supplementary(short_name="pr")
+    dataset.session = session
+
+    datasets = list(dataset.from_files())
+
+    expected = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
+    expected.add_supplementary(short_name="pr")
+    expected.session = session
+
+    assert datasets == [expected]
+    assert datasets[0].files == [lwcre_file]
+    assert datasets[0].supplementaries[0].files == []
+
+    expected_input_dataset = Dataset(
+        **OBS6_SAT_FACETS,
+        short_name="lwcre",
+        derive=True,
+        frequency="mon",
+        long_name="TOA Longwave Cloud Radiative Effect",
+        modeling_realm=["atmos"],
+        original_short_name="lwcre",
+        standard_name="",
+        units="W m-2",
+    )
+    expected_input_dataset.supplementaries = [
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="pr",
+            derive=False,
+            frequency="mon",
+            long_name="Precipitation",
+            modeling_realm=["atmos"],
+            original_short_name="pr",
+            standard_name="precipitation_flux",
+            units="kg m-2 s-1",
+        ),
+    ]
+    expected_input_dataset.session = session
+
+    assert datasets[0].input_datasets == [expected_input_dataset]
+    assert expected_input_dataset.files == [lwcre_file]
+
+
+@pytest.mark.parametrize("timerange", ["1980/2000", "*"])
+def test_from_files_with_derived_no_force_derivation_glob(  # noqa: PLR0913
+    timerange,
+    lwcre_file,
+    lwcre_file_ground,
+    rlut_file,
+    rlut_file_ground,
+    rlutcs_file,
+    pr_file,
+    session,
+):
+    """Test `from_files` with derived variable and supplementary."""
+    dataset = Dataset(
+        **{**OBS6_SAT_FACETS, "type": "*", "timerange": timerange},
+        short_name="lwcre",
+        derive=True,
+    )
+    dataset.add_supplementary(short_name="pr")
+    dataset.session = session
+
+    datasets = list(dataset.from_files())
+
+    expected_datasets = [
+        Dataset(
+            **{**OBS6_SAT_FACETS, "type": "ground"},
+            short_name="lwcre",
+            derive=True,
+        ),
+        Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True),
+    ]
+    for expected_ds in expected_datasets:
+        expected_ds.add_supplementary(short_name="pr", type="sat")
+        expected_ds.session = session
+
+    assert datasets == expected_datasets
+    assert datasets[0].files == [lwcre_file_ground]
+    assert datasets[0].supplementaries[0].files == [pr_file]
+    assert datasets[1].files == [lwcre_file]
+    assert datasets[1].supplementaries[0].files == [pr_file]
+
+    expected_input_datasets = [
+        Dataset(
+            **{**OBS6_SAT_FACETS, "type": "ground"},
+            short_name="lwcre",
+            derive=True,
+            frequency="mon",
+            long_name="TOA Longwave Cloud Radiative Effect",
+            modeling_realm=["atmos"],
+            original_short_name="lwcre",
+            standard_name="",
+            units="W m-2",
+        ),
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="lwcre",
+            derive=True,
+            frequency="mon",
+            long_name="TOA Longwave Cloud Radiative Effect",
+            modeling_realm=["atmos"],
+            original_short_name="lwcre",
+            standard_name="",
+            units="W m-2",
+        ),
+    ]
+    for expected_ds in expected_input_datasets:
+        expected_ds.supplementaries = [
+            Dataset(
+                **OBS6_SAT_FACETS,
+                short_name="pr",
+                derive=False,
+                frequency="mon",
+                long_name="Precipitation",
+                modeling_realm=["atmos"],
+                original_short_name="pr",
+                standard_name="precipitation_flux",
+                units="kg m-2 s-1",
+            ),
+        ]
+        expected_ds.session = session
+
+    for dataset, expected in zip(
+        datasets,
+        expected_input_datasets,
+        strict=True,
+    ):
+        assert dataset.input_datasets == [expected]
+    assert expected_input_datasets[0].files == [lwcre_file_ground]
+    assert expected_input_datasets[1].files == [lwcre_file]
+
+
+def test_from_files_with_derived_force_derivation(
+    lwcre_file,
+    rlut_file,
+    rlutcs_file,
+    session,
+):
+    """Test `from_files` with derived variable and supplementary."""
+    dataset = Dataset(
+        **OBS6_SAT_FACETS,
+        short_name="lwcre",
+        derive=True,
+        force_derivation=True,
+    )
+    dataset.add_supplementary(short_name="pr")
+    dataset.session = session
+
+    datasets = list(dataset.from_files())
+
+    expected = Dataset(
+        **OBS6_SAT_FACETS,
+        short_name="lwcre",
+        derive=True,
+        force_derivation=True,
+    )
+    expected.add_supplementary(short_name="pr")
+    expected.session = session
+
+    assert datasets == [expected]
+    assert datasets[0].files == [lwcre_file]
+    assert datasets[0].supplementaries[0].files == []
+
+    expected_input_datasets = [
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlut",
+            derive=False,
+            force_derivation=False,
+            frequency="mon",
+            long_name="TOA Outgoing Longwave Radiation",
+            modeling_realm=["atmos"],
+            original_short_name="rlut",
+            standard_name="toa_outgoing_longwave_flux",
+            units="W m-2",
+        ),
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlutcs",
+            derive=False,
+            force_derivation=False,
+            frequency="mon",
+            long_name="TOA Outgoing Clear-Sky Longwave Radiation",
+            modeling_realm=["atmos"],
+            original_short_name="rlutcs",
+            standard_name="toa_outgoing_longwave_flux_assuming_clear_sky",
+            units="W m-2",
+        ),
+    ]
+    for expected_ds in expected_input_datasets:
+        expected_ds.session = session
+
+    assert datasets[0].input_datasets == expected_input_datasets
+    assert expected_input_datasets[0].files == [rlut_file]
+    assert expected_input_datasets[1].files == [rlutcs_file]
+
+
+@pytest.mark.parametrize("timerange", ["1980/2000", "*"])
+def test_from_files_with_derived_force_derivation_glob(  # noqa: PLR0913
+    timerange,
+    lwcre_file,
+    lwcre_file_ground,
+    rlut_file,
+    rlut_file_ground,
+    rlutcs_file,
+    pr_file,
+    session,
+    caplog,
+):
+    """Test `from_files` with derived variable and supplementary."""
+    dataset = Dataset(
+        **{**OBS6_SAT_FACETS, "type": "*", "timerange": timerange},
+        short_name="lwcre",
+        derive=True,
+        force_derivation=True,
+    )
+    dataset.add_supplementary(short_name="pr")
+    dataset.session = session
+
+    with caplog.at_level(logging.DEBUG):
+        datasets = list(dataset.from_files())
+
+    expected = Dataset(
+        **OBS6_SAT_FACETS,
+        short_name="lwcre",
+        derive=True,
+        force_derivation=True,
+    )
+    expected.add_supplementary(short_name="pr")
+    expected.session = session
+
+    assert datasets == [expected]
+    assert datasets[0].files == [lwcre_file]
+    assert datasets[0].supplementaries[0].files == [pr_file]
+
+    expected_input_datasets = [
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlut",
+            derive=False,
+            force_derivation=False,
+            frequency="mon",
+            long_name="TOA Outgoing Longwave Radiation",
+            modeling_realm=["atmos"],
+            original_short_name="rlut",
+            standard_name="toa_outgoing_longwave_flux",
+            units="W m-2",
+        ),
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlutcs",
+            derive=False,
+            force_derivation=False,
+            frequency="mon",
+            long_name="TOA Outgoing Clear-Sky Longwave Radiation",
+            modeling_realm=["atmos"],
+            original_short_name="rlutcs",
+            standard_name="toa_outgoing_longwave_flux_assuming_clear_sky",
+            units="W m-2",
+        ),
+    ]
+    for expected_ds in expected_input_datasets:
+        expected_ds.session = session
+
+    assert datasets[0].input_datasets == expected_input_datasets
+    assert expected_input_datasets[0].files == [rlut_file]
+    assert expected_input_datasets[1].files == [rlutcs_file]
+
+    log_debugs = [r.message for r in caplog.records if r.levelname == "DEBUG"]
+    msg = "Not all necessary input variables to derive 'lwcre' are available"
+    for log_debug in log_debugs:
+        if msg in log_debug:
+            break
+    else:
+        pytest.fail(f"No debug message '{msg}'")
+
+
 def test_match():
     dataset1 = Dataset(
         short_name="areacella",
@@ -1609,7 +2246,7 @@ def test_find_files_non_esgf_projects(mocker, project, monkeypatch):
     assert tas._file_globs == mock.sentinel.file_globs
 
 
-def test_set_version():
+def test_set_version_non_derived_var():
     dataset = Dataset(short_name="tas")
     dataset.add_supplementary(short_name="areacella")
     file_v1 = esmvalcore.local.LocalFile("/path/to/v1/tas.nc")
@@ -1625,6 +2262,43 @@ def test_set_version():
     assert dataset.supplementaries[0].facets["version"] == "v3"
 
 
+def test_set_version_derive_var(monkeypatch):
+    dataset = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
+    dataset.add_supplementary(short_name="areacella")
+    dataset.files = []
+    areacella_file = esmvalcore.local.LocalFile("/path/to/areacella.nc")
+    areacella_file.facets["version"] = "v4"
+    dataset.supplementaries[0].files = [areacella_file]
+
+    def _get_input_datasets():
+        rlut_file = esmvalcore.local.LocalFile("/path/to/rlut.nc")
+        rlut_file.facets["version"] = "v1"
+        rlut_dataset = Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlut",
+            derive=False,
+        )
+        rlut_dataset.files = [rlut_file]
+        rlutcs_file_1 = esmvalcore.local.LocalFile("/path/to/rlutcs_1.nc")
+        rlutcs_file_2 = esmvalcore.local.LocalFile("/path/to/rlutcs_2.nc")
+        rlutcs_file_1.facets["version"] = "v2"
+        rlutcs_file_2.facets["version"] = "v3"
+        rlutcs_dataset = Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlutcs",
+            derive=False,
+        )
+        rlutcs_dataset.files = [rlutcs_file_1, rlutcs_file_2]
+        return [rlut_dataset, rlutcs_dataset]
+
+    monkeypatch.setattr(dataset, "_get_input_datasets", _get_input_datasets)
+
+    dataset.set_version()
+
+    assert dataset.facets["version"] == ["v1", "v2", "v3"]
+    assert dataset.supplementaries[0].facets["version"] == "v4"
+
+
 @pytest.mark.parametrize("timerange", ["*", "185001/*", "*/185112"])
 def test_update_timerange_from_esgf(mocker, timerange):
     esgf_files = [
@@ -2137,16 +2811,6 @@ def test_get_extra_facets_native6():
     }
 
 
-OBS6_SAT_FACETS = {
-    "project": "OBS6",
-    "dataset": "SAT",
-    "mip": "Amon",
-    "tier": 2,
-    "type": "sat",
-    "timerange": "1980/2000",
-}
-
-
 def test_is_derived_no_derivation():
     dataset = Dataset(**OBS6_SAT_FACETS, short_name="tas")
     assert dataset._is_derived() is False
@@ -2196,6 +2860,15 @@ def test_derivation_necessary_no_force_derivation_no_files():
     assert dataset._derivation_necessary() is True
 
 
+def test_derivation_necessary_no_force_derivation_no_files_glob():
+    dataset = Dataset(
+        **{**OBS6_SAT_FACETS, "timerange": "*"},
+        short_name="lwcre",
+        derive=True,
+    )
+    assert dataset._derivation_necessary() is True
+
+
 def test_derivation_necessary_no_force_derivation(tmp_path, session):
     dataset = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
     dataset.session = session
@@ -2269,3 +2942,67 @@ def test_add_derived_supplementary_to_derived():
         force_derivation=True,
     )
     assert dataset.supplementaries[0] == expected_supplementary
+
+
+def test_input_datasets_derivation():
+    dataset = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
+    dataset.add_supplementary(short_name="pr")
+
+    expected_datasets = [
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlut",
+            derive=False,
+            frequency="mon",
+            long_name="TOA Outgoing Longwave Radiation",
+            modeling_realm=["atmos"],
+            original_short_name="rlut",
+            standard_name="toa_outgoing_longwave_flux",
+            units="W m-2",
+        ),
+        Dataset(
+            **OBS6_SAT_FACETS,
+            short_name="rlutcs",
+            derive=False,
+            frequency="mon",
+            long_name="TOA Outgoing Clear-Sky Longwave Radiation",
+            modeling_realm=["atmos"],
+            original_short_name="rlutcs",
+            standard_name="toa_outgoing_longwave_flux_assuming_clear_sky",
+            units="W m-2",
+        ),
+    ]
+    for expected_dataset in expected_datasets:
+        expected_dataset.session = dataset.session
+
+    assert dataset.input_datasets == expected_datasets
+
+
+def test_input_datasets_no_derivation():
+    dataset = Dataset(**OBS6_SAT_FACETS, short_name="tas")
+    dataset.add_supplementary(short_name="pr")
+
+    assert dataset.input_datasets == [dataset]
+
+
+def test_input_datasets_no_force_derivation(tmp_path, session):
+    dataset = Dataset(**OBS6_SAT_FACETS, short_name="lwcre", derive=True)
+    dataset.add_supplementary(short_name="pr")
+    dataset.session = session
+
+    input_dir = tmp_path / "Tier2" / "SAT"
+    input_dir.mkdir(parents=True, exist_ok=True)
+    lwcre_file = esmvalcore.local.LocalFile(
+        input_dir / "OBS6_SAT_sat_1_Amon_lwcre_1980-2000.nc",
+    )
+    lwcre_file.touch()
+
+    assert dataset.input_datasets == [dataset]
+
+
+def test_input_datasets_no_derivation_available():
+    dataset = Dataset(**OBS6_SAT_FACETS, short_name="tas", derive=True)
+
+    msg = r"Cannot derive variable 'tas': no derivation script available"
+    with pytest.raises(NotImplementedError, match=msg):
+        dataset.input_datasets  # noqa: B018