diff --git a/esmvaltool/cmorizers/data/utilities.py b/esmvaltool/cmorizers/data/utilities.py
index 4a504cb8c7..1a48650542 100644
--- a/esmvaltool/cmorizers/data/utilities.py
+++ b/esmvaltool/cmorizers/data/utilities.py
@@ -2,21 +2,31 @@
 
 import datetime
 import gzip
+import json
 import logging
 import os
 import re
 import shutil
+import uuid
+from abc import abstractmethod
+from collections.abc import Mapping
 from contextlib import contextmanager
+from dataclasses import dataclass
+from functools import lru_cache
 from pathlib import Path
 
+import esmvalcore.cmor
 import iris
 import numpy as np
 import yaml
 from cf_units import Unit
 from dask import array as da
+from esmvalcore.cmor.check import CheckLevels, CMORCheckError, cmor_check
 from esmvalcore.cmor.table import CMOR_TABLES
+from esmvalcore.config import CFG
 from iris.cube import Cube
 
+import esmvaltool
 from esmvaltool import __file__ as esmvaltool_file
 from esmvaltool import __version__ as version
 
@@ -305,51 +315,603 @@ def flip_dim_coord(cube, coord_name):
     cube.data = da.flip(cube.core_data(), axis=coord_idx)
 
 
-def read_cmor_config(dataset):
+def read_cmor_config(dataset: str) -> dict:
     """Read the associated dataset-specific config file."""
     reg_path = os.path.join(
         os.path.dirname(__file__), "cmor_config", dataset + ".yml"
     )
     with open(reg_path, encoding="utf-8") as file:
         cfg = yaml.safe_load(file)
-    cfg["cmor_table"] = CMOR_TABLES[cfg["attributes"]["project_id"]]
-    if "comment" not in cfg["attributes"]:
-        cfg["attributes"]["comment"] = ""
+    attributes = cfg["attributes"]
+    if attributes.get("activity_id", "") == "obs4MIPs":
+        # Fill in various attributes automatically.
+        timestamp = datetime.datetime.now(datetime.timezone.utc)
+        timestamp_format = "%Y-%m-%dT%H:%M:%SZ"
+        now_time = timestamp.strftime(timestamp_format)
+        attributes["project_id"] = "obs4MIPs"
+        attributes["tier"] = "1"
+        attributes["source_id"] = dataset
+        source_id_info = load_obs4mips_source_id_info()[dataset]
+        for key in ["institution_id", "source_label"]:
+            attributes[key] = re.sub(
+                "[^a-zA-Z0-9]+", "-", source_id_info[key]
+            ).strip("-")
+        vocabulary = load_controlled_vocabulary("obs4MIPs")
+        for key, value in vocabulary["source_id"][dataset].items():
+            attributes[key] = value
+        attributes["institution"] = vocabulary["institution_id"][
+            attributes["institution_id"]
+        ]
+        if "references" not in attributes:
+            attributes["references"] = attributes["doi"]
+        if "creation_date" not in attributes:
+            attributes["creation_date"] = now_time
+        attributes["data_specs_version"] = "2.5"
+        attributes["processing_code_location"] = (
+            _get_processing_code_location()
+        )
+        if "version" not in attributes:
+            attributes["version"] = timestamp.strftime("v%Y%m%d")
+    elif "comment" not in attributes:
+        attributes["comment"] = ""
+
+    cfg["cmor_table"] = CMOR_TABLES[attributes["project_id"]]
+
     return cfg
 
 
-def save_variable(cube, var, outdir, attrs, **kwargs):
+# See https://zenodo.org/records/11500474 for the obs4MIPs specification
+# See https://github.com/PCMDI/obs4MIPs-cmor-tables for the obs4MIPs CMOR tables
+
+
+def find_cmor_tables_path(project: str) -> Path:
+    """Find the path to the CMOR tables."""
+    # Code copied from
+    # https://github.com/ESMValGroup/ESMValCore/blob/main/esmvalcore/cmor/table.py
+    project_config = yaml.safe_load(
+        CFG["config_developer_file"].read_text(encoding="utf-8")
+    )[project]
+    install_dir = os.path.dirname(os.path.realpath(esmvalcore.cmor.__file__))
+    cmor_type = project_config.get("cmor_type", "CMIP5")
+    default_path = os.path.join(install_dir, "tables", cmor_type.lower())
+    tables_path = project_config.get("cmor_path", default_path)
+    tables_path = os.path.expandvars(os.path.expanduser(tables_path))
+    if not os.path.exists(tables_path):
+        tables_path = os.path.join(install_dir, "tables", tables_path)
+    return Path(tables_path)
+
+
+@lru_cache
+def load_controlled_vocabulary(project: str) -> dict:
+    """Load the controlled vocabulary."""
+    tables_path = find_cmor_tables_path(project)
+    cv_paths = list((tables_path / "Tables").glob("*_CV.json"))
+    if not cv_paths:
+        return {}
+    cv_path = cv_paths[0]
+    vocabulary = json.loads(cv_path.read_text(encoding="utf-8"))
+    return vocabulary["CV"]
+
+
+@lru_cache
+def load_obs4mips_source_id_info() -> dict[str, dict]:
+    """Load additional information from the obs4MIPs source_id table."""
+    table_path = find_cmor_tables_path("obs4MIPs") / "obs4MIPs_source_id.json"
+    table = json.loads(table_path.read_text(encoding="utf-8"))
+    return table["source_id"]
+
+
+class AttributeValidationError(Exception):
+    """There was an error in a global NetCDF attribute."""
+
+
+@dataclass
+class BaseAttributeValidator:
+    """Validator for global attributes."""
+
+    name: str
+    """The name of the attribute."""
+    required: bool
+    """Whether the attribute is required or not."""
+
+    def validate(self, attributes: Mapping[str, str]) -> None:
+        """Validate attributes."""
+        if self.name in attributes:
+            self.validate_values(attributes)
+        elif self.required:
+            msg = f"Required attribute '{self.name}' missing."
+            raise AttributeValidationError(msg)
+
+    @abstractmethod
+    def validate_values(self, attributes: Mapping[str, str]) -> None:
+        """Validate attribute values."""
+
+
+@dataclass
+class CVAttributeValidator(BaseAttributeValidator):
+    """Validator for attributes defined by the controlled vocabulary."""
+
+    values: set[str]
+
+    def validate_values(self, attributes: Mapping[str, str]) -> None:
+        """Validate attribute values."""
+        value = attributes[self.name]
+        if value not in self.values:
+            msg = (
+                f"Encountered an invalid value '{value}' for attribute "
+                f"'{self.name}'. Choose from: {','.join(sorted(self.values))}"
+            )
+            raise AttributeValidationError(msg)
+
+
+@dataclass
+class CVRelatedAttributeValidator(BaseAttributeValidator):
+    """Validator for attributes defined by the controlled vocabulary."""
+
+    source_name: str
+    values: dict[str, str]
+
+    def validate_values(self, attributes: Mapping[str, str]) -> None:
+        """Validate attribute values."""
+        source_value = attributes[self.source_name]
+        value = attributes[self.name]
+        if value != self.values[source_value]:
+            msg = (
+                f"Encountered an invalid value '{value}' for attribute "
+                f"{self.name}. It should be: {self.values[source_value]}"
+            )
+            raise AttributeValidationError(msg)
+
+
+def load_cv_validators(project: str) -> list[BaseAttributeValidator]:
+    """Load validators representing the controlled vocabulary."""
+    if project in ("OBS", "OBS6"):
+        # There is no controlled vocabulary for ESMValTool internal projects OBS6 and OBS.
+        return []
+
+    if project != "obs4MIPs":
+        msg = f"Reading the controlled vocabulary for project {project} is not (yet) supported."
+        raise NotImplementedError(msg)
+
+    vocabulary = load_controlled_vocabulary(project)
+    validators: list[BaseAttributeValidator] = []
+    required_attributes = {
+        v.name for v in GLOBAL_ATTRIBUTE_VALIDATORS[project] if v.required
+    }
+    ignore = {"required_global_attributes", "license"}
+    for key, values in vocabulary.items():
+        if key in ignore:
+            continue
+        if key in vocabulary[key]:
+            # Some entries are nested.
+            values = vocabulary[key][key]
+        validators.append(
+            CVAttributeValidator(
+                key,
+                values={values} if isinstance(values, str) else set(values),
+                required=key in required_attributes,
+            )
+        )
+
+    validators.append(
+        CVRelatedAttributeValidator(
+            "institution",
+            required=True,
+            source_name="institution_id",
+            values=vocabulary["institution_id"],
+        )
+    )
+
+    # Create validators for attributes determined by the "source_id".
+    related_values: dict[str, dict[str, str]] = {}
+    for source_id, source_values in vocabulary["source_id"].items():
+        for name, value in source_values.items():
+            if name not in related_values:
+                related_values[name] = {}
+            related_values[name][source_id] = value
+    for name, values in related_values.items():
+        validators.append(
+            CVRelatedAttributeValidator(
+                name,
+                required=True,
+                source_name="source_id",
+                values=values,
+            )
+        )
+
+    return validators
+
+
+@dataclass
+class DateTimeAttributeValidator(BaseAttributeValidator):
+    """Validator for datetime attributes."""
+
+    def validate_values(self, attributes: Mapping[str, str]) -> None:
+        """Validate attribute values."""
+        value = attributes[self.name]
+        datetime_format = "%Y-%m-%dT%H:%M:%SZ"  # Enforce ISO 8601 with UTC.
+        try:
+            datetime.datetime.strptime(value, datetime_format)
+        except ValueError as exc:
+            msg = f"Invalid datetime encountered for attribute '{self.name}', message: {exc}"
+            raise AttributeValidationError(msg) from None
+
+
+@dataclass
+class RegexAttributeValidator(BaseAttributeValidator):
+    """Validator for attributes based on regular expressions."""
+
+    pattern: str
+
+    def validate_values(self, attributes: Mapping[str, str]) -> None:
+        """Validate attribute values."""
+        pattern = self.pattern.format(**attributes)
+        value = attributes[self.name]
+        if not re.match(pattern, value):
+            msg = (
+                f"Invalid attribute value '{value}' encountered for attribute "
+                f"'{self.name}'. It should match '{pattern}'"
+            )
+            raise AttributeValidationError(msg)
+
+
+PATH_ATTRIBUTE = "^[a-zA-Z0-9-]+$"  # Used in file or directory names.
+PATH_ATTRIBUTE_WITH_SPACES = (
+    "^[a-zA-Z0-9- ]+$"  # Used in file or directory names after space removal.
+)
+DRS_ATTRIBUTE = "^[a-zA-Z0-9-_]+$"  # Data Reference Syntax (DRS) components.
+FREE_FORM_ATTRIBUTE = ".+"
+
+
+GLOBAL_ATTRIBUTE_VALIDATORS: dict[str, list[BaseAttributeValidator]] = {
+    "obs4MIPs": [
+        # Required attributes
+        RegexAttributeValidator(
+            "activity_id",
+            required=True,
+            pattern="^obs4MIPs$",
+        ),
+        RegexAttributeValidator(
+            "contact",
+            required=True,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        DateTimeAttributeValidator(
+            "creation_date",
+            required=True,
+        ),
+        RegexAttributeValidator(
+            "dataset_contributor",
+            required=True,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "data_specs_version",
+            required=True,
+            pattern=r"^2\.5$",
+        ),
+        # "doi" is not a required attribute according to the obs4MIPs spec,
+        # but it is for CMIP7 data so we add it for consistency.
+        RegexAttributeValidator("doi", required=True, pattern=r"^10\.[0-9]+"),
+        RegexAttributeValidator(
+            "frequency",
+            required=True,
+            pattern=PATH_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "grid",
+            required=True,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "grid_label",
+            required=True,
+            pattern=PATH_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "institution",
+            required=True,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "institution_id", required=True, pattern=PATH_ATTRIBUTE
+        ),
+        RegexAttributeValidator(
+            "license", required=True, pattern=FREE_FORM_ATTRIBUTE
+        ),
+        RegexAttributeValidator(
+            "nominal_resolution",
+            required=True,
+            pattern=PATH_ATTRIBUTE_WITH_SPACES,
+        ),
+        RegexAttributeValidator(
+            "processing_code_location",
+            required=True,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "product", required=True, pattern=DRS_ATTRIBUTE
+        ),
+        RegexAttributeValidator("realm", required=True, pattern=DRS_ATTRIBUTE),
+        RegexAttributeValidator(
+            "references",
+            required=True,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "region",
+            required=True,
+            pattern=DRS_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "source",
+            required=True,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "source_id",
+            required=True,
+            pattern=PATH_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "source_id",
+            required=True,
+            pattern="^{source_label}-.+$",
+        ),
+        RegexAttributeValidator(
+            "source_label",
+            required=True,
+            pattern=DRS_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "source_type",
+            required=True,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "source_version_number",
+            required=True,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "tracking_id",
+            required=True,
+            pattern="^hdl:21.14102/[0-9a-f]{{8}}(-[0-9a-f]{{4}}){{3}}-[0-9a-f]{{12}}$",
+        ),
+        RegexAttributeValidator(
+            "variable_id",
+            required=True,
+            pattern=PATH_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "variant_label",
+            required=True,
+            pattern=PATH_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "variant_label",
+            required=True,
+            pattern="^{institution_id}(-.+)?$",
+        ),
+        # Optional attributes
+        RegexAttributeValidator(
+            "comment",
+            required=False,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "external_variables",
+            required=False,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "history",
+            required=False,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "source_data_notes",
+            required=False,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        # TODO: Maybe we can add the two attributes below based on info from
+        # the automatic download.
+        DateTimeAttributeValidator(
+            "source_data_retrieval_date",
+            required=False,
+        ),
+        RegexAttributeValidator(
+            "source_data_url",
+            required=False,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "title",
+            required=False,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+        RegexAttributeValidator(
+            "variant_info",
+            required=False,
+            pattern=FREE_FORM_ATTRIBUTE,
+        ),
+    ],
+}
+
+
+def validate_global_attributes(
+    project: str,
+    attributes: dict[str, str],
+) -> bool:
+    """Validate the global NetCDF attributes."""
+    validators = GLOBAL_ATTRIBUTE_VALIDATORS.get(
+        project, []
+    ) + load_cv_validators(project)
+    messages = set()
+    for validator in validators:
+        try:
+            validator.validate(attributes)
+        except AttributeValidationError as exc:
+            messages.add(str(exc))
+    if messages:
+        logger.error("%s", "\n".join(sorted(messages)))
+    return not messages
+
+
+# Code of the two functions below copied from
+# https://github.com/ESMValGroup/ESMValCore/blob/0a1292b0e3b181bb913242da7dc2798b50e7a892/esmvalcore/preprocessor/_io.py#L45-L66
+
+
+def _get_attr_from_field_coord(ncfield, coord_name, attr):
+    if coord_name is not None:
+        attrs = ncfield.cf_group[coord_name].cf_attrs()
+        attr_val = [value for (key, value) in attrs if key == attr]
+        if attr_val:
+            return attr_val[0]
+    return None
+
+
+def _load_callback(raw_cube, field, _):
+    """Use this callback to fix anything Iris tries to break."""
+    for coord in raw_cube.coords():
+        # Iris chooses to change longitude and latitude units to degrees
+        # regardless of value in file, so reinstating file value
+        if coord.standard_name in ["longitude", "latitude"]:
+            units = _get_attr_from_field_coord(field, coord.var_name, "units")
+            if units is not None:
+                coord.units = units
+
+
+def _check_formatting(filename: Path, attributes: dict[str, str]) -> None:
+    """Run final cmorization checks."""
+    project = attributes["project_id"]
+    logger.info("Checking compliance with '%s' project standards", project)
+    cube = iris.load_cube(filename, callback=_load_callback)
+
+    attribute_success = validate_global_attributes(
+        project, cube.attributes.globals
+    )
+
+    if project in ("OBS", "OBS6"):
+        # Use the configured check_level for older CMORizers to avoid breaking
+        # them.
+        check_level = CFG["check_level"]
+    else:
+        # Use strict checks for obs4MIPs
+        check_level = CheckLevels.STRICT
+    try:
+        cmor_check(
+            cube=cube,
+            cmor_table=project,
+            mip=attributes["mip"],
+            short_name=cube.var_name,
+            frequency=cube.attributes.globals.get("frequency"),
+            check_level=check_level,
+        )
+    except CMORCheckError as exc:
+        logger.error("%s", exc)
+        cmor_check_success = False
+    else:
+        cmor_check_success = True
+
+    success = attribute_success and cmor_check_success
+    msg = (
+        f"Data in file {filename} is {'' if success else 'not '}"
+        f"compliant with '{project}' project standards"
+    )
+    if success:
+        logger.info(msg)
+    else:
+        raise ValueError(msg)
+    # TODO: add concatenate test
+    # TODO: add time coverage test
+
+
+FILENAME_TEMPLATE = {
+    "obs4MIPs": "{variable_id}_{frequency}_{source_id}_{variant_label}_{grid_label}",
+    "OBS6": "{project_id}_{dataset_id}_{modeling_realm}_{version}_{mip}_{variable_id}",
+    "OBS": "{project_id}_{dataset_id}_{modeling_realm}_{version}_{mip}_{variable_id}",
+}
+
+DIRECTORY_TEMPLATE = {
+    "obs4MIPs": "{activity_id}/{institution_id}/{source_id}/{frequency}/{variable_id}/{nominal_resolution}/{version}",
+}
+
+
+def get_output_filename(
+    outdir: str,
+    attrs: dict[str, str],
+    time_range: str | None,
+) -> Path:
+    """Get the output filename."""
+    project = attrs["project_id"]
+    if project in DIRECTORY_TEMPLATE:
+        dirname = DIRECTORY_TEMPLATE[project].format(
+            **{k: v.replace(" ", "") for k, v in attrs.items()}
+        )
+        # Ignore the TierX/dataset subdirectory set in the cmorizer.py script
+        # if the project defines its own directory structure.
+        out_path = Path(outdir).parent.parent / dirname
+    else:
+        out_path = Path(outdir)
+    filename = FILENAME_TEMPLATE[project].format(**attrs)
+    if time_range is not None:
+        filename = f"{filename}_{time_range}"
+    filename = f"{filename}.nc"
+    return out_path / filename
+
+
+def save_variable(
+    cube: Cube,
+    var: str,
+    outdir: str,
+    attrs: dict[str, str],
+    **kwargs,
+) -> None:
     """Saver function.
 
     Saves iris cubes (data variables) in CMOR-standard named files.
 
     Parameters
     ----------
-    cube: iris.cube.Cube
+    cube:
         data cube to be saved.
 
-    var: str
+    var:
         Variable short_name e.g. ts or tas.
 
-    outdir: str
+    outdir:
         root directory where the file will be saved.
 
-    attrs: dict
+    attrs:
         dictionary holding cube metadata attributes like
         project_id, version etc.
 
     **kwargs: kwargs
         Keyword arguments to be passed to `iris.save`
     """
+    if var != cube.var_name:
+        msg = (
+            f"Attempted to save cube with var_name '{cube.var_name}' as "
+            f"variable '{var}'"
+        )
+        raise ValueError(msg)
+
+    # Set global attributes.
+    set_global_atts(cube, attrs)
+
+    # Ensure correct dtypes.
     fix_dtype(cube)
-    # CMOR standard
+
+    # Determine the output filename.
     try:
         time = cube.coord("time")
     except iris.exceptions.CoordinateNotFoundError:
         time_suffix = None
     else:
         if (
-            len(time.points) == 1 and "mon" not in cube.attributes.get("mip")
+            len(time.points) == 1
+            and "mon" not in cube.attributes.get("mip", "")
         ) or cube.attributes.get("frequency") == "yr":
             year = str(time.cell(0).point.year)
             time_suffix = "-".join([year + "01", year + "12"])
@@ -362,22 +924,18 @@ def save_variable(cube, var, outdir, attrs, **kwargs):
             )
             time_suffix = "-".join([date1, date2])
 
-    name_elements = [
-        attrs["project_id"],
-        attrs["dataset_id"],
-        attrs["modeling_realm"],
-        attrs["version"],
-        attrs["mip"],
-        var,
-    ]
-    if time_suffix:
-        name_elements.append(time_suffix)
-    file_name = "_".join(name_elements) + ".nc"
-    file_path = os.path.join(outdir, file_name)
+    attrs["variable_id"] = cube.var_name
+    file_path = get_output_filename(outdir, attrs, time_suffix)
     logger.info("Saving: %s", file_path)
+    file_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Save the cube.
     status = "lazy" if cube.has_lazy_data() else "realized"
     logger.info("Cube has %s data [lazy is preferred]", status)
-    iris.save(cube, file_path, fill_value=1e20, **kwargs)
+    iris.save(cube, file_path, fill_value=1e20, compute=False, **kwargs)
+
+    # Check that the cube complies with the CMOR tables for the project.
+    _check_formatting(file_path, attrs)
 
 
 def extract_doi_value(tags):
@@ -409,43 +967,80 @@ def extract_doi_value(tags):
     return ", ".join(reference_doi)
 
 
-def set_global_atts(cube, attrs):
+def _get_processing_code_location() -> str:
+    """Get a link to code used to CMORize the data."""
+    # Ideas for improvement:
+    # - make sure current code dir is not dirty
+    # - replace version by commit that is available online (though this
+    #   guarantees nothing as it may still get garbage collected if it
+    #   becomes disconnected from existing branches/tags).
+    code_version = ".".join(esmvaltool.__version__.split(".", 3)[:3])
+    return f"https://github.com/ESMValGroup/ESMValTool/tree/{code_version}"
+
+
+def set_global_atts(cube: Cube, attrs: dict[str, str]) -> None:
     """Complete the cmorized file with global metadata."""
     logger.debug("Setting global metadata...")
     attrs = dict(attrs)
     cube.attributes.clear()
-    timestamp = datetime.datetime.utcnow()
-    timestamp_format = "%Y-%m-%d %H:%M:%S"
+    timestamp = datetime.datetime.now(datetime.timezone.utc)
+    timestamp_format = "%Y-%m-%dT%H:%M:%SZ"
     now_time = timestamp.strftime(timestamp_format)
 
     # Necessary attributes
-    try:
+    if attrs["project_id"] == "obs4MIPs":
         glob_dict = {
-            "title": (
-                f"{attrs.pop('dataset_id')} data reformatted for "
-                f"ESMValTool v{version}"
-            ),
-            "version": attrs.pop("version"),
-            "tier": str(attrs.pop("tier")),
-            "source": attrs.pop("source"),
-            "reference": extract_doi_value(attrs.pop("reference")),
-            "comment": attrs.pop("comment"),
-            "user": os.environ.get("USER", "unknown user"),
-            "host": os.environ.get("HOSTNAME", "unknown host"),
-            "history": f"Created on {now_time}",
-            "project_id": attrs.pop("project_id"),
+            "tracking_id": f"hdl:21.14102/{uuid.uuid4()}",
+            "variable_id": cube.var_name,
         }
-    except KeyError as original_error:
-        msg = (
-            "All CMORized datasets need the global attributes "
-            "'dataset_id', 'version', 'tier', 'source', 'reference', "
-            "'comment' and 'project_id' "
-            "specified in the configuration file"
-        )
-        raise KeyError(msg) from original_error
+        required_keys = {
+            v.name
+            for v in GLOBAL_ATTRIBUTE_VALIDATORS["obs4MIPs"]
+            if v.required
+        }
+        optional_keys = {
+            v.name
+            for v in GLOBAL_ATTRIBUTE_VALIDATORS["obs4MIPs"]
+            if not v.required
+        }
+        for key in required_keys | optional_keys:
+            if key in attrs:
+                glob_dict[key] = attrs[key]
+        missing = required_keys - set(glob_dict)
+        if missing:
+            msg = (
+                "The following required keys are missing from the "
+                f"configuration file: {', '.join(sorted(missing))}"
+            )
+            raise KeyError(msg)
+    else:
+        try:
+            glob_dict = {
+                "title": (
+                    f"{attrs.pop('dataset_id')} data reformatted for "
+                    f"ESMValTool v{version}"
+                ),
+                "version": attrs.pop("version"),
+                "tier": str(attrs.pop("tier")),
+                "source": attrs.pop("source"),
+                "reference": extract_doi_value(attrs.pop("reference")),
+                "comment": attrs.pop("comment"),
+                "user": os.environ.get("USER", "unknown user"),
+                "host": os.environ.get("HOSTNAME", "unknown host"),
+                "history": f"Created on {now_time}",
+                "project_id": attrs.pop("project_id"),
+            }
+        except KeyError as original_error:
+            msg = (
+                "All CMORized datasets need the global attributes "
+                "'dataset_id', 'version', 'tier', 'source', 'reference', "
+                "'comment' and 'project_id' "
+                "specified in the configuration file"
+            )
+            raise KeyError(msg) from original_error
+        # Additional attributes
+        glob_dict.update(attrs)
 
-    # Additional attributes
-    glob_dict.update(attrs)
     cube.attributes.globals = glob_dict
 
 
diff --git a/tests/unit/cmorizers/obs/test_merra2.py b/tests/unit/cmorizers/obs/test_merra2.py
index 3288df5b11..6de319c00b 100644
--- a/tests/unit/cmorizers/obs/test_merra2.py
+++ b/tests/unit/cmorizers/obs/test_merra2.py
@@ -5,6 +5,8 @@
 import numpy as np
 import pytest
 from cf_units import Unit
+from esmvalcore.cmor.check import CheckLevels
+from esmvalcore.config import CFG
 
 from esmvaltool.cmorizers.data.formatters.datasets.merra2 import (
     _extract_variable,
@@ -205,9 +207,14 @@ def test_load_cube_pairwise_vars_wrong_oper(tmp_path):
     print(exc)
 
 
-def test_extract_variable(tmp_path):
+def test_extract_variable(tmp_path, monkeypatch):
     """Test variable extraction."""
     # call is _extract_variable(in_files, var, cfg, out_dir)
+
+    # It looks like CMORization is not done to a good enough quality to pass
+    # the CMOR checks, so relax them until this is fixed.
+    monkeypatch.setitem(CFG, "check_level", CheckLevels.IGNORE)
+
     path_cubes = tmp_path / "cubes.nc"
     cube_1 = _create_sample_cube()
     cube_1.var_name = "SWTDN"
@@ -236,8 +243,12 @@ def test_extract_variable(tmp_path):
     assert cmorized_cube.attributes["raw"] == "SWTDN"
 
 
-def test_extract_variable_pairs(tmp_path):
+def test_extract_variable_pairs(tmp_path, monkeypatch):
     """Test variable extraction."""
+    # It looks like CMORization is not done to a good enough quality to pass
+    # the CMOR checks, so relax them until this is fixed.
+    monkeypatch.setitem(CFG, "check_level", CheckLevels.IGNORE)
+
     path_cubes = tmp_path / "cubes.nc"
     cube_1 = _create_sample_cube()
     cube_1.var_name = "SWTDN"
@@ -282,8 +293,12 @@ def test_extract_variable_pairs(tmp_path):
         assert attr in cmorized_cube.attributes
 
 
-def test_vertical_levels(tmp_path):
+def test_vertical_levels(tmp_path, monkeypatch):
     """Test cases for cmorization with vertical levels."""
+    # It looks like CMORization is not done to a good enough quality to pass
+    # the CMOR checks, so relax them until this is fixed.
+    monkeypatch.setitem(CFG, "check_level", CheckLevels.IGNORE)
+
     path_cubes = tmp_path / "cubes.nc"
     cube_1 = _create_sample_cube()
     cube_1.var_name = "V"