diff --git a/dandischema/consts.py b/dandischema/consts.py index cf02329d..8ee5d97a 100644 --- a/dandischema/consts.py +++ b/dandischema/consts.py @@ -1,6 +1,6 @@ from packaging.version import Version as _Version -DANDI_SCHEMA_VERSION = "0.7.0" +DANDI_SCHEMA_VERSION = "0.7.1" ALLOWED_INPUT_SCHEMAS = [ "0.4.4", "0.5.1", @@ -16,13 +16,13 @@ "0.6.8", "0.6.9", "0.6.10", + "0.7.0", DANDI_SCHEMA_VERSION, ] -# ATM we allow only for a single target version which is current -# migrate has a guard now for this since it cannot migrate to anything but current -# version -ALLOWED_TARGET_SCHEMAS = [DANDI_SCHEMA_VERSION] +# We establish migrations (back) to only a few recent versions. +# When adding changes, please consider whether a migration path should be added. +ALLOWED_TARGET_SCHEMAS = ["0.6.10", "0.7.0", DANDI_SCHEMA_VERSION] # This allows multiple schemas for validation, whereas target schemas focus on # migration. diff --git a/dandischema/metadata.py b/dandischema/metadata.py index 19064262..96613e13 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -358,24 +358,18 @@ def migrate( schema version of the provided instance """ - # ATM, we only support the latest schema version as a target. See definition of - # `ALLOWED_TARGET_SCHEMAS` for details - if len(ALLOWED_TARGET_SCHEMAS) > 1: - msg = f"Only migration to current version, {DANDI_SCHEMA_VERSION}, is supported" - raise NotImplementedError(msg) - # -------------------------------------------------------------- # Validate DANDI schema version provided in the metadata instance # -------------------------------------------------------------- # DANDI schema version of the provided instance - obj_ver = obj.get("schemaVersion") - if obj_ver is None: + obj_version = obj.get("schemaVersion") + if obj_version is None: msg = ( - "The provided Dandiset metadata instance does not have a " + "The provided metadata instance does not have a " "`schemaVersion` field for specifying the DANDI schema version." ) raise ValueError(msg) - if not isinstance(obj_ver, str): + if not isinstance(obj_version, str): msg = ( "The provided Dandiset metadata instance has a non-string " "`schemaVersion` field for specifying the DANDI schema version." @@ -384,17 +378,17 @@ def migrate( # Check if `obj_ver` is a valid DANDI schema version try: # DANDI schema version of the provided instance in tuple form - obj_ver_tuple = version2tuple(obj_ver) + obj_version_tuple = version2tuple(obj_version) except ValueError as e: msg = ( "The provided Dandiset metadata instance has an invalid " "`schemaVersion` field for specifying the DANDI schema version." ) raise ValueError(msg) from e - if obj_ver not in ALLOWED_INPUT_SCHEMAS: + if obj_version not in ALLOWED_INPUT_SCHEMAS: msg = ( f"The DANDI schema version of the provided Dandiset metadata instance, " - f"{obj_ver!r}, is not one of the supported versions for input " + f"{obj_version!r}, is not one of the supported versions for input " f"Dandiset metadata instances. The supported versions are " f"{ALLOWED_INPUT_SCHEMAS}." ) @@ -407,7 +401,7 @@ def migrate( # Check if `to_version` is a valid DANDI schema version try: # The target DANDI schema version in tuple form - target_ver_tuple = version2tuple(to_version) + to_version_tuple = version2tuple(to_version) except ValueError as e: msg = ( "The provided target version, {to_version!r}, is not a valid DANDI schema " @@ -424,22 +418,17 @@ def migrate( raise ValueError(msg) # ---------------------------------------------------------------- - # Ensure the target DANDI schema version is at least the DANDI schema version - # of the provided instance - if obj_ver_tuple > target_ver_tuple: - raise ValueError(f"Cannot migrate from {obj_ver} to lower {to_version}.") - # Optionally validate the instance against the DANDI schema it specifies # before migration if not skip_validation: - _validate_obj_json(obj, _get_jsonschema_validator(obj_ver, "Dandiset")) + _validate_obj_json(obj, _get_jsonschema_validator(obj_version, "Dandiset")) obj_migrated = deepcopy(obj) - if obj_ver_tuple == target_ver_tuple: + if obj_version_tuple == to_version_tuple: return obj_migrated - if obj_ver_tuple < version2tuple("0.6.0") <= target_ver_tuple: + if obj_version_tuple < version2tuple("0.6.0") <= to_version_tuple: for val in obj_migrated.get("about", []): if "schemaKey" not in val: if "identifier" in val and "UBERON" in val["identifier"]: @@ -459,6 +448,34 @@ def migrate( if "schemaKey" not in obj_migrated: obj_migrated["schemaKey"] = "Dandiset" + # Downgrades + + # Simple downgrades that just require removing fields, which is totally fine + # if they are empty, as they are None or empty containers (list, tuple, etc) + # or empty strings. + # List only those for which such notion of "empty" applies. + SIMPLE_DOWNGRADES = [ + # version added, fields to remove + ("0.7.0", ["releaseNotes"]), + ("0.7.1", ["sameAs"]), + ] + for ver_added, fields in SIMPLE_DOWNGRADES: + # additional guards are via ALLOWED_TARGET_SCHEMAS + if to_version_tuple < version2tuple(ver_added) <= obj_version_tuple: + for field in fields: + if field in obj_migrated: + value = obj_migrated.get(field) + # Explicit check for "empty" value per above description. + if value is None or ( + not value and isinstance(value, (list, tuple, dict, set, str)) + ): + del obj_migrated[field] + else: + raise ValueError( + f"Cannot downgrade to {to_version} from " + f"{obj_version} with {field}={value!r} present" + ) + # Always update schemaVersion when migrating obj_migrated["schemaVersion"] = to_version return obj_migrated diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py index 6da7fff3..da68c399 100644 --- a/dandischema/tests/test_metadata.py +++ b/dandischema/tests/test_metadata.py @@ -424,20 +424,6 @@ def test_migrate_value_errors(obj: Dict[str, Any], target: Any, msg: str) -> Non migrate(obj, to_version=target, skip_validation=True) -def test_migrate_value_errors_lesser_target(monkeypatch: pytest.MonkeyPatch) -> None: - """ - Test cases when `migrate()` is expected to raise a `ValueError` exception - when the target schema version is lesser than the schema version of the metadata - instance - """ - from dandischema import metadata - - monkeypatch.setattr(metadata, "ALLOWED_TARGET_SCHEMAS", ["0.6.0"]) - - with pytest.raises(ValueError, match="Cannot migrate from .* to lower"): - migrate({"schemaVersion": "0.6.7"}, to_version="0.6.0", skip_validation=True) - - @skipif_no_network @skipif_no_test_dandiset_metadata_dir # Skip for instance name not being DANDI because JSON schema version at `0.4.4`, the @@ -509,6 +495,65 @@ def test_migrate_schemaversion_update() -> None: ) +@pytest.mark.ai_generated +def test_migrate_downgrade() -> None: + """Test downgrade from 0.7.0 to 0.6.10 handling releaseNotes and sameAs fields""" + + # Minimal metadata at current (0.7.0) version + meta_dict: dict = { + "schemaKey": "Dandiset", + "schemaVersion": DANDI_SCHEMA_VERSION, + "identifier": "DANDI:000000", + } + + # Test 1: Downgrade without new fields (should succeed) + downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True) + assert downgraded["schemaVersion"] == "0.6.10" + assert "releaseNotes" not in downgraded + assert "sameAs" not in downgraded + + # Test 2: Downgrade with empty releaseNotes (should succeed) + meta_dict["releaseNotes"] = "" + downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True) + assert downgraded["schemaVersion"] == "0.6.10" + assert "releaseNotes" not in downgraded + + # Test 3: Downgrade with None releaseNotes (should succeed) + meta_dict["releaseNotes"] = None + downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True) + assert downgraded["schemaVersion"] == "0.6.10" + assert "releaseNotes" not in downgraded + + # Test 4: Downgrade with empty sameAs list (should succeed) + meta_dict.pop("releaseNotes") + meta_dict["sameAs"] = [] + downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True) + assert downgraded["schemaVersion"] == "0.6.10" + assert "sameAs" not in downgraded + + # Test 5: Downgrade with non-empty releaseNotes (should fail) + meta_dict.pop("sameAs") + meta_dict["releaseNotes"] = "Releasing during testing" + with pytest.raises(ValueError, match="Cannot downgrade to 0.6.10 from"): + migrate(meta_dict, to_version="0.6.10", skip_validation=True) + + # Test 6: Downgrade with non-empty sameAs (should fail) + meta_dict.pop("releaseNotes") + meta_dict["sameAs"] = ["dandi://DANDI-SANDBOX/123456"] + with pytest.raises(ValueError, match="Cannot downgrade to 0.6.10 from"): + migrate(meta_dict, to_version="0.6.10", skip_validation=True) + + # Test 7: No-op migration (already at target version) + meta_dict_0610 = { + "schemaKey": "Dandiset", + "schemaVersion": "0.6.10", + "identifier": "DANDI:000000", + } + migrated = migrate(meta_dict_0610, to_version="0.6.10", skip_validation=True) + assert migrated == meta_dict_0610 + assert migrated is not meta_dict_0610 # but we do create a copy + + @pytest.mark.parametrize( "files, summary", [ diff --git a/pyproject.toml b/pyproject.toml index 54a26d7c..8df389f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,3 +26,8 @@ profile = "black" force_sort_within_sections = true reverse_relative = true known_first_party = ["dandischema"] + +[tool.pytest.ini_options] +markers = [ + "ai_generated: marks tests as generated by AI assistants (deselect with '-m \"not ai_generated\"')", +]