Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions dandischema/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@
DANDI_SCHEMA_VERSION,
]

# ATM we allow only for a single target version which is current
# migrate has a guard now for this since it cannot migrate to anything but current
# version
ALLOWED_TARGET_SCHEMAS = [DANDI_SCHEMA_VERSION]
# We establish migrations (back) to only a few recent versions.
# When adding changes, please consider whether a migration path should be added.
ALLOWED_TARGET_SCHEMAS = ["0.6.10", DANDI_SCHEMA_VERSION]

# This allows multiple schemas for validation, whereas target schemas focus on
# migration.
Expand Down
60 changes: 38 additions & 22 deletions dandischema/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,24 +358,18 @@ def migrate(
schema version of the provided instance
"""

# ATM, we only support the latest schema version as a target. See definition of
# `ALLOWED_TARGET_SCHEMAS` for details
if len(ALLOWED_TARGET_SCHEMAS) > 1:
msg = f"Only migration to current version, {DANDI_SCHEMA_VERSION}, is supported"
raise NotImplementedError(msg)

# --------------------------------------------------------------
# Validate DANDI schema version provided in the metadata instance
# --------------------------------------------------------------
# DANDI schema version of the provided instance
obj_ver = obj.get("schemaVersion")
if obj_ver is None:
obj_version = obj.get("schemaVersion")
if obj_version is None:
msg = (
"The provided Dandiset metadata instance does not have a "
"The provided metadata instance does not have a "
"`schemaVersion` field for specifying the DANDI schema version."
)
raise ValueError(msg)
if not isinstance(obj_ver, str):
if not isinstance(obj_version, str):
msg = (
"The provided Dandiset metadata instance has a non-string "
"`schemaVersion` field for specifying the DANDI schema version."
Expand All @@ -384,17 +378,17 @@ def migrate(
# Check if `obj_ver` is a valid DANDI schema version
try:
# DANDI schema version of the provided instance in tuple form
obj_ver_tuple = version2tuple(obj_ver)
obj_version_tuple = version2tuple(obj_version)
except ValueError as e:
msg = (
"The provided Dandiset metadata instance has an invalid "
"`schemaVersion` field for specifying the DANDI schema version."
)
raise ValueError(msg) from e
if obj_ver not in ALLOWED_INPUT_SCHEMAS:
if obj_version not in ALLOWED_INPUT_SCHEMAS:
msg = (
f"The DANDI schema version of the provided Dandiset metadata instance, "
f"{obj_ver!r}, is not one of the supported versions for input "
f"{obj_version!r}, is not one of the supported versions for input "
f"Dandiset metadata instances. The supported versions are "
f"{ALLOWED_INPUT_SCHEMAS}."
)
Expand All @@ -407,7 +401,7 @@ def migrate(
# Check if `to_version` is a valid DANDI schema version
try:
# The target DANDI schema version in tuple form
target_ver_tuple = version2tuple(to_version)
to_version_tuple = version2tuple(to_version)
except ValueError as e:
msg = (
"The provided target version, {to_version!r}, is not a valid DANDI schema "
Expand All @@ -424,22 +418,17 @@ def migrate(
raise ValueError(msg)
# ----------------------------------------------------------------

# Ensure the target DANDI schema version is at least the DANDI schema version
# of the provided instance
if obj_ver_tuple > target_ver_tuple:
raise ValueError(f"Cannot migrate from {obj_ver} to lower {to_version}.")

# Optionally validate the instance against the DANDI schema it specifies
# before migration
if not skip_validation:
_validate_obj_json(obj, _get_jsonschema_validator(obj_ver, "Dandiset"))
_validate_obj_json(obj, _get_jsonschema_validator(obj_version, "Dandiset"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This migration function seems to expect the metadata instance to be a Dandiset instance.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Validating PublishedDandiset instance may create a problem if #266 is put in. However, I guess we can have it specifically validate against the PublishDandiset schema if a PublishedDandiset instance is provided.


obj_migrated = deepcopy(obj)

if obj_ver_tuple == target_ver_tuple:
if obj_version_tuple == to_version_tuple:
return obj_migrated

if obj_ver_tuple < version2tuple("0.6.0") <= target_ver_tuple:
if obj_version_tuple < version2tuple("0.6.0") <= to_version_tuple:
for val in obj_migrated.get("about", []):
if "schemaKey" not in val:
if "identifier" in val and "UBERON" in val["identifier"]:
Expand All @@ -459,6 +448,33 @@ def migrate(
if "schemaKey" not in obj_migrated:
obj_migrated["schemaKey"] = "Dandiset"

# Downgrades

# Simple downgrades that just require removing fields, which is totally fine
# if they are empty, as they are None or empty containers (list, tuple, etc)
# or empty strings.
# List only those for which such notion of "empty" applies.
SIMPLE_DOWNGRADES = [
# version added, fields to remove
("0.7.0", ["sameAs", "releaseNotes"]),
]
for ver_added, fields in SIMPLE_DOWNGRADES:
# additional guards are via ALLOWED_TARGET_SCHEMAS
if to_version_tuple < version2tuple(ver_added) <= obj_version_tuple:
for field in fields:
if field in obj_migrated:
value = obj_migrated.get(field)
# Explicit check for "empty" value per above description.
if value is None or (
not value and isinstance(value, (list, tuple, dict, set, str))
):
del obj_migrated[field]
else:
raise ValueError(
f"Cannot downgrade to {to_version} from "
f"{obj_version} with {field}={value!r} present"
)

# Always update schemaVersion when migrating
obj_migrated["schemaVersion"] = to_version
return obj_migrated
Expand Down
73 changes: 59 additions & 14 deletions dandischema/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,20 +424,6 @@ def test_migrate_value_errors(obj: Dict[str, Any], target: Any, msg: str) -> Non
migrate(obj, to_version=target, skip_validation=True)


def test_migrate_value_errors_lesser_target(monkeypatch: pytest.MonkeyPatch) -> None:
"""
Test cases when `migrate()` is expected to raise a `ValueError` exception
when the target schema version is lesser than the schema version of the metadata
instance
"""
from dandischema import metadata

monkeypatch.setattr(metadata, "ALLOWED_TARGET_SCHEMAS", ["0.6.0"])

with pytest.raises(ValueError, match="Cannot migrate from .* to lower"):
migrate({"schemaVersion": "0.6.7"}, to_version="0.6.0", skip_validation=True)


@skipif_no_network
@skipif_no_test_dandiset_metadata_dir
# Skip for instance name not being DANDI because JSON schema version at `0.4.4`, the
Expand Down Expand Up @@ -509,6 +495,65 @@ def test_migrate_schemaversion_update() -> None:
)


@pytest.mark.ai_generated
def test_migrate_downgrade() -> None:
"""Test downgrade from 0.7.0 to 0.6.10 handling releaseNotes and sameAs fields"""

# Minimal metadata at current (0.7.0) version
meta_dict: dict = {
"schemaKey": "Dandiset",
"schemaVersion": DANDI_SCHEMA_VERSION,
"identifier": "DANDI:000000",
}

# Test 1: Downgrade without new fields (should succeed)
downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True)
assert downgraded["schemaVersion"] == "0.6.10"
assert "releaseNotes" not in downgraded
assert "sameAs" not in downgraded

# Test 2: Downgrade with empty releaseNotes (should succeed)
meta_dict["releaseNotes"] = ""
downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True)
assert downgraded["schemaVersion"] == "0.6.10"
assert "releaseNotes" not in downgraded

# Test 3: Downgrade with None releaseNotes (should succeed)
meta_dict["releaseNotes"] = None
downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True)
assert downgraded["schemaVersion"] == "0.6.10"
assert "releaseNotes" not in downgraded

# Test 4: Downgrade with empty sameAs list (should succeed)
meta_dict.pop("releaseNotes")
meta_dict["sameAs"] = []
downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True)
assert downgraded["schemaVersion"] == "0.6.10"
assert "sameAs" not in downgraded

# Test 5: Downgrade with non-empty releaseNotes (should fail)
meta_dict.pop("sameAs")
meta_dict["releaseNotes"] = "Releasing during testing"
with pytest.raises(ValueError, match="Cannot downgrade to 0.6.10 from"):
migrate(meta_dict, to_version="0.6.10", skip_validation=True)

# Test 6: Downgrade with non-empty sameAs (should fail)
meta_dict.pop("releaseNotes")
meta_dict["sameAs"] = ["dandi://DANDI-SANDBOX/123456"]
with pytest.raises(ValueError, match="Cannot downgrade to 0.6.10 from"):
migrate(meta_dict, to_version="0.6.10", skip_validation=True)

# Test 7: No-op migration (already at target version)
meta_dict_0610 = {
"schemaKey": "Dandiset",
"schemaVersion": "0.6.10",
"identifier": "DANDI:000000",
}
migrated = migrate(meta_dict_0610, to_version="0.6.10", skip_validation=True)
assert migrated == meta_dict_0610
assert migrated is not meta_dict_0610 # but we do create a copy


@pytest.mark.parametrize(
"files, summary",
[
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,8 @@ profile = "black"
force_sort_within_sections = true
reverse_relative = true
known_first_party = ["dandischema"]

[tool.pytest.ini_options]
markers = [
"ai_generated: marks tests as generated by AI assistants (deselect with '-m \"not ai_generated\"')",
]
Loading