Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions dandischema/consts.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from packaging.version import Version as _Version

DANDI_SCHEMA_VERSION = "0.7.0"
DANDI_SCHEMA_VERSION = "0.7.1"
ALLOWED_INPUT_SCHEMAS = [
"0.4.4",
"0.5.1",
Expand All @@ -16,13 +16,13 @@
"0.6.8",
"0.6.9",
"0.6.10",
"0.7.0",
DANDI_SCHEMA_VERSION,
]

# ATM we allow only for a single target version which is current
# migrate has a guard now for this since it cannot migrate to anything but current
# version
ALLOWED_TARGET_SCHEMAS = [DANDI_SCHEMA_VERSION]
# We establish migrations (back) to only a few recent versions.
# When adding changes, please consider whether a migration path should be added.
ALLOWED_TARGET_SCHEMAS = ["0.6.10", "0.7.0", DANDI_SCHEMA_VERSION]

# This allows multiple schemas for validation, whereas target schemas focus on
# migration.
Expand Down
61 changes: 39 additions & 22 deletions dandischema/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,24 +358,18 @@ def migrate(
schema version of the provided instance
"""

# ATM, we only support the latest schema version as a target. See definition of
# `ALLOWED_TARGET_SCHEMAS` for details
if len(ALLOWED_TARGET_SCHEMAS) > 1:
msg = f"Only migration to current version, {DANDI_SCHEMA_VERSION}, is supported"
raise NotImplementedError(msg)

# --------------------------------------------------------------
# Validate DANDI schema version provided in the metadata instance
# --------------------------------------------------------------
# DANDI schema version of the provided instance
obj_ver = obj.get("schemaVersion")
if obj_ver is None:
obj_version = obj.get("schemaVersion")
if obj_version is None:
msg = (
"The provided Dandiset metadata instance does not have a "
"The provided metadata instance does not have a "
"`schemaVersion` field for specifying the DANDI schema version."
)
raise ValueError(msg)
if not isinstance(obj_ver, str):
if not isinstance(obj_version, str):
msg = (
"The provided Dandiset metadata instance has a non-string "
"`schemaVersion` field for specifying the DANDI schema version."
Expand All @@ -384,17 +378,17 @@ def migrate(
# Check if `obj_ver` is a valid DANDI schema version
try:
# DANDI schema version of the provided instance in tuple form
obj_ver_tuple = version2tuple(obj_ver)
obj_version_tuple = version2tuple(obj_version)
except ValueError as e:
msg = (
"The provided Dandiset metadata instance has an invalid "
"`schemaVersion` field for specifying the DANDI schema version."
)
raise ValueError(msg) from e
if obj_ver not in ALLOWED_INPUT_SCHEMAS:
if obj_version not in ALLOWED_INPUT_SCHEMAS:
msg = (
f"The DANDI schema version of the provided Dandiset metadata instance, "
f"{obj_ver!r}, is not one of the supported versions for input "
f"{obj_version!r}, is not one of the supported versions for input "
f"Dandiset metadata instances. The supported versions are "
f"{ALLOWED_INPUT_SCHEMAS}."
)
Expand All @@ -407,7 +401,7 @@ def migrate(
# Check if `to_version` is a valid DANDI schema version
try:
# The target DANDI schema version in tuple form
target_ver_tuple = version2tuple(to_version)
to_version_tuple = version2tuple(to_version)
except ValueError as e:
msg = (
"The provided target version, {to_version!r}, is not a valid DANDI schema "
Expand All @@ -424,22 +418,17 @@ def migrate(
raise ValueError(msg)
# ----------------------------------------------------------------

# Ensure the target DANDI schema version is at least the DANDI schema version
# of the provided instance
if obj_ver_tuple > target_ver_tuple:
raise ValueError(f"Cannot migrate from {obj_ver} to lower {to_version}.")

# Optionally validate the instance against the DANDI schema it specifies
# before migration
if not skip_validation:
_validate_obj_json(obj, _get_jsonschema_validator(obj_ver, "Dandiset"))
_validate_obj_json(obj, _get_jsonschema_validator(obj_version, "Dandiset"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This migration function seems to expect the metadata instance to be a Dandiset instance.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Validating PublishedDandiset instance may create a problem if #266 is put in. However, I guess we can have it specifically validate against the PublishDandiset schema if a PublishedDandiset instance is provided.


obj_migrated = deepcopy(obj)

if obj_ver_tuple == target_ver_tuple:
if obj_version_tuple == to_version_tuple:
return obj_migrated

if obj_ver_tuple < version2tuple("0.6.0") <= target_ver_tuple:
if obj_version_tuple < version2tuple("0.6.0") <= to_version_tuple:
for val in obj_migrated.get("about", []):
if "schemaKey" not in val:
if "identifier" in val and "UBERON" in val["identifier"]:
Expand All @@ -459,6 +448,34 @@ def migrate(
if "schemaKey" not in obj_migrated:
obj_migrated["schemaKey"] = "Dandiset"

# Downgrades

# Simple downgrades that just require removing fields, which is totally fine
# if they are empty, as they are None or empty containers (list, tuple, etc)
# or empty strings.
# List only those for which such notion of "empty" applies.
SIMPLE_DOWNGRADES = [
# version added, fields to remove
("0.7.0", ["releaseNotes"]),
("0.7.1", ["sameAs"]),
]
for ver_added, fields in SIMPLE_DOWNGRADES:
# additional guards are via ALLOWED_TARGET_SCHEMAS
if to_version_tuple < version2tuple(ver_added) <= obj_version_tuple:
for field in fields:
if field in obj_migrated:
value = obj_migrated.get(field)
# Explicit check for "empty" value per above description.
if value is None or (
not value and isinstance(value, (list, tuple, dict, set, str))
):
del obj_migrated[field]
else:
raise ValueError(
f"Cannot downgrade to {to_version} from "
f"{obj_version} with {field}={value!r} present"
)

# Always update schemaVersion when migrating
obj_migrated["schemaVersion"] = to_version
return obj_migrated
Expand Down
73 changes: 59 additions & 14 deletions dandischema/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,20 +424,6 @@ def test_migrate_value_errors(obj: Dict[str, Any], target: Any, msg: str) -> Non
migrate(obj, to_version=target, skip_validation=True)


def test_migrate_value_errors_lesser_target(monkeypatch: pytest.MonkeyPatch) -> None:
"""
Test cases when `migrate()` is expected to raise a `ValueError` exception
when the target schema version is lesser than the schema version of the metadata
instance
"""
from dandischema import metadata

monkeypatch.setattr(metadata, "ALLOWED_TARGET_SCHEMAS", ["0.6.0"])

with pytest.raises(ValueError, match="Cannot migrate from .* to lower"):
migrate({"schemaVersion": "0.6.7"}, to_version="0.6.0", skip_validation=True)


@skipif_no_network
@skipif_no_test_dandiset_metadata_dir
# Skip for instance name not being DANDI because JSON schema version at `0.4.4`, the
Expand Down Expand Up @@ -509,6 +495,65 @@ def test_migrate_schemaversion_update() -> None:
)


@pytest.mark.ai_generated
def test_migrate_downgrade() -> None:
"""Test downgrade from 0.7.0 to 0.6.10 handling releaseNotes and sameAs fields"""

# Minimal metadata at current (0.7.0) version
meta_dict: dict = {
"schemaKey": "Dandiset",
"schemaVersion": DANDI_SCHEMA_VERSION,
"identifier": "DANDI:000000",
}

# Test 1: Downgrade without new fields (should succeed)
downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True)
assert downgraded["schemaVersion"] == "0.6.10"
assert "releaseNotes" not in downgraded
assert "sameAs" not in downgraded

# Test 2: Downgrade with empty releaseNotes (should succeed)
meta_dict["releaseNotes"] = ""
downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True)
assert downgraded["schemaVersion"] == "0.6.10"
assert "releaseNotes" not in downgraded

# Test 3: Downgrade with None releaseNotes (should succeed)
meta_dict["releaseNotes"] = None
downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True)
assert downgraded["schemaVersion"] == "0.6.10"
assert "releaseNotes" not in downgraded

# Test 4: Downgrade with empty sameAs list (should succeed)
meta_dict.pop("releaseNotes")
meta_dict["sameAs"] = []
downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True)
assert downgraded["schemaVersion"] == "0.6.10"
assert "sameAs" not in downgraded

# Test 5: Downgrade with non-empty releaseNotes (should fail)
meta_dict.pop("sameAs")
meta_dict["releaseNotes"] = "Releasing during testing"
with pytest.raises(ValueError, match="Cannot downgrade to 0.6.10 from"):
migrate(meta_dict, to_version="0.6.10", skip_validation=True)

# Test 6: Downgrade with non-empty sameAs (should fail)
meta_dict.pop("releaseNotes")
meta_dict["sameAs"] = ["dandi://DANDI-SANDBOX/123456"]
with pytest.raises(ValueError, match="Cannot downgrade to 0.6.10 from"):
migrate(meta_dict, to_version="0.6.10", skip_validation=True)

# Test 7: No-op migration (already at target version)
meta_dict_0610 = {
"schemaKey": "Dandiset",
"schemaVersion": "0.6.10",
"identifier": "DANDI:000000",
}
migrated = migrate(meta_dict_0610, to_version="0.6.10", skip_validation=True)
assert migrated == meta_dict_0610
assert migrated is not meta_dict_0610 # but we do create a copy


@pytest.mark.parametrize(
"files, summary",
[
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,8 @@ profile = "black"
force_sort_within_sections = true
reverse_relative = true
known_first_party = ["dandischema"]

[tool.pytest.ini_options]
markers = [
"ai_generated: marks tests as generated by AI assistants (deselect with '-m \"not ai_generated\"')",
]
Loading