From 7063e443ac95170b65249b8dd6a0bfc93cf0af75 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 13 Nov 2025 18:09:40 -0500 Subject: [PATCH 1/7] Register ai_generated pytest marker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add pytest marker registration for 'ai_generated' to avoid PytestUnknownMarkWarning when running tests marked with @pytest.mark.ai_generated. This marker is used to identify tests generated by AI assistants, allowing them to be filtered or selected separately if needed. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 54a26d7c..8df389f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,3 +26,8 @@ profile = "black" force_sort_within_sections = true reverse_relative = true known_first_party = ["dandischema"] + +[tool.pytest.ini_options] +markers = [ + "ai_generated: marks tests as generated by AI assistants (deselect with '-m \"not ai_generated\"')", +] From 8cf1b08188a217cdc4a3b8135507ff6882e27bd4 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 13 Nov 2025 18:07:32 -0500 Subject: [PATCH 2/7] Add support for schema downgrade migrations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change introduces the ability to downgrade metadata schemas to recent versions, allowing for more flexible schema version management. Key changes: - Allow migration to version 0.6.10 in addition to the current version - Implement SIMPLE_DOWNGRADES mechanism for safe field removal during downgrade - Remove restriction preventing downgrade to lower schema versions - Add validation to prevent data loss when downgrading with populated fields - Add comprehensive tests for downgrade functionality with releaseNotes field The downgrade mechanism ensures data integrity by raising an error if a field being removed during downgrade contains a non-empty value. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Conflicts: dandischema/metadata.py -- both wanted to always set schema version to migrated-to dandischema/tests/test_metadata.py -- just added tests conflicted placement --- dandischema/consts.py | 7 ++-- dandischema/metadata.py | 51 +++++++++++++++++------------- dandischema/tests/test_metadata.py | 50 +++++++++++++++++++++-------- 3 files changed, 68 insertions(+), 40 deletions(-) diff --git a/dandischema/consts.py b/dandischema/consts.py index cf02329d..e9872583 100644 --- a/dandischema/consts.py +++ b/dandischema/consts.py @@ -19,10 +19,9 @@ DANDI_SCHEMA_VERSION, ] -# ATM we allow only for a single target version which is current -# migrate has a guard now for this since it cannot migrate to anything but current -# version -ALLOWED_TARGET_SCHEMAS = [DANDI_SCHEMA_VERSION] +# We establish migrations (back) to only a few recent versions. +# When adding changes, please consider whether a migration path should be added. +ALLOWED_TARGET_SCHEMAS = ["0.6.10", DANDI_SCHEMA_VERSION] # This allows multiple schemas for validation, whereas target schemas focus on # migration. diff --git a/dandischema/metadata.py b/dandischema/metadata.py index 19064262..a6aaca30 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -358,24 +358,18 @@ def migrate( schema version of the provided instance """ - # ATM, we only support the latest schema version as a target. See definition of - # `ALLOWED_TARGET_SCHEMAS` for details - if len(ALLOWED_TARGET_SCHEMAS) > 1: - msg = f"Only migration to current version, {DANDI_SCHEMA_VERSION}, is supported" - raise NotImplementedError(msg) - # -------------------------------------------------------------- # Validate DANDI schema version provided in the metadata instance # -------------------------------------------------------------- # DANDI schema version of the provided instance - obj_ver = obj.get("schemaVersion") - if obj_ver is None: + obj_version = obj.get("schemaVersion") + if obj_version is None: msg = ( - "The provided Dandiset metadata instance does not have a " + "The provided metadata instance does not have a " "`schemaVersion` field for specifying the DANDI schema version." ) raise ValueError(msg) - if not isinstance(obj_ver, str): + if not isinstance(obj_version, str): msg = ( "The provided Dandiset metadata instance has a non-string " "`schemaVersion` field for specifying the DANDI schema version." @@ -384,17 +378,17 @@ def migrate( # Check if `obj_ver` is a valid DANDI schema version try: # DANDI schema version of the provided instance in tuple form - obj_ver_tuple = version2tuple(obj_ver) + obj_version_tuple = version2tuple(obj_version) except ValueError as e: msg = ( "The provided Dandiset metadata instance has an invalid " "`schemaVersion` field for specifying the DANDI schema version." ) raise ValueError(msg) from e - if obj_ver not in ALLOWED_INPUT_SCHEMAS: + if obj_version not in ALLOWED_INPUT_SCHEMAS: msg = ( f"The DANDI schema version of the provided Dandiset metadata instance, " - f"{obj_ver!r}, is not one of the supported versions for input " + f"{obj_version!r}, is not one of the supported versions for input " f"Dandiset metadata instances. The supported versions are " f"{ALLOWED_INPUT_SCHEMAS}." ) @@ -407,7 +401,7 @@ def migrate( # Check if `to_version` is a valid DANDI schema version try: # The target DANDI schema version in tuple form - target_ver_tuple = version2tuple(to_version) + to_version_tuple = version2tuple(to_version) except ValueError as e: msg = ( "The provided target version, {to_version!r}, is not a valid DANDI schema " @@ -424,22 +418,17 @@ def migrate( raise ValueError(msg) # ---------------------------------------------------------------- - # Ensure the target DANDI schema version is at least the DANDI schema version - # of the provided instance - if obj_ver_tuple > target_ver_tuple: - raise ValueError(f"Cannot migrate from {obj_ver} to lower {to_version}.") - # Optionally validate the instance against the DANDI schema it specifies # before migration if not skip_validation: - _validate_obj_json(obj, _get_jsonschema_validator(obj_ver, "Dandiset")) + _validate_obj_json(obj, _get_jsonschema_validator(obj_version, "Dandiset")) obj_migrated = deepcopy(obj) - if obj_ver_tuple == target_ver_tuple: + if obj_version_tuple == to_version_tuple: return obj_migrated - if obj_ver_tuple < version2tuple("0.6.0") <= target_ver_tuple: + if obj_version_tuple < version2tuple("0.6.0") <= to_version_tuple: for val in obj_migrated.get("about", []): if "schemaKey" not in val: if "identifier" in val and "UBERON" in val["identifier"]: @@ -459,6 +448,24 @@ def migrate( if "schemaKey" not in obj_migrated: obj_migrated["schemaKey"] = "Dandiset" + # Downgrades + + # Simple downgrades that just require removing fields, which is totally fine + # if they are empty + SIMPLE_DOWNGRADES = [ + # version added, fields to remove + ("0.6.11", ["releaseNotes"]), + ] + for ver_added, fields in SIMPLE_DOWNGRADES: + # additional guards are via ALLOWED_TARGET_SCHEMAS + if (to_version_tuple < version2tuple(ver_added) <= obj_version_tuple): + for field in fields: + if field in obj_migrated: + if val := obj_migrated.get(field): + raise ValueError(f"Cannot downgrade to {to_version} from " + f"{obj_version} with {field}={val!r} present") + del obj_migrated[field] + # Always update schemaVersion when migrating obj_migrated["schemaVersion"] = to_version return obj_migrated diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py index 6da7fff3..4b9fa678 100644 --- a/dandischema/tests/test_metadata.py +++ b/dandischema/tests/test_metadata.py @@ -17,6 +17,7 @@ DOI_PREFIX, INSTANCE_NAME, METADATA_DIR, + basic_publishmeta, skipif_instance_name_not_dandi, skipif_no_network, skipif_no_test_dandiset_metadata_dir, @@ -424,20 +425,6 @@ def test_migrate_value_errors(obj: Dict[str, Any], target: Any, msg: str) -> Non migrate(obj, to_version=target, skip_validation=True) -def test_migrate_value_errors_lesser_target(monkeypatch: pytest.MonkeyPatch) -> None: - """ - Test cases when `migrate()` is expected to raise a `ValueError` exception - when the target schema version is lesser than the schema version of the metadata - instance - """ - from dandischema import metadata - - monkeypatch.setattr(metadata, "ALLOWED_TARGET_SCHEMAS", ["0.6.0"]) - - with pytest.raises(ValueError, match="Cannot migrate from .* to lower"): - migrate({"schemaVersion": "0.6.7"}, to_version="0.6.0", skip_validation=True) - - @skipif_no_network @skipif_no_test_dandiset_metadata_dir # Skip for instance name not being DANDI because JSON schema version at `0.4.4`, the @@ -509,6 +496,41 @@ def test_migrate_schemaversion_update() -> None: ) +@pytest.mark.ai_generated +def test_migrate_downgrade_releasenotes() -> None: + """Test downgrade from 0.6.11 to 0.6.10 handling releaseNotes field""" + + # Create a basic PublishedDandiset metadata in 0.6.11 format + meta_dict = { + "schemaVersion": "0.6.11", + } + meta_dict.update(basic_publishmeta(dandi_id="999999")) + + # Test 1: Downgrade without releaseNotes (should succeed) + downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True) + assert downgraded["schemaVersion"] == "0.6.10" + assert "releaseNotes" not in downgraded + + # Test 2: Downgrade with empty releaseNotes (should succeed) + meta_dict["releaseNotes"] = "" + downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True) + assert downgraded["schemaVersion"] == "0.6.10" + assert "releaseNotes" not in downgraded + + # Test 3: Downgrade with non-empty releaseNotes (should fail) + meta_dict["releaseNotes"] = "Releasing during testing" + with pytest.raises(ValueError, match="Cannot downgrade to 0.6.10 from"): + migrate(meta_dict, to_version="0.6.10", skip_validation=True) + + # Test 4: No-op migration (already at target version) + meta_dict_0610 = meta_dict.copy() + meta_dict_0610["schemaVersion"] = "0.6.10" + meta_dict_0610.pop("releaseNotes") + migrated = migrate(meta_dict_0610, to_version="0.6.10", skip_validation=True) + assert migrated == meta_dict_0610 + assert migrated is not meta_dict_0610 # but we do create a copy + + @pytest.mark.parametrize( "files, summary", [ From 427310a2127f5b0e2074fe3d06b1ad8c327e35ef Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 20 Nov 2025 16:27:16 -0800 Subject: [PATCH 3/7] Consider "empty" only None and empty containers and add a comment about that. --- dandischema/metadata.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/dandischema/metadata.py b/dandischema/metadata.py index a6aaca30..fe6b7c04 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -451,20 +451,28 @@ def migrate( # Downgrades # Simple downgrades that just require removing fields, which is totally fine - # if they are empty + # if they are empty, as they are None or empty containers (list, tuple, etc). + # List only those for which such notion of "empty" applies. SIMPLE_DOWNGRADES = [ # version added, fields to remove ("0.6.11", ["releaseNotes"]), ] for ver_added, fields in SIMPLE_DOWNGRADES: # additional guards are via ALLOWED_TARGET_SCHEMAS - if (to_version_tuple < version2tuple(ver_added) <= obj_version_tuple): + if to_version_tuple < version2tuple(ver_added) <= obj_version_tuple: for field in fields: if field in obj_migrated: - if val := obj_migrated.get(field): - raise ValueError(f"Cannot downgrade to {to_version} from " - f"{obj_version} with {field}={val!r} present") - del obj_migrated[field] + value = obj_migrated.get(field) + # Explicit check for "empty" value per above description. + if value is None or ( + not value and isinstance(value, (list, tuple, dict, set)) + ): + del obj_migrated[field] + else: + raise ValueError( + f"Cannot downgrade to {to_version} from " + f"{obj_version} with {field}={value!r} present" + ) # Always update schemaVersion when migrating obj_migrated["schemaVersion"] = to_version From 87b20402407d31fba03730fdc07c8df9f1ef68bd Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 4 Mar 2026 12:43:06 -0500 Subject: [PATCH 4/7] Fix downgrade migrations: correct version, add sameAs, handle empty strings - SIMPLE_DOWNGRADES: "0.6.11" -> "0.7.0" (no 0.6.11 was ever released) - Add sameAs to downgrade fields (added on master via PR #364) - Include str in empty-value check so releaseNotes="" is treated as empty - Rewrite test to use DANDI_SCHEMA_VERSION and minimal metadata dict (0.6.11 was not in ALLOWED_INPUT_SCHEMAS, basic_publishmeta needed instance_name positional arg) - Add test coverage for sameAs downgrade (empty list, non-empty list) Co-Authored-By: Claude Code 2.1.63 / Claude Opus 4.6 --- dandischema/metadata.py | 7 +++-- dandischema/tests/test_metadata.py | 49 ++++++++++++++++++++++-------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/dandischema/metadata.py b/dandischema/metadata.py index fe6b7c04..c2cd4fbd 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -451,11 +451,12 @@ def migrate( # Downgrades # Simple downgrades that just require removing fields, which is totally fine - # if they are empty, as they are None or empty containers (list, tuple, etc). + # if they are empty, as they are None or empty containers (list, tuple, etc) + # or empty strings. # List only those for which such notion of "empty" applies. SIMPLE_DOWNGRADES = [ # version added, fields to remove - ("0.6.11", ["releaseNotes"]), + ("0.7.0", ["sameAs", "releaseNotes"]), ] for ver_added, fields in SIMPLE_DOWNGRADES: # additional guards are via ALLOWED_TARGET_SCHEMAS @@ -465,7 +466,7 @@ def migrate( value = obj_migrated.get(field) # Explicit check for "empty" value per above description. if value is None or ( - not value and isinstance(value, (list, tuple, dict, set)) + not value and isinstance(value, (list, tuple, dict, set, str)) ): del obj_migrated[field] else: diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py index 4b9fa678..da68c399 100644 --- a/dandischema/tests/test_metadata.py +++ b/dandischema/tests/test_metadata.py @@ -17,7 +17,6 @@ DOI_PREFIX, INSTANCE_NAME, METADATA_DIR, - basic_publishmeta, skipif_instance_name_not_dandi, skipif_no_network, skipif_no_test_dandiset_metadata_dir, @@ -497,19 +496,21 @@ def test_migrate_schemaversion_update() -> None: @pytest.mark.ai_generated -def test_migrate_downgrade_releasenotes() -> None: - """Test downgrade from 0.6.11 to 0.6.10 handling releaseNotes field""" +def test_migrate_downgrade() -> None: + """Test downgrade from 0.7.0 to 0.6.10 handling releaseNotes and sameAs fields""" - # Create a basic PublishedDandiset metadata in 0.6.11 format - meta_dict = { - "schemaVersion": "0.6.11", + # Minimal metadata at current (0.7.0) version + meta_dict: dict = { + "schemaKey": "Dandiset", + "schemaVersion": DANDI_SCHEMA_VERSION, + "identifier": "DANDI:000000", } - meta_dict.update(basic_publishmeta(dandi_id="999999")) - # Test 1: Downgrade without releaseNotes (should succeed) + # Test 1: Downgrade without new fields (should succeed) downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True) assert downgraded["schemaVersion"] == "0.6.10" assert "releaseNotes" not in downgraded + assert "sameAs" not in downgraded # Test 2: Downgrade with empty releaseNotes (should succeed) meta_dict["releaseNotes"] = "" @@ -517,15 +518,37 @@ def test_migrate_downgrade_releasenotes() -> None: assert downgraded["schemaVersion"] == "0.6.10" assert "releaseNotes" not in downgraded - # Test 3: Downgrade with non-empty releaseNotes (should fail) + # Test 3: Downgrade with None releaseNotes (should succeed) + meta_dict["releaseNotes"] = None + downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True) + assert downgraded["schemaVersion"] == "0.6.10" + assert "releaseNotes" not in downgraded + + # Test 4: Downgrade with empty sameAs list (should succeed) + meta_dict.pop("releaseNotes") + meta_dict["sameAs"] = [] + downgraded = migrate(meta_dict, to_version="0.6.10", skip_validation=True) + assert downgraded["schemaVersion"] == "0.6.10" + assert "sameAs" not in downgraded + + # Test 5: Downgrade with non-empty releaseNotes (should fail) + meta_dict.pop("sameAs") meta_dict["releaseNotes"] = "Releasing during testing" with pytest.raises(ValueError, match="Cannot downgrade to 0.6.10 from"): migrate(meta_dict, to_version="0.6.10", skip_validation=True) - # Test 4: No-op migration (already at target version) - meta_dict_0610 = meta_dict.copy() - meta_dict_0610["schemaVersion"] = "0.6.10" - meta_dict_0610.pop("releaseNotes") + # Test 6: Downgrade with non-empty sameAs (should fail) + meta_dict.pop("releaseNotes") + meta_dict["sameAs"] = ["dandi://DANDI-SANDBOX/123456"] + with pytest.raises(ValueError, match="Cannot downgrade to 0.6.10 from"): + migrate(meta_dict, to_version="0.6.10", skip_validation=True) + + # Test 7: No-op migration (already at target version) + meta_dict_0610 = { + "schemaKey": "Dandiset", + "schemaVersion": "0.6.10", + "identifier": "DANDI:000000", + } migrated = migrate(meta_dict_0610, to_version="0.6.10", skip_validation=True) assert migrated == meta_dict_0610 assert migrated is not meta_dict_0610 # but we do create a copy From 40ab34d30e951418d6747906844e30a5a80b3452 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 4 Mar 2026 15:34:03 -0500 Subject: [PATCH 5/7] Fix downgrade as the sameAs was added after 0.7.0 schema release --- dandischema/metadata.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dandischema/metadata.py b/dandischema/metadata.py index c2cd4fbd..a94ad2f1 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -456,7 +456,8 @@ def migrate( # List only those for which such notion of "empty" applies. SIMPLE_DOWNGRADES = [ # version added, fields to remove - ("0.7.0", ["sameAs", "releaseNotes"]), + ("0.7.0", ["releaseNotes"]), + ("0.8.0", ["sameAs"]), ] for ver_added, fields in SIMPLE_DOWNGRADES: # additional guards are via ALLOWED_TARGET_SCHEMAS From eb5264d232a02626d4bf5aa22e88b87995c7f2a1 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 4 Mar 2026 15:36:18 -0500 Subject: [PATCH 6/7] Prep for 0.8.0 schema release to reflect addition of sameAs --- dandischema/consts.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dandischema/consts.py b/dandischema/consts.py index e9872583..863d7647 100644 --- a/dandischema/consts.py +++ b/dandischema/consts.py @@ -1,6 +1,6 @@ from packaging.version import Version as _Version -DANDI_SCHEMA_VERSION = "0.7.0" +DANDI_SCHEMA_VERSION = "0.8.0" ALLOWED_INPUT_SCHEMAS = [ "0.4.4", "0.5.1", @@ -16,12 +16,13 @@ "0.6.8", "0.6.9", "0.6.10", + "0.7.0", DANDI_SCHEMA_VERSION, ] # We establish migrations (back) to only a few recent versions. # When adding changes, please consider whether a migration path should be added. -ALLOWED_TARGET_SCHEMAS = ["0.6.10", DANDI_SCHEMA_VERSION] +ALLOWED_TARGET_SCHEMAS = ["0.6.10", "0.7.0", DANDI_SCHEMA_VERSION] # This allows multiple schemas for validation, whereas target schemas focus on # migration. From b07b2ec31ab3f1f7223825d7ab35ab9fac55bca1 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 4 Mar 2026 15:54:56 -0500 Subject: [PATCH 7/7] Aim to release as schema 0.7.1 --- dandischema/consts.py | 2 +- dandischema/metadata.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dandischema/consts.py b/dandischema/consts.py index 863d7647..8ee5d97a 100644 --- a/dandischema/consts.py +++ b/dandischema/consts.py @@ -1,6 +1,6 @@ from packaging.version import Version as _Version -DANDI_SCHEMA_VERSION = "0.8.0" +DANDI_SCHEMA_VERSION = "0.7.1" ALLOWED_INPUT_SCHEMAS = [ "0.4.4", "0.5.1", diff --git a/dandischema/metadata.py b/dandischema/metadata.py index a94ad2f1..96613e13 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -457,7 +457,7 @@ def migrate( SIMPLE_DOWNGRADES = [ # version added, fields to remove ("0.7.0", ["releaseNotes"]), - ("0.8.0", ["sameAs"]), + ("0.7.1", ["sameAs"]), ] for ver_added, fields in SIMPLE_DOWNGRADES: # additional guards are via ALLOWED_TARGET_SCHEMAS