diff --git a/dandischema/models.py b/dandischema/models.py index b35275fe..bb3b8f2e 100644 --- a/dandischema/models.py +++ b/dandischema/models.py @@ -85,7 +85,8 @@ "[a-f0-9]{8}[-]*[a-f0-9]{4}[-]*" "[a-f0-9]{4}[-]*[a-f0-9]{4}[-]*[a-f0-9]{12}$" ) ASSET_UUID_PATTERN = r"^dandiasset:" + UUID_PATTERN -VERSION_PATTERN = r"\d{6}/\d+\.\d+\.\d+" +VERSION_NUM_PATTERN = r"\d+\.\d+\.\d+" +VERSION_PATTERN = rf"\d{{6}}/{VERSION_NUM_PATTERN}" _INNER_DANDI_DOI_PATTERN = ( rf"{DOI_PREFIX_PATTERN}/{ID_PATTERN.lower()}\.{VERSION_PATTERN}" ) @@ -1666,7 +1667,7 @@ def contributor_musthave_contact( id: str = Field( description="Uniform resource identifier", pattern=( - rf"^({ID_PATTERN}|{ID_PATTERN.lower()}):\d{{6}}(/(draft|\d+\.\d+\.\d+))$" + rf"^({ID_PATTERN}|{ID_PATTERN.lower()}):\d{{6}}(/(draft|{VERSION_NUM_PATTERN}))$" ), json_schema_extra={"readOnly": True}, ) @@ -1677,6 +1678,28 @@ def contributor_musthave_contact( pattern=rf"^{ID_PATTERN}:\d{{6}}$", json_schema_extra={"readOnly": True, "nskey": "schema"}, ) + + sameAs: Annotated[ + Optional[ + list[ + Annotated[ + str, + StringConstraints( + pattern=( + rf"^dandi://{UNVENDORED_ID_PATTERN}/\d{{6}}" + rf"(@(draft|{VERSION_NUM_PATTERN}))?(/\S+)?$" + ) + ), + ] + ] + ], + Field( + default=None, + description="Known DANDI URLs of the Dandiset at other DANDI instances.", + json_schema_extra={"readOnly": True, "nskey": "schema"}, + ), + ] + name: str = Field( title="Dandiset title", description="A title associated with the Dandiset.", diff --git a/dandischema/tests/test_models.py b/dandischema/tests/test_models.py index 8bb87ccb..2a50b174 100644 --- a/dandischema/tests/test_models.py +++ b/dandischema/tests/test_models.py @@ -41,6 +41,53 @@ _INSTANCE_CONFIG = get_instance_config() +@pytest.fixture +def base_dandiset_metadata() -> dict[str, Any]: + """ + Fixture providing basic Dandiset metadata for constructing a `Dandiset` instance. + + Returns: + Dict[str, Any] + A dictionary containing basic Dandiset metadata without `doi`, `datePublished`, + and `publishedBy`, suitable for constructing a `Dandiset` instance but not a + `PublishedDandiset` instance. + + Note: + This metadata is returned by a fixture to ensure that each test receives a fresh + copy of the metadata dictionary. + """ + + return { + "identifier": f"{INSTANCE_NAME}:999999", + "id": f"{INSTANCE_NAME}:999999/draft", + "version": "1.0.0", + "name": "testing dataset", + "description": "testing", + "contributor": [ + { + "name": "last name, first name", + "email": "someone@dandiarchive.org", + "roleName": [RoleType("dcite:ContactPerson")], + "schemaKey": "Person", + } + ], + "license": [LicenseType("spdx:CC-BY-4.0")], + "citation": "Last, first (2021). Test citation.", + "assetsSummary": { + "numberOfBytes": 0, + "numberOfFiles": 0, + "dataStandard": [{"name": "NWB"}], + "approach": [{"name": "electrophysiology"}], + "measurementTechnique": [{"name": "two-photon microscopy technique"}], + "species": [{"name": "Human"}], + }, + "manifestLocation": [ + "https://api.dandiarchive.org/api/dandisets/999999/versions/draft/assets/" + ], + "url": "https://dandiarchive.org/dandiset/999999/draft", + } + + @pytest.mark.parametrize( ("y_type", "anys_value"), [ @@ -403,46 +450,15 @@ def test_autogenerated_titles() -> None: @skipif_no_doi_prefix -def test_dandimeta_1() -> None: +def test_dandimeta_1(base_dandiset_metadata: dict[str, Any]) -> None: """checking basic metadata for publishing""" assert DOI_PREFIX is not None - # metadata without doi, datePublished and publishedBy - meta_dict: Dict[str, Any] = { - "identifier": f"{INSTANCE_NAME}:999999", - "id": f"{INSTANCE_NAME}:999999/draft", - "version": "1.0.0", - "name": "testing dataset", - "description": "testing", - "contributor": [ - { - "name": "last name, first name", - "email": "someone@dandiarchive.org", - "roleName": [RoleType("dcite:ContactPerson")], - "schemaKey": "Person", - } - ], - "license": [LicenseType("spdx:CC-BY-4.0")], - "citation": "Last, first (2021). Test citation.", - "assetsSummary": { - "numberOfBytes": 0, - "numberOfFiles": 0, - "dataStandard": [{"name": "NWB"}], - "approach": [{"name": "electrophysiology"}], - "measurementTechnique": [{"name": "two-photon microscopy technique"}], - "species": [{"name": "Human"}], - }, - "manifestLocation": [ - "https://api.dandiarchive.org/api/dandisets/999999/versions/draft/assets/" - ], - "url": "https://dandiarchive.org/dandiset/999999/draft", - } - # should work for Dandiset but PublishedDandiset should raise an error - Dandiset(**meta_dict) + Dandiset(**base_dandiset_metadata) with pytest.raises(ValidationError) as exc: - PublishedDandiset(**meta_dict) + PublishedDandiset(**base_dandiset_metadata) ErrDetail = namedtuple("ErrDetail", ["type", "msg"]) @@ -490,21 +506,23 @@ def test_dandimeta_1() -> None: # after adding basic meta required to publish: doi, datePublished, publishedBy, assetsSummary, # so PublishedDandiset should work - meta_dict["url"] = "https://dandiarchive.org/dandiset/999999/0.0.0" - meta_dict["id"] = f"{INSTANCE_NAME}:999999/0.0.0" - meta_dict["version"] = "0.0.0" - meta_dict.update( + base_dandiset_metadata["url"] = "https://dandiarchive.org/dandiset/999999/0.0.0" + base_dandiset_metadata["id"] = f"{INSTANCE_NAME}:999999/0.0.0" + base_dandiset_metadata["version"] = "0.0.0" + base_dandiset_metadata.update( basic_publishmeta(INSTANCE_NAME, dandi_id="999999", prefix=DOI_PREFIX) ) - meta_dict["assetsSummary"].update(**{"numberOfBytes": 1, "numberOfFiles": 1}) + base_dandiset_metadata["assetsSummary"].update( + **{"numberOfBytes": 1, "numberOfFiles": 1} + ) # Test that releaseNotes is optional (can be omitted) - dandiset_without_notes = PublishedDandiset(**meta_dict) + dandiset_without_notes = PublishedDandiset(**base_dandiset_metadata) assert dandiset_without_notes.releaseNotes is None # Test that releaseNotes can be set to a string value - meta_dict["releaseNotes"] = "Releasing during testing" - dandiset_with_notes = PublishedDandiset(**meta_dict) + base_dandiset_metadata["releaseNotes"] = "Releasing during testing" + dandiset_with_notes = PublishedDandiset(**base_dandiset_metadata) assert dandiset_with_notes.releaseNotes == "Releasing during testing" # Test that releaseNotes appears in model_dump @@ -983,3 +1001,78 @@ class VendoredFieldModel(BaseModel): # Validate the invalid vendored fields against the vendored patterns with pytest.raises(ValidationError): VendoredFieldModel.model_validate(invalid_vendored_fields) + + +class TestDandisetSameAs: + def test_not_specified(self, base_dandiset_metadata: dict[str, Any]) -> None: + """ + Test the case that `sameAs` is not specified in instantiating a `Dandiset` + """ + dandiset = Dandiset.model_validate(base_dandiset_metadata) + assert dandiset.sameAs is None + + def test_empty_list(self, base_dandiset_metadata: dict[str, Any]) -> None: + """ + Test the case that `sameAs` in a `Dandiset` is initialized to an empty list + """ + base_dandiset_metadata["sameAs"] = [] + dandiset = Dandiset.model_validate(base_dandiset_metadata) + assert dandiset.sameAs == [] + + @pytest.mark.parametrize( + "dandi_urls", + [ + ["dandi://DANDI-SANDBOX/123456"], + ["dandi://DANDI-SANDBOX/123456@draft"], + ["dandi://DANDI-SANDBOX/123456@1.22.33"], + ["dandi://DANDI-SANDBOX/123456/path"], + ["dandi://DANDI-SANDBOX/123456@draft/path"], + ["dandi://DANDI-SANDBOX/123456@1.22.33/path"], + ["dandi://EMBER-DANDI/123456"], + ["dandi://DANDI-SANDBOX/123456", "dandi://EMBER-DANDI/123456"], + ["dandi://A/123456", "dandi://B/654321"], + ], + ) + def test_with_valid_dandi_urls( + self, dandi_urls: list[str], base_dandiset_metadata: dict[str, Any] + ) -> None: + """ + Test the case that `sameAs` is initialized to a list of valid DANDI URLs + """ + base_dandiset_metadata["sameAs"] = dandi_urls + dandiset = Dandiset.model_validate(base_dandiset_metadata) + assert dandiset.sameAs == dandi_urls + + @pytest.mark.parametrize( + "dandi_urls", + [ + # List of invalid DANDI URLs + ["dandi://DANDI-SANDBOX/123456@abc"], + ["dandi://DANDI-SANDBOX/123456@1.22.33.44"], + ["dandi://DANDI-SANDBOX/123456/"], + ["dandi://DANDI-SANDBOX/123456@draft/"], + ["dandi://DANDI-SANDBOX/123456@1.22.33/"], + ["http://DANDI-SANDBOX/123456"], # Not dandi:// scheme + ["dandi://DANDI- SANDBOX/123456"], # Containing a space + ["dandi://"], # Missing instance name and dandiset id + ["dandi://DANDI-SANDBOX"], # Missing dandiset id + ["dandi://DANDI-SANDBOX/12345"], # Dandiset id too short + ["dandi://-DANDI/123456"], # Invalid instance name + ["dandi://EMBER3DANDI/123456"], # Invalid instance name + ["dandi://DANDI-SANDBOX/123456", "dandi://DANDI- SANDBOX/123456"], + [42], + # Value that is not a list + "DANDI-SANDBOX:123456", + 42, + ], + ) + def test_with_invalid_dandi_urls( + self, dandi_urls: Any, base_dandiset_metadata: dict[str, Any] + ) -> None: + """ + Test the case that `sameAs` is initialized to an invalid list of DANDI URLs + or a value that is not a list + """ + base_dandiset_metadata["sameAs"] = dandi_urls + with pytest.raises(ValidationError): + Dandiset.model_validate(base_dandiset_metadata)