Skip to content

Commit 07aa22c

Browse files
authored
Merge pull request #364 from dandi/add-sameas
Add `sameAs` field to the `Dandiset` model`
2 parents be7d361 + 419313b commit 07aa22c

File tree

2 files changed

+160
-44
lines changed

2 files changed

+160
-44
lines changed

dandischema/models.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@
8585
"[a-f0-9]{8}[-]*[a-f0-9]{4}[-]*" "[a-f0-9]{4}[-]*[a-f0-9]{4}[-]*[a-f0-9]{12}$"
8686
)
8787
ASSET_UUID_PATTERN = r"^dandiasset:" + UUID_PATTERN
88-
VERSION_PATTERN = r"\d{6}/\d+\.\d+\.\d+"
88+
VERSION_NUM_PATTERN = r"\d+\.\d+\.\d+"
89+
VERSION_PATTERN = rf"\d{{6}}/{VERSION_NUM_PATTERN}"
8990
_INNER_DANDI_DOI_PATTERN = (
9091
rf"{DOI_PREFIX_PATTERN}/{ID_PATTERN.lower()}\.{VERSION_PATTERN}"
9192
)
@@ -1666,7 +1667,7 @@ def contributor_musthave_contact(
16661667
id: str = Field(
16671668
description="Uniform resource identifier",
16681669
pattern=(
1669-
rf"^({ID_PATTERN}|{ID_PATTERN.lower()}):\d{{6}}(/(draft|\d+\.\d+\.\d+))$"
1670+
rf"^({ID_PATTERN}|{ID_PATTERN.lower()}):\d{{6}}(/(draft|{VERSION_NUM_PATTERN}))$"
16701671
),
16711672
json_schema_extra={"readOnly": True},
16721673
)
@@ -1677,6 +1678,28 @@ def contributor_musthave_contact(
16771678
pattern=rf"^{ID_PATTERN}:\d{{6}}$",
16781679
json_schema_extra={"readOnly": True, "nskey": "schema"},
16791680
)
1681+
1682+
sameAs: Annotated[
1683+
Optional[
1684+
list[
1685+
Annotated[
1686+
str,
1687+
StringConstraints(
1688+
pattern=(
1689+
rf"^dandi://{UNVENDORED_ID_PATTERN}/\d{{6}}"
1690+
rf"(@(draft|{VERSION_NUM_PATTERN}))?(/\S+)?$"
1691+
)
1692+
),
1693+
]
1694+
]
1695+
],
1696+
Field(
1697+
default=None,
1698+
description="Known DANDI URLs of the Dandiset at other DANDI instances.",
1699+
json_schema_extra={"readOnly": True, "nskey": "schema"},
1700+
),
1701+
]
1702+
16801703
name: str = Field(
16811704
title="Dandiset title",
16821705
description="A title associated with the Dandiset.",

dandischema/tests/test_models.py

Lines changed: 135 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,53 @@
4141
_INSTANCE_CONFIG = get_instance_config()
4242

4343

44+
@pytest.fixture
45+
def base_dandiset_metadata() -> dict[str, Any]:
46+
"""
47+
Fixture providing basic Dandiset metadata for constructing a `Dandiset` instance.
48+
49+
Returns:
50+
Dict[str, Any]
51+
A dictionary containing basic Dandiset metadata without `doi`, `datePublished`,
52+
and `publishedBy`, suitable for constructing a `Dandiset` instance but not a
53+
`PublishedDandiset` instance.
54+
55+
Note:
56+
This metadata is returned by a fixture to ensure that each test receives a fresh
57+
copy of the metadata dictionary.
58+
"""
59+
60+
return {
61+
"identifier": f"{INSTANCE_NAME}:999999",
62+
"id": f"{INSTANCE_NAME}:999999/draft",
63+
"version": "1.0.0",
64+
"name": "testing dataset",
65+
"description": "testing",
66+
"contributor": [
67+
{
68+
"name": "last name, first name",
69+
"email": "someone@dandiarchive.org",
70+
"roleName": [RoleType("dcite:ContactPerson")],
71+
"schemaKey": "Person",
72+
}
73+
],
74+
"license": [LicenseType("spdx:CC-BY-4.0")],
75+
"citation": "Last, first (2021). Test citation.",
76+
"assetsSummary": {
77+
"numberOfBytes": 0,
78+
"numberOfFiles": 0,
79+
"dataStandard": [{"name": "NWB"}],
80+
"approach": [{"name": "electrophysiology"}],
81+
"measurementTechnique": [{"name": "two-photon microscopy technique"}],
82+
"species": [{"name": "Human"}],
83+
},
84+
"manifestLocation": [
85+
"https://api.dandiarchive.org/api/dandisets/999999/versions/draft/assets/"
86+
],
87+
"url": "https://dandiarchive.org/dandiset/999999/draft",
88+
}
89+
90+
4491
@pytest.mark.parametrize(
4592
("y_type", "anys_value"),
4693
[
@@ -403,46 +450,15 @@ def test_autogenerated_titles() -> None:
403450

404451

405452
@skipif_no_doi_prefix
406-
def test_dandimeta_1() -> None:
453+
def test_dandimeta_1(base_dandiset_metadata: dict[str, Any]) -> None:
407454
"""checking basic metadata for publishing"""
408455

409456
assert DOI_PREFIX is not None
410457

411-
# metadata without doi, datePublished and publishedBy
412-
meta_dict: Dict[str, Any] = {
413-
"identifier": f"{INSTANCE_NAME}:999999",
414-
"id": f"{INSTANCE_NAME}:999999/draft",
415-
"version": "1.0.0",
416-
"name": "testing dataset",
417-
"description": "testing",
418-
"contributor": [
419-
{
420-
"name": "last name, first name",
421-
"email": "someone@dandiarchive.org",
422-
"roleName": [RoleType("dcite:ContactPerson")],
423-
"schemaKey": "Person",
424-
}
425-
],
426-
"license": [LicenseType("spdx:CC-BY-4.0")],
427-
"citation": "Last, first (2021). Test citation.",
428-
"assetsSummary": {
429-
"numberOfBytes": 0,
430-
"numberOfFiles": 0,
431-
"dataStandard": [{"name": "NWB"}],
432-
"approach": [{"name": "electrophysiology"}],
433-
"measurementTechnique": [{"name": "two-photon microscopy technique"}],
434-
"species": [{"name": "Human"}],
435-
},
436-
"manifestLocation": [
437-
"https://api.dandiarchive.org/api/dandisets/999999/versions/draft/assets/"
438-
],
439-
"url": "https://dandiarchive.org/dandiset/999999/draft",
440-
}
441-
442458
# should work for Dandiset but PublishedDandiset should raise an error
443-
Dandiset(**meta_dict)
459+
Dandiset(**base_dandiset_metadata)
444460
with pytest.raises(ValidationError) as exc:
445-
PublishedDandiset(**meta_dict)
461+
PublishedDandiset(**base_dandiset_metadata)
446462

447463
ErrDetail = namedtuple("ErrDetail", ["type", "msg"])
448464

@@ -490,21 +506,23 @@ def test_dandimeta_1() -> None:
490506

491507
# after adding basic meta required to publish: doi, datePublished, publishedBy, assetsSummary,
492508
# so PublishedDandiset should work
493-
meta_dict["url"] = "https://dandiarchive.org/dandiset/999999/0.0.0"
494-
meta_dict["id"] = f"{INSTANCE_NAME}:999999/0.0.0"
495-
meta_dict["version"] = "0.0.0"
496-
meta_dict.update(
509+
base_dandiset_metadata["url"] = "https://dandiarchive.org/dandiset/999999/0.0.0"
510+
base_dandiset_metadata["id"] = f"{INSTANCE_NAME}:999999/0.0.0"
511+
base_dandiset_metadata["version"] = "0.0.0"
512+
base_dandiset_metadata.update(
497513
basic_publishmeta(INSTANCE_NAME, dandi_id="999999", prefix=DOI_PREFIX)
498514
)
499-
meta_dict["assetsSummary"].update(**{"numberOfBytes": 1, "numberOfFiles": 1})
515+
base_dandiset_metadata["assetsSummary"].update(
516+
**{"numberOfBytes": 1, "numberOfFiles": 1}
517+
)
500518

501519
# Test that releaseNotes is optional (can be omitted)
502-
dandiset_without_notes = PublishedDandiset(**meta_dict)
520+
dandiset_without_notes = PublishedDandiset(**base_dandiset_metadata)
503521
assert dandiset_without_notes.releaseNotes is None
504522

505523
# Test that releaseNotes can be set to a string value
506-
meta_dict["releaseNotes"] = "Releasing during testing"
507-
dandiset_with_notes = PublishedDandiset(**meta_dict)
524+
base_dandiset_metadata["releaseNotes"] = "Releasing during testing"
525+
dandiset_with_notes = PublishedDandiset(**base_dandiset_metadata)
508526
assert dandiset_with_notes.releaseNotes == "Releasing during testing"
509527

510528
# Test that releaseNotes appears in model_dump
@@ -983,3 +1001,78 @@ class VendoredFieldModel(BaseModel):
9831001
# Validate the invalid vendored fields against the vendored patterns
9841002
with pytest.raises(ValidationError):
9851003
VendoredFieldModel.model_validate(invalid_vendored_fields)
1004+
1005+
1006+
class TestDandisetSameAs:
1007+
def test_not_specified(self, base_dandiset_metadata: dict[str, Any]) -> None:
1008+
"""
1009+
Test the case that `sameAs` is not specified in instantiating a `Dandiset`
1010+
"""
1011+
dandiset = Dandiset.model_validate(base_dandiset_metadata)
1012+
assert dandiset.sameAs is None
1013+
1014+
def test_empty_list(self, base_dandiset_metadata: dict[str, Any]) -> None:
1015+
"""
1016+
Test the case that `sameAs` in a `Dandiset` is initialized to an empty list
1017+
"""
1018+
base_dandiset_metadata["sameAs"] = []
1019+
dandiset = Dandiset.model_validate(base_dandiset_metadata)
1020+
assert dandiset.sameAs == []
1021+
1022+
@pytest.mark.parametrize(
1023+
"dandi_urls",
1024+
[
1025+
["dandi://DANDI-SANDBOX/123456"],
1026+
["dandi://DANDI-SANDBOX/123456@draft"],
1027+
["dandi://DANDI-SANDBOX/123456@1.22.33"],
1028+
["dandi://DANDI-SANDBOX/123456/path"],
1029+
["dandi://DANDI-SANDBOX/123456@draft/path"],
1030+
["dandi://DANDI-SANDBOX/123456@1.22.33/path"],
1031+
["dandi://EMBER-DANDI/123456"],
1032+
["dandi://DANDI-SANDBOX/123456", "dandi://EMBER-DANDI/123456"],
1033+
["dandi://A/123456", "dandi://B/654321"],
1034+
],
1035+
)
1036+
def test_with_valid_dandi_urls(
1037+
self, dandi_urls: list[str], base_dandiset_metadata: dict[str, Any]
1038+
) -> None:
1039+
"""
1040+
Test the case that `sameAs` is initialized to a list of valid DANDI URLs
1041+
"""
1042+
base_dandiset_metadata["sameAs"] = dandi_urls
1043+
dandiset = Dandiset.model_validate(base_dandiset_metadata)
1044+
assert dandiset.sameAs == dandi_urls
1045+
1046+
@pytest.mark.parametrize(
1047+
"dandi_urls",
1048+
[
1049+
# List of invalid DANDI URLs
1050+
["dandi://DANDI-SANDBOX/123456@abc"],
1051+
["dandi://DANDI-SANDBOX/123456@1.22.33.44"],
1052+
["dandi://DANDI-SANDBOX/123456/"],
1053+
["dandi://DANDI-SANDBOX/123456@draft/"],
1054+
["dandi://DANDI-SANDBOX/123456@1.22.33/"],
1055+
["http://DANDI-SANDBOX/123456"], # Not dandi:// scheme
1056+
["dandi://DANDI- SANDBOX/123456"], # Containing a space
1057+
["dandi://"], # Missing instance name and dandiset id
1058+
["dandi://DANDI-SANDBOX"], # Missing dandiset id
1059+
["dandi://DANDI-SANDBOX/12345"], # Dandiset id too short
1060+
["dandi://-DANDI/123456"], # Invalid instance name
1061+
["dandi://EMBER3DANDI/123456"], # Invalid instance name
1062+
["dandi://DANDI-SANDBOX/123456", "dandi://DANDI- SANDBOX/123456"],
1063+
[42],
1064+
# Value that is not a list
1065+
"DANDI-SANDBOX:123456",
1066+
42,
1067+
],
1068+
)
1069+
def test_with_invalid_dandi_urls(
1070+
self, dandi_urls: Any, base_dandiset_metadata: dict[str, Any]
1071+
) -> None:
1072+
"""
1073+
Test the case that `sameAs` is initialized to an invalid list of DANDI URLs
1074+
or a value that is not a list
1075+
"""
1076+
base_dandiset_metadata["sameAs"] = dandi_urls
1077+
with pytest.raises(ValidationError):
1078+
Dandiset.model_validate(base_dandiset_metadata)

0 commit comments

Comments
 (0)