Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions dandischema/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@
"[a-f0-9]{8}[-]*[a-f0-9]{4}[-]*" "[a-f0-9]{4}[-]*[a-f0-9]{4}[-]*[a-f0-9]{12}$"
)
ASSET_UUID_PATTERN = r"^dandiasset:" + UUID_PATTERN
VERSION_PATTERN = r"\d{6}/\d+\.\d+\.\d+"
VERSION_NUM_PATTERN = r"\d+\.\d+\.\d+"
VERSION_PATTERN = rf"\d{{6}}/{VERSION_NUM_PATTERN}"
_INNER_DANDI_DOI_PATTERN = (
rf"{DOI_PREFIX_PATTERN}/{ID_PATTERN.lower()}\.{VERSION_PATTERN}"
)
Expand Down Expand Up @@ -1666,7 +1667,7 @@ def contributor_musthave_contact(
id: str = Field(
description="Uniform resource identifier",
pattern=(
rf"^({ID_PATTERN}|{ID_PATTERN.lower()}):\d{{6}}(/(draft|\d+\.\d+\.\d+))$"
rf"^({ID_PATTERN}|{ID_PATTERN.lower()}):\d{{6}}(/(draft|{VERSION_NUM_PATTERN}))$"
),
json_schema_extra={"readOnly": True},
)
Expand All @@ -1677,6 +1678,28 @@ def contributor_musthave_contact(
pattern=rf"^{ID_PATTERN}:\d{{6}}$",
json_schema_extra={"readOnly": True, "nskey": "schema"},
)

sameAs: Annotated[
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: I thought we had merged

already... apparently not, so later we would need to add handling of sameAs as well there

Optional[
list[
Annotated[
str,
StringConstraints(
pattern=(
rf"^dandi://{UNVENDORED_ID_PATTERN}/\d{{6}}"
rf"(@(draft|{VERSION_NUM_PATTERN}))?(/\S+)?$"
Comment on lines +1689 to +1690
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the only comment i have is whether this should be a DANDI specific sameAs or anywhere. say someone puts the same dataset on zenodo, does that get to be added here? or in related resources?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would keep it constrained to just point to our instances, while coding tools defensively (could be anything), and indeed referring people to use related for extra resources. Although, we could potentially use sameAs to point to DataLad dandisets here... WDYT?

)
),
]
]
],
Field(
default=None,
description="Known DANDI URLs of the Dandiset at other DANDI instances.",
json_schema_extra={"readOnly": True, "nskey": "schema"},
),
]

name: str = Field(
title="Dandiset title",
description="A title associated with the Dandiset.",
Expand Down
177 changes: 135 additions & 42 deletions dandischema/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,53 @@
_INSTANCE_CONFIG = get_instance_config()


@pytest.fixture
def base_dandiset_metadata() -> dict[str, Any]:
"""
Fixture providing basic Dandiset metadata for constructing a `Dandiset` instance.

Returns:
Dict[str, Any]
A dictionary containing basic Dandiset metadata without `doi`, `datePublished`,
and `publishedBy`, suitable for constructing a `Dandiset` instance but not a
`PublishedDandiset` instance.

Note:
This metadata is returned by a fixture to ensure that each test receives a fresh
copy of the metadata dictionary.
"""

return {
"identifier": f"{INSTANCE_NAME}:999999",
"id": f"{INSTANCE_NAME}:999999/draft",
"version": "1.0.0",
"name": "testing dataset",
"description": "testing",
"contributor": [
{
"name": "last name, first name",
"email": "someone@dandiarchive.org",
"roleName": [RoleType("dcite:ContactPerson")],
"schemaKey": "Person",
}
],
"license": [LicenseType("spdx:CC-BY-4.0")],
"citation": "Last, first (2021). Test citation.",
"assetsSummary": {
"numberOfBytes": 0,
"numberOfFiles": 0,
"dataStandard": [{"name": "NWB"}],
"approach": [{"name": "electrophysiology"}],
"measurementTechnique": [{"name": "two-photon microscopy technique"}],
"species": [{"name": "Human"}],
},
"manifestLocation": [
"https://api.dandiarchive.org/api/dandisets/999999/versions/draft/assets/"
],
"url": "https://dandiarchive.org/dandiset/999999/draft",
}


@pytest.mark.parametrize(
("y_type", "anys_value"),
[
Expand Down Expand Up @@ -403,46 +450,15 @@ def test_autogenerated_titles() -> None:


@skipif_no_doi_prefix
def test_dandimeta_1() -> None:
def test_dandimeta_1(base_dandiset_metadata: dict[str, Any]) -> None:
"""checking basic metadata for publishing"""

assert DOI_PREFIX is not None

# metadata without doi, datePublished and publishedBy
meta_dict: Dict[str, Any] = {
"identifier": f"{INSTANCE_NAME}:999999",
"id": f"{INSTANCE_NAME}:999999/draft",
"version": "1.0.0",
"name": "testing dataset",
"description": "testing",
"contributor": [
{
"name": "last name, first name",
"email": "someone@dandiarchive.org",
"roleName": [RoleType("dcite:ContactPerson")],
"schemaKey": "Person",
}
],
"license": [LicenseType("spdx:CC-BY-4.0")],
"citation": "Last, first (2021). Test citation.",
"assetsSummary": {
"numberOfBytes": 0,
"numberOfFiles": 0,
"dataStandard": [{"name": "NWB"}],
"approach": [{"name": "electrophysiology"}],
"measurementTechnique": [{"name": "two-photon microscopy technique"}],
"species": [{"name": "Human"}],
},
"manifestLocation": [
"https://api.dandiarchive.org/api/dandisets/999999/versions/draft/assets/"
],
"url": "https://dandiarchive.org/dandiset/999999/draft",
}

# should work for Dandiset but PublishedDandiset should raise an error
Dandiset(**meta_dict)
Dandiset(**base_dandiset_metadata)
with pytest.raises(ValidationError) as exc:
PublishedDandiset(**meta_dict)
PublishedDandiset(**base_dandiset_metadata)

ErrDetail = namedtuple("ErrDetail", ["type", "msg"])

Expand Down Expand Up @@ -490,21 +506,23 @@ def test_dandimeta_1() -> None:

# after adding basic meta required to publish: doi, datePublished, publishedBy, assetsSummary,
# so PublishedDandiset should work
meta_dict["url"] = "https://dandiarchive.org/dandiset/999999/0.0.0"
meta_dict["id"] = f"{INSTANCE_NAME}:999999/0.0.0"
meta_dict["version"] = "0.0.0"
meta_dict.update(
base_dandiset_metadata["url"] = "https://dandiarchive.org/dandiset/999999/0.0.0"
base_dandiset_metadata["id"] = f"{INSTANCE_NAME}:999999/0.0.0"
base_dandiset_metadata["version"] = "0.0.0"
base_dandiset_metadata.update(
basic_publishmeta(INSTANCE_NAME, dandi_id="999999", prefix=DOI_PREFIX)
)
meta_dict["assetsSummary"].update(**{"numberOfBytes": 1, "numberOfFiles": 1})
base_dandiset_metadata["assetsSummary"].update(
**{"numberOfBytes": 1, "numberOfFiles": 1}
)

# Test that releaseNotes is optional (can be omitted)
dandiset_without_notes = PublishedDandiset(**meta_dict)
dandiset_without_notes = PublishedDandiset(**base_dandiset_metadata)
assert dandiset_without_notes.releaseNotes is None

# Test that releaseNotes can be set to a string value
meta_dict["releaseNotes"] = "Releasing during testing"
dandiset_with_notes = PublishedDandiset(**meta_dict)
base_dandiset_metadata["releaseNotes"] = "Releasing during testing"
dandiset_with_notes = PublishedDandiset(**base_dandiset_metadata)
assert dandiset_with_notes.releaseNotes == "Releasing during testing"

# Test that releaseNotes appears in model_dump
Expand Down Expand Up @@ -983,3 +1001,78 @@ class VendoredFieldModel(BaseModel):
# Validate the invalid vendored fields against the vendored patterns
with pytest.raises(ValidationError):
VendoredFieldModel.model_validate(invalid_vendored_fields)


class TestDandisetSameAs:
def test_not_specified(self, base_dandiset_metadata: dict[str, Any]) -> None:
"""
Test the case that `sameAs` is not specified in instantiating a `Dandiset`
"""
dandiset = Dandiset.model_validate(base_dandiset_metadata)
assert dandiset.sameAs is None

def test_empty_list(self, base_dandiset_metadata: dict[str, Any]) -> None:
"""
Test the case that `sameAs` in a `Dandiset` is initialized to an empty list
"""
base_dandiset_metadata["sameAs"] = []
dandiset = Dandiset.model_validate(base_dandiset_metadata)
assert dandiset.sameAs == []

@pytest.mark.parametrize(
"dandi_urls",
[
["dandi://DANDI-SANDBOX/123456"],
["dandi://DANDI-SANDBOX/123456@draft"],
["dandi://DANDI-SANDBOX/123456@1.22.33"],
["dandi://DANDI-SANDBOX/123456/path"],
["dandi://DANDI-SANDBOX/123456@draft/path"],
["dandi://DANDI-SANDBOX/123456@1.22.33/path"],
["dandi://EMBER-DANDI/123456"],
["dandi://DANDI-SANDBOX/123456", "dandi://EMBER-DANDI/123456"],
["dandi://A/123456", "dandi://B/654321"],
],
)
def test_with_valid_dandi_urls(
self, dandi_urls: list[str], base_dandiset_metadata: dict[str, Any]
) -> None:
"""
Test the case that `sameAs` is initialized to a list of valid DANDI URLs
"""
base_dandiset_metadata["sameAs"] = dandi_urls
dandiset = Dandiset.model_validate(base_dandiset_metadata)
assert dandiset.sameAs == dandi_urls

@pytest.mark.parametrize(
"dandi_urls",
[
# List of invalid DANDI URLs
["dandi://DANDI-SANDBOX/123456@abc"],
["dandi://DANDI-SANDBOX/123456@1.22.33.44"],
["dandi://DANDI-SANDBOX/123456/"],
["dandi://DANDI-SANDBOX/123456@draft/"],
["dandi://DANDI-SANDBOX/123456@1.22.33/"],
["http://DANDI-SANDBOX/123456"], # Not dandi:// scheme
["dandi://DANDI- SANDBOX/123456"], # Containing a space
["dandi://"], # Missing instance name and dandiset id
["dandi://DANDI-SANDBOX"], # Missing dandiset id
["dandi://DANDI-SANDBOX/12345"], # Dandiset id too short
["dandi://-DANDI/123456"], # Invalid instance name
["dandi://EMBER3DANDI/123456"], # Invalid instance name
["dandi://DANDI-SANDBOX/123456", "dandi://DANDI- SANDBOX/123456"],
[42],
# Value that is not a list
"DANDI-SANDBOX:123456",
42,
],
)
def test_with_invalid_dandi_urls(
self, dandi_urls: Any, base_dandiset_metadata: dict[str, Any]
) -> None:
"""
Test the case that `sameAs` is initialized to an invalid list of DANDI URLs
or a value that is not a list
"""
base_dandiset_metadata["sameAs"] = dandi_urls
with pytest.raises(ValidationError):
Dandiset.model_validate(base_dandiset_metadata)
Loading