Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions dandischema/datacite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@
from jsonschema import Draft7Validator
import requests

from ..models import NAME_PATTERN, Organization, Person, PublishedDandiset, RoleType
from ..models import (
NAME_PATTERN,
Dandiset,
Organization,
Person,
PublishedDandiset,
RoleType,
)

DATACITE_CONTRTYPE = {
"ContactPerson",
Expand Down Expand Up @@ -71,8 +78,13 @@ def to_datacite(
publish: bool = False,
) -> dict:
"""Convert published Dandiset metadata to Datacite"""
if not isinstance(meta, PublishedDandiset):
meta = PublishedDandiset(**meta)

# checking the version, create Dandiset for draft version and PublishedDandiset otherwise
if isinstance(meta, dict):
if meta.get("version") == "draft":
meta = Dandiset(**meta)
else:
meta = PublishedDandiset(**meta)

attributes: Dict[str, Any] = {}
if publish:
Expand All @@ -89,7 +101,8 @@ def to_datacite(
},
]

attributes["doi"] = meta.doi
if hasattr(meta, "doi"):
attributes["doi"] = meta.doi
if meta.version:
attributes["version"] = meta.version
attributes["titles"] = [{"title": meta.name}]
Expand All @@ -103,7 +116,8 @@ def to_datacite(
"publisherIdentifierScheme": "RRID",
"lang": "en",
}
attributes["publicationYear"] = str(meta.datePublished.year)
if hasattr(meta, "datePublished"):
attributes["publicationYear"] = str(meta.datePublished.year)
# not sure about it dandi-api had "resourceTypeGeneral": "NWB"
attributes["types"] = {
"resourceType": "Neural Data",
Expand Down
171 changes: 165 additions & 6 deletions dandischema/datacite/tests/test_datacite.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from copy import deepcopy
import json
import os
from pathlib import Path
Expand All @@ -9,6 +10,7 @@
import requests

from dandischema.models import (
Dandiset,
LicenseType,
PublishedDandiset,
RelationType,
Expand All @@ -21,7 +23,7 @@
from .. import _get_datacite_schema, to_datacite


def datacite_post(datacite: dict, doi: str) -> None:
def datacite_post(datacite: dict, doi: str, clean: bool = True) -> None:
"""Post the datacite object and check the status of the request"""

# removing doi in case it exists
Expand All @@ -35,21 +37,178 @@ def datacite_post(datacite: dict, doi: str) -> None:
auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]),
)
rp.raise_for_status()

print("\n in datacite_post, after posting", doi, rp.status_code)
# checking if i'm able to get the url
rg = requests.get(url=f"https://api.test.datacite.org/dois/{doi}/activities")
rg.raise_for_status()

# cleaning url
_clean_doi(doi)
if clean:
# cleaning url
_clean_doi(doi)


def datacite_update(datacite: dict, doi: str) -> None:
"""Update the datacite object and check the status of the request"""
rp = requests.put(
url=f"https://api.test.datacite.org/dois/{doi}",
json=datacite,
headers={"Content-Type": "application/vnd.api+json"},
auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]),
)
rp.raise_for_status()

# checking if i'm able to get the url
rg = requests.get(url=f"https://api.test.datacite.org/dois/{doi}/activities")
rg.raise_for_status()


def _clean_doi(doi: str) -> None:
"""Remove doi. Status code is ignored"""
requests.delete(
rq = requests.delete(
f"https://api.test.datacite.org/dois/{doi}",
auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]),
)
print("\n in _clean_doi", doi, rq.status_code)
return rq.status_code


@pytest.mark.skip(
reason="to not produced too many dois, not sure if we want to keep it as a test"
)
def test_datacite_lifecycle() -> None:
"""testing the lifecycle of a public dandiset and doi (from draft to published)"""

# checking which doi is available
doi_available = False
while not doi_available:
dandi_id = f"000{random.randrange(500, 999)}"
print(f"searching for available doi, trying dandi_id: {dandi_id}")
doi_root = f"10.80507/dandi.{dandi_id}"
if _clean_doi(doi_root) != 405:
doi_available = True
print(f"found available doi, dandi_id: {dandi_id}")

dandi_id_prefix = f"DANDI:{dandi_id}"
# creating the main/root doi and url
doi_root = f"10.80507/dandi.{dandi_id}"
url_root = f"https://dandiarchive.org/dandiset/{dandi_id}"

# creating draft dandiset with minimal metadata
version = "draft"
meta_dict = {
"identifier": dandi_id_prefix,
"id": f"{dandi_id_prefix}/{version}",
"name": "Testing Dataset: lifecycle",
"description": "testing lifecycle of a dataset and doi: draft",
"version": version,
"contributor": [
{
"name": "A_last, A_first",
"email": "nemo@example.com",
"roleName": [RoleType("dcite:ContactPerson")],
"schemaKey": "Person",
}
],
"license": [LicenseType("spdx:CC-BY-4.0")],
"citation": "A_last, A_first 2021",
"manifestLocation": [
f"https://api.dandiarchive.org/api/dandisets/{dandi_id}/versions/{version}/assets/"
],
"assetsSummary": {
"schemaKey": "AssetsSummary",
"numberOfBytes": 10,
"numberOfFiles": 1,
},
Comment on lines +117 to +121
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hopefully does not matter but whenever dandiset is created, there is no assets summary yet -- it is done async IIRC, hence

Suggested change
"assetsSummary": {
"schemaKey": "AssetsSummary",
"numberOfBytes": 10,
"numberOfFiles": 1,
},

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assetsSummary is a mandatory field for Dandiset

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in dandi archive we do following if not provided in metadata

        if 'assetsSummary' not in metadata:
            metadata['assetsSummary'] = {
                'schemaKey': 'AssetsSummary',
                'numberOfBytes': 0,
                'numberOfFiles': 0,
            }

http://github.com/dandi/dandi-archive/blob/HEAD/dandiapi/api/models/version.py

}
# in addition to minimal metadata, we need to add doi and url if we want to create draft doi
meta_dict["doi"] = doi_root
meta_dict["url"] = url_root
# creating draft dandiset
dset = Dandiset(**meta_dict)

# creating datacite object and posting the main doi entry (should be draft)
datacite = to_datacite(dset)
datacite_post(datacite, doi_root, clean=False)

# updating the draft but not enough to create PublishDandiset
meta_dict["description"] = "testing lifecycle of a dataset and doi: new draft"
# the dandi workflow should check if we cna create a datacite that can be validated and published
# try: datacite_new = to_datacite(meta_dict, validate=True, publish=True)
# if the metadata is not enough to create a valid datacite, we should update the draft doi
datacite_new = to_datacite(meta_dict)
datacite_update(datacite_new, doi_root)

# creating v1.0.0
version = "1.0.0"
# adding contributors and updating description
meta_dict["contributor"].append(
{
"name": "B_last, B_first",
"email": "nemo@example.com",
"roleName": [RoleType("dcite:DataCurator")],
"schemaKey": "Person",
}
)
meta_dict["description"] = "testing lifecycle of a dataset and doi: v1.0.0"
# adding mandatory metadata for PublishDandiset
publish_meta = {
"datePublished": "2020",
"publishedBy": {
"id": "urn:uuid:08fffc59-9f1b-44d6-8e02-6729d266d1b6",
"name": "DANDI publish",
"startDate": "2021-05-18T19:58:39.310338-04:00",
"endDate": "2021-05-18T19:58:39.310361-04:00",
"wasAssociatedWith": [
{
"id": "urn:uuid:9267d2e1-4a37-463b-9b10-dad3c66d8eaa",
"identifier": "RRID:SCR_017571",
"name": "DANDI API",
"version": version,
"schemaKey": "Software",
}
],
"schemaKey": "PublishActivity",
},
}
meta_dict.update(publish_meta)
# updating the version, id etc.
meta_dict["version"] = version
meta_dict["id"] = f"{dandi_id_prefix}/{version}"
meta_dict["doi"] = f"{doi_root}/{version}"
meta_dict["url"] = f"https://dandiarchive.org/dandiset/{dandi_id}/{version}"
# creating new published dandiset
dset_v1 = PublishedDandiset(**meta_dict)
# creating datacite object and posting (should be findable)
datacite_v1 = to_datacite(dset_v1, publish=True, validate=True)
datacite_post(datacite_v1, meta_dict["doi"], clean=False)

# updating the main doi but keeping the root doi and url
datacite = deepcopy(datacite_v1)
datacite["data"]["attributes"]["doi"] = doi_root
datacite["data"]["attributes"]["url"] = url_root
# updating the doi (should change from draft to findable)
datacite_update(datacite, doi_root)

# creating v2.0.0
version = "2.0.0"
# updating description
meta_dict["description"] = "testing lifecycle of a dataset and doi: v2.0.0"
meta_dict["version"] = version
meta_dict["id"] = f"{dandi_id_prefix}/{version}"
meta_dict["doi"] = f"{doi_root}/{version}"
meta_dict["url"] = f"https://dandiarchive.org/dandiset/{dandi_id}/{version}"
# creating new published dandiset
dset_v2 = PublishedDandiset(**meta_dict)
# creating datacite object and posting (should be findable)
datacite_v2 = to_datacite(dset_v2, publish=True, validate=True)
datacite_post(datacite_v2, meta_dict["doi"], clean=False)

# updating the main doi to v2 but keeping the root doi and url
datacite = deepcopy(datacite_v2)
datacite["data"]["attributes"]["doi"] = doi_root
datacite["data"]["attributes"]["url"] = url_root
# updating the findable doi
datacite_update(datacite, doi_root)


@pytest.fixture(scope="module")
Expand All @@ -59,7 +218,7 @@ def schema() -> Any:

@pytest.fixture(scope="function")
def metadata_basic() -> Dict[str, Any]:
dandi_id_noprefix = f"000{random.randrange(100, 999)}"
dandi_id_noprefix = f"000{random.randrange(100, 499)}"
dandi_id = f"DANDI:{dandi_id_noprefix}"
version = "0.0.0"
# meta data without doi, datePublished and publishedBy
Expand Down
2 changes: 1 addition & 1 deletion dandischema/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _basic_publishmeta(
"id": "urn:uuid:9267d2e1-4a37-463b-9b10-dad3c66d8eaa",
"identifier": "RRID:SCR_017571",
"name": "DANDI API",
"version": "0.1.0",
"version": version,
"schemaKey": "Software",
}
],
Expand Down