Skip to content

Commit 4519e35

Browse files
committed
Merge branch 'master' into devendorize
2 parents c5f6327 + 6d6c7b1 commit 4519e35

File tree

3 files changed

+175
-16
lines changed

3 files changed

+175
-16
lines changed

dandischema/datacite/__init__.py

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,14 @@
1414
from jsonschema import Draft7Validator
1515
import requests
1616

17-
from ..models import NAME_PATTERN, Organization, Person, PublishedDandiset, RoleType
17+
from ..models import (
18+
NAME_PATTERN,
19+
LicenseType,
20+
Organization,
21+
Person,
22+
PublishedDandiset,
23+
RoleType,
24+
)
1825

1926
DATACITE_CONTRTYPE = {
2027
"ContactPerson",
@@ -65,6 +72,46 @@
6572
DATACITE_MAP = {el.lower(): el for el in DATACITE_IDENTYPE}
6673

6774

75+
def _licenses_to_rights_list(licenses: list[LicenseType]) -> list[dict[str, str]]:
76+
"""
77+
Construct the `rightsList` in DataCite metadata per given list of `LicenseType`
78+
objects.
79+
80+
Parameters
81+
----------
82+
licenses : list[LicenseType]
83+
The list of `LicenseType` objects
84+
"""
85+
rights_list = []
86+
license_pattern = re.compile(r"^([^:\s]+):(\S+)$")
87+
for license_ in licenses:
88+
license_match = license_pattern.match(license_.value)
89+
assert (
90+
license_match
91+
), 'License is not of the expected format of "scheme:identifier"'
92+
scheme, identifier = license_match.groups()
93+
assert all(
94+
[scheme, identifier]
95+
), "License scheme and identifier must both exist and be non-empty"
96+
97+
if scheme.upper() == "SPDX":
98+
# SPDX license
99+
rights_list.append(
100+
{
101+
"rightsIdentifier": identifier,
102+
"rightsIdentifierScheme": "SPDX",
103+
"schemeUri": "https://spdx.org/licenses/",
104+
}
105+
)
106+
else:
107+
raise NotImplementedError(
108+
f"License scheme {scheme} is not supported. "
109+
"Currently only SPDX licenses are supported."
110+
)
111+
112+
return rights_list
113+
114+
68115
def to_datacite(
69116
meta: Union[dict, PublishedDandiset],
70117
validate: bool = False,
@@ -111,15 +158,7 @@ def to_datacite(
111158
}
112159
# meta has also attribute url, but it often empty
113160
attributes["url"] = str(meta.url or "")
114-
# assuming that all licenses are from SPDX?
115-
attributes["rightsList"] = [
116-
{
117-
"schemeUri": "https://spdx.org/licenses/",
118-
"rightsIdentifierScheme": "SPDX",
119-
"rightsIdentifier": el.name,
120-
}
121-
for el in meta.license
122-
]
161+
attributes["rightsList"] = _licenses_to_rights_list(meta.license)
123162
attributes["schemaVersion"] = "http://datacite.org/schema/kernel-4"
124163

125164
contributors = []

dandischema/datacite/tests/test_datacite.py

Lines changed: 116 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
from enum import Enum
12
import json
23
import os
34
import random
4-
from typing import Any, Dict, Tuple
5+
from typing import TYPE_CHECKING, Any, Dict, Tuple, cast
56

67
from jsonschema import Draft7Validator
78
import pytest
@@ -25,7 +26,118 @@
2526
skipif_no_test_dandiset_metadata_dir,
2627
)
2728

28-
from .. import _get_datacite_schema, to_datacite
29+
from .. import _get_datacite_schema, _licenses_to_rights_list, to_datacite
30+
31+
32+
class TestLicensesToRightsList:
33+
"""
34+
Tests for the `_licenses_to_rights_list()` helper function.
35+
"""
36+
37+
@pytest.mark.parametrize(
38+
"licenses",
39+
[
40+
[" "],
41+
["bad_license"],
42+
[" spdx:CC0-1.0"],
43+
["spdx:CC0-1.0 "],
44+
["spdx: CC0-1.0"],
45+
["spdx:CC0-1.0", "foo-license"],
46+
],
47+
)
48+
def test_bad_format(self, licenses: list[str]) -> None:
49+
"""
50+
Test handling of licenses with a value of bad format
51+
"""
52+
if TYPE_CHECKING:
53+
54+
# noinspection PyUnusedLocal
55+
class BadLicenseType(Enum):
56+
... # fmt: skip
57+
58+
else:
59+
# noinspection PyPep8Naming
60+
BadLicenseType = Enum(
61+
"BadLicenseType",
62+
[(f"M{idx}", license_) for idx, license_ in enumerate(licenses)],
63+
)
64+
65+
with pytest.raises(AssertionError, match="not of the expected format"):
66+
_licenses_to_rights_list(list(BadLicenseType))
67+
68+
@pytest.mark.parametrize(
69+
"licenses",
70+
[
71+
["foo:license"],
72+
["bar:license"],
73+
["foo:license", "bar:license"],
74+
],
75+
)
76+
def test_non_spdx_license(self, licenses: list[str]) -> None:
77+
"""
78+
Test handling of licenses not denoted using the `"spdx"` schema, i.e.
79+
licenses that are not in the SPDX license list at
80+
https://spdx.org/licenses/
81+
"""
82+
if TYPE_CHECKING:
83+
# noinspection PyUnusedLocal
84+
class NonSpdxLicenseType(Enum):
85+
... # fmt: skip
86+
87+
else:
88+
# noinspection PyPep8Naming
89+
NonSpdxLicenseType = Enum(
90+
"NonSpdxLicenseType",
91+
[(f"M{idx}", license_) for idx, license_ in enumerate(licenses)],
92+
)
93+
94+
with pytest.raises(
95+
NotImplementedError, match="Currently only SPDX licenses are supported"
96+
):
97+
_licenses_to_rights_list(list(NonSpdxLicenseType))
98+
99+
@pytest.mark.parametrize(
100+
"licenses",
101+
[
102+
["spdx:CC0-1.0"],
103+
["spdx:CC-BY-4.0"],
104+
["spdx:CC0-1.0", "spdx:CC-BY-4.0"],
105+
],
106+
)
107+
def test_valid_input(self, licenses: list[str]) -> None:
108+
"""
109+
Test handling of valid input
110+
"""
111+
if TYPE_CHECKING:
112+
# noinspection PyUnusedLocal
113+
class ValidLicenseType(Enum):
114+
... # fmt: skip
115+
116+
else:
117+
# noinspection PyPep8Naming
118+
ValidLicenseType = Enum(
119+
"ValidLicenseType",
120+
[(license_,) * 2 for license_ in licenses],
121+
)
122+
123+
expected_rights_list = [
124+
{
125+
"rightsIdentifier": license_.removeprefix("spdx:"),
126+
"rightsIdentifierScheme": "SPDX",
127+
"schemeUri": "https://spdx.org/licenses/",
128+
}
129+
for license_ in licenses
130+
]
131+
132+
assert (
133+
_licenses_to_rights_list(
134+
cast(
135+
list[LicenseType],
136+
[ValidLicenseType(license_) for license_ in licenses],
137+
)
138+
)
139+
== expected_rights_list
140+
)
29141

30142

31143
def datacite_post(datacite: dict, doi: str) -> None:
@@ -171,7 +283,7 @@ def test_datacite(dandi_id: str, schema: Any) -> None:
171283
),
172284
"rightsList": (
173285
1,
174-
{"rightsIdentifierScheme": "SPDX", "rightsIdentifier": "CC_BY_40"},
286+
{"rightsIdentifierScheme": "SPDX", "rightsIdentifier": "CC-BY-4.0"},
175287
),
176288
"types": (
177289
None,
@@ -493,7 +605,7 @@ def test_datacite_publish(metadata_basic: Dict[str, Any]) -> None:
493605
},
494606
"rightsList": [
495607
{
496-
"rightsIdentifier": "CC_BY_40",
608+
"rightsIdentifier": "CC-BY-4.0",
497609
"rightsIdentifierScheme": "SPDX",
498610
"schemeUri": "https://spdx.org/licenses/",
499611
}

dandischema/utils.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,18 @@ def jsonschema_validator(
217217

218218
if check_format:
219219
# Return a validator with format checking enabled
220-
return validator_cls(schema, format_checker=validator_cls.FORMAT_CHECKER)
220+
# TODO: Static type checking is temporarily disabled partially in the following line
221+
# because of https://github.com/python-jsonschema/jsonschema/issues/1382.
222+
# It should be re-enabled once the issue is resolved.
223+
return validator_cls(
224+
schema, format_checker=validator_cls.FORMAT_CHECKER
225+
) # type: ignore[call-arg]
221226

222227
# Return a validator with format checking disabled
223-
return validator_cls(schema)
228+
# TODO: Static type checking is temporarily disabled partially in the following line
229+
# because of https://github.com/python-jsonschema/jsonschema/issues/1382.
230+
# It should be re-enabled once the issue is resolved.
231+
return validator_cls(schema) # type: ignore[call-arg]
224232

225233

226234
def validate_json(instance: Any, validator: JsonschemaValidator) -> None:

0 commit comments

Comments
 (0)