Skip to content

Commit d1bc7ef

Browse files
karkraegKarl Krägelin
andauthored
gnd: improve validation and normalization
* accept both `http://d-nb.info/gnd/<id>` and `https://d-nb.info/gnd/<id>` to be pasted as GND URI and being normalized * re-use gnd_resolver_url var in regex * use the regex in the normalize function * utils: remove var and improve regex * validators: dont use var * utils: adapt to correct url match regex * validators: remove additional check * utils: improve regex to match for IDs without http prefix in order to remove additional check in validators * validators: adhere to pydocstyle --------- Co-authored-by: Karl Krägelin <mail@karlkraeglin.de>
1 parent ffd5a6b commit d1bc7ef

File tree

3 files changed

+4
-12
lines changed

3 files changed

+4
-12
lines changed

idutils/normalizers.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,8 @@ def normalize_orcid(val):
5454

5555
def normalize_gnd(val):
5656
"""Normalize a GND identifier."""
57-
if val.startswith(gnd_resolver_url):
58-
val = val[len(gnd_resolver_url) :]
59-
if val.lower().startswith("gnd:"):
60-
val = val[len("gnd:") :]
61-
return "gnd:{0}".format(val)
57+
m = gnd_regexp.match(val)
58+
return f"gnd:{m.group(2)}"
6259

6360

6461
def normalize_urn(val):

idutils/utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,16 +82,15 @@
8282
"""
8383

8484
gnd_regexp = re.compile(
85-
r"(gnd:|GND:)?("
86-
r"(1|10)\d{7}[0-9X]|"
85+
r"(gnd:|GND:|https?://d-nb\.info/gnd/|d-nb\.info/gnd/)?("
86+
r"1[012]?\d{7}[0-9X]|"
8787
r"[47]\d{6}-\d|"
8888
r"[1-9]\d{0,7}-[0-9X]|"
8989
r"3\d{7}[0-9X]"
9090
r")"
9191
)
9292
"""See https://www.wikidata.org/wiki/Property:P227."""
9393

94-
gnd_resolver_url = "http://d-nb.info/gnd/"
9594

9695
urn_resolver_url = "https://nbn-resolving.org/"
9796

idutils/validators.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414
"""Utility file containing ID validators."""
1515

16-
1716
import unicodedata
1817
from urllib.parse import urlparse
1918

@@ -237,9 +236,6 @@ def is_pmcid(val):
237236

238237
def is_gnd(val):
239238
"""Test if argument is a GND Identifier."""
240-
if val.startswith(gnd_resolver_url):
241-
val = val[len(gnd_resolver_url) :]
242-
243239
return gnd_regexp.match(val)
244240

245241

0 commit comments

Comments
 (0)