Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions idutils/normalizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,8 @@ def normalize_orcid(val):

def normalize_gnd(val):
"""Normalize a GND identifier."""
if val.startswith(gnd_resolver_url):
val = val[len(gnd_resolver_url) :]
if val.lower().startswith("gnd:"):
val = val[len("gnd:") :]
return "gnd:{0}".format(val)
m = gnd_regexp.match(val)
return f"gnd:{m.group(2)}"


def normalize_urn(val):
Expand Down
5 changes: 2 additions & 3 deletions idutils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,15 @@
"""

gnd_regexp = re.compile(
r"(gnd:|GND:)?("
r"(1|10)\d{7}[0-9X]|"
r"(gnd:|GND:|https?://d-nb\.info/gnd/)?("
r"1[012]?\d{7}[0-9X]|"
r"[47]\d{6}-\d|"
r"[1-9]\d{0,7}-[0-9X]|"
r"3\d{7}[0-9X]"
r")"
)
"""See https://www.wikidata.org/wiki/Property:P227."""

gnd_resolver_url = "http://d-nb.info/gnd/"

urn_resolver_url = "https://nbn-resolving.org/"

Expand Down
5 changes: 2 additions & 3 deletions idutils/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

"""Utility file containing ID validators."""


import unicodedata
from urllib.parse import urlparse

Expand Down Expand Up @@ -237,8 +236,8 @@ def is_pmcid(val):

def is_gnd(val):
"""Test if argument is a GND Identifier."""
if val.startswith(gnd_resolver_url):
val = val[len(gnd_resolver_url) :]
if val.startswith("d-nb.info/gnd/"):
val = val[len("d-nb.info/gnd/") :]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor: is this actually needed now that the regex contains the URL? I feel this if ... clause would only match identifiers without the http(s) protocol in front, i.e. d-nb.info/gnd/12345. Maybe this logic can be captured in the regex?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, this can probably be removed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed that by chaning the regex accordingly, see https://regex101.com/r/C9VJpH/1


return gnd_regexp.match(val)

Expand Down
Loading