Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions idutils/normalizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ def normalize_orcid(val):

def normalize_gnd(val):
"""Normalize a GND identifier."""
if val.startswith(gnd_resolver_url):
val = val[len(gnd_resolver_url) :]
if val.startswith("http://" + gnd_resolver_url):
val = val[len("http://" + gnd_resolver_url) :]
elif val.startswith("https://" + gnd_resolver_url):
val = val[len("https://" + gnd_resolver_url) :]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you just use the regex in the normalize function like

def normalize_pmid(val):

if val.lower().startswith("gnd:"):
val = val[len("gnd:") :]
return "gnd:{0}".format(val)
Expand Down
5 changes: 3 additions & 2 deletions idutils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@
https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier
"""

gnd_resolver_url = "d-nb.info/gnd/"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think gnd_resolver_url is not needed any more.

It's used at the beginning of the validate function, but I don't that's needed after this code change

def is_gnd(val):

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its also used at

if val.startswith(gnd_resolver_url):
but I guess the url does not need to be configurble.


gnd_regexp = re.compile(
r"(gnd:|GND:)?("
rf"(gnd:|GND:|http://{re.escape(gnd_resolver_url)}|https://{re.escape(gnd_resolver_url)})?("
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can use ? instead of repeating http and https like

r"(pmid:|https?://pubmed.ncbi.nlm.nih.gov/)?(\d+)/?$", flags=re.I
.

I would also lean toward putting the url in the regex instead of having a variable

r"(1|10)\d{7}[0-9X]|"
r"[47]\d{6}-\d|"
r"[1-9]\d{0,7}-[0-9X]|"
Expand All @@ -91,7 +93,6 @@
)
"""See https://www.wikidata.org/wiki/Property:P227."""

gnd_resolver_url = "http://d-nb.info/gnd/"

urn_resolver_url = "https://nbn-resolving.org/"

Expand Down