Skip to content

Commit 08fbdf5

Browse files
committed
Almost working name script
1 parent 521c44a commit 08fbdf5

File tree

2 files changed

+32
-10
lines changed

2 files changed

+32
-10
lines changed

caltechdata_api/customize_schema.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,21 @@
55
from datetime import date
66
import yaml
77
from pathlib import Path
8+
import requests
9+
10+
11+
def grid_to_ror(grid):
12+
# Temporary until InvenioRDM stops spitting out GRIDS
13+
# We manually handle some incorrect/redundant GRID Ids
14+
if grid == "grid.451078.f":
15+
ror = "https://ror.org/00hm6j694"
16+
elif grid == 'grid.5805.8':
17+
ror = "https://ror.org/02en5vm52"
18+
else:
19+
url = f"https://api.ror.org/organizations?query.advanced=external_ids.GRID.all:{grid}"
20+
results = requests.get(url)
21+
ror = results.json()["items"][0]["id"]
22+
return ror
823

924

1025
def get_vocabularies():
@@ -320,6 +335,11 @@ def customize_schema_rdm(json_record):
320335
ror = ror.split("ror.org/")[1]
321336
funder["id"] = ror
322337
fund.pop("funderIdentifierType")
338+
elif fund["funderIdentifierType"] == "GRID":
339+
#We need this temporarily to round-trip data
340+
ror = grid_to_ror(fund.pop("funderIdentifier"))
341+
funder["id"] = ror
342+
fund.pop("funderIdentifierType")
323343
else:
324344
print(f'Unknown Type mapping {fund["funderIdentifierType"]}')
325345
if "awardTitle" in fund:

fix_names.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
from caltechdata_api import caltechdata_edit
55

66

7-
def fix_name(metadata):
8-
fixed = False
7+
def fix_name(metadata, fixed):
98
for name in metadata:
109
if name["nameType"] == "Personal":
1110
if "givenName" not in name:
@@ -15,37 +14,40 @@ def fix_name(metadata):
1514
return metadata, fixed
1615

1716

18-
url = "https://data.caltech.edu/api/records"
17+
url = 'https://data.caltech.edu/api/records?q=-metadata.related_identifiers.identifier%3A"10.25989%2Fes8t-kswe"'
1918

2019
headers = {
2120
"accept": "application/vnd.datacite.datacite+json",
2221
}
2322

24-
response = requests.get(f"{url}?search_type=scan&scroll=5m")
23+
response = requests.get(f"{url}&search_type=scan&scroll=5m")
2524

2625
total = response.json()["hits"]["total"]
2726
pages = math.ceil(int(total) / 1000)
2827
hits = [] # [{'id':'a7f64-a8k10'}]
2928
print(total)
30-
for c in progressbar(range(1, 2)): # , pages + 1)):
31-
chunkurl = f"{url}?&sort=newest&size=1000&page={c}"
29+
for c in progressbar(range(1, pages + 1)):
30+
chunkurl = f"{url}&sort=newest&size=1000&page={c}"
3231
response = requests.get(chunkurl)
3332
response = response.json()
34-
3533
hits += response["hits"]["hits"]
3634

35+
36+
url = 'https://data.caltech.edu/api/records'
37+
3738
for h in progressbar(hits):
3839
idv = str(h["id"])
40+
3941
response = requests.get(f"{url}/{idv}", headers=headers)
4042
if response.status_code != 200:
4143
print(response.text)
4244
exit()
4345
else:
46+
fixed = False
4447
metadata = response.json()
45-
metadata["creators"], fixed = fix_name(metadata["creators"])
48+
metadata["creators"], fixed = fix_name(metadata["creators"], fixed)
4649
if "contributors" in metadata:
47-
metadata["contributors"] = fix_name(metadata["contributors"])
50+
metadata["contributors"], fixed = fix_name(metadata["contributors"], fixed)
4851
if fixed:
4952
print(idv)
5053
caltechdata_edit(idv, metadata, production=True, publish=True)
51-
exit()

0 commit comments

Comments
 (0)