Skip to content

Commit 5c80ad6

Browse files
committed
Fix imports and in progress name script
1 parent ca6feb8 commit 5c80ad6

File tree

3 files changed

+32
-10
lines changed

3 files changed

+32
-10
lines changed

caltechdata_api/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
from .caltechdata_write import caltechdata_write, send_s3, write_files_rdm
2-
from .caltechdata_edit import caltechdata_add, caltechdata_edit, caltechdata_unembargo
1+
from .caltechdata_write import caltechdata_write, write_files_rdm
2+
from .caltechdata_edit import caltechdata_edit, caltechdata_unembargo
33
from .customize_schema import customize_schema
4-
from .decustomize_schema import decustomize_schema
54
from .get_metadata import get_metadata
65
from .download_file import download_file, download_url

caltechdata_api/caltechdata_edit.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import requests
44
from requests import session
55

6-
from caltechdata_api import customize_schema, send_s3, write_files_rdm
6+
from caltechdata_api import customize_schema, write_files_rdm
77

88

99
def caltechdata_unembargo(token, ids, production=False):
@@ -37,8 +37,7 @@ def caltechdata_edit(
3737
ids = [ids]
3838

3939
data = customize_schema.customize_schema(
40-
copy.deepcopy(metadata), schema=schema, pilot=True
41-
)
40+
copy.deepcopy(metadata), schema=schema)
4241
if production == True:
4342
url = "https://data.caltech.edu/"
4443
verify = True

fix_names.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,25 @@
11
import requests
22
import math
33
from progressbar import progressbar
4+
from caltechdata_api import caltechdata_edit
5+
6+
def fix_name(metadata):
7+
fixed = False
8+
for name in metadata:
9+
if name['nameType'] == 'Personal':
10+
if 'givenName' not in name:
11+
fixed = True
12+
given = name['name'].split(',')[1]
13+
name['givenName'] = given.strip()
14+
return metadata,fixed
415

516
url = "https://data.caltech.edu/api/records"
617

718
headers = {
819
"accept": "application/vnd.datacite.datacite+json",
920
}
1021

11-
response = requests.get(f"{url}")
22+
response = requests.get(f"{url}?search_type=scan&scroll=5m")
1223

1324
total = response.json()["hits"]["total"]
1425
pages = math.ceil(int(total) / 1000)
@@ -18,10 +29,23 @@
1829
chunkurl = (
1930
f"{url}?&sort=newest&size=1000&page={c}"
2031
)
21-
response = requests.get(chunkurl).json()
32+
response = requests.get(chunkurl)
33+
response = response.json()
2234

2335
hits += response["hits"]["hits"]
2436

2537
for h in progressbar(hits):
26-
rid = str(h["id"])
27-
print(rid)
38+
idv = str(h["id"])
39+
response = requests.get(f'{url}/{idv}', headers=headers)
40+
if response.status_code != 200:
41+
print(response.text)
42+
exit()
43+
else:
44+
metadata = response.json()
45+
metadata['creators'], fixed = fix_name(metadata['creators'])
46+
if 'contributors' in metadata:
47+
metadata['contributors'] = fix_name(metadata['contributors'])
48+
if fixed:
49+
print(idv)
50+
caltechdata_edit(idv,metadata,production=True,publish=True)
51+
exit()

0 commit comments

Comments
 (0)