Skip to content

Commit 5c70ddc

Browse files
committed
Fix for records without description and add some update scripts
1 parent 165caed commit 5c70ddc

File tree

3 files changed

+110
-10
lines changed

3 files changed

+110
-10
lines changed

caltechdata_api/get_metadata.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def get_metadata(
4444
}
4545

4646
if token:
47+
base_headers["Authorization"] = "Bearer %s" % token
4748
headers["Authorization"] = "Bearer %s" % token
4849

4950
response = requests.get(url + idv, headers=headers, verify=verify)
@@ -58,16 +59,25 @@ def get_metadata(
5859
else:
5960
instance = response.json()
6061
base_metadata = instance["metadata"]
61-
metadata["descriptions"][0]["description"] = base_metadata.get(
62-
"description"
63-
)
64-
additional_descriptions = base_metadata.get(
65-
"additional_descriptions", []
66-
)
67-
count = 1
68-
for desc in additional_descriptions:
69-
metadata["descriptions"][count]["description"] = desc["description"]
70-
count += 1
62+
if "descriptions" in metadata:
63+
metadata["descriptions"][0]["description"] = base_metadata.get(
64+
"description"
65+
)
66+
additional_descriptions = base_metadata.get(
67+
"additional_descriptions", []
68+
)
69+
count = 1
70+
if (
71+
len(metadata["descriptions"])
72+
== len(additional_descriptions) + 1
73+
):
74+
for desc in additional_descriptions:
75+
metadata["descriptions"][count]["description"] = desc[
76+
"description"
77+
]
78+
count += 1
79+
else:
80+
print(f"Record {idv} does not have a description.")
7181
if "formats" in metadata:
7282
metadata["formats"] = list(set(metadata["formats"]))
7383
if validate:

update_descriptions.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import os, requests, json, math
2+
from caltechdata_api import get_metadata, caltechdata_edit
3+
4+
token = os.environ["RDMTOK"]
5+
6+
url = "https://data.caltech.edu/api/communities/0497183f-f3b1-483d-b8bb-133c731c939a/records"
7+
query = "?q=NOT%20_exists_%3Ametadata.description&f=allversions:true"
8+
9+
headers = {
10+
"Authorization": "Bearer %s" % token,
11+
"Content-type": "application/json",
12+
}
13+
14+
url = url + query
15+
response = requests.get(url, headers=headers)
16+
total = response.json()["hits"]["total"]
17+
pages = math.ceil(int(total) / 10)
18+
for c in range(1, pages + 1):
19+
chunkurl = f"{url}&size=10&page={c}"
20+
response = requests.get(chunkurl, headers=headers).json()
21+
for hit in response["hits"]["hits"]:
22+
idv = hit["id"]
23+
print(idv)
24+
result = requests.post(
25+
"https://data.caltech.edu/api/records/" + idv + "/draft",
26+
headers=headers,
27+
)
28+
if result.status_code != 201:
29+
raise Exception(result.text)
30+
metadata = result.json()
31+
metadata["metadata"]["description"] = hit["metadata"]["title"]
32+
for desc in metadata["metadata"]["additional_descriptions"]:
33+
if "title" in desc["type"]:
34+
desc["type"].pop("title")
35+
for date in metadata["metadata"]["dates"]:
36+
if "title" in date["type"]:
37+
date["type"].pop("title")
38+
if "icon" in metadata["metadata"]["rights"][0]:
39+
metadata["metadata"]["rights"][0].pop("icon")
40+
metadata["metadata"]["rights"][0].pop("title")
41+
metadata["metadata"]["rights"][0].pop("description")
42+
metadata["metadata"]["rights"][0].pop("props")
43+
if "title" in metadata["metadata"]["languages"][0]:
44+
metadata["metadata"]["languages"][0].pop("title")
45+
if "title" in metadata["metadata"]["resource_type"]:
46+
metadata["metadata"]["resource_type"].pop("title")
47+
result = requests.put(
48+
"https://data.caltech.edu/api/records/" + idv + "/draft",
49+
headers=headers,
50+
json=metadata,
51+
)
52+
if result.status_code != 200:
53+
raise Exception(result.text)
54+
publish_link = (
55+
f"https://data.caltech.edu/api/records/{idv}/draft/actions/publish"
56+
)
57+
result = requests.post(publish_link, headers=headers)
58+
if result.status_code != 202:
59+
raise Exception(result.text)

update_osn_links.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import os, requests, json, math
2+
from caltechdata_api import get_metadata, caltechdata_edit
3+
4+
token = os.environ["RDMTOK"]
5+
6+
url = "https://data.caltech.edu/api/records"
7+
query = '?q=metadata.additional_descriptions.description:"renc.osn.xsede.org"&allversions=true'
8+
9+
headers = {
10+
"Authorization": "Bearer %s" % token,
11+
"Content-type": "application/json",
12+
}
13+
14+
url = url + query
15+
response = requests.get(url, headers=headers)
16+
total = response.json()["hits"]["total"]
17+
pages = math.ceil(int(total) / 10)
18+
for c in range(1, pages + 1):
19+
chunkurl = f"{url}&size=10&page={c}"
20+
response = requests.get(chunkurl, headers=headers).json()
21+
for hit in response["hits"]["hits"]:
22+
idv = hit["id"]
23+
print(idv)
24+
metadata = get_metadata(idv, token=token, validate=False)
25+
for desc in metadata["descriptions"]:
26+
desc["description"] = desc["description"].replace(
27+
"renc.osn.xsede.org",
28+
"sdsc.osn.xsede.org",
29+
)
30+
caltechdata_edit(idv, metadata, token=token, production=True,
31+
publish=True)

0 commit comments

Comments
 (0)