Skip to content

Commit efb9ecd

Browse files
committed
Improved description handling
1 parent c6ec803 commit efb9ecd

File tree

3 files changed

+38
-6
lines changed

3 files changed

+38
-6
lines changed

caltechdata_api/caltechdata_edit.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,22 @@ def caltechdata_edit(
124124
# Check if file links were provided in the metadata
125125
descriptions = []
126126
ex_file_links = []
127+
ex_file_descriptions = []
127128
if "descriptions" in metadata:
128129
for d in metadata["descriptions"]:
129130
if d["description"].startswith("Files available via S3"):
130131
file_text = d["description"]
131132
file_list = file_text.split('href="')
133+
# Check if we have file_descriptions
134+
split_comma = file_list[0].split(", ")
135+
if len(split_comma) == 3:
136+
ex_file_descriptions.append(split_comma[1])
132137
# Loop over links in description, skip header text
133138
for file in file_list[1:]:
134139
ex_file_links.append(file.split('"\n')[0])
140+
split_comma = file.split(", ")
141+
if len(split_comma) == 3:
142+
ex_file_descriptions.append(split_comma[1])
135143
else:
136144
descriptions.append(d)
137145
# We remove file link descriptions, and re-add below
@@ -145,7 +153,7 @@ def caltechdata_edit(
145153
# Otherwise we add file links found in the mtadata file
146154
elif ex_file_links:
147155
metadata = add_file_links(
148-
metadata, ex_file_links, file_descriptions, s3_link=s3_link
156+
metadata, ex_file_links, ex_file_descriptions, s3_link=s3_link
149157
)
150158

151159
if authors == False:

caltechdata_api/caltechdata_write.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,11 @@ def add_file_links(
8181
size = s3.info(path)["size"]
8282
size = humanbytes(size)
8383
try:
84-
desc = file_descriptions[index] + ","
84+
description = file_descriptions[index]
85+
if description != " ":
86+
desc = description + ","
87+
else:
88+
desc = ""
8589
except IndexError:
8690
desc = ""
8791
if link_string == "":

caltechdata_api/get_metadata.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@ def get_metadata(
3131
url = "https://data.caltechlibrary.dev/api/records/"
3232
verify = True
3333

34+
base_headers = {
35+
"accept": "application/json",
36+
}
37+
3438
if authors:
35-
headers = {
36-
"accept": "application/json",
37-
}
39+
headers = base_headers
3840
validate = False
3941
else:
4042
headers = {
@@ -49,7 +51,25 @@ def get_metadata(
4951
raise Exception(response.text)
5052
else:
5153
metadata = response.json()
52-
54+
if not authors:
55+
response = requests.get(url + idv, headers=base_headers, verify=verify)
56+
if response.status_code != 200:
57+
raise Exception(response.text)
58+
else:
59+
instance = response.json()
60+
base_metadata = instance["metadata"]
61+
metadata["descriptions"][0]["description"] = base_metadata.get(
62+
"description"
63+
)
64+
additional_descriptions = base_metadata.get(
65+
"additional_descriptions", []
66+
)
67+
count = 1
68+
for desc in additional_descriptions:
69+
metadata["descriptions"][count]["description"] = desc["description"]
70+
count += 1
71+
if "formats" in metadata:
72+
metadata["formats"] = list(set(metadata["formats"]))
5373
if validate:
5474
if schema == "43":
5575
try:

0 commit comments

Comments
 (0)