Skip to content

Commit 3672edc

Browse files
authored
Merge pull request #18 from caltechlibrary/authors
Add in authors support
2 parents 3906e16 + 9a58da8 commit 3672edc

File tree

7 files changed

+269
-117
lines changed

7 files changed

+269
-117
lines changed

caltechdata_api/caltechdata_edit.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,27 @@ def caltechdata_edit(
7272
if isinstance(files, str) == True:
7373
files = [files]
7474

75+
# Check if file links were provided in the metadata
76+
descriptions = []
77+
for d in metadata["descriptions"]:
78+
if d["description"].startswith("Files available via S3"):
79+
ex_file_links = []
80+
file_text = d["description"]
81+
file_list = file_text.split('href="')
82+
# Loop over links in description, skip header text
83+
for file in file_list[1:]:
84+
ex_file_links.append(file.split('"\n')[0])
85+
else:
86+
descriptions.append(d)
87+
# We remove file link descriptions, and re-add below
88+
metadata["descriptions"] = descriptions
89+
90+
# If user has provided file links as a cli option, we add those
7591
if file_links:
7692
metadata = add_file_links(metadata, file_links)
93+
# Otherwise we add file links found in the mtadata file
94+
elif ex_file_links:
95+
metadata = add_file_links(metadata, ex_file_links)
7796

7897
if production == True:
7998
url = "https://data.caltech.edu"
@@ -101,7 +120,7 @@ def caltechdata_edit(
101120
headers=headers,
102121
)
103122
if existing.status_code != 200:
104-
raise Exception(existing.text)
123+
raise Exception(f"Record {idv} does not exist, cannot edit")
105124

106125
status = existing.json()["status"]
107126

caltechdata_api/caltechdata_write.py

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,8 @@ def add_file_links(metadata, file_links):
6363
for link in file_links:
6464
file = link.split("/")[-1]
6565
path = link.split(endpoint)[1]
66-
try:
67-
size = s3.info(path)["Size"]
68-
size = humanbytes(size)
69-
except:
70-
size = 0
66+
size = s3.info(path)["size"]
67+
size = humanbytes(size)
7168
if link_string == "":
7269
cleaned = link.strip(file)
7370
link_string = f"Files available via S3 at {cleaned}&lt;/p&gt;</p>"
@@ -124,6 +121,7 @@ def caltechdata_write(
124121
file_links=[],
125122
s3=None,
126123
community=None,
124+
authors=False,
127125
):
128126
"""
129127
File links are links to files existing in external systems that will
@@ -148,35 +146,59 @@ def caltechdata_write(
148146
else:
149147
repo_prefix = "10.33569"
150148
pids = {}
151-
if "identifiers" in metadata:
152-
for identifier in metadata["identifiers"]:
149+
identifiers = []
150+
if "metadata" in metadata:
151+
# we have rdm schema
152+
if "identifiers" in metadata["metadata"]:
153+
identifiers = metadata["metadata"]["identifiers"]
154+
elif "identifiers" in metadata:
155+
identifiers = metadata["identifiers"]
156+
for identifier in identifiers:
157+
if "identifierType" in identifier:
153158
if identifier["identifierType"] == "DOI":
154159
doi = identifier["identifier"]
155160
prefix = doi.split("/")[0]
156-
157-
if prefix == repo_prefix:
158-
pids["doi"] = {
159-
"identifier": doi,
160-
"provider": "datacite",
161-
"client": "datacite",
162-
}
163-
else:
164-
pids["doi"] = {
165-
"identifier": doi,
166-
"provider": "external",
167-
}
168161
elif identifier["identifierType"] == "oai":
169162
pids["oai"] = {
170163
"identifier": identifier["identifier"],
171164
"provider": "oai",
172165
}
166+
elif "scheme" in identifier:
167+
# We have RDM internal metadata
168+
if identifier["scheme"] == "doi":
169+
doi = identifier["identifier"]
170+
prefix = doi.split("/")[0]
171+
else:
172+
doi = False
173+
else:
174+
doi = False
175+
if doi != False:
176+
if prefix == repo_prefix:
177+
pids["doi"] = {
178+
"identifier": doi,
179+
"provider": "datacite",
180+
"client": "datacite",
181+
}
182+
else:
183+
pids["doi"] = {
184+
"identifier": doi,
185+
"provider": "external",
186+
}
187+
173188
metadata["pids"] = pids
174189

175-
data = customize_schema.customize_schema(copy.deepcopy(metadata), schema=schema)
176-
if production == True:
177-
url = "https://data.caltech.edu/"
190+
if authors == False:
191+
data = customize_schema.customize_schema(copy.deepcopy(metadata), schema=schema)
192+
if production == True:
193+
url = "https://data.caltech.edu/"
194+
else:
195+
url = "https://data.caltechlibrary.dev/"
178196
else:
179-
url = "https://data.caltechlibrary.dev/"
197+
data = metadata
198+
if production == True:
199+
url = "https://authors.caltech.edu/"
200+
else:
201+
url = "https://authors.caltechlibrary.dev/"
180202

181203
headers = {
182204
"Authorization": "Bearer %s" % token,
@@ -194,6 +216,7 @@ def caltechdata_write(
194216
data["files"] = {"default_preview": "README.txt"}
195217

196218
# Make draft and publish
219+
print(data)
197220
result = requests.post(url + "/api/records", headers=headers, json=data)
198221
if result.status_code != 201:
199222
raise Exception(result.text)

caltechdata_api/customize_schema.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ def rdm_creators_contributors(person_list, peopleroles):
8181
cre["type"] = "personal"
8282
change_label(cre, "givenName", "given_name")
8383
change_label(cre, "familyName", "family_name")
84-
if 'name' not in cre:
85-
cre['name'] = cre['family_name']+','+cre['given_name']
84+
if "name" not in cre:
85+
cre["name"] = cre["family_name"] + "," + cre["given_name"]
8686
change_label(cre, "nameIdentifiers", "identifiers")
8787
if "identifiers" in cre:
8888
new_id = []

edit_osn.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import argparse, os, json
2+
import s3fs
3+
from datacite import schema43
4+
from caltechdata_api import caltechdata_edit, get_metadata
5+
6+
7+
parser = argparse.ArgumentParser(
8+
description="Edits a CaltechDATA record by adding OSN-stored pilot files"
9+
)
10+
parser.add_argument("folder", nargs=1, help="Folder")
11+
parser.add_argument("-id", nargs=1, help="")
12+
13+
args = parser.parse_args()
14+
15+
# Get access token as environment variable
16+
token = os.environ["RDMTOK"]
17+
18+
endpoint = "https://renc.osn.xsede.org/"
19+
20+
# Get metadata and files from bucket
21+
s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
22+
23+
folder = args.folder[0]
24+
25+
path = "ini210004tommorrell/" + folder + "/"
26+
27+
idv = args.id[0]
28+
metadata = get_metadata(idv, schema="43")
29+
30+
# Find the files
31+
files = s3.glob(path + "/*")
32+
33+
file_links = []
34+
for link in files:
35+
fname = link.split("/")[-1]
36+
if "." not in fname:
37+
# If there is a directory, get files
38+
folder_files = s3.glob(link + "/*")
39+
for file in folder_files:
40+
name = file.split("/")[-1]
41+
if "." not in name:
42+
level_2_files = s3.glob(file + "/*")
43+
for f in level_2_files:
44+
name = f.split("/")[-1]
45+
if "." not in name:
46+
level_3_files = s3.glob(f + "/*")
47+
for l3 in level_3_files:
48+
file_links.append(endpoint + l3)
49+
else:
50+
file_links.append(endpoint + f)
51+
else:
52+
file_links.append(endpoint + file)
53+
else:
54+
file_links.append(endpoint + link)
55+
56+
production = True
57+
58+
response = caltechdata_edit(
59+
idv, metadata, token, [], production, "43", publish=True, file_links=file_links
60+
)
61+
print(response)

outdated/edit_pilot_phase1.py

Lines changed: 0 additions & 91 deletions
This file was deleted.

0 commit comments

Comments
 (0)