Skip to content

Commit e6f71d0

Browse files
committed
Move to exceptions, and enhance caltechdata_edit
1 parent 66d7d96 commit e6f71d0

File tree

6 files changed

+118
-100
lines changed

6 files changed

+118
-100
lines changed

caltechdata_api/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
from .caltechdata_write import caltechdata_write, write_files_rdm
1+
from .caltechdata_write import (
2+
caltechdata_write,
3+
write_files_rdm,
4+
add_file_links,
5+
send_to_community,
6+
)
27
from .caltechdata_edit import caltechdata_edit, caltechdata_unembargo
38
from .customize_schema import customize_schema
49
from .get_metadata import get_metadata

caltechdata_api/caltechdata_edit.py

Lines changed: 49 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
import requests
44
from requests import session
55

6-
from caltechdata_api import customize_schema, write_files_rdm
6+
from caltechdata_api import (
7+
customize_schema,
8+
write_files_rdm,
9+
add_file_links,
10+
send_to_community,
11+
)
712

813

914
def caltechdata_unembargo(token, ids, production=False):
@@ -19,10 +24,10 @@ def caltechdata_edit(
1924
production=False,
2025
schema="43",
2126
publish=False,
27+
file_links=[],
28+
s3=None,
29+
community=None,
2230
):
23-
"""Including files will only replaces files if they have the same name
24-
The delete option will delete any existing files with a given file extension
25-
There are more file operations that could be implemented"""
2631

2732
# If no token is provided, get from RDMTOK environment variable
2833
if not token:
@@ -36,6 +41,9 @@ def caltechdata_edit(
3641
if isinstance(ids, str):
3742
ids = [ids]
3843

44+
if file_links:
45+
metadata = add_file_links(metadata, file_links)
46+
3947
data = customize_schema.customize_schema(copy.deepcopy(metadata), schema=schema)
4048
if production == True:
4149
url = "https://data.caltech.edu"
@@ -88,52 +96,58 @@ def caltechdata_edit(
8896
write_files_rdm(files, file_link, headers, f_headers, verify)
8997

9098
else:
91-
# just update metadata
92-
# result = requests.post(
93-
# url + "/api/records/" + idv + "/draft",
94-
# headers=headers,
95-
# verify=verify,
96-
# )
97-
# if result.status_code != 200:
98-
# print(result.text)
99-
# exit()
100-
# print(result.json())
101-
# exit()
102-
# print(url + "/api/records/" + idv + "/draft")
99+
# Check for existing draft
103100
result = requests.get(
104-
url + "/api/records/" + idv,
101+
url + "/api/records/" + idv + "/draft",
105102
headers=headers,
106103
verify=verify,
107104
)
108105
if result.status_code != 200:
106+
draft = False
107+
else:
108+
draft = True
109+
if draft == False:
109110
result = requests.get(
110-
url + "/api/records/" + idv + "/draft",
111-
headers=headers,
112-
verify=verify,
111+
url + "/api/records/" + idv,
112+
headers=headers,
113+
verify=verify,
113114
)
114115
if result.status_code != 200:
115-
print(result.text)
116-
exit()
116+
raise Exception(result.text)
117117
# We want files to stay the same as the existing record
118118
data["files"] = result.json()["files"]
119119
print(url + "/api/records/" + idv + "/draft")
120-
result = requests.put(
121-
url + "/api/records/" + idv + "/draft",
122-
headers=headers,
123-
json=data,
124-
verify=verify,
125-
)
126-
if result.status_code != 200:
127-
print(result.text)
128-
exit()
120+
if draft == True:
121+
result = requests.put(
122+
url + "/api/records/" + idv + "/draft",
123+
headers=headers,
124+
json=data,
125+
verify=verify,
126+
)
127+
if result.status_code != 200:
128+
raise Exception(result.text)
129+
else:
130+
result = requests.post(
131+
url + "/api/records/" + idv + "/draft",
132+
headers=headers,
133+
json=data,
134+
verify=verify,
135+
)
136+
if result.status_code != 201:
137+
raise Exception(result.text)
129138

130-
if publish:
139+
if community:
140+
review_link = result.json()["links"]["review"]
141+
result = send_to_community(
142+
review_link, data, headers, verify, publish, community
143+
)
144+
doi = result.json()["pids"]["doi"]["identifier"]
145+
completed.append(doi)
146+
elif publish:
131147
publish_link = f"{url}/api/records/{idv}/draft/actions/publish"
132-
print(publish_link)
133148
result = requests.post(publish_link, headers=headers, verify=verify)
134149
if result.status_code != 202:
135-
print(result.text)
136-
exit()
150+
raise Exception(result.text)
137151
doi = result.json()["pids"]["doi"]["identifier"]
138152
completed.append(doi)
139153
else:

caltechdata_api/caltechdata_write.py

Lines changed: 44 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import copy
22
import json
3-
import os, sys, requests
3+
import os, requests
44

55
import s3fs
66
from requests import session
@@ -19,8 +19,7 @@ def write_files_rdm(files, file_link, headers, f_headers, verify, s3=None):
1919
result = requests.post(file_link, headers=headers, json=f_json, verify=verify)
2020
print("upload links")
2121
if result.status_code != 201:
22-
print(result.text)
23-
exit()
22+
raise Exception(result.text)
2423
# Now we have the upload links
2524
for entry in result.json()["entries"]:
2625
link = entry["links"]["content"]
@@ -36,12 +35,10 @@ def write_files_rdm(files, file_link, headers, f_headers, verify, s3=None):
3635
print(link)
3736
result = requests.put(link, headers=f_headers, verify=verify, data=infile)
3837
if result.status_code != 200:
39-
print(result.text)
40-
exit()
38+
raise Exception(result.text)
4139
result = requests.post(commit, headers=headers, verify=verify)
4240
if result.status_code != 200:
43-
print(result.text)
44-
exit()
41+
raise Exception(result.text)
4542

4643

4744
def add_file_links(metadata, file_links):
@@ -70,6 +67,42 @@ def add_file_links(metadata, file_links):
7067
return metadata
7168

7269

70+
def send_to_community(review_link, data, headers, verify, publish, community):
71+
72+
data = {
73+
"receiver": {"community": community},
74+
"type": "community-submission",
75+
}
76+
result = requests.put(review_link, json=data, headers=headers, verify=verify)
77+
if result.status_code != 200:
78+
print(result.status_code)
79+
raise Exception(result.text)
80+
submit_link = result.json()["links"]["actions"]["submit"]
81+
data = comment = {
82+
"payload": {
83+
"content": "This record is submitted automatically with the CaltechDATA API",
84+
"format": "html",
85+
}
86+
}
87+
result = requests.post(submit_link, json=data, headers=headers, verify=verify)
88+
if result.status_code != 200:
89+
print(result.status_code)
90+
raise Exception(result.text)
91+
if publish:
92+
accept_link = result.json()["links"]["actions"]["accept"]
93+
data = comment = {
94+
"payload": {
95+
"content": "This record is accepted automatically with the CaltechDATA API",
96+
"format": "html",
97+
}
98+
}
99+
result = requests.post(accept_link, json=data, headers=headers, verify=verify)
100+
if result.status_code != 200:
101+
print(result.status_code)
102+
raise Exception(result.text)
103+
return result
104+
105+
73106
def caltechdata_write(
74107
metadata,
75108
token=None,
@@ -128,8 +161,7 @@ def caltechdata_write(
128161
url + "/api/records", headers=headers, json=data, verify=verify
129162
)
130163
if result.status_code != 201:
131-
print(result.text)
132-
exit()
164+
raise Exceltion(result.text)
133165
idv = result.json()["id"]
134166
print(f"record {idv} created")
135167
publish_link = result.json()["links"]["publish"]
@@ -141,46 +173,11 @@ def caltechdata_write(
141173

142174
if community:
143175
review_link = result.json()["links"]["review"]
144-
data = {
145-
"receiver": {"community": community},
146-
"type": "community-submission",
147-
}
148-
result = requests.put(review_link, json=data, headers=headers, verify=verify)
149-
if result.status_code != 200:
150-
print(result.status_code)
151-
print(result.text)
152-
exit()
153-
submit_link = result.json()["links"]["actions"]["submit"]
154-
data = comment = {
155-
"payload": {
156-
"content": "This record is submitted automatically with the CaltechDATA API",
157-
"format": "html",
158-
}
159-
}
160-
result = requests.post(submit_link, json=data, headers=headers, verify=verify)
161-
if result.status_code != 200:
162-
print(result.status_code)
163-
print(result.text)
164-
exit()
165-
if publish:
166-
accept_link = result.json()["links"]["actions"]["accept"]
167-
data = comment = {
168-
"payload": {
169-
"content": "This record is accepted automatically with the CaltechDATA API",
170-
"format": "html",
171-
}
172-
}
173-
result = requests.post(
174-
accept_link, json=data, headers=headers, verify=verify
175-
)
176-
if result.status_code != 200:
177-
print(result.status_code)
178-
print(result.text)
179-
exit()
176+
send_to_community(review_link, data, headers, verify, publish, community)
177+
180178
else:
181179
if publish:
182180
result = requests.post(publish_link, headers=headers, verify=verify)
183181
if result.status_code != 202:
184-
print(result.text)
185-
exit()
182+
raise Exception(result.text)
186183
return idv

completed_dois.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

fix_names.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def fix_name(metadata):
2323

2424
total = response.json()["hits"]["total"]
2525
pages = math.ceil(int(total) / 1000)
26-
hits = []
26+
hits = []#[{'id':'15e0h-t0t34'}]
2727
print(total)
2828
for c in progressbar(range(1,2)):#, pages + 1)):
2929
chunkurl = (

write_hte.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
1313

1414
# Set up datacite client
15-
# password = os.environ["DATACITE"]
15+
password = os.environ["DATACITE"]
1616
prefix = "10.25989"
17-
# datacite = DataCiteRESTClient(username="CALTECH.HTE", password=password, prefix=prefix)
17+
datacite = DataCiteRESTClient(username="CALTECH.HTE", password=password, prefix=prefix)
1818

1919
path = "ini210004tommorrell/" + folder + "/"
2020
dirs = s3.ls(path)
@@ -177,14 +177,15 @@
177177

178178
production = True
179179

180-
result = requests.get(f'https://api.datacite.org/dois/{doi}')
181-
if result.status_code != 200:
182-
print('DATACITE Failed')
183-
print(result.text)
184-
exit()
180+
#We're now doing new records, so redirects are not needed
181+
#result = requests.get(f'https://api.datacite.org/dois/{doi}')
182+
#if result.status_code != 200:
183+
# print('DATACITE Failed')
184+
# print(result.text)
185+
# exit()
185186

186-
url = result.json()['data']['attributes']['url']
187-
old_id = url.split('data.caltech.edu/records/')[1]
187+
#url = result.json()['data']['attributes']['url']
188+
#old_id = url.split('data.caltech.edu/records/')[1]
188189
new_id = caltechdata_write(
189190
metadata,
190191
schema="43",
@@ -195,14 +196,15 @@
195196
community=community,
196197
)
197198
print(new_id)
198-
199-
# url = response.split("record ")[1].strip()[:-1]
199+
url = f'https://data.caltech.edu/records/{new_id}'
200200

201-
record_ids[old_id] = new_id
202-
with open("new_ids.json", "w") as outfile:
203-
json.dump(record_ids, outfile)
201+
#record_ids[old_id] = new_id
202+
#with open("new_ids.json", "w") as outfile:
203+
# json.dump(record_ids, outfile)
204204

205-
# doi = datacite.update_doi(doi=record, metadata=metadata, url=url)['doi']
205+
doi = datacite.update_doi(doi=record, metadata=metadata, url=url)['doi']
206206
completed.append(doi)
207207
with open("completed_dois.json", "w") as outfile:
208208
data = json.dump(completed, outfile)
209+
210+
exit()

0 commit comments

Comments
 (0)