Skip to content

Commit 521c44a

Browse files
committed
Finish up edit modificationd and general cleanup
1 parent 21cb146 commit 521c44a

File tree

9 files changed

+79
-97
lines changed

9 files changed

+79
-97
lines changed

caltechdata_api/caltechdata_edit.py

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ def caltechdata_edit(
2020
metadata={},
2121
token=None,
2222
files={},
23-
delete={},
2423
production=False,
2524
schema="43",
2625
publish=False,
@@ -61,13 +60,6 @@ def caltechdata_edit(
6160
"Content-type": "application/octet-stream",
6261
}
6362

64-
if delete:
65-
print(
66-
"""WARNING: Delete command is no longer supported; only the
67-
files listed in the file option will be added to new version of
68-
record"""
69-
)
70-
7163
completed = []
7264

7365
for idv in ids:
@@ -80,8 +72,7 @@ def caltechdata_edit(
8072
verify=verify,
8173
)
8274
if result.status_code != 201:
83-
print(result.text)
84-
exit()
75+
raise Exception(result.text)
8576
# Get the id of the new version
8677
idv = result.json()["id"]
8778
# Update metadata
@@ -103,38 +94,31 @@ def caltechdata_edit(
10394
verify=verify,
10495
)
10596
if result.status_code != 200:
106-
draft = False
107-
else:
108-
draft = True
109-
if draft == False:
110-
result = requests.get(
111-
url + "/api/records/" + idv,
112-
headers=headers,
113-
verify=verify,
114-
)
115-
if result.status_code != 200:
116-
raise Exception(result.text)
117-
# We want files to stay the same as the existing record
118-
data["files"] = result.json()["files"]
119-
print(url + "/api/records/" + idv + "/draft")
120-
if draft == True:
121-
result = requests.put(
97+
# We make a draft
98+
result = requests.post(
12299
url + "/api/records/" + idv + "/draft",
123100
headers=headers,
124-
json=data,
125101
verify=verify,
126102
)
127-
if result.status_code != 200:
103+
if result.status_code != 201:
128104
raise Exception(result.text)
129-
else:
130-
result = requests.post(
131-
url + "/api/records/" + idv + "/draft",
105+
result = requests.get(
106+
url + "/api/records/" + idv,
132107
headers=headers,
133-
json=data,
134108
verify=verify,
135109
)
136-
if result.status_code != 201:
110+
if result.status_code != 200:
137111
raise Exception(result.text)
112+
# We want files to stay the same as the existing record
113+
data["files"] = result.json()["files"]
114+
result = requests.put(
115+
url + "/api/records/" + idv + "/draft",
116+
headers=headers,
117+
json=data,
118+
verify=verify,
119+
)
120+
if result.status_code != 200:
121+
raise Exception(result.text)
138122

139123
if community:
140124
review_link = result.json()["links"]["review"]

caltechdata_api/caltechdata_write.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ def write_files_rdm(files, file_link, headers, f_headers, verify, s3=None):
1717
f_json.append({"key": filename})
1818
f_list[filename] = f
1919
result = requests.post(file_link, headers=headers, json=f_json, verify=verify)
20-
print("upload links")
2120
if result.status_code != 201:
2221
raise Exception(result.text)
2322
# Now we have the upload links
@@ -31,8 +30,6 @@ def write_files_rdm(files, file_link, headers, f_headers, verify, s3=None):
3130
infile = open(f_list[name], "rb")
3231
# size = infile.seek(0, 2)
3332
# infile.seek(0, 0) # reset at beginning
34-
print("upload")
35-
print(link)
3633
result = requests.put(link, headers=f_headers, verify=verify, data=infile)
3734
if result.status_code != 200:
3835
raise Exception(result.text)
@@ -75,7 +72,6 @@ def send_to_community(review_link, data, headers, verify, publish, community):
7572
}
7673
result = requests.put(review_link, json=data, headers=headers, verify=verify)
7774
if result.status_code != 200:
78-
print(result.status_code)
7975
raise Exception(result.text)
8076
submit_link = result.json()["links"]["actions"]["submit"]
8177
data = comment = {
@@ -86,7 +82,6 @@ def send_to_community(review_link, data, headers, verify, publish, community):
8682
}
8783
result = requests.post(submit_link, json=data, headers=headers, verify=verify)
8884
if result.status_code != 200:
89-
print(result.status_code)
9085
raise Exception(result.text)
9186
if publish:
9287
accept_link = result.json()["links"]["actions"]["accept"]
@@ -98,7 +93,6 @@ def send_to_community(review_link, data, headers, verify, publish, community):
9893
}
9994
result = requests.post(accept_link, json=data, headers=headers, verify=verify)
10095
if result.status_code != 200:
101-
print(result.status_code)
10296
raise Exception(result.text)
10397
return result
10498

@@ -154,22 +148,18 @@ def caltechdata_write(
154148
if "README.txt" in files:
155149
data["files"] = {"default_preview": "README.txt"}
156150

157-
print(json.dumps(data))
158-
159151
# Make draft and publish
160152
result = requests.post(
161153
url + "/api/records", headers=headers, json=data, verify=verify
162154
)
163155
if result.status_code != 201:
164156
raise Exception(result.text)
165157
idv = result.json()["id"]
166-
print(f"record {idv} created")
167158
publish_link = result.json()["links"]["publish"]
168159

169160
if files:
170161
file_link = result.json()["links"]["files"]
171162
write_files_rdm(files, file_link, headers, f_headers, verify, s3)
172-
print("files added")
173163

174164
if community:
175165
review_link = result.json()["links"]["review"]

caltechdata_api/customize_schema.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -220,29 +220,27 @@ def customize_schema_rdm(json_record):
220220
pids = {}
221221
if "identifiers" in json_record:
222222
identifiers = []
223+
pids = {}
223224
for identifier in json_record["identifiers"]:
224225
if identifier["identifierType"] == "DOI":
225226
doi = identifier["identifier"]
226227
prefix = doi.split("/")[0]
227228
if prefix == "10.22002":
228-
pids = {
229-
"doi": {
230-
"identifier": doi,
231-
"provider": "datacite",
232-
"client": "datacite",
233-
}
229+
pids["doi"] = {
230+
"identifier": doi,
231+
"provider": "datacite",
232+
"client": "datacite",
234233
}
235234
else:
236-
pids = {
237-
"doi": {
238-
"identifier": doi,
239-
"provider": "external",
240-
}
235+
pids["doi"] = {
236+
"identifier": doi,
237+
"provider": "external",
241238
}
242239
elif identifier["identifierType"] == "oai":
243-
# All OAI identifiers are system generated, and are not accepted
244-
# via this API
245-
print("Discarding oai identifier")
240+
pids["oai"] = {
241+
"identifier": identifier["identifier"],
242+
"provider": "oai",
243+
}
246244
else:
247245
identifier["scheme"] = identifiertypes[identifier.pop("identifierType")]
248246
identifiers.append(identifier)

outdated/edit.py renamed to edit.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,12 @@
1313
)
1414
parser.add_argument("-ids", nargs="*", help="CaltechDATA IDs")
1515
parser.add_argument("-fnames", nargs="*", help="New Files")
16-
parser.add_argument("-schema", default="40", help="Metadata Schema")
17-
parser.add_argument(
18-
"-delete", nargs="*", default="{}", help="Filename or extension to delete"
19-
)
16+
parser.add_argument("-flinks", nargs="*", help="New File Links")
17+
parser.add_argument("-schema", default="43", help="Metadata Schema")
2018
args = parser.parse_args()
2119

22-
# Get access token from TIND set as environment variable with source token.bash
23-
token = os.environ["TINDTOK"]
20+
# Get access token set as environment variable with source token.bash
21+
token = os.environ["RDMTOK"]
2422

2523
if args.json_file:
2624
metaf = open(args.json_file, "r")
@@ -29,8 +27,16 @@
2927
metadata = {}
3028

3129
production = True
30+
publish = True
3231

3332
response = caltechdata_edit(
34-
args.ids, metadata, token, args.fnames, args.delete, production, args.schema
33+
args.ids,
34+
metadata,
35+
token,
36+
args.fnames,
37+
production,
38+
args.schema,
39+
publish,
40+
args.flinks,
3541
)
3642
print(response)

fix_names.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,49 +3,49 @@
33
from progressbar import progressbar
44
from caltechdata_api import caltechdata_edit
55

6+
67
def fix_name(metadata):
78
fixed = False
89
for name in metadata:
9-
if name['nameType'] == 'Personal':
10-
if 'givenName' not in name:
10+
if name["nameType"] == "Personal":
11+
if "givenName" not in name:
1112
fixed = True
12-
given = name['name'].split(',')[1]
13-
name['givenName'] = given.strip()
14-
return metadata,fixed
13+
given = name["name"].split(",")[1]
14+
name["givenName"] = given.strip()
15+
return metadata, fixed
16+
1517

1618
url = "https://data.caltech.edu/api/records"
1719

1820
headers = {
19-
"accept": "application/vnd.datacite.datacite+json",
20-
}
21+
"accept": "application/vnd.datacite.datacite+json",
22+
}
2123

2224
response = requests.get(f"{url}?search_type=scan&scroll=5m")
2325

2426
total = response.json()["hits"]["total"]
2527
pages = math.ceil(int(total) / 1000)
26-
hits = []#[{'id':'15e0h-t0t34'}]
28+
hits = [] # [{'id':'a7f64-a8k10'}]
2729
print(total)
28-
for c in progressbar(range(1,2)):#, pages + 1)):
29-
chunkurl = (
30-
f"{url}?&sort=newest&size=1000&page={c}"
31-
)
30+
for c in progressbar(range(1, 2)): # , pages + 1)):
31+
chunkurl = f"{url}?&sort=newest&size=1000&page={c}"
3232
response = requests.get(chunkurl)
3333
response = response.json()
34-
34+
3535
hits += response["hits"]["hits"]
3636

3737
for h in progressbar(hits):
3838
idv = str(h["id"])
39-
response = requests.get(f'{url}/{idv}', headers=headers)
39+
response = requests.get(f"{url}/{idv}", headers=headers)
4040
if response.status_code != 200:
4141
print(response.text)
4242
exit()
4343
else:
4444
metadata = response.json()
45-
metadata['creators'], fixed = fix_name(metadata['creators'])
46-
if 'contributors' in metadata:
47-
metadata['contributors'] = fix_name(metadata['contributors'])
45+
metadata["creators"], fixed = fix_name(metadata["creators"])
46+
if "contributors" in metadata:
47+
metadata["contributors"] = fix_name(metadata["contributors"])
4848
if fixed:
4949
print(idv)
50-
caltechdata_edit(idv,metadata,production=True,publish=True)
50+
caltechdata_edit(idv, metadata, production=True, publish=True)
5151
exit()

setup.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,14 @@ def read(fname):
1818
src = f.read()
1919
return src
2020

21+
2122
def package_files(package, directory):
2223
os.chdir(package)
2324
paths = glob.glob(directory + "/**", recursive=True)
2425
os.chdir("..")
2526
return paths
2627

28+
2729
codemeta_json = "codemeta.json"
2830

2931
# Let's pickup as much metadata as we need from codemeta.json
@@ -65,7 +67,7 @@ def package_files(package, directory):
6567
# 'fancy feature': ['django'],
6668
}
6769

68-
files = package_files("caltechdata_api", "vocabularies")
70+
files = package_files("caltechdata_api", "vocabularies")
6971
files.append("vocabularies.yaml")
7072

7173
# The rest you shouldn't have to touch too much :)

token.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
export TINDTOK="token"
1+
export RDMTOK="token"
22

write.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,7 @@
2323
production = True
2424
publish = False
2525

26-
response = caltechdata_write(metadata, token, args.fnames, production, args.schema, publish)
26+
response = caltechdata_write(
27+
metadata, token, args.fnames, production, args.schema, publish
28+
)
2729
print(response)

write_hte.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@
148148
new_cre.append(creator)
149149
metadata["creators"] = new_cre
150150

151-
doi = metadata['doi'].lower()
151+
doi = metadata["doi"].lower()
152152
unnecessary = [
153153
"id",
154154
"doi",
@@ -177,15 +177,15 @@
177177

178178
production = True
179179

180-
#We're now doing new records, so redirects are not needed
181-
#result = requests.get(f'https://api.datacite.org/dois/{doi}')
182-
#if result.status_code != 200:
180+
# We're now doing new records, so redirects are not needed
181+
# result = requests.get(f'https://api.datacite.org/dois/{doi}')
182+
# if result.status_code != 200:
183183
# print('DATACITE Failed')
184184
# print(result.text)
185185
# exit()
186-
187-
#url = result.json()['data']['attributes']['url']
188-
#old_id = url.split('data.caltech.edu/records/')[1]
186+
187+
# url = result.json()['data']['attributes']['url']
188+
# old_id = url.split('data.caltech.edu/records/')[1]
189189
new_id = caltechdata_write(
190190
metadata,
191191
schema="43",
@@ -196,13 +196,13 @@
196196
community=community,
197197
)
198198
print(new_id)
199-
url = f'https://data.caltech.edu/records/{new_id}'
200-
201-
#record_ids[old_id] = new_id
202-
#with open("new_ids.json", "w") as outfile:
199+
url = f"https://data.caltech.edu/records/{new_id}"
200+
201+
# record_ids[old_id] = new_id
202+
# with open("new_ids.json", "w") as outfile:
203203
# json.dump(record_ids, outfile)
204204

205-
doi = datacite.update_doi(doi=record, metadata=metadata, url=url)['doi']
205+
doi = datacite.update_doi(doi=record, metadata=metadata, url=url)["doi"]
206206
completed.append(doi)
207207
with open("completed_dois.json", "w") as outfile:
208208
data = json.dump(completed, outfile)

0 commit comments

Comments
 (0)