Finish up edit modificationd and general cleanup

tmorrell · tmorrell · commit 521c44ad0538 · 2022-10-31T15:10:59.000-07:00
diff --git a/caltechdata_api/caltechdata_edit.py b/caltechdata_api/caltechdata_edit.py
@@ -20,7 +20,6 @@ def caltechdata_edit(
     metadata={},
     token=None,
     files={},
-    delete={},
     production=False,
     schema="43",
     publish=False,
@@ -61,13 +60,6 @@ def caltechdata_edit(
         "Content-type": "application/octet-stream",
     }
 
-    if delete:
-        print(
-            """WARNING: Delete command is no longer supported; only the
-            files listed in the file option will be added to new version of
-            record"""
-        )
-
     completed = []
 
     for idv in ids:
@@ -80,8 +72,7 @@ def caltechdata_edit(
                 verify=verify,
             )
             if result.status_code != 201:
-                print(result.text)
-                exit()
+                raise Exception(result.text)
             # Get the id of the new version
             idv = result.json()["id"]
             # Update metadata
@@ -103,38 +94,31 @@ def caltechdata_edit(
                 verify=verify,
             )
             if result.status_code != 200:
-                draft = False
-            else:
-                draft = True
-            if draft == False:
-                result = requests.get(
-                    url + "/api/records/" + idv,
-                    headers=headers,
-                    verify=verify,
-                )
-                if result.status_code != 200:
-                    raise Exception(result.text)
-            # We want files to stay the same as the existing record
-            data["files"] = result.json()["files"]
-            print(url + "/api/records/" + idv + "/draft")
-            if draft == True:
-                result = requests.put(
+                # We make a draft
+                result = requests.post(
                     url + "/api/records/" + idv + "/draft",
                     headers=headers,
-                    json=data,
                     verify=verify,
                 )
-                if result.status_code != 200:
+                if result.status_code != 201:
                     raise Exception(result.text)
-            else:
-                result = requests.post(
-                    url + "/api/records/" + idv + "/draft",
+                result = requests.get(
+                    url + "/api/records/" + idv,
                     headers=headers,
-                    json=data,
                     verify=verify,
                 )
-                if result.status_code != 201:
+                if result.status_code != 200:
                     raise Exception(result.text)
+            # We want files to stay the same as the existing record
+            data["files"] = result.json()["files"]
+            result = requests.put(
+                url + "/api/records/" + idv + "/draft",
+                headers=headers,
+                json=data,
+                verify=verify,
+            )
+            if result.status_code != 200:
+                raise Exception(result.text)
 
         if community:
             review_link = result.json()["links"]["review"]
diff --git a/caltechdata_api/caltechdata_write.py b/caltechdata_api/caltechdata_write.py
@@ -17,7 +17,6 @@ def write_files_rdm(files, file_link, headers, f_headers, verify, s3=None):
         f_json.append({"key": filename})
         f_list[filename] = f
     result = requests.post(file_link, headers=headers, json=f_json, verify=verify)
-    print("upload links")
     if result.status_code != 201:
         raise Exception(result.text)
     # Now we have the upload links
@@ -31,8 +30,6 @@ def write_files_rdm(files, file_link, headers, f_headers, verify, s3=None):
             infile = open(f_list[name], "rb")
         # size = infile.seek(0, 2)
         # infile.seek(0, 0)  # reset at beginning
-        print("upload")
-        print(link)
         result = requests.put(link, headers=f_headers, verify=verify, data=infile)
         if result.status_code != 200:
             raise Exception(result.text)
@@ -75,7 +72,6 @@ def send_to_community(review_link, data, headers, verify, publish, community):
     }
     result = requests.put(review_link, json=data, headers=headers, verify=verify)
     if result.status_code != 200:
-        print(result.status_code)
         raise Exception(result.text)
     submit_link = result.json()["links"]["actions"]["submit"]
     data = comment = {
@@ -86,7 +82,6 @@ def send_to_community(review_link, data, headers, verify, publish, community):
     }
     result = requests.post(submit_link, json=data, headers=headers, verify=verify)
     if result.status_code != 200:
-        print(result.status_code)
         raise Exception(result.text)
     if publish:
         accept_link = result.json()["links"]["actions"]["accept"]
@@ -98,7 +93,6 @@ def send_to_community(review_link, data, headers, verify, publish, community):
         }
         result = requests.post(accept_link, json=data, headers=headers, verify=verify)
         if result.status_code != 200:
-            print(result.status_code)
             raise Exception(result.text)
     return result
 
@@ -154,22 +148,18 @@ def caltechdata_write(
         if "README.txt" in files:
             data["files"] = {"default_preview": "README.txt"}
 
-    print(json.dumps(data))
-
     # Make draft and publish
     result = requests.post(
         url + "/api/records", headers=headers, json=data, verify=verify
     )
     if result.status_code != 201:
         raise Exception(result.text)
     idv = result.json()["id"]
-    print(f"record {idv} created")
     publish_link = result.json()["links"]["publish"]
 
     if files:
         file_link = result.json()["links"]["files"]
         write_files_rdm(files, file_link, headers, f_headers, verify, s3)
-    print("files added")
 
     if community:
         review_link = result.json()["links"]["review"]
diff --git a/caltechdata_api/customize_schema.py b/caltechdata_api/customize_schema.py
@@ -220,29 +220,27 @@ def customize_schema_rdm(json_record):
     pids = {}
     if "identifiers" in json_record:
         identifiers = []
+        pids = {}
         for identifier in json_record["identifiers"]:
             if identifier["identifierType"] == "DOI":
                 doi = identifier["identifier"]
                 prefix = doi.split("/")[0]
                 if prefix == "10.22002":
-                    pids = {
-                        "doi": {
-                            "identifier": doi,
-                            "provider": "datacite",
-                            "client": "datacite",
-                        }
+                    pids["doi"] = {
+                        "identifier": doi,
+                        "provider": "datacite",
+                        "client": "datacite",
                     }
                 else:
-                    pids = {
-                        "doi": {
-                            "identifier": doi,
-                            "provider": "external",
-                        }
+                    pids["doi"] = {
+                        "identifier": doi,
+                        "provider": "external",
                     }
             elif identifier["identifierType"] == "oai":
-                # All OAI identifiers are system generated, and are not accepted
-                # via this API
-                print("Discarding oai identifier")
+                pids["oai"] = {
+                    "identifier": identifier["identifier"],
+                    "provider": "oai",
+                }
             else:
                 identifier["scheme"] = identifiertypes[identifier.pop("identifierType")]
                 identifiers.append(identifier)
diff --git a/edit.py b/edit.py
@@ -13,14 +13,12 @@
 )
 parser.add_argument("-ids", nargs="*", help="CaltechDATA IDs")
 parser.add_argument("-fnames", nargs="*", help="New Files")
-parser.add_argument("-schema", default="40", help="Metadata Schema")
-parser.add_argument(
-    "-delete", nargs="*", default="{}", help="Filename or extension to delete"
-)
+parser.add_argument("-flinks", nargs="*", help="New File Links")
+parser.add_argument("-schema", default="43", help="Metadata Schema")
 args = parser.parse_args()
 
-# Get access token from TIND set as environment variable with source token.bash
-token = os.environ["TINDTOK"]
+# Get access token set as environment variable with source token.bash
+token = os.environ["RDMTOK"]
 
 if args.json_file:
     metaf = open(args.json_file, "r")
@@ -29,8 +27,16 @@
     metadata = {}
 
 production = True
+publish = True
 
 response = caltechdata_edit(
-    args.ids, metadata, token, args.fnames, args.delete, production, args.schema
+    args.ids,
+    metadata,
+    token,
+    args.fnames,
+    production,
+    args.schema,
+    publish,
+    args.flinks,
 )
 print(response)
diff --git a/fix_names.py b/fix_names.py
@@ -3,49 +3,49 @@
 from progressbar import progressbar
 from caltechdata_api import caltechdata_edit
 
+
 def fix_name(metadata):
     fixed = False
     for name in metadata:
-        if name['nameType'] == 'Personal':
-            if 'givenName' not in name:
+        if name["nameType"] == "Personal":
+            if "givenName" not in name:
                 fixed = True
-                given = name['name'].split(',')[1]
-                name['givenName'] = given.strip()
-    return metadata,fixed
+                given = name["name"].split(",")[1]
+                name["givenName"] = given.strip()
+    return metadata, fixed
+
 
 url = "https://data.caltech.edu/api/records"
 
 headers = {
-            "accept": "application/vnd.datacite.datacite+json",
-        }
+    "accept": "application/vnd.datacite.datacite+json",
+}
 
 response = requests.get(f"{url}?search_type=scan&scroll=5m")
 
 total = response.json()["hits"]["total"]
 pages = math.ceil(int(total) / 1000)
-hits = []#[{'id':'15e0h-t0t34'}]
+hits = []  # [{'id':'a7f64-a8k10'}]
 print(total)
-for c in progressbar(range(1,2)):#, pages + 1)):
-    chunkurl = (
-            f"{url}?&sort=newest&size=1000&page={c}"
-    )
+for c in progressbar(range(1, 2)):  # , pages + 1)):
+    chunkurl = f"{url}?&sort=newest&size=1000&page={c}"
     response = requests.get(chunkurl)
     response = response.json()
-    
+
     hits += response["hits"]["hits"]
 
 for h in progressbar(hits):
     idv = str(h["id"])
-    response = requests.get(f'{url}/{idv}', headers=headers)
+    response = requests.get(f"{url}/{idv}", headers=headers)
     if response.status_code != 200:
         print(response.text)
         exit()
     else:
         metadata = response.json()
-        metadata['creators'], fixed = fix_name(metadata['creators'])
-        if 'contributors' in metadata:
-            metadata['contributors'] = fix_name(metadata['contributors'])
+        metadata["creators"], fixed = fix_name(metadata["creators"])
+        if "contributors" in metadata:
+            metadata["contributors"] = fix_name(metadata["contributors"])
         if fixed:
             print(idv)
-            caltechdata_edit(idv,metadata,production=True,publish=True)
+            caltechdata_edit(idv, metadata, production=True, publish=True)
             exit()
diff --git a/setup.py b/setup.py
@@ -18,12 +18,14 @@ def read(fname):
         src = f.read()
     return src
 
+
 def package_files(package, directory):
     os.chdir(package)
     paths = glob.glob(directory + "/**", recursive=True)
     os.chdir("..")
     return paths
 
+
 codemeta_json = "codemeta.json"
 
 # Let's pickup as much metadata as we need from codemeta.json
@@ -65,7 +67,7 @@ def package_files(package, directory):
     # 'fancy feature': ['django'],
 }
 
-files = package_files("caltechdata_api", "vocabularies") 
+files = package_files("caltechdata_api", "vocabularies")
 files.append("vocabularies.yaml")
 
 # The rest you shouldn't have to touch too much :)
diff --git a/token.bash b/token.bash
@@ -1,2 +1,2 @@
-export TINDTOK="token"
+export RDMTOK="token"
 
diff --git a/write.py b/write.py
@@ -23,5 +23,7 @@
 production = True
 publish = False
 
-response = caltechdata_write(metadata, token, args.fnames, production, args.schema, publish)
+response = caltechdata_write(
+    metadata, token, args.fnames, production, args.schema, publish
+)
 print(response)
diff --git a/write_hte.py b/write_hte.py
@@ -148,7 +148,7 @@
                 new_cre.append(creator)
         metadata["creators"] = new_cre
 
-        doi = metadata['doi'].lower()
+        doi = metadata["doi"].lower()
         unnecessary = [
             "id",
             "doi",
@@ -177,15 +177,15 @@
 
         production = True
 
-        #We're now doing new records, so redirects are not needed
-        #result = requests.get(f'https://api.datacite.org/dois/{doi}')
-        #if result.status_code != 200:
+        # We're now doing new records, so redirects are not needed
+        # result = requests.get(f'https://api.datacite.org/dois/{doi}')
+        # if result.status_code != 200:
         #    print('DATACITE Failed')
         #    print(result.text)
         #    exit()
-        
-        #url = result.json()['data']['attributes']['url']
-        #old_id = url.split('data.caltech.edu/records/')[1]
+
+        # url = result.json()['data']['attributes']['url']
+        # old_id = url.split('data.caltech.edu/records/')[1]
         new_id = caltechdata_write(
             metadata,
             schema="43",
@@ -196,13 +196,13 @@
             community=community,
         )
         print(new_id)
-        url = f'https://data.caltech.edu/records/{new_id}'    
-        
-        #record_ids[old_id] = new_id
-        #with open("new_ids.json", "w") as outfile:
+        url = f"https://data.caltech.edu/records/{new_id}"
+
+        # record_ids[old_id] = new_id
+        # with open("new_ids.json", "w") as outfile:
         #    json.dump(record_ids, outfile)
 
-        doi = datacite.update_doi(doi=record, metadata=metadata, url=url)['doi']
+        doi = datacite.update_doi(doi=record, metadata=metadata, url=url)["doi"]
         completed.append(doi)
         with open("completed_dois.json", "w") as outfile:
             data = json.dump(completed, outfile)

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-export TINDTOK="token"`
	`1`	`+export RDMTOK="token"`
`2`	`2`