Move to exceptions, and enhance caltechdata_edit

tmorrell · tmorrell · commit e6f71d0d4c53 · 2022-10-28T10:57:59.000-07:00
diff --git a/caltechdata_api/__init__.py b/caltechdata_api/__init__.py
@@ -1,4 +1,9 @@
-from .caltechdata_write import caltechdata_write, write_files_rdm
+from .caltechdata_write import (
+    caltechdata_write,
+    write_files_rdm,
+    add_file_links,
+    send_to_community,
+)
 from .caltechdata_edit import caltechdata_edit, caltechdata_unembargo
 from .customize_schema import customize_schema
 from .get_metadata import get_metadata
diff --git a/caltechdata_api/caltechdata_edit.py b/caltechdata_api/caltechdata_edit.py
@@ -3,7 +3,12 @@
 import requests
 from requests import session
 
-from caltechdata_api import customize_schema, write_files_rdm
+from caltechdata_api import (
+    customize_schema,
+    write_files_rdm,
+    add_file_links,
+    send_to_community,
+)
 
 
 def caltechdata_unembargo(token, ids, production=False):
@@ -19,10 +24,10 @@ def caltechdata_edit(
     production=False,
     schema="43",
     publish=False,
+    file_links=[],
+    s3=None,
+    community=None,
 ):
-    """Including files will only replaces files if they have the same name
-    The delete option will delete any existing files with a given file extension
-    There are more file operations that could be implemented"""
 
     # If no token is provided, get from RDMTOK environment variable
     if not token:
@@ -36,6 +41,9 @@ def caltechdata_edit(
     if isinstance(ids, str):
         ids = [ids]
 
+    if file_links:
+        metadata = add_file_links(metadata, file_links)
+
     data = customize_schema.customize_schema(copy.deepcopy(metadata), schema=schema)
     if production == True:
         url = "https://data.caltech.edu"
@@ -88,52 +96,58 @@ def caltechdata_edit(
             write_files_rdm(files, file_link, headers, f_headers, verify)
 
         else:
-            # just update metadata
-            # result = requests.post(
-            #    url + "/api/records/" + idv + "/draft",
-            #    headers=headers,
-            #    verify=verify,
-            # )
-            # if result.status_code != 200:
-            #    print(result.text)
-            #    exit()
-            # print(result.json())
-            # exit()
-            # print(url + "/api/records/" + idv + "/draft")
+            # Check for existing draft
             result = requests.get(
-                url + "/api/records/" + idv,
+                url + "/api/records/" + idv + "/draft",
                 headers=headers,
                 verify=verify,
             )
             if result.status_code != 200:
+                draft = False
+            else:
+                draft = True
+            if draft == False:
                 result = requests.get(
-                url + "/api/records/" + idv + "/draft",
-                headers=headers,
-                verify=verify,
+                    url + "/api/records/" + idv,
+                    headers=headers,
+                    verify=verify,
                 )
                 if result.status_code != 200:
-                    print(result.text)
-                    exit()
+                    raise Exception(result.text)
             # We want files to stay the same as the existing record
             data["files"] = result.json()["files"]
             print(url + "/api/records/" + idv + "/draft")
-            result = requests.put(
-                url + "/api/records/" + idv + "/draft",
-                headers=headers,
-                json=data,
-                verify=verify,
-            )
-            if result.status_code != 200:
-                print(result.text)
-                exit()
+            if draft == True:
+                result = requests.put(
+                    url + "/api/records/" + idv + "/draft",
+                    headers=headers,
+                    json=data,
+                    verify=verify,
+                )
+                if result.status_code != 200:
+                    raise Exception(result.text)
+            else:
+                result = requests.post(
+                    url + "/api/records/" + idv + "/draft",
+                    headers=headers,
+                    json=data,
+                    verify=verify,
+                )
+                if result.status_code != 201:
+                    raise Exception(result.text)
 
-        if publish:
+        if community:
+            review_link = result.json()["links"]["review"]
+            result = send_to_community(
+                review_link, data, headers, verify, publish, community
+            )
+            doi = result.json()["pids"]["doi"]["identifier"]
+            completed.append(doi)
+        elif publish:
             publish_link = f"{url}/api/records/{idv}/draft/actions/publish"
-            print(publish_link)
             result = requests.post(publish_link, headers=headers, verify=verify)
             if result.status_code != 202:
-                print(result.text)
-                exit()
+                raise Exception(result.text)
             doi = result.json()["pids"]["doi"]["identifier"]
             completed.append(doi)
         else:
diff --git a/caltechdata_api/caltechdata_write.py b/caltechdata_api/caltechdata_write.py
@@ -1,6 +1,6 @@
 import copy
 import json
-import os, sys, requests
+import os, requests
 
 import s3fs
 from requests import session
@@ -19,8 +19,7 @@ def write_files_rdm(files, file_link, headers, f_headers, verify, s3=None):
     result = requests.post(file_link, headers=headers, json=f_json, verify=verify)
     print("upload links")
     if result.status_code != 201:
-        print(result.text)
-        exit()
+        raise Exception(result.text)
     # Now we have the upload links
     for entry in result.json()["entries"]:
         link = entry["links"]["content"]
@@ -36,12 +35,10 @@ def write_files_rdm(files, file_link, headers, f_headers, verify, s3=None):
         print(link)
         result = requests.put(link, headers=f_headers, verify=verify, data=infile)
         if result.status_code != 200:
-            print(result.text)
-            exit()
+            raise Exception(result.text)
         result = requests.post(commit, headers=headers, verify=verify)
         if result.status_code != 200:
-            print(result.text)
-            exit()
+            raise Exception(result.text)
 
 
 def add_file_links(metadata, file_links):
@@ -70,6 +67,42 @@ def add_file_links(metadata, file_links):
     return metadata
 
 
+def send_to_community(review_link, data, headers, verify, publish, community):
+
+    data = {
+        "receiver": {"community": community},
+        "type": "community-submission",
+    }
+    result = requests.put(review_link, json=data, headers=headers, verify=verify)
+    if result.status_code != 200:
+        print(result.status_code)
+        raise Exception(result.text)
+    submit_link = result.json()["links"]["actions"]["submit"]
+    data = comment = {
+        "payload": {
+            "content": "This record is submitted automatically with the CaltechDATA API",
+            "format": "html",
+        }
+    }
+    result = requests.post(submit_link, json=data, headers=headers, verify=verify)
+    if result.status_code != 200:
+        print(result.status_code)
+        raise Exception(result.text)
+    if publish:
+        accept_link = result.json()["links"]["actions"]["accept"]
+        data = comment = {
+            "payload": {
+                "content": "This record is accepted automatically with the CaltechDATA API",
+                "format": "html",
+            }
+        }
+        result = requests.post(accept_link, json=data, headers=headers, verify=verify)
+        if result.status_code != 200:
+            print(result.status_code)
+            raise Exception(result.text)
+    return result
+
+
 def caltechdata_write(
     metadata,
     token=None,
@@ -128,8 +161,7 @@ def caltechdata_write(
         url + "/api/records", headers=headers, json=data, verify=verify
     )
     if result.status_code != 201:
-        print(result.text)
-        exit()
+        raise Exceltion(result.text)
     idv = result.json()["id"]
     print(f"record {idv} created")
     publish_link = result.json()["links"]["publish"]
@@ -141,46 +173,11 @@ def caltechdata_write(
 
     if community:
         review_link = result.json()["links"]["review"]
-        data = {
-            "receiver": {"community": community},
-            "type": "community-submission",
-        }
-        result = requests.put(review_link, json=data, headers=headers, verify=verify)
-        if result.status_code != 200:
-            print(result.status_code)
-            print(result.text)
-            exit()
-        submit_link = result.json()["links"]["actions"]["submit"]
-        data = comment = {
-            "payload": {
-                "content": "This record is submitted automatically with the CaltechDATA API",
-                "format": "html",
-            }
-        }
-        result = requests.post(submit_link, json=data, headers=headers, verify=verify)
-        if result.status_code != 200:
-            print(result.status_code)
-            print(result.text)
-            exit()
-        if publish:
-            accept_link = result.json()["links"]["actions"]["accept"]
-            data = comment = {
-                "payload": {
-                    "content": "This record is accepted automatically with the CaltechDATA API",
-                    "format": "html",
-                }
-            }
-            result = requests.post(
-                accept_link, json=data, headers=headers, verify=verify
-            )
-            if result.status_code != 200:
-                print(result.status_code)
-                print(result.text)
-                exit()
+        send_to_community(review_link, data, headers, verify, publish, community)
+
     else:
         if publish:
             result = requests.post(publish_link, headers=headers, verify=verify)
             if result.status_code != 202:
-                print(result.text)
-                exit()
+                raise Exception(result.text)
     return idv
diff --git a/completed_dois.json b/completed_dois.json
diff --git a/fix_names.py b/fix_names.py
@@ -23,7 +23,7 @@ def fix_name(metadata):
 
 total = response.json()["hits"]["total"]
 pages = math.ceil(int(total) / 1000)
-hits = []
+hits = []#[{'id':'15e0h-t0t34'}]
 print(total)
 for c in progressbar(range(1,2)):#, pages + 1)):
     chunkurl = (
diff --git a/write_hte.py b/write_hte.py
@@ -12,9 +12,9 @@
 s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
 
 # Set up datacite client
-# password = os.environ["DATACITE"]
+password = os.environ["DATACITE"]
 prefix = "10.25989"
-# datacite = DataCiteRESTClient(username="CALTECH.HTE", password=password, prefix=prefix)
+datacite = DataCiteRESTClient(username="CALTECH.HTE", password=password, prefix=prefix)
 
 path = "ini210004tommorrell/" + folder + "/"
 dirs = s3.ls(path)
@@ -177,14 +177,15 @@
 
         production = True
 
-        result = requests.get(f'https://api.datacite.org/dois/{doi}')
-        if result.status_code != 200:
-            print('DATACITE Failed')
-            print(result.text)
-            exit()
+        #We're now doing new records, so redirects are not needed
+        #result = requests.get(f'https://api.datacite.org/dois/{doi}')
+        #if result.status_code != 200:
+        #    print('DATACITE Failed')
+        #    print(result.text)
+        #    exit()
         
-        url = result.json()['data']['attributes']['url']
-        old_id = url.split('data.caltech.edu/records/')[1]
+        #url = result.json()['data']['attributes']['url']
+        #old_id = url.split('data.caltech.edu/records/')[1]
         new_id = caltechdata_write(
             metadata,
             schema="43",
@@ -195,14 +196,15 @@
             community=community,
         )
         print(new_id)
-
-        # url = response.split("record ")[1].strip()[:-1]
+        url = f'https://data.caltech.edu/records/{new_id}'    
         
-        record_ids[old_id] = new_id
-        with open("new_ids.json", "w") as outfile:
-            json.dump(record_ids, outfile)
+        #record_ids[old_id] = new_id
+        #with open("new_ids.json", "w") as outfile:
+        #    json.dump(record_ids, outfile)
 
-        # doi = datacite.update_doi(doi=record, metadata=metadata, url=url)['doi']
+        doi = datacite.update_doi(doi=record, metadata=metadata, url=url)['doi']
         completed.append(doi)
         with open("completed_dois.json", "w") as outfile:
             data = json.dump(completed, outfile)
+
+        exit()