caltechlibrary
diff --git a/‎caltechdata_api/caltechdata_edit.py‎
Lines changed: 20 additions & 1 deletion b/‎caltechdata_api/caltechdata_edit.py‎
Lines changed: 20 additions & 1 deletion
diff --git a/‎caltechdata_api/caltechdata_write.py‎
Lines changed: 46 additions & 23 deletions b/‎caltechdata_api/caltechdata_write.py‎
Lines changed: 46 additions & 23 deletions
diff --git a/‎caltechdata_api/customize_schema.py‎
Lines changed: 2 additions & 2 deletions b/‎caltechdata_api/customize_schema.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎edit_osn.py‎
Lines changed: 61 additions & 0 deletions b/‎edit_osn.py‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎outdated/edit_pilot_phase1.py‎
Lines changed: 0 additions & 91 deletions b/‎outdated/edit_pilot_phase1.py‎
Lines changed: 0 additions & 91 deletions
@@ -72,8 +72,27 @@ def caltechdata_edit(
     if isinstance(files, str) == True:
         files = [files]
 
+    # Check if file links were provided in the metadata
+    descriptions = []
+    for d in metadata["descriptions"]:
+        if d["description"].startswith("Files available via S3"):
+            ex_file_links = []
+            file_text = d["description"]
+            file_list = file_text.split('href="')
+            # Loop over links in description, skip header text
+            for file in file_list[1:]:
+                ex_file_links.append(file.split('"\n')[0])
+        else:
+            descriptions.append(d)
+    # We remove file link descriptions, and re-add below
+    metadata["descriptions"] = descriptions
+
+    # If user has provided file links as a cli option, we add those
     if file_links:
         metadata = add_file_links(metadata, file_links)
+    # Otherwise we add file links found in the mtadata file
+    elif ex_file_links:
+        metadata = add_file_links(metadata, ex_file_links)
 
     if production == True:
         url = "https://data.caltech.edu"
@@ -101,7 +120,7 @@ def caltechdata_edit(
             headers=headers,
         )
         if existing.status_code != 200:
-            raise Exception(existing.text)
+            raise Exception(f"Record {idv} does not exist, cannot edit")
 
     status = existing.json()["status"]
 
 
@@ -63,11 +63,8 @@ def add_file_links(metadata, file_links):
     for link in file_links:
         file = link.split("/")[-1]
         path = link.split(endpoint)[1]
-        try:
-            size = s3.info(path)["Size"]
-            size = humanbytes(size)
-        except:
-            size = 0
+        size = s3.info(path)["size"]
+        size = humanbytes(size)
         if link_string == "":
             cleaned = link.strip(file)
             link_string = f"Files available via S3 at {cleaned}&lt;/p&gt;</p>"
@@ -124,6 +121,7 @@ def caltechdata_write(
     file_links=[],
     s3=None,
     community=None,
+    authors=False,
 ):
     """
     File links are links to files existing in external systems that will
@@ -148,35 +146,59 @@ def caltechdata_write(
     else:
         repo_prefix = "10.33569"
     pids = {}
-    if "identifiers" in metadata:
-        for identifier in metadata["identifiers"]:
+    identifiers = []
+    if "metadata" in metadata:
+        # we have rdm schema
+        if "identifiers" in metadata["metadata"]:
+            identifiers = metadata["metadata"]["identifiers"]
+    elif "identifiers" in metadata:
+        identifiers = metadata["identifiers"]
+    for identifier in identifiers:
+        if "identifierType" in identifier:
             if identifier["identifierType"] == "DOI":
                 doi = identifier["identifier"]
                 prefix = doi.split("/")[0]
-
-                if prefix == repo_prefix:
-                    pids["doi"] = {
-                        "identifier": doi,
-                        "provider": "datacite",
-                        "client": "datacite",
-                    }
-                else:
-                    pids["doi"] = {
-                        "identifier": doi,
-                        "provider": "external",
-                    }
             elif identifier["identifierType"] == "oai":
                 pids["oai"] = {
                     "identifier": identifier["identifier"],
                     "provider": "oai",
                 }
+        elif "scheme" in identifier:
+            # We have RDM internal metadata
+            if identifier["scheme"] == "doi":
+                doi = identifier["identifier"]
+                prefix = doi.split("/")[0]
+            else:
+                doi = False
+        else:
+            doi = False
+        if doi != False:
+            if prefix == repo_prefix:
+                pids["doi"] = {
+                    "identifier": doi,
+                    "provider": "datacite",
+                    "client": "datacite",
+                }
+            else:
+                pids["doi"] = {
+                    "identifier": doi,
+                    "provider": "external",
+                }
+
     metadata["pids"] = pids
 
-    data = customize_schema.customize_schema(copy.deepcopy(metadata), schema=schema)
-    if production == True:
-        url = "https://data.caltech.edu/"
+    if authors == False:
+        data = customize_schema.customize_schema(copy.deepcopy(metadata), schema=schema)
+        if production == True:
+            url = "https://data.caltech.edu/"
+        else:
+            url = "https://data.caltechlibrary.dev/"
     else:
-        url = "https://data.caltechlibrary.dev/"
+        data = metadata
+        if production == True:
+            url = "https://authors.caltech.edu/"
+        else:
+            url = "https://authors.caltechlibrary.dev/"
 
     headers = {
         "Authorization": "Bearer %s" % token,
@@ -194,6 +216,7 @@ def caltechdata_write(
             data["files"] = {"default_preview": "README.txt"}
 
     # Make draft and publish
+    print(data)
     result = requests.post(url + "/api/records", headers=headers, json=data)
     if result.status_code != 201:
         raise Exception(result.text)
 
@@ -81,8 +81,8 @@ def rdm_creators_contributors(person_list, peopleroles):
             cre["type"] = "personal"
         change_label(cre, "givenName", "given_name")
         change_label(cre, "familyName", "family_name")
-        if 'name' not in cre:
-            cre['name'] = cre['family_name']+','+cre['given_name']
+        if "name" not in cre:
+            cre["name"] = cre["family_name"] + "," + cre["given_name"]
         change_label(cre, "nameIdentifiers", "identifiers")
         if "identifiers" in cre:
             new_id = []
 
@@ -0,0 +1,61 @@
+import argparse, os, json
+import s3fs
+from datacite import schema43
+from caltechdata_api import caltechdata_edit, get_metadata
+
+
+parser = argparse.ArgumentParser(
+    description="Edits a CaltechDATA record by adding OSN-stored pilot files"
+)
+parser.add_argument("folder", nargs=1, help="Folder")
+parser.add_argument("-id", nargs=1, help="")
+
+args = parser.parse_args()
+
+# Get access token as environment variable
+token = os.environ["RDMTOK"]
+
+endpoint = "https://renc.osn.xsede.org/"
+
+# Get metadata and files from bucket
+s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
+
+folder = args.folder[0]
+
+path = "ini210004tommorrell/" + folder + "/"
+
+idv = args.id[0]
+metadata = get_metadata(idv, schema="43")
+
+# Find the files
+files = s3.glob(path + "/*")
+
+file_links = []
+for link in files:
+    fname = link.split("/")[-1]
+    if "." not in fname:
+        # If there is a directory, get files
+        folder_files = s3.glob(link + "/*")
+        for file in folder_files:
+            name = file.split("/")[-1]
+            if "." not in name:
+                level_2_files = s3.glob(file + "/*")
+                for f in level_2_files:
+                    name = f.split("/")[-1]
+                    if "." not in name:
+                        level_3_files = s3.glob(f + "/*")
+                        for l3 in level_3_files:
+                            file_links.append(endpoint + l3)
+                    else:
+                        file_links.append(endpoint + f)
+            else:
+                file_links.append(endpoint + file)
+    else:
+        file_links.append(endpoint + link)
+
+production = True
+
+response = caltechdata_edit(
+    idv, metadata, token, [], production, "43", publish=True, file_links=file_links
+)
+print(response)