Skip to content
36 changes: 14 additions & 22 deletions caltechdata_api/caltechdata_write.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import copy
import json
import os, requests

import os
import requests
import s3fs
from requests import session
from json.decoder import JSONDecodeError
Expand Down Expand Up @@ -49,8 +49,6 @@ def write_files_rdm(files, file_link, headers, f_headers, s3=None, keepfiles=Fal
infile = open(name, "rb")
else:
infile = open(f_list[name], "rb")
# size = infile.seek(0, 2)
# infile.seek(0, 0) # reset at beginning
result = requests.put(link, headers=f_headers, data=infile)
if result.status_code != 200:
raise Exception(result.text)
Expand All @@ -65,10 +63,11 @@ def write_files_rdm(files, file_link, headers, f_headers, s3=None, keepfiles=Fal
raise Exception(result.text)



def add_file_links(
metadata, file_links, file_descriptions=[], additional_descriptions="", s3_link=None
):
# Currently configured for S3 links, assuming all are at same endpoint
# Currently configured for S3 links, assuming all are at the same endpoint
link_string = ""
endpoint = "https://" + file_links[0].split("/")[2]
s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
Expand Down Expand Up @@ -152,13 +151,8 @@ def caltechdata_write(
s3_link=None,
default_preview=None,
review_message=None,
keep_file=False, # New parameter
):
"""
File links are links to files existing in external systems that will
be added directly in a CaltechDATA record, instead of uploading the file.

S3 is a s3sf object for directly opening files
"""
# Make a copy so that none of our changes leak out
metadata = copy.deepcopy(metadata)

Expand All @@ -167,7 +161,7 @@ def caltechdata_write(
token = os.environ["RDMTOK"]

# If files is a string - change to single value array
if isinstance(files, str) == True:
if isinstance(files, str):
files = [files]

if file_links:
Expand All @@ -176,14 +170,13 @@ def caltechdata_write(
)

# Pull out pid information
if production == True:
if production:
repo_prefix = "10.22002"
else:
repo_prefix = "10.33569"
pids = {}
identifiers = []
if "metadata" in metadata:
# we have rdm schema
if "identifiers" in metadata["metadata"]:
identifiers = metadata["metadata"]["identifiers"]
elif "identifiers" in metadata:
Expand All @@ -200,11 +193,10 @@ def caltechdata_write(
"provider": "oai",
}
elif "scheme" in identifier:
# We have RDM internal metadata
if identifier["scheme"] == "doi":
doi = identifier["identifier"]
prefix = doi.split("/")[0]
if doi != False:
if doi:
if prefix == repo_prefix:
pids["doi"] = {
"identifier": doi,
Expand All @@ -220,25 +212,25 @@ def caltechdata_write(
if "pids" not in metadata:
metadata["pids"] = pids

if authors == False:
if not authors:
data = customize_schema.customize_schema(metadata, schema=schema)
if production == True:
if production:
url = "https://data.caltech.edu/"
else:
url = "https://data.caltechlibrary.dev/"
else:
data = metadata
if production == True:
if production:
url = "https://authors.library.caltech.edu/"
else:
url = "https://authors.caltechlibrary.dev/"

headers = {
"Authorization": "Bearer %s" % token,
"Authorization": f"Bearer {token}",
"Content-type": "application/json",
}
f_headers = {
"Authorization": "Bearer %s" % token,
"Authorization": f"Bearer {token}",
"Content-type": "application/octet-stream",
}

Expand All @@ -256,7 +248,7 @@ def caltechdata_write(

if files:
file_link = result.json()["links"]["files"]
write_files_rdm(files, file_link, headers, f_headers, s3)
write_files_rdm(files, file_link, headers, f_headers, s3, keep_file)

if community:
review_link = result.json()["links"]["review"]
Expand Down
17 changes: 10 additions & 7 deletions caltechdata_api/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,22 @@ def decrypt_token(encrypted_token, key):


# Function to get or set token
def get_or_set_token():

def get_or_set_token(production=True):
key = load_or_generate_key()
token_file = os.path.join(caltechdata_directory, "token.txt")

# Use different token files for production and test environments
token_filename = "token.txt" if production else "token_test.txt"
token_file = os.path.join(caltechdata_directory, token_filename)

try:
with open(token_file, "rb") as f:
encrypted_token = f.read()
token = decrypt_token(encrypted_token, key)
return token
except FileNotFoundError:
while True:
token = input("Enter your CaltechDATA token: ").strip()
confirm_token = input("Confirm your CaltechDATA token: ").strip()
token = input(f"Enter your {'Production' if production else 'Test'} CaltechDATA token: ").strip()
confirm_token = input(f"Confirm your {'Production' if production else 'Test'} CaltechDATA token: ").strip()
if token == confirm_token:
encrypted_token = encrypt_token(token, key)
with open(token_file, "wb") as f:
Expand Down Expand Up @@ -403,7 +406,7 @@ def main():


def create_record(production):
token = get_or_set_token()
token = get_or_set_token(production)
print("Using CaltechDATA token:", token)
while True:
choice = get_user_input(
Expand Down Expand Up @@ -526,7 +529,7 @@ def print_upload_message(rec_id, production):

def edit_record(production):
record_id = input("Enter the CaltechDATA record ID: ")
token = get_or_set_token()
token = get_or_set_token(production)
file_name = download_file_by_id(record_id, token)

if file_name:
Expand Down