-
-
- You can't perform that action at this time.
-
-
-
-
-
-
-
-
-
-
-
-
- You signed in with another tab or window. Reload to refresh your session.
- You signed out in another tab or window. Reload to refresh your session.
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/archiveIt.py b/archiveIt.py
index 8281a6b..944d4fa 100644
--- a/archiveIt.py
+++ b/archiveIt.py
@@ -1,22 +1,49 @@
-import requests, json, secrets, authenticate, runtime
-
+import requests, json, runtime
+from asnake.client import ASnakeClient
# provide instructions
print('This script is used to generate new digital objects within an ArchivesSpace collection for websites crawled in an Archive-It collection. Please note: This is a "proof of concept" script, NOT completed work. Do not use in production scenarios.')
input('Press Enter to continue...')
-# This is where we connect to ArchivesSpace. See authenticate.py
-baseURL, headers = authenticate.login()
+# This is where we connect to ArchivesSpace.
+client = ASnakeClient()
+client.authorize() # login, using default values
# archiveit_coll = raw_input('Enter the Archive-It collection number: ')
archiveit_coll = '3181'
# search AS for archival_object's with level "Web archive"
-query = '/search?page=1&filter={"query":{"jsonmodel_type":"boolean_query","op":"AND","subqueries":[{"jsonmodel_type":"field_query","field":"primary_type","value":"archival_object","literal":true},{"jsonmodel_type":"field_query","field":"level","value":"Web archive","literal":true},{"jsonmodel_type":"field_query","field":"types","value":"pui","literal":true}]}}'
-ASoutput = requests.get(baseURL + query, headers=headers).json()
-print('Found ' + str(len(ASoutput['results'])) + ' archival objects with the instance type "Web archive."')
+
+warchives = list(client.get_paged( # get_paged returns an iterator, so wrap in list since we use it multiple times
+ 'search', # the query URL
+ params={
+ "filter": json.dumps( # use json.dumps to serialize the query JSON into a string - remember that query is passed as a GET param in the URL
+ {"query":
+ {"jsonmodel_type": "boolean_query",
+ "op":"AND",
+ "subqueries":[
+ {"jsonmodel_type":"field_query",
+ "field":"primary_type",
+ "value":"archival_object",
+ "literal":true},
+ {"jsonmodel_type":"field_query",
+ "field":"level",
+ "value":"Web archive",
+ "literal":true},
+ {"jsonmodel_type":"field_query",
+ "field":"types",
+ "value":"pui",
+ "literal":true}
+ ]
+ }
+ } # end query
+ ) # end json.dumps
+ } # end params
+)) # end list and client.get_paged
+
+print('Found ' + str(len(warchives)) + ' archival objects with the instance type "Web archive."')
# grab needed fields out of ao
-for ao in ASoutput['results']:
+for ao in warchives:
url = ao['title']
uri = ao['uri']
@@ -28,8 +55,7 @@
# take AI json lists and convert to python dicts
keys = AIoutput[0]
crawlList = []
- for i in range (1, len (AIoutput)):
- AIlist = AIoutput[i]
+ for AIlist in AIoutput[1:]:
crawl = {}
for j in range (0, len(AIlist)):
crawl[keys[j]] = AIlist[j]
@@ -40,29 +66,47 @@
newInstances = []
for crawl in crawlList:
doid = 'https://wayback.archive-it.org' + '/' + archiveit_coll + '/' + crawl['timestamp'] + '/' + crawl['original']
- query = '/search?page=1&filter={"query":{"jsonmodel_type":"boolean_query","op":"AND","subqueries":[{"jsonmodel_type":"field_query","field":"primary_type","value":"digital_object","literal":true},{"jsonmodel_type":"field_query","field":"digital_object_id","value":"' + doid + '","literal":true}]}}'
- existingdoID = requests.get(baseURL + query, headers=headers).json()
+ filter_query=json.dumps({
+ "query":{
+ "jsonmodel_type":"boolean_query",
+ "op":"AND",
+ "subqueries":[
+ {
+ "jsonmodel_type":"field_query",
+ "field":"primary_type",
+ "value":"digital_object",
+ "literal":True},
+ {
+ "jsonmodel_type":"field_query",
+ "field":"digital_object_id",
+ "value": str(doid),
+ "literal":True
+ }
+ ]
+ }
+ })
+
+ existingdoID = list(client.get_paged(search, params={"filter": filter_query}))
doPost = {}
- if len(existingdoID['results']) != 0:
+ if len(existingdoID) != 0:
print('Digital object already exists.')
else:
doPost['digital_object_id'] = doid
doPost['title'] = 'Web crawl of ' + crawl['original']
doPost['dates'] = [{'expression': crawl['timestamp'], 'date_type': 'single', 'label': 'creation'}]
doPost['file_versions'] = [{'file_uri': crawl['filename'], 'checksum': crawl['digest'], 'checksum_method': 'sha-1'}]
- doJson = json.dumps(doPost)
if doPost != {}:
- post = requests.post(baseURL + '/repositories/2/digital_objects', headers=headers, data=doJson).json()
+ post = requests.post('/repositories/2/digital_objects', json=doPost).json()
print(post)
doItem = {}
doItem['digital_object'] = {'ref': post['uri']}
doItem['instance_type'] = 'digital_object'
newInstances.append(doItem)
- aoGet = requests.get(baseURL + uri, headers=headers).json()
+ aoGet = client.get(uri).json()
existingInstances = aoGet['instances']
existingInstances = existingInstances + newInstances
aoGet['instances'] = existingInstances
- aoUpdate = requests.post(baseURL + uri, headers=headers, data=json.dumps(aoGet)).json()
+ aoUpdate = client.post(uri, json=aoGet).json()
print('The following archival objects have been updated in ArchivesSpace:')
print(aoUpdate)
diff --git a/asLinkProfiles.py b/asLinkProfiles.py
index 0b90933..6e4cae4 100644
--- a/asLinkProfiles.py
+++ b/asLinkProfiles.py
@@ -1,4 +1,5 @@
-import json, requests, authenticate, runtime
+import json, requests, runtime
+from asnake.client import ASnakeClient
# function to find key in nested dictionaries: see http://stackoverflow.com/questions/9807634/find-all-occurences-of-a-key-in-nested-python-dictionaries-and-lists
# and now we're getting fancy!
@@ -15,8 +16,9 @@ def gen_dict_extract(key, var):
for result in gen_dict_extract(key, d):
yield result
-# This is where we connect to ArchivesSpace. See authenticate.py
-baseURL, headers = authenticate.login()
+# This is where we connect to ArchivesSpace.
+client = ASnakeClient()
+client.authorize() # login, using default values
# provide instructions
print ('This script is used to link all top_containers in a single collection (identified by the ArchivesSpace resource ID number) to a single container_profile (identified by the ArchivesSpace container_profile ID number).')
@@ -26,18 +28,24 @@ def gen_dict_extract(key, var):
resource_id = input('Enter resource ID (in this case, you should enter 1): ')
# search for top_containers linked to entered resource id
-endpoint = '/repositories/2/top_containers/search?page=1&aq={"filter_term":{"field":"collection_uri_u_sstr", "value":"/repositories/2/resources/ + resource_id", "jsonmodel_type":"field_query"}}'
-output = requests.get(baseURL + endpoint, headers=headers).json()
+endpoint = '/repositories/2/top_containers/search'
+advanced_query = json.dumps({
+ "filter_term": {
+ "field": "collection_uri_u_sstr",
+ "value": "/repositories/2/resources/" + resource_id,
+ "jsonmodel_type":"field_query"}
+})
+results = list(client.get_paged(endpoint, params={'aq': advanced_query}))
# populate top_containers with the ids of each top_container in search results
top_containers = []
-for value in gen_dict_extract('id', output):
+for value in gen_dict_extract('id', results):
top_containers.append(value)
# GET each top_container listed in top_containers and add to records
records = []
for top_container in top_containers:
- output = requests.get(baseURL + top_container, headers=headers).json()
+ output = client.get(top_container).json()
records.append(output)
# have user enter container profile id
@@ -47,7 +55,7 @@ def gen_dict_extract(key, var):
print ('The following records have been updated in ArchivesSpace:')
for record in records:
record['container_profile'] = {'ref': '/container_profiles/' + profile_id}
- jsonLine = json.dumps(record)
+ jsonLine = record
uri = record['uri']
- post = requests.post(baseURL + uri, headers=headers, data=jsonLine).json()
+ post = client.post(uri, json=jsonLine).json()
print(post)
diff --git a/authenticate.py b/authenticate.py
deleted file mode 100644
index 521a933..0000000
--- a/authenticate.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import requests, json, secrets
-from requests.compat import urljoin, quote
-
-def login():
- # import secrets
- baseURL = secrets.baseURL
- user = secrets.user
- password = secrets.password
-
- # attempt to authenticate
- # following the approach used in ArchivesSnake
- response = requests.post(urljoin(baseURL, '/users/{user}/login'.format(user=quote(user))),
- params={"password": password, "expiring": False})
-
- if response.status_code != 200:
- print('Login failed! Check credentials and try again')
- exit()
- else:
- session = json.loads(response.text)['session']
- headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
- print('Login successful!')
- return baseURL, headers
diff --git a/postBarcodes.py b/postBarcodes.py
index e1cebee..0e78c4e 100644
--- a/postBarcodes.py
+++ b/postBarcodes.py
@@ -1,11 +1,12 @@
-import json, requests, csv, authenticate, runtime
-
+import json, csv, runtime
+from asnake.client import ASnakeClient
# print instructions
print ('This script replaces existing fauxcodes with real barcodes (linked in a separate csv file) in ArchivesSpace.')
input('Press Enter to connect to ArchivesSpace and post those barcodes...')
# This is where we connect to ArchivesSpace. See authenticate.py
-baseURL, headers = authenticate.login()
+client = ASnakeClient()
+client.authorize()
# open csv and generate dict
reader = csv.DictReader(open('barcodes.csv'))
@@ -14,7 +15,7 @@
print ('The following barcodes have been updated in ArchivesSpace:')
for row in reader:
uri = row['uri']
- output = requests.get(baseURL + uri, headers=headers).json()
- output['barcode'] = row['real']
- post = requests.post(baseURL + uri, headers=headers, data=json.dumps(output)).json()
- print (post)
+ container = client.get(uri).json()
+ container['barcode'] = row['real']
+ post = client.post(uri, json=container).json()
+ print(post)
diff --git a/postContainerProfiles.py b/postContainerProfiles.py
index 21baa27..796fa34 100644
--- a/postContainerProfiles.py
+++ b/postContainerProfiles.py
@@ -1,30 +1,26 @@
-import json, requests, secrets, time, runtime
+import json, requests, time, runtime
+from asnake.client import ASnakeClient
+from asnake.client.web_client import ASnakeAuthError
-# import secrets
-baseURL = secrets.baseURL
-user = secrets.user
-password = secrets.password
-
-#authenticate
-auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
-session = auth["session"]
-headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
+# Create a client
+client = ASnakeClient()
+client.authorize() # login, using default values
# test for successful connection
def test_connection():
- try:
- requests.get(baseURL)
- print ('Connected!')
- return True
+ try:
+ client.get(baseURL)
+ print ('Connected!')
+ return True
- except requests.exceptions.ConnectionError:
- print ('Connection error. Please confirm ArchivesSpace is running. Trying again in 10 seconds.')
+ except (requests.exceptions.ConnectionError, ASnakeAuthError) as e:
+ print ('Connection error. Please confirm ArchivesSpace is running. Trying again in 10 seconds.')
is_connected = test_connection()
while not is_connected:
- time.sleep(10)
- is_connected = test_connection()
+ time.sleep(10)
+ is_connected = test_connection()
# print instructions
print ("This script will add the container_profiles included in a separate json file to ArchivesSpace.")
@@ -34,9 +30,8 @@ def test_connection():
print ("The following container profiles have been added to ArchivesSpace:")
jsonfile = open("containerProfiles.json")
jsonfile = json.load(jsonfile)
-for line in jsonfile:
- toPost = json.dumps(line)
- post = requests.post(baseURL + "/container_profiles", headers=headers, data=toPost).json()
- print (post)
+for container_profile in jsonfile:
+ post = client.post("/container_profiles", json=container_profile).json()
+ print (post)
print ("You've just completed your first API POST. Congratulations!")
diff --git a/postDos.py b/postDos.py
index 431fcc1..1839ff1 100644
--- a/postDos.py
+++ b/postDos.py
@@ -1,55 +1,82 @@
-import json, requests, csv, os, authenticate, runtime
+import json, csv, os, runtime
+from asnake.client import ASnakeClient
-# This is where we connect to ArchivesSpace. See authenticate.py
-baseURL, headers = authenticate.login()
+# This is where we connect to ArchivesSpace.
+client = ASnakeClient()
+client.authorize() # login, using default values
# User supplied filename
do_csv = input('Enter csv filename: ')
-base_file_name = os.path.splitext(do_csv)[0]
+base_file_name = os.path.basename(do_csv)
+
# Open csv, create new csv
csv_dict = csv.DictReader(open(do_csv, 'r', encoding='utf-8'))
f=csv.writer(open(base_file_name + '.just_posted.csv', 'w'))
-f.writerow(['title']+['digital_object_id']+['digital_object_uri']+['archival_object_uri'])
+f.writerow(['title', 'digital_object_id', 'digital_object_uri', 'archival_object_uri'])
- # Note: if this script is re-run to create new digital objects or updated to include additional rows
- # (e.g. after changing the do object IDs, or if you attempt to add another row that references a preceding archival object)
- # then only the most-recently created digital objects will be linked
- # and the previously-created digital objects will be orphaned records.
+# Note: if this script is re-run to create new digital objects or updated to include additional rows
+# (e.g. after changing the do object IDs, or if you attempt to add another row that references a preceding archival object)
+# then only the most-recently created digital objects will be linked
+# and the previously-created digital objects will be orphaned records.
# Parse csv and update ArchivesSpace.
for row in csv_dict:
- file_uri = row['fileuri']
- title = row['title']
- digital_object_id = row['objectid']
- ref_ID = row['refID']
- # Construct new digital object from csv
- doRecord = {'title': title, 'digital_object_id': digital_object_id, 'publish': False}
- doRecord['file_versions'] = [{'file_uri': file_uri, 'publish': False, 'file_format_name': 'jpeg'}]
- doRecord = json.dumps(doRecord)
- doPost = requests.post(baseURL + '/repositories/2/digital_objects', headers=headers, data=doRecord).json()
- print(doPost)
- # Store uri of newly posted digital objects because we'll need it
- uri = doPost['uri']
- # Find AOs based on refIDs supplied in csv
- AOquery = '/search?page=1&filter={"query":{"jsonmodel_type":"boolean_query","op":"AND","subqueries":[{"jsonmodel_type":"field_query","field":"primary_type","value":"archival_object","literal":true},{"jsonmodel_type":"field_query","field":"ref_id","value":"' + ref_ID + '","literal":true},{"jsonmodel_type":"field_query","field":"types","value":"pui","literal":true}]}}'
- aoSearch = requests.get(baseURL + AOquery, headers=headers).json()
- linked_ao_uri = aoSearch['results'][0]['uri']
- # Get and store archival objects from above search
- aoRecord = requests.get(baseURL + linked_ao_uri, headers=headers).json()
- # Find existing instances and create new ones from new digital objects
- exising_instance = aoRecord['instances'][0]
- new_instance = '{"instance_type": "digital_object", "digital_object": {"ref": "' + uri + '"}}'
- new_instance = json.loads(new_instance)
- # Merge old and new instances
- instances_new = []
- instances_new.append(exising_instance)
- instances_new.append(new_instance)
- aoRecord['instances'] = instances_new
- # Post updated archival objects
- aoPost = requests.post(baseURL + linked_ao_uri, headers=headers, data=json.dumps(aoRecord)).json()
- print(aoPost)
- # Save select information to new csv file
- f.writerow([title]+[digital_object_id]+[uri]+[linked_ao_uri])
+ file_uri = row['fileuri']
+ title = row['title']
+ digital_object_id = row['objectid']
+ ref_ID = row['refID']
+ # Construct new digital object from csv
+ doRecord = {'title': title, 'digital_object_id': digital_object_id, 'publish': False}
+ doRecord['file_versions'] = [{'file_uri': file_uri, 'publish': False, 'file_format_name': 'jpeg'}]
+
+ doPost = client.post('/repositories/2/digital_objects', json=doRecord).json()
+ print(doPost)
+ # Store uri of newly posted digital objects because we'll need it
+ uri = doPost['uri']
+ # Find AOs based on refIDs supplied in csv
+ AOquery = json.dumps({
+ "query": {
+ "jsonmodel_type":"boolean_query",
+ "op":"AND",
+ "subqueries":[
+ {
+ "jsonmodel_type":"field_query",
+ "field":"primary_type",
+ "value":"archival_object",
+ "literal":True
+ },
+ {
+ "jsonmodel_type":"field_query",
+ "field":"ref_id",
+ "value": ref_ID, "literal":True
+ },
+ {
+ "jsonmodel_type":"field_query",
+ "field":"types",
+ "value":"pui",
+ "literal":True
+ }
+ ]
+ }
+ })
+ aoSearch = list(client.get_paged('search', params={"filter": AQuery}))
+ linked_ao_uri = aoSearch[0]['uri']
+ # Get and store archival objects from above search
+ aoRecord = client.get(linked_ao_uri).json()
+ # Find existing instances and create new ones from new digital objects
+ exising_instance = aoRecord['instances'][0]
+ new_instance = {"instance_type": "digital_object", "digital_object": {"ref": uri}}
+
+ # Merge old and new instances
+ instances_new = []
+ instances_new.append(exising_instance)
+ instances_new.append(new_instance)
+ aoRecord['instances'] = instances_new
+ # Post updated archival objects
+ aoPost = client.post(linked_ao_uri, json=aoRecord).json()
+ print(aoPost)
+ # Save select information to new csv file
+ f.writerow([title, digital_object_id, uri, linked_ao_uri])
# Feedback to user
print ('New .csv saved to working directory. Go have a look!')
diff --git a/postVIAFOrganizations.py b/postVIAFOrganizations.py
index 4185ea7..886b627 100644
--- a/postVIAFOrganizations.py
+++ b/postVIAFOrganizations.py
@@ -1,11 +1,13 @@
-import json, requests, csv, authenticate, runtime
+import json, csv, runtime
+from asnake.client import ASnakeClient
# print instructions
print('This script takes viafCorporateResults.csv and posts the organizations as corporate_entities to ArchivesSpace.')
input('Press Enter to continue...')
-# This is where we connect to ArchivesSpace. See authenticate.py
-baseURL, headers = authenticate.login()
+# This is where we connect to ArchivesSpace.
+client = ASnakeClient()
+client.authorize() # login, using default values
targetFile = 'viafCorporateResults.csv'
@@ -14,11 +16,11 @@
orgList = []
for row in csv:
orgRecord = {}
- # changed this since ASpace doesn't come with 'viaf' as an option for source of the box.
+ # changed this since ASpace doesn't come with 'viaf' as an option for source out of the box.
source = 'naf' if row.get('lc') is not None else 'local'
orgRecord['names'] = [{'primary_name': row['result'], 'sort_name': row['result'], 'source': source, 'authority_id': row['lc']}]
- orgRecord = json.dumps(orgRecord)
- post = requests.post(baseURL + '/agents/corporate_entities', headers=headers, data=orgRecord).json()
+
+ post = client.post('/agents/corporate_entities', json=orgRecord).json()
print(post, '\n')
print("Check out your instance of ArchivesSpace to see what's new.")
diff --git a/secrets.py b/secrets.py
deleted file mode 100644
index 204e2dc..0000000
--- a/secrets.py
+++ /dev/null
@@ -1,3 +0,0 @@
-baseURL='http://localhost:8089'
-user='admin'
-password='admin'
diff --git a/updatePersnames.py b/updatePersnames.py
index a3052a4..ad440b4 100644
--- a/updatePersnames.py
+++ b/updatePersnames.py
@@ -1,4 +1,5 @@
-import requests, csv, json, urllib, time, authenticate
+import requests, csv, json, urllib, time
+from asnake.client import ASnakeClient
viafURL = 'http://viaf.org/viaf/search?query=local.personalNames+%3D+%22'
@@ -7,17 +8,35 @@
input('Press Enter to continue...')
# This is where we connect to ArchivesSpace. See authenticate.py
-baseURL, headers = authenticate.login()
+client = ASnakeClient()
+client.authorize() # login, using default values
# search AS for person agents with source "viaf"
-query = '/search?page=1&filter={"query":{"jsonmodel_type":"boolean_query","op":"AND","subqueries":[{"jsonmodel_type":"field_query","field":"primary_type","value":"agent_person","literal":true},{"jsonmodel_type":"field_query","field":"source","value":"viaf","literal":true}]}}'
-ASoutput = requests.get(baseURL + query, headers=headers).json()
-print('Found ' + str(len(ASoutput['results'])) + ' agents.')
+query = json.dumps({"query":{
+ "jsonmodel_type":"boolean_query",
+ "op":"AND",
+ "subqueries":[
+ {
+ "jsonmodel_type":"field_query",
+ "field":"primary_type",
+ "value":"agent_person",
+ "literal":True
+ },
+ {
+ "jsonmodel_type":"field_query",
+ "field":"source",
+ "value":"viaf",
+ "literal":True
+ }
+ ]
+}})
+ASoutput = list(requests.get_paged("/search", params={"filter": query}))
+print('Found ' + str(len(ASoutput)) + ' agents.')
# grab uri out of agent
-for person in ASoutput['results']:
+for person in ASoutput:
uri = person['uri']
- personRecord = requests.get(baseURL + uri, headers=headers).json()
+ personRecord = client.get(uri).json()
lockVersion = str(personRecord['lock_version'])
primary_name = personRecord['names'][0]['primary_name']
try:
@@ -54,6 +73,21 @@
if viafid != '':
links = json.loads(requests.get('http://viaf.org/viaf/'+viafid+'/justlinks.json').text)
viafid = 'http://viaf.org/viaf/'+viafid
- toPost = '{"lock_version": ' + lockVersion + ',"names": [{"primary_name":"' + properPrimary.strip() + '","rest_of_name":"' + properSecondary.strip() + '","dates":"' + properDates.strip() + '","sort_name":"' + properName + '","authorized":true, "is_display_name": true, "source": "viaf", "rules": "dacs", "name_order": "inverted", "jsonmodel_type": "name_person", "authority_id":"' + viafid + '"}]}'
- post = requests.post(baseURL + uri, headers=headers, data=toPost).json()
+ toPost = {"lock_version": lockVersion,
+ "names": [
+ {"primary_name": properPrimary.strip(),
+ "rest_of_name": properSecondary.strip(),
+ "dates": properDates.strip(),
+ "sort_name":properName,
+ "authorized":True,
+ "is_display_name": True,
+ "source": "viaf",
+ "rules": "dacs",
+ "name_order": "inverted",
+ "jsonmodel_type": "name_person",
+ "authority_id": viafid
+ }
+ ]
+ }
+ post = client.post(uri, json=toPost).json()
print(post)