SynBioDex
diff --git a/‎SBOL3_simple_library4.nt‎
Lines changed: 98 additions & 98 deletions b/‎SBOL3_simple_library4.nt‎
Lines changed: 98 additions & 98 deletions
diff --git a/‎SBOL3_simple_library4.xlsx‎
99 Bytes b/‎SBOL3_simple_library4.xlsx‎
99 Bytes
diff --git a/‎excelutils/excel_sbol_utils/helpers.py‎
Lines changed: 50 additions & 0 deletions b/‎excelutils/excel_sbol_utils/helpers.py‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎excelutils/excel_sbol_utils/library3.py‎
Lines changed: 19 additions & 9 deletions b/‎excelutils/excel_sbol_utils/library3.py‎
Lines changed: 19 additions & 9 deletions
@@ -1,5 +1,6 @@
 import re
 import string
+import rdflib
 from openpyxl.worksheet import cell_range, worksheet
 from pathlib import Path
 
@@ -146,3 +147,52 @@ def read_variant_table(excel_file: Path) -> tuple[str, str, list[list]]:
     variant_lists = [[v for v in column if v] for column in column_iterator]  # drop the empty cells from each range
 
     return library_name, base_sequence, variant_lists
+
+def update_uri_refs(doc, update_dict, use_derived=True, derived_ls = ['_sequence']):
+    """
+    This updates a set of referenced uris (may be a namespace or identity update)
+
+    Args:
+        doc (SBOL3 Document): document to be updated
+        update_dict (dict): dictionary of the form {old_uri:new_uri}
+        use_derived (bool, optional): Whether or not to also update derived uris. Defaults to True.
+        derived_ls (list, optional): List of derivations e.g. also version of the uri
+                                     with _sequence added to the end. Defaults to ['_sequence'].
+
+    Returns:
+        doc (SBOL3 Document): updated document
+    """
+    # create all the additional uris that will need to be updated
+    derived_keys = []
+    for deriv in derived_ls:
+        der_update = [f'{x}{deriv}' for x in  update_dict.keys()]
+        derived_keys.extend(der_update)
+
+    # pull the graph from the document
+    g = doc.graph()
+    for index, (subject, predicate, _object) in enumerate(g):
+        # if the object is one of the items to be updated do so
+        if str(_object) in update_dict:
+            g.remove((subject, predicate, _object))
+            new = rdflib.URIRef(update_dict[str(_object)])
+            g.add((subject, predicate, new))
+        # update any derived objects
+        elif use_derived and str(_object) in derived_keys:
+            suffix = str(_object).split('_')[-1]  # assumes suffix starts with '_'
+            suffix = f'_{suffix}'
+            g.remove((subject, predicate, _object))
+            old = str(_object)
+            new = f"{update_dict[old.replace(suffix, '')]}{suffix}"
+            new = rdflib.URIRef(new)
+            g.add((subject, predicate, new))
+        # update any derived subjects
+        if use_derived and str(subject) in derived_keys:
+            suffix = str(subject).split('_')[-1]  # assumes suffix starts with '_'
+            suffix = f'_{suffix}'
+            g.remove((subject, predicate, _object))
+            old = str(subject)
+            new = f"{update_dict[old.replace(suffix, '')]}{suffix}"
+            new = rdflib.URIRef(new)
+            g.add((new, predicate, _object))
+    doc._parse_graph(g)
+    return doc
@@ -154,12 +154,13 @@ def dataSource(rowobj):
 		val = vals[list(vals.keys())[colnum]]
 
 		datasource_dict = {'GenBank':{'Replace Example':'https://www.ncbi.nlm.nih.gov/nuccore/{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'https://www.ncbi.nlm.nih.gov/nuccore', 'Prefix':'gb'},
-				   'PubMed':{'Replace Example':'https://pubmed.ncbi.nlm.nih.gov/{REPLACE_HERE}/', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
+				   'PubMed':{'Replace Example':'https://pubmed.ncbi.nlm.nih.gov/{REPLACE_HERE}/', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':'', 'derived_from':''},
 				   'iGEM registry':{'Replace Example':'http://parts.igem.org/Part:{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'http://parts.igem.org', 'Prefix':'igem'},
 				   'AddGene':{'Replace Example':'https://www.addgene.org/{REPLACE_HERE}/', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
 				   'Seva plasmids':{'Replace Example':'http://www.sevahub.es/public/Canonical/{REPLACE_HERE}/1', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''},
 				   'Tax_id':{'Replace Example':'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id={REPLACE_HERE}', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
 				   'SynBioHub':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''},
+				   'URL':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'FALSE', 'Namespace':val, 'Prefix':'', 'derived_from':f'{val}/{rowobj.obj.displayId}'},
 				   'Local Sequence File':{'Replace Example':'', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
 				   'URL for GenBank file':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''},
 				   'URL for FASTA file':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''}
@@ -168,28 +169,37 @@ def dataSource(rowobj):
 		literal = datasource_dict[pref]['Literal Part']
 
 		if literal == 'FALSE':
-			rowobj.obj.wasDerivedFrom = val
+			if len(datasource_dict[pref]['derived_from']) > 0:
+				rowobj.obj.derived_from = [datasource_dict[pref]['derived_from']]
+			ns = datasource_dict[pref]['Namespace']
+			if len(ns) > 0:
+				if len(datasource_dict[pref]['Prefix']) > 0:
+					if datasource_dict[pref]['Prefix'] not in rowobj.doc_pref_terms:
+						rowobj.doc.bind(datasource_dict[pref]['Prefix'], ns)
+						rowobj.doc_pref_terms.append(datasource_dict[pref]['Prefix'])
+				
+				old_id = rowobj.obj.identity
+				rowobj.doc.change_object_namespace([rowobj.obj], ns)
+				new_id = rowobj.obj.identity
+				rowobj.data_source_id_to_update[old_id] = new_id
 
 		else:
 			ns = datasource_dict[pref]['Namespace']
 			if len(ns) > 0:
-				if datasource_dict[pref]['Prefix'] not in rowobj.doc_pref_terms:
-					rowobj.doc.bind(datasource_dict[pref]['Prefix'], ns)
-					rowobj.doc_pref_terms.append(datasource_dict[pref]['Prefix'])
+				if len(datasource_dict[pref]['Prefix']) > 0:
+					if datasource_dict[pref]['Prefix'] not in rowobj.doc_pref_terms:
+						rowobj.doc.bind(datasource_dict[pref]['Prefix'], ns)
+						rowobj.doc_pref_terms.append(datasource_dict[pref]['Prefix'])
 
 				old_id = rowobj.obj.identity
 				rowobj.doc.change_object_namespace([rowobj.obj], ns)
 				new_id = rowobj.obj.identity
 				rowobj.data_source_id_to_update[old_id] = new_id
 				if val != rowobj.obj.display_id:
-					# rowobj.data_source_id_to_update[rowobj.obj.identity] = {'current_id': rowobj.obj.display_id,
-					# 													'update_id': val}
 					new_identity = str(rowobj.obj.identity).replace(rowobj.obj.display_id, helpers.check_name(val))
 					id_map = {rowobj.obj.identity:new_identity}
-					# print(str(id_map))
 					rowobj.obj.set_identity(new_identity)
 					rowobj.obj.update_all_dependents(id_map) # this function doesn't yet do everything it should
-					warnings.warn('not yet possible to have display id that is different from source value')
 					rowobj.data_source_id_to_update[old_id] = new_identity
 
 def sequence(rowobj):