Skip to content

Commit dc08169

Browse files
committed
fix namespace updates
1 parent cd1100b commit dc08169

File tree

6 files changed

+178
-804
lines changed

6 files changed

+178
-804
lines changed

SBOL3_simple_library4.nt

Lines changed: 98 additions & 98 deletions
Large diffs are not rendered by default.

SBOL3_simple_library4.xlsx

99 Bytes
Binary file not shown.

excelutils/excel_sbol_utils/helpers.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import re
22
import string
3+
import rdflib
34
from openpyxl.worksheet import cell_range, worksheet
45
from pathlib import Path
56

@@ -146,3 +147,52 @@ def read_variant_table(excel_file: Path) -> tuple[str, str, list[list]]:
146147
variant_lists = [[v for v in column if v] for column in column_iterator] # drop the empty cells from each range
147148

148149
return library_name, base_sequence, variant_lists
150+
151+
def update_uri_refs(doc, update_dict, use_derived=True, derived_ls = ['_sequence']):
152+
"""
153+
This updates a set of referenced uris (may be a namespace or identity update)
154+
155+
Args:
156+
doc (SBOL3 Document): document to be updated
157+
update_dict (dict): dictionary of the form {old_uri:new_uri}
158+
use_derived (bool, optional): Whether or not to also update derived uris. Defaults to True.
159+
derived_ls (list, optional): List of derivations e.g. also version of the uri
160+
with _sequence added to the end. Defaults to ['_sequence'].
161+
162+
Returns:
163+
doc (SBOL3 Document): updated document
164+
"""
165+
# create all the additional uris that will need to be updated
166+
derived_keys = []
167+
for deriv in derived_ls:
168+
der_update = [f'{x}{deriv}' for x in update_dict.keys()]
169+
derived_keys.extend(der_update)
170+
171+
# pull the graph from the document
172+
g = doc.graph()
173+
for index, (subject, predicate, _object) in enumerate(g):
174+
# if the object is one of the items to be updated do so
175+
if str(_object) in update_dict:
176+
g.remove((subject, predicate, _object))
177+
new = rdflib.URIRef(update_dict[str(_object)])
178+
g.add((subject, predicate, new))
179+
# update any derived objects
180+
elif use_derived and str(_object) in derived_keys:
181+
suffix = str(_object).split('_')[-1] # assumes suffix starts with '_'
182+
suffix = f'_{suffix}'
183+
g.remove((subject, predicate, _object))
184+
old = str(_object)
185+
new = f"{update_dict[old.replace(suffix, '')]}{suffix}"
186+
new = rdflib.URIRef(new)
187+
g.add((subject, predicate, new))
188+
# update any derived subjects
189+
if use_derived and str(subject) in derived_keys:
190+
suffix = str(subject).split('_')[-1] # assumes suffix starts with '_'
191+
suffix = f'_{suffix}'
192+
g.remove((subject, predicate, _object))
193+
old = str(subject)
194+
new = f"{update_dict[old.replace(suffix, '')]}{suffix}"
195+
new = rdflib.URIRef(new)
196+
g.add((new, predicate, _object))
197+
doc._parse_graph(g)
198+
return doc

excelutils/excel_sbol_utils/library3.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,13 @@ def dataSource(rowobj):
154154
val = vals[list(vals.keys())[colnum]]
155155

156156
datasource_dict = {'GenBank':{'Replace Example':'https://www.ncbi.nlm.nih.gov/nuccore/{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'https://www.ncbi.nlm.nih.gov/nuccore', 'Prefix':'gb'},
157-
'PubMed':{'Replace Example':'https://pubmed.ncbi.nlm.nih.gov/{REPLACE_HERE}/', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
157+
'PubMed':{'Replace Example':'https://pubmed.ncbi.nlm.nih.gov/{REPLACE_HERE}/', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':'', 'derived_from':''},
158158
'iGEM registry':{'Replace Example':'http://parts.igem.org/Part:{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'http://parts.igem.org', 'Prefix':'igem'},
159159
'AddGene':{'Replace Example':'https://www.addgene.org/{REPLACE_HERE}/', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
160160
'Seva plasmids':{'Replace Example':'http://www.sevahub.es/public/Canonical/{REPLACE_HERE}/1', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''},
161161
'Tax_id':{'Replace Example':'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id={REPLACE_HERE}', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
162162
'SynBioHub':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''},
163+
'URL':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'FALSE', 'Namespace':val, 'Prefix':'', 'derived_from':f'{val}/{rowobj.obj.displayId}'},
163164
'Local Sequence File':{'Replace Example':'', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
164165
'URL for GenBank file':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''},
165166
'URL for FASTA file':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''}
@@ -168,28 +169,37 @@ def dataSource(rowobj):
168169
literal = datasource_dict[pref]['Literal Part']
169170

170171
if literal == 'FALSE':
171-
rowobj.obj.wasDerivedFrom = val
172+
if len(datasource_dict[pref]['derived_from']) > 0:
173+
rowobj.obj.derived_from = [datasource_dict[pref]['derived_from']]
174+
ns = datasource_dict[pref]['Namespace']
175+
if len(ns) > 0:
176+
if len(datasource_dict[pref]['Prefix']) > 0:
177+
if datasource_dict[pref]['Prefix'] not in rowobj.doc_pref_terms:
178+
rowobj.doc.bind(datasource_dict[pref]['Prefix'], ns)
179+
rowobj.doc_pref_terms.append(datasource_dict[pref]['Prefix'])
180+
181+
old_id = rowobj.obj.identity
182+
rowobj.doc.change_object_namespace([rowobj.obj], ns)
183+
new_id = rowobj.obj.identity
184+
rowobj.data_source_id_to_update[old_id] = new_id
172185

173186
else:
174187
ns = datasource_dict[pref]['Namespace']
175188
if len(ns) > 0:
176-
if datasource_dict[pref]['Prefix'] not in rowobj.doc_pref_terms:
177-
rowobj.doc.bind(datasource_dict[pref]['Prefix'], ns)
178-
rowobj.doc_pref_terms.append(datasource_dict[pref]['Prefix'])
189+
if len(datasource_dict[pref]['Prefix']) > 0:
190+
if datasource_dict[pref]['Prefix'] not in rowobj.doc_pref_terms:
191+
rowobj.doc.bind(datasource_dict[pref]['Prefix'], ns)
192+
rowobj.doc_pref_terms.append(datasource_dict[pref]['Prefix'])
179193

180194
old_id = rowobj.obj.identity
181195
rowobj.doc.change_object_namespace([rowobj.obj], ns)
182196
new_id = rowobj.obj.identity
183197
rowobj.data_source_id_to_update[old_id] = new_id
184198
if val != rowobj.obj.display_id:
185-
# rowobj.data_source_id_to_update[rowobj.obj.identity] = {'current_id': rowobj.obj.display_id,
186-
# 'update_id': val}
187199
new_identity = str(rowobj.obj.identity).replace(rowobj.obj.display_id, helpers.check_name(val))
188200
id_map = {rowobj.obj.identity:new_identity}
189-
# print(str(id_map))
190201
rowobj.obj.set_identity(new_identity)
191202
rowobj.obj.update_all_dependents(id_map) # this function doesn't yet do everything it should
192-
warnings.warn('not yet possible to have display id that is different from source value')
193203
rowobj.data_source_id_to_update[old_id] = new_identity
194204

195205
def sequence(rowobj):

0 commit comments

Comments
 (0)