diff --git a/notebooks/Tricahue_XDC.ipynb b/notebooks/Tricahue_XDC.ipynb new file mode 100644 index 0000000..69b308a --- /dev/null +++ b/notebooks/Tricahue_XDC.ipynb @@ -0,0 +1,574 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tricahue\n", + "import os\n", + "import getpass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# XDC Test" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "fj_url = \"charmmefj-api.synbiohub.org\"\n", + "fj_user = input()\n", + "fj_pass = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "sbh_url = \"https://synbiohub.org\"\n", + "sbh_user = \"test@test.test\"\n", + "sbh_pass = \"test123\"\n", + "sbh_collec = \"XDC_package_test\"" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "test_file_path ='../tests/test_files'\n", + "excel_path = os.path.join(test_file_path, 'Medias.xlsm')\n", + "\n", + "homespace = 'https://ebugs.synbiohub.org/gonza10v'\n", + "\n", + "fj_overwrite = False\n", + "sbh_overwrite=False" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "xdc = tricahue.XDC(input_excel_path = excel_path,\n", + " fj_url = fj_url,\n", + " fj_user = fj_user, \n", + " fj_pass = fj_pass, \n", + " sbh_url = sbh_url, \n", + " sbh_user = sbh_user, \n", + " sbh_pass = sbh_pass, \n", + " sbh_collection = sbh_collec, \n", + " sbh_collection_description = 'Collection made using tricahue',\n", + " sbh_overwrite = sbh_overwrite, \n", + " fj_overwrite = fj_overwrite, \n", + " homespace = homespace,\n", + " fj_token = None, \n", + " sbh_token = None)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "xdc.initialize()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n" + ] + } + ], + "source": [ + "print(xdc.fj_token)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No user logged in.\n" + ] + } + ], + "source": [ + "xdc.log_in_fj()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(xdc.fj_token)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "xdc.log_in_sbh()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "xdc.sbh_token" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "xdc.x2f.sheets = ['Media']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Media']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xdc.x2f.sheets" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing the welcome page...\n", + "Welcome Page Metadata:\n", + "Author: Gonzalo Vidal\n", + "Email: gonzalo.vidalpena@colorado.edu\n", + "Lab: Genetic Logic Lab\n", + "Institution: University of Colorado Boulder\n", + "Library Name: medias\n", + "Description: Repository of medias\n", + "PubId: \n", + "Date: \n", + "Final: False\n", + "Domain: \n", + "Master Collection: \n", + "Conversion will happen with sbol version 2 as specified in the excel sheet\n", + "Media\n" + ] + } + ], + "source": [ + "xdc.convert_to_sbol()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://ebugs.synbiohub.org/gonza10v/LB/1\n", + "https://ebugs.synbiohub.org/gonza10v/PBS/1\n", + "https://ebugs.synbiohub.org/gonza10v/Water/1\n" + ] + } + ], + "source": [ + "for tl in xdc.sbol_doc:\n", + " print(tl)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xdc.sbol_hash_map" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "xdc.generate_sbol_hash_map()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'LB': 'https://synbiohub.org/user/synbiotest/XDC_package_test//LB/1',\n", + " 'PBS': 'https://synbiohub.org/user/synbiotest/XDC_package_test//PBS/1',\n", + " 'Water': 'https://synbiohub.org/user/synbiotest/XDC_package_test//Water/1'}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xdc.sbol_hash_map" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "xdc.x2f.index_skiprows = 3" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xdc.x2f.index_skiprows" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Returning exististing model, media\n", + "id 3\n", + "name LB\n", + "description Rich Broth\n", + "sboluri \n", + "\n", + "Returning exististing model, media\n", + "id 4\n", + "name PBS\n", + "description Phosphate Buffered Saline\n", + "sboluri \n", + "\n", + "Returning exististing model, media\n", + "id 5\n", + "name Water\n", + "description Water\n", + "sboluri \n", + "\n" + ] + } + ], + "source": [ + "xdc.upload_to_fj()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namedescriptionobjectflapjackidsbol_uri
Media ID
LBLBRich BrothMedia3
PBSPBSPhosphate Buffered SalineMedia4
WaterWaterWaterMedia5
\n", + "
" + ], + "text/plain": [ + " name description object flapjackid sbol_uri\n", + "Media ID \n", + "LB LB Rich Broth Media 3 \n", + "PBS PBS Phosphate Buffered Saline Media 4 \n", + "Water Water Water Media 5 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xdc.x2f.df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xdc.sbol_doc" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://synbiohub.org/user/synbiotest/XDC_package_test/XDC_package_test_collection/1'" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xdc.upload_to_sbh()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing the welcome page...\n", + "Welcome Page Metadata:\n", + "Author: Gonzalo Vidal\n", + "Email: gonzalo.vidalpena@colorado.edu\n", + "Lab: Genetic Logic Lab\n", + "Institution: University of Colorado Boulder\n", + "Library Name: medias\n", + "Description: Repository of medias\n", + "PubId: \n", + "Date: \n", + "Final: False\n", + "Domain: \n", + "Master Collection: \n", + "Conversion will happen with sbol version 2 as specified in the excel sheet\n", + "Media\n", + "Returning exististing model, media\n", + "id 3\n", + "name LB\n", + "description Rich Broth\n", + "sboluri \n", + "\n", + "Returning exististing model, media\n", + "id 4\n", + "name PBS\n", + "description Phosphate Buffered Saline\n", + "sboluri \n", + "\n", + "Returning exististing model, media\n", + "id 5\n", + "name Water\n", + "description Water\n", + "sboluri \n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "'https://synbiohub.org/user/synbiotest/XDC_package_test/XDC_package_test_collection/1'" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#complete run\n", + "\n", + "xdc.log_in_fj()\n", + "xdc.log_in_sbh()\n", + "xdc.convert_to_sbol()\n", + "xdc.x2f.sheets = ['Media']\n", + "xdc.x2f.index_skiprows = 3\n", + "xdc.generate_sbol_hash_map()\n", + "xdc.upload_to_fj()\n", + "xdc.upload_to_sbh()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The End" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "GLLDB", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.19" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/XDC_package_test_SBOL_Fj_doc.xml b/notebooks/XDC_package_test_SBOL_Fj_doc.xml new file mode 100644 index 0000000..1acf07f --- /dev/null +++ b/notebooks/XDC_package_test_SBOL_Fj_doc.xml @@ -0,0 +1,29 @@ + + + 1 + Rich Broth + LB + LB + + + + + + Phosphate Buffered Saline + PBS + PBS + + 1 + + + + + Water + + + Water + 1 + Water + 962.0 + + diff --git a/pyproject.toml b/pyproject.toml index f5b3b10..0a1116c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta" [project] name = "tricahue" -version = "0.0.b1" +version = "0.0.b3" description = "This package extracts experimental data and metadata, converts to stardard formats, uploads to SynBioHub and Flapjack, and connects them" readme = "README.md" dependencies = [ - "excel2flapjack==1.0.9", + "excel2flapjack==1.0.10", "excel2sbol==1.0.29" ] requires-python = ">=3.7" diff --git a/src/tricahue.py b/src/tricahue.py index 06418ea..a9d5991 100644 --- a/src/tricahue.py +++ b/src/tricahue.py @@ -1,7 +1,6 @@ from excel2flapjack.main import X2F import excel2sbol.converter as conv import sbol2 -import tempfile import requests import os @@ -78,7 +77,6 @@ def __init__(self, input_excel_path, fj_url, fj_user, fj_pass, sbh_url, sbh_user self.fj_overwrite = fj_overwrite self.fj_token = fj_token self.sbh_token = sbh_token - #self.status = "Not started" self.input_excel = pd.ExcelFile(self.input_excel_path) self.x2f = None self.homespace = 'https://sbolstandard.org' @@ -95,8 +93,7 @@ def initialize(self): fj_url=self.fj_url, overwrite=self.fj_overwrite) if self.sbh_collection_description is None: - self.sbh_collection_description = 'Collection of SBOL files uploaded from XDC' - #self.status = "Initialized" + self.sbh_collection_description = 'Collection of SBOL files uploaded from Tricahue' def log_in_fj(self): @@ -129,12 +126,10 @@ def log_in_sbh(self): } ) self.sbh_token = response.text - #self.status = "Logged into SynBioHub" + def convert_to_sbol(self): - #temp_dir = tempfile.TemporaryDirectory() #TODO:check if I need to create the temporary object in a different context - #file_path_out = os.path.join(temp_dir.name, 'converted_SBOL.xml') - + # Convert excel to SBOL conv.converter(file_path_in = self.input_excel_path, file_path_out = self.file_path_out, homespace=self.homespace) # Pull graph uri from synbiohub @@ -163,27 +158,47 @@ def convert_to_sbol(self): self.sbol_hash_map = sbol_hash_map self.x2f.sbol_hash_map = sbol_hash_map self.sbol_doc = doc - self.status = "Converted to SBOL" + + def generate_sbol_hash_map(self): + response = requests.get( + f'{self.sbh_url}/profile', + headers={ + 'Accept': 'text/plain', + 'X-authorization': self.sbh_token + } + ) + self.sbol_graph_uri = response.json()['graphUri'] + sbol_collec_url = f'{self.sbol_graph_uri}/{self.sbh_collection}/' + + + # create hashmap of flapjack id to sbol uri + self.sbol_doc.read(self.file_path_out) + self.sbol_hash_map = {} + for tl in self.sbol_doc: + #if 'https://flapjack.rudge-lab.org/ID' in tl.properties: + sbol_uri = tl.properties['http://sbols.org/v2#persistentIdentity'][0] + sbol_uri = sbol_uri.replace(self.homespace, sbol_collec_url) + sbol_uri = f'{sbol_uri}/1' + + sbol_name = str(tl.properties['http://sbols.org/v2#displayId'][0]) + self.sbol_hash_map[sbol_name] = sbol_uri + def upload_to_fj(self, ): self.x2f.create_df() - self.x2f.upload_medias() - #self.status = "Uploaded to Flapjack" + self.x2f.upload_medias() #TODO: change to upload all def upload_to_sbh(self): - #temp_dir = tempfile.TemporaryDirectory() #TODO:check if I need to create the temporary object in a different context - #file_path_out2 = os.path.join(temp_dir.name, 'SBOL_Fj_doc.xml') - # Add flapjack annotations to the SBOL doc = sbol2.Document() doc.read(self.file_path_out) for tl in self.sbol_doc: id = str(tl).split('/')[-2] - if id in self.xdc.sbol_hash_map: + if id in self.sbol_hash_map: setattr(tl, 'flapjack_ID', sbol2.URIProperty(tl, 'https://flapjack.rudge-lab.org/ID', - '0', '*', [], initial_value=f'http://wwww.flapjack.com/{self.xdc.sbol_hash_map[id]}')) + '0', '*', [], initial_value=f'http://wwww.flapjack.com/{self.sbol_hash_map[id]}')) #doc = sbol2.Document() doc.write(self.file_path_out2) @@ -206,7 +221,7 @@ def upload_to_sbh(self): 'version' : '1', 'name' : self.sbh_collection, 'description' : self.sbh_collection_description, #TODO - 'overwrite_merge' : sbh_overwrite + 'overwrite_merge' : self.sbh_overwrite }, ) @@ -224,7 +239,9 @@ def upload_to_sbh(self): def run(self): self.initialize() self.log_in_fj() + self.log_in_sbh() self.convert_to_sbol() + self.generate_sbol_hash_map() self.upload_to_fj() self.upload_to_sbh() diff --git a/tests/test_xdc.py b/tests/test_xdc.py index 6375269..0938246 100644 --- a/tests/test_xdc.py +++ b/tests/test_xdc.py @@ -28,7 +28,7 @@ sbh_user = sbh_user, sbh_pass = sbh_pass, sbh_collection = sbh_collec, - sbh_collection_description = 'XDC package test collection', + sbh_collection_description = 'Tricahue XDC package test collection', sbh_overwrite = sbh_overwrite, fj_overwrite = fj_overwrite, homespace = homespace,