diff --git a/appyters/CFDE-Gene-Partnership/CFDE-Gene-Partnership.ipynb b/appyters/CFDE-Gene-Partnership/CFDE-Gene-Partnership.ipynb index c6e92625b..d27ee28de 100644 --- a/appyters/CFDE-Gene-Partnership/CFDE-Gene-Partnership.ipynb +++ b/appyters/CFDE-Gene-Partnership/CFDE-Gene-Partnership.ipynb @@ -103,11 +103,15 @@ "console.setFormatter(formatter)\n", "logger.addHandler(console)\n", "\n", - "from itables_javascript import show\n", "import itables.options as opt\n", "opt.maxBytes = 0\n", "opt.showIndex = False\n", "\n", + "def show(df, *kargs, **kwargs):\n", + " from itables_javascript import show\n", + " try: return show(df, *kargs, **kwargs)\n", + " except: return display(df)\n", + "\n", "class RenderJSON(object):\n", " ''' https://gist.github.com/t27/48b3ac73a1479914f9fe9383e5d45325 '''\n", " def __init__(self, json_data):\n", @@ -364,17 +368,32 @@ "outputs": [], "source": [ "@lru_cache()\n", - "def gtex_singleTissueEqtl(geneSymbol, datasetId='gtex_v8'):\n", + "def gtex_resolve_genecode_id(geneId):\n", + " res = requests.get(\n", + " 'https://gtexportal.org/api/v2/reference/gene',\n", + " params=dict(\n", + " format='json',\n", + " geneId=geneId,\n", + " pageSize=1,\n", + " )\n", + " )\n", + " if not res.ok: raise Exception(f\"The gene {geneId} could not be found in GTEx\")\n", + " return res.json()['data'][0]['gencodeId']\n", + "\n", + "@lru_cache()\n", + "def gtex_medianGeneExpression(geneSymbol, datasetId='gtex_v8'):\n", + " gencodeId = gtex_resolve_genecode_id(geneSymbol)\n", " res = requests.get(\n", - " 'https://gtexportal.org/rest/v1/association/singleTissueEqtl',\n", + " 'https://gtexportal.org/api/v2/expression/medianGeneExpression',\n", " params=dict(\n", " format='json',\n", - " geneSymbol=geneSymbol,\n", + " gencodeId=gencodeId,\n", " datasetId=datasetId,\n", " )\n", " )\n", - " results = res.json()['singleTissueEqtl']\n", - " if len(results) == 0: raise Exception(f\"No information for gene with identifier {geneSymbol} found in GTEx\")\n", + " if not res.ok: raise Exception(f\"Failed to resolve information about {geneSymbol} ({gencodeId}) from GTEx ({datasetId})\")\n", + " results = res.json()['data']\n", + " if len(results) == 0: raise Exception(f\"No information for gene {geneSymbol} ({gencodeId}) found in GTEx ({datasetId})\")\n", " return pd.DataFrame(results)" ] }, @@ -386,13 +405,8 @@ "outputs": [], "source": [ "with ignore_exceptions(f\"Gene with identifier {gene_info['symbol']} currently not available in GTEx\"):\n", - " gtex_results = gtex_singleTissueEqtl(gene_info['symbol'])\n", - " columns = list(gtex_results.columns)\n", - " columns.insert(0, columns.pop(columns.index('nes')))\n", - " columns.insert(0, columns.pop(columns.index('pValue')))\n", - " columns.insert(0, columns.pop(columns.index('tissueSiteDetailId')))\n", - " gtex_results = gtex_results[columns]\n", - " show(gtex_results, order=[[gtex_results.columns.get_loc('pValue'), 'asc']])" + " gtex_results = gtex_medianGeneExpression(gene_info['symbol']).sort_values('median', ascending=False)\n", + " show(gtex_results, order=[[gtex_results.columns.get_loc('median'), 'desc']])" ] }, { @@ -403,20 +417,12 @@ "outputs": [], "source": [ "with ignore_exceptions('Could not process GTEx output'):\n", - " gtex_combined_stouffer_statistic = gtex_results.groupby('tissueSiteDetailId')['pValue'] \\\n", - " .agg(partial(combine_pvalues, method='stouffer', select='statistic')) \\\n", - " .to_frame('combined_stouffer_statistic') \\\n", - " .reset_index() \\\n", - " .sort_values('combined_stouffer_statistic', ascending=False)\n", - " gtex_combined_stouffer_statistic['group'] = gtex_combined_stouffer_statistic['tissueSiteDetailId'].apply(lambda name: name.split('_', maxsplit=1)[0])\n", - "\n", " fig = px.bar(\n", - " gtex_combined_stouffer_statistic,\n", - " y='combined_stouffer_statistic',\n", + " gtex_results,\n", + " y='median',\n", " x='tissueSiteDetailId',\n", - " color='group',\n", " orientation='v',\n", - " title=f\"Tissues with significant expression of {gene_info['symbol']} in GTEx\",\n", + " title=f\"Tissues with median expression of {gene_info['symbol']} in GTEx\",\n", " height=1000,\n", " )\n", " fig.show()" @@ -449,7 +455,7 @@ "metadata": {}, "outputs": [], "source": [ - "import matplotlib.cm as cm\n", + "import matplotlib; cm = matplotlib.colormaps\n", "import matplotlib.colors as colors\n", "from bokeh.io import output_notebook\n", "from bokeh.plotting import figure, show as bokeh_show\n", @@ -506,8 +512,8 @@ " ]\n", " # generate plot and relevant plot labels\n", " plot = figure(\n", - " plot_width=700,\n", - " plot_height=500,\n", + " width=700,\n", + " height=500,\n", " tooltips=tools\n", " )\n", " \n", @@ -684,7 +690,7 @@ "source": [ "with ignore_exceptions(f\"No information for gene with identifier {gene_info['symbol']} found in IPMC\"):\n", " impc_results = impc_phenotype(gene_info['symbol'].capitalize())\n", - " show(impc_results[[\n", + " show(impc_results.loc[:, impc_results.columns.isin([\n", " 'marker_accession_id',\n", " 'mp_term_id',\n", " 'mp_term_name',\n", @@ -693,7 +699,7 @@ " 'phenotyping_center',\n", " 'percentage_change',\n", " 'statistical_method',\n", - " ]])" + " ])])" ] }, { @@ -751,20 +757,20 @@ "outputs": [], "source": [ "@lru_cache()\n", - "def glygen_geneNameSearch(recommended_gene_name, organism_taxon_id=9606):\n", - " res = requests.get(\n", - " 'https://api.glygen.org/directsearch/gene/',\n", - " params=dict(\n", - " query=json.dumps(dict(\n", - " recommended_gene_name=recommended_gene_name,\n", - " organism=dict(\n", - " id=organism_taxon_id\n", - " ),\n", - " )),\n", - " ),\n", - " verify=False, # not sure why on my system I get SSL errors\n", - " )\n", - " return res.json()" + "def glygen_protein_search(gene_name):\n", + " search_results = requests.post('https://api.glygen.org/protein/search/', json={'gene_name': gene_name}, verify=False).json()\n", + " return requests.post('https://api.glygen.org/protein/list/', json={'id': search_results['list_id']}, verify=False).json()\n", + "\n", + "@lru_cache()\n", + "def glygen_protein_detail(uniprot_canonical_ac):\n", + " return requests.post(f'https://api.glygen.org/protein/detail/{uniprot_canonical_ac}/', json={}, verify=False).json()\n", + "\n", + "@lru_cache()\n", + "def glygen_geneNameSearch(recommended_gene_name):\n", + " protein_list_results = glygen_protein_search(recommended_gene_name)\n", + " protein_uniprot_canonical_ac = protein_list_results['results'][0]['uniprot_canonical_ac']\n", + " protein_details = requests.post(f'https://api.glygen.org/protein/detail/{protein_uniprot_canonical_ac}/', json={}, verify=False).json()\n", + " return protein_details" ] }, { @@ -777,7 +783,7 @@ "with ignore_exceptions(f\"No information for gene with identifier {gene_info['symbol']} found in GlyGen\"):\n", " glygen_geneInfo = glygen_geneNameSearch(gene_info['symbol'])\n", " display(RenderJSON(glygen_geneInfo))\n", - " d = pd.DataFrame(glygen_geneInfo['results'][0]['glycosylation'])\n", + " d = pd.DataFrame(glygen_geneInfo['glycosylation'])\n", " d['evidence'] = d['evidence'].apply(\n", " lambda evidence: ' '.join(f\"{e['url']}>\" for e in evidence if 'url' in e)\n", " )\n", diff --git a/appyters/CFDE-Gene-Partnership/appyter.json b/appyters/CFDE-Gene-Partnership/appyter.json index 70507ef1a..3f7ccfb48 100644 --- a/appyters/CFDE-Gene-Partnership/appyter.json +++ b/appyters/CFDE-Gene-Partnership/appyter.json @@ -2,7 +2,7 @@ "$schema": "https://raw.githubusercontent.com/MaayanLab/appyter-catalog/main/schema/appyter-validator.json", "name": "CFDE-Gene-Partnership", "title": "CFDE Gene Partnership", - "version": "0.2.9", + "version": "0.3.0", "description": "This Appyter uses FAIR APIs from different DCCs to Resolve gene-centric information from CF DCCs.", "image": "cfde-gene-partnership.png", "public": true,