1717 SnpsReference as SoybeanSnpsReference ,
1818 SamplesLookup as SoybeanSampleNames ,
1919)
20+ from api .models .canola_nssnp import (
21+ CanolaProteinReference as CanolaProteinReference ,
22+ CanolaSnpsToProtein as CanolaSnpsToProtein
23+ )
24+ from api .models .homologs_db import homologs as HomologsDB
2025from api .utils .bar_utils import BARUtils
2126from flask import request
2227import re
@@ -141,7 +146,7 @@ class GeneNameAlias(Resource):
141146 def get (self , species = "" , gene_id = "" ):
142147 """Endpoint returns annotated SNP poplar data in order of (to match A th API format):
143148 AA pos (zero-indexed), sample id, 'missense_variant','MODERATE', 'MISSENSE', codon/DNA base change,
144- AA change (DH), pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype
149+ AA change (DH), pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype (allele frequency for canola)
145150 values with single quotes are fixed"""
146151 results_json = []
147152
@@ -160,43 +165,76 @@ def get(self, species="", gene_id=""):
160165 protein_reference = SoybeanProteinReference
161166 snps_to_protein = SoybeanSnpsToProtein
162167 snps_reference = SoybeanSnpsReference
168+ elif species == "canola" and BARUtils .is_canola_gene_valid (gene_id ):
169+ protein_reference = CanolaProteinReference
170+ snps_to_protein = CanolaSnpsToProtein
163171 else :
164172 return BARUtils .error_exit ("Invalid gene id" ), 400
165173
166- rows = (
167- db .session .execute (
168- db .select (protein_reference , snps_to_protein , snps_reference )
169- .select_from (protein_reference )
170- .join (snps_to_protein )
171- .join (snps_reference )
172- .where (protein_reference .gene_identifier == gene_id )
174+ if species == "canola" and BARUtils .is_canola_gene_valid (gene_id ):
175+ rows = (
176+ db .session .execute (
177+ db .select (protein_reference , snps_to_protein )
178+ .select_from (protein_reference )
179+ .join (snps_to_protein )
180+ .where (protein_reference .gene_identifier == gene_id )
181+ )
182+ .tuples ()
183+ .all ()
184+ )
185+ for protein , snptoprotein in rows :
186+ itm_lst = [
187+ snptoprotein .chromosome ,
188+ snptoprotein .aa_pos - 1 , # zero index-ed
189+ None ,
190+ "missense_variant" ,
191+ "MODERATE" ,
192+ "MISSENSE" ,
193+ str (snptoprotein .transcript_pos ) + snptoprotein .ref_DNA + ">" + snptoprotein .alt_DNA ,
194+ snptoprotein .ref_aa + snptoprotein .alt_aa ,
195+ None ,
196+ gene_id ,
197+ "protein_coding" ,
198+ "CODING" ,
199+ protein .gene_name ,
200+ float (snptoprotein .alt_freq ),
201+ ]
202+ results_json .append (itm_lst )
203+ else :
204+ rows = (
205+ db .session .execute (
206+ db .select (protein_reference , snps_to_protein , snps_reference )
207+ .select_from (protein_reference )
208+ .join (snps_to_protein )
209+ .join (snps_reference )
210+ .where (protein_reference .gene_identifier == gene_id )
211+ )
212+ .tuples ()
213+ .all ()
173214 )
174- .tuples ()
175- .all ()
176- )
177215
178- # BAR A Th API format is chr, AA pos (zero-indexed), sample id, 'missense_variant',
179- # 'MODERATE', 'MISSENSE', codon/DNA base change, AA change (DH),
180- # pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype
181- for protein , snpsjoin , snpstbl in rows :
182- itm_lst = [
183- snpstbl .chromosome ,
184- # snpstbl.chromosomal_loci,
185- snpsjoin .aa_pos - 1 , # zero index-ed
186- snpstbl .sample_id ,
187- "missense_variant" ,
188- "MODERATE" ,
189- "MISSENSE" ,
190- str (snpsjoin .transcript_pos ) + snpsjoin .ref_DNA + ">" + snpsjoin .alt_DNA ,
191- snpsjoin .ref_aa + snpsjoin .alt_aa ,
192- None ,
193- re .sub (r".\d$" , "" , protein .gene_identifier ),
194- "protein_coding" ,
195- "CODING" ,
196- protein .gene_identifier ,
197- None ,
198- ]
199- results_json .append (itm_lst )
216+ # BAR A Th API format is chr, AA pos (zero-indexed), sample id, 'missense_variant',
217+ # 'MODERATE', 'MISSENSE', codon/DNA base change, AA change (DH),
218+ # pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype
219+ for protein , snpsjoin , snpstbl in rows :
220+ itm_lst = [
221+ snpstbl .chromosome ,
222+ # snpstbl.chromosomal_loci,
223+ snpsjoin .aa_pos - 1 , # zero index-ed
224+ snpstbl .sample_id ,
225+ "missense_variant" ,
226+ "MODERATE" ,
227+ "MISSENSE" ,
228+ str (snpsjoin .transcript_pos ) + snpsjoin .ref_DNA + ">" + snpsjoin .alt_DNA ,
229+ snpsjoin .ref_aa + snpsjoin .alt_aa ,
230+ None ,
231+ re .sub (r".\d$" , "" , protein .gene_identifier ),
232+ "protein_coding" ,
233+ "CODING" ,
234+ protein .gene_identifier ,
235+ None ,
236+ ]
237+ results_json .append (itm_lst )
200238
201239 # Return results if there are data
202240 if len (results_json ) > 0 :
@@ -502,3 +540,49 @@ def get(self, pval="", araid="", popid=""):
502540 pop_both_sig_idx = HotspotUtils .get_sig_index (pop_both_sig )
503541 output = {"ara_id" : araid , "pop_id" : popid , "ara_hotspots" : ara_both_sig_idx , "pop_hotspots" : pop_both_sig_idx }
504542 return BARUtils .success_exit (output )
543+
544+
545+ @snps .route ("/homologs/<string:search_species>/<string:search_gene>/<string:target_species>" )
546+ class Homologs (Resource ):
547+ @snps .param ("search_species" , _in = "path" , default = "canola" )
548+ @snps .param ("search_gene" , _in = "path" , default = "BnaA07g31480D" )
549+ @snps .param ("target_species" , _in = "path" , default = "arabidopsis" )
550+ def get (self , search_species = "" , search_gene = "" , target_species = "" ):
551+ """This endpoint shows the homologs proteins of search_gene in target_species.
552+ The endpoint returns a list of homologous pairs of proteins in following format:
553+ Percent_id(percent identity get by blast); e score
554+ """
555+ # Escape input
556+ search_species = escape (search_species )
557+ gene_id = escape (search_gene )
558+ target_species = escape (target_species )
559+ supported = ["arabidopsis" , "canola" ]
560+ if (search_species not in supported ) or (target_species not in supported ):
561+ return BARUtils .error_exit ("Species not supported" ), 400
562+ elif (search_species == "arabidopsis" and BARUtils .is_arabidopsis_gene_valid (gene_id )) or (search_species == "canola" and BARUtils .is_canola_gene_valid (gene_id )):
563+ results = HomologsDB .query .filter_by (
564+ search_protein_name = gene_id ,
565+ search_species_name = search_species ,
566+ result_species_name = target_species
567+ ).all ()
568+ if not results :
569+ return BARUtils .error_exit ("No homologs found for the given query" ), 400
570+
571+ homologs_list = [
572+ {
573+ "search_species_name" : search_species ,
574+ "search_protein_name" : gene_id ,
575+ "result_species_name" : target_species ,
576+ "result_protein_name" : homolog .result_protein_name ,
577+ "Percent_id" : float (homolog .Percent_id ),
578+ "e_score" : float (homolog .e_score )
579+ }
580+ for homolog in results
581+ ]
582+ homologs_list .sort (key = lambda x : x ["e_score" ])
583+ if len (homologs_list ) >= 5 :
584+ homologs_list = homologs_list [:5 ]
585+ response = {"homologs" : homologs_list }
586+ return BARUtils .success_exit (response ), 200
587+ else :
588+ return BARUtils .error_exit ("Invalid gene id" ), 400
0 commit comments