1+ import os
12import re
3+ import subprocess
4+ import tempfile
25from io import StringIO
36from typing import Dict , Optional
47
@@ -24,6 +27,14 @@ class UniProtSearch(BaseSearcher):
2427 3) Search with FASTA sequence (BLAST searcher).
2528 """
2629
30+ def __init__ (self , use_local_blast : bool = False , local_blast_db : str = "sp_db" ):
31+ super ().__init__ ()
32+ self .use_local_blast = use_local_blast
33+ self .local_blast_db = local_blast_db
34+ if self .use_local_blast and not os .path .isfile (f"{ self .local_blast_db } .phr" ):
35+ logger .error ("Local BLAST database files not found. Please check the path." )
36+ self .use_local_blast = False
37+
2738 def get_by_accession (self , accession : str ) -> Optional [dict ]:
2839 try :
2940 handle = ExPASy .get_sprot_raw (accession )
@@ -101,38 +112,86 @@ def get_by_fasta(self, fasta_sequence: str, threshold: float) -> Optional[Dict]:
101112 logger .error ("Empty FASTA sequence provided." )
102113 return None
103114
104- # UniProtKB/Swiss-Prot BLAST API
105- try :
106- logger .debug ("Performing BLAST searcher for the given sequence: %s" , seq )
107- result_handle = NCBIWWW .qblast (
108- program = "blastp" ,
109- database = "swissprot" ,
110- sequence = seq ,
111- hitlist_size = 1 ,
112- expect = threshold ,
113- )
114- blast_record = NCBIXML .read (result_handle )
115- except RequestException :
116- raise
117- except Exception as e : # pylint: disable=broad-except
118- logger .error ("BLAST searcher failed: %s" , e )
119- return None
115+ accession = None
116+ if self .use_local_blast :
117+ accession = self ._local_blast (seq , threshold )
118+ if accession :
119+ logger .debug ("Local BLAST found accession: %s" , accession )
120+
121+ if not accession :
122+ logger .debug ("Falling back to NCBIWWW.qblast." )
123+
124+ # UniProtKB/Swiss-Prot BLAST API
125+ try :
126+ logger .debug (
127+ "Performing BLAST searcher for the given sequence: %s" , seq
128+ )
129+ result_handle = NCBIWWW .qblast (
130+ program = "blastp" ,
131+ database = "swissprot" ,
132+ sequence = seq ,
133+ hitlist_size = 1 ,
134+ expect = threshold ,
135+ )
136+ blast_record = NCBIXML .read (result_handle )
137+ except RequestException :
138+ raise
139+ except Exception as e : # pylint: disable=broad-except
140+ logger .error ("BLAST searcher failed: %s" , e )
141+ return None
120142
121- if not blast_record .alignments :
122- logger .info ("No BLAST hits found for the given sequence." )
123- return None
143+ if not blast_record .alignments :
144+ logger .info ("No BLAST hits found for the given sequence." )
145+ return None
124146
125- best_alignment = blast_record .alignments [0 ]
126- best_hsp = best_alignment .hsps [0 ]
127- if best_hsp .expect > threshold :
128- logger .info ("No BLAST hits below the threshold E-value." )
129- return None
130- hit_id = best_alignment .hit_id
147+ best_alignment = blast_record .alignments [0 ]
148+ best_hsp = best_alignment .hsps [0 ]
149+ if best_hsp .expect > threshold :
150+ logger .info ("No BLAST hits below the threshold E-value." )
151+ return None
152+ hit_id = best_alignment .hit_id
131153
132- # like sp|P01308.1|INS_HUMAN
133- accession = hit_id .split ("|" )[1 ].split ("." )[0 ] if "|" in hit_id else hit_id
154+ # like sp|P01308.1|INS_HUMAN
155+ accession = hit_id .split ("|" )[1 ].split ("." )[0 ] if "|" in hit_id else hit_id
134156 return self .get_by_accession (accession )
135157
158+ def _local_blast (self , seq : str , threshold : float ) -> Optional [str ]:
159+ """
160+ Perform local BLAST search using local BLAST database.
161+ :param seq: The protein sequence.
162+ :param threshold: E-value threshold for BLAST searcher.
163+ :return: The accession number of the best hit or None if not found.
164+ """
165+ try :
166+ with tempfile .NamedTemporaryFile (
167+ mode = "w+" , suffix = ".fa" , delete = False
168+ ) as tmp :
169+ tmp .write (f">query\n { seq } \n " )
170+ tmp_name = tmp .name
171+
172+ cmd = [
173+ "blastp" ,
174+ "-db" ,
175+ self .local_blast_db ,
176+ "-query" ,
177+ tmp_name ,
178+ "-evalue" ,
179+ str (threshold ),
180+ "-max_target_seqs" ,
181+ "1" ,
182+ "-outfmt" ,
183+ "6 sacc" , # only return accession
184+ ]
185+ logger .debug ("Running local blastp: %s" , " " .join (cmd ))
186+ out = subprocess .check_output (cmd , text = True ).strip ()
187+ os .remove (tmp_name )
188+ if out :
189+ return out .split ("\n " , maxsplit = 1 )[0 ]
190+ return None
191+ except Exception as exc : # pylint: disable=broad-except
192+ logger .error ("Local blastp failed: %s" , exc )
193+ return None
194+
136195 @retry (
137196 stop = stop_after_attempt (5 ),
138197 wait = wait_exponential (multiplier = 1 , min = 4 , max = 10 ),
@@ -173,3 +232,6 @@ async def search(
173232 if result :
174233 result ["_search_query" ] = query
175234 return result
235+
236+
237+ # TODO: use local UniProt database for large-scale searchs
0 commit comments