Skip to content

Commit f0f3fe1

Browse files
feat: add search_any in uniprot_search
1 parent 08bf2f0 commit f0f3fe1

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

graphgen/models/search/db/uniprot_search.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
from io import StringIO
23
from typing import Dict, Optional
34

@@ -116,3 +117,30 @@ def get_by_fasta(self, fasta_sequence: str, threshold: float) -> Optional[Dict]:
116117
# like sp|P01308.1|INS_HUMAN
117118
accession = hit_id.split("|")[1].split(".")[0] if "|" in hit_id else hit_id
118119
return self.get_by_accession(accession)
120+
121+
def get_any(self, query: str, threshold: float = 1e-5) -> Optional[Dict]:
122+
"""
123+
Search UniProt with either an accession number, keyword, or FASTA sequence.
124+
:param query: The search query (accession number, keyword, or FASTA sequence).
125+
:param threshold: E-value threshold for BLAST search.
126+
:return: A dictionary containing the best hit information or None if not found.
127+
"""
128+
129+
# auto detect query type
130+
if not query or not isinstance(query, str):
131+
logger.error("Empty or non-string input.")
132+
return None
133+
query = query.strip()
134+
135+
# check if fasta sequence
136+
if query.startswith(">") or re.fullmatch(
137+
r"[ACDEFGHIKLMNPQRSTVWY\s]+", query, re.I
138+
):
139+
return self.get_by_fasta(query, threshold)
140+
141+
# check if accession number
142+
if re.fullmatch(r"[A-NR-Z0-9]{6,10}", query, re.I):
143+
return self.get_by_accession(query)
144+
145+
# otherwise treat as keyword
146+
return self.get_best_hit(query)

0 commit comments

Comments
 (0)