|
| 1 | +import re |
1 | 2 | from io import StringIO |
2 | 3 | from typing import Dict, Optional |
3 | 4 |
|
@@ -116,3 +117,30 @@ def get_by_fasta(self, fasta_sequence: str, threshold: float) -> Optional[Dict]: |
116 | 117 | # like sp|P01308.1|INS_HUMAN |
117 | 118 | accession = hit_id.split("|")[1].split(".")[0] if "|" in hit_id else hit_id |
118 | 119 | return self.get_by_accession(accession) |
| 120 | + |
| 121 | + def get_any(self, query: str, threshold: float = 1e-5) -> Optional[Dict]: |
| 122 | + """ |
| 123 | + Search UniProt with either an accession number, keyword, or FASTA sequence. |
| 124 | + :param query: The search query (accession number, keyword, or FASTA sequence). |
| 125 | + :param threshold: E-value threshold for BLAST search. |
| 126 | + :return: A dictionary containing the best hit information or None if not found. |
| 127 | + """ |
| 128 | + |
| 129 | + # auto detect query type |
| 130 | + if not query or not isinstance(query, str): |
| 131 | + logger.error("Empty or non-string input.") |
| 132 | + return None |
| 133 | + query = query.strip() |
| 134 | + |
| 135 | + # check if fasta sequence |
| 136 | + if query.startswith(">") or re.fullmatch( |
| 137 | + r"[ACDEFGHIKLMNPQRSTVWY\s]+", query, re.I |
| 138 | + ): |
| 139 | + return self.get_by_fasta(query, threshold) |
| 140 | + |
| 141 | + # check if accession number |
| 142 | + if re.fullmatch(r"[A-NR-Z0-9]{6,10}", query, re.I): |
| 143 | + return self.get_by_accession(query) |
| 144 | + |
| 145 | + # otherwise treat as keyword |
| 146 | + return self.get_best_hit(query) |
0 commit comments