Skip to content

Commit a9c7422

Browse files
committed
Check if MANE transcript exists in UTA, if not select longest compatible
1 parent 6cf9577 commit a9c7422

File tree

4 files changed

+42
-8
lines changed

4 files changed

+42
-8
lines changed

src/cool_seq_tool/app.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def __init__(
107107
self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
108108
self.seqrepo_access,
109109
self.uta_db,
110+
self.mane_transcript,
110111
self.mane_transcript_mappings,
111112
self.liftover,
112113
)

src/cool_seq_tool/mappers/exon_genomic_coords.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
99
from cool_seq_tool.mappers.liftover import LiftOver
10+
from cool_seq_tool.mappers.mane_transcript import ManeTranscript
1011
from cool_seq_tool.schemas import (
12+
AnnotationLayer,
1113
Assembly,
1214
BaseModelForbidExtra,
1315
ServiceMeta,
@@ -232,6 +234,7 @@ def __init__(
232234
self,
233235
seqrepo_access: SeqRepoAccess,
234236
uta_db: UtaDatabase,
237+
mane_transcript: ManeTranscript,
235238
mane_transcript_mappings: ManeTranscriptMappings,
236239
liftover: LiftOver,
237240
) -> None:
@@ -261,6 +264,7 @@ def __init__(
261264
"""
262265
self.seqrepo_access = seqrepo_access
263266
self.uta_db = uta_db
267+
self.mane_transcript = mane_transcript
264268
self.mane_transcript_mappings = mane_transcript_mappings
265269
self.liftover = liftover
266270

@@ -796,18 +800,23 @@ async def _genomic_to_tx_segment(
796800
mane_transcripts = self.mane_transcript_mappings.get_gene_mane_data(
797801
gene
798802
)
799-
800-
if mane_transcripts:
803+
if mane_transcripts and await self.uta_db.validate_mane_transcript_acc(
804+
mane_transcripts
805+
):
801806
transcript = mane_transcripts[0]["RefSeq_nuc"]
802807
else:
803808
# Attempt to find a coding transcript if a MANE transcript
804809
# cannot be found
805-
results = await self.uta_db.get_transcripts(
806-
gene=gene, alt_ac=genomic_ac
810+
results = (
811+
await self.mane_transcript.get_longest_compatible_transcript(
812+
start_pos=genomic_pos,
813+
end_pos=genomic_pos,
814+
start_annotation_layer=AnnotationLayer.GENOMIC,
815+
gene=gene,
816+
)
807817
)
808-
809-
if not results.is_empty():
810-
transcript = results[0]["tx_ac"][0]
818+
if results:
819+
transcript = results.refseq
811820
else:
812821
# Run if gene is for a noncoding transcript
813822
query = f"""
@@ -826,7 +835,6 @@ async def _genomic_to_tx_segment(
826835
f"Could not find a transcript for {gene} on {genomic_ac}"
827836
]
828837
)
829-
830838
tx_exons = await self._get_all_exon_coords(
831839
tx_ac=transcript, genomic_ac=genomic_ac
832840
)

src/cool_seq_tool/sources/uta_database.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,21 @@ async def validate_genomic_ac(self, ac: str) -> bool:
378378
result = await self.execute_query(query)
379379
return result[0][0]
380380

381+
async def validate_mane_transcript_acc(self, transcript_list: list[dict]) -> bool:
382+
"""Return whether or not a MANE transcript exists in UTA
383+
384+
:param transcript_list: A list of MANE transcripts
385+
:return ``True`` if transcript accession exists in UTA. ``False`` otherwise
386+
"""
387+
transcript = transcript_list[0]["RefSeq_nuc"]
388+
query = f"""
389+
SELECT *
390+
FROM {self.schema}.tx_exon_aln_v
391+
WHERE tx_ac = '{transcript}'
392+
""" # noqa: S608
393+
results = await self.execute_query(query)
394+
return bool(results)
395+
381396
async def get_ac_descr(self, ac: str) -> str | None:
382397
"""Return accession description. This is typically available only for accessions
383398
from older (pre-GRCh38) builds.

tests/mappers/test_exon_genomic_coords.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,6 +1044,16 @@ async def test_genomic_to_transcript(test_egc_mapper, tpm3_exon1, tpm3_exon8):
10441044
)
10451045
genomic_tx_seg_checks(resp, tpm3_exon8)
10461046

1047+
# Check if transcript exists in UTA. If not, return longest compatible transcript
1048+
resp = await test_egc_mapper._genomic_to_tx_segment(
1049+
22513522,
1050+
genomic_ac="NC_000016.10",
1051+
is_seg_start=False,
1052+
gene="NPIPB5",
1053+
get_nearest_transcript_junction=True,
1054+
)
1055+
assert resp.tx_ac == "NM_001135865.2"
1056+
10471057

10481058
@pytest.mark.asyncio()
10491059
async def test_tpm3(

0 commit comments

Comments
 (0)