Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/cool_seq_tool/mappers/exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
CoordinateType,
ServiceMeta,
Strand,
TranscriptPriority,
)
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
from cool_seq_tool.sources.uta_database import GenomicAlnData, UtaDatabase
Expand Down Expand Up @@ -113,6 +114,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
)
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
tx_status: TranscriptPriority | None = Field(
None, description="Transcript priority for RefSeq transcript accession"
)
strand: Strand | None = Field(
None, description="The strand that the transcript accession exists on."
)
Expand Down Expand Up @@ -144,6 +148,7 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805
"gene": "TPM3",
"genomic_ac": "NC_000001.11",
"tx_ac": "NM_152263.3",
"tx_status": "longest_compatible_remaining",
"strand": -1,
"seg": {
"exon_ord": 0,
Expand Down Expand Up @@ -172,6 +177,9 @@ class GenomicTxSegService(BaseModelForbidExtra):
)
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
tx_status: TranscriptPriority | None = Field(
None, description="Transcript priority for RefSeq transcript accession"
)
strand: Strand | None = Field(
None, description="The strand that the transcript exists on."
)
Expand Down Expand Up @@ -211,6 +219,7 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805
"gene": "TPM3",
"genomic_ac": "NC_000001.11",
"tx_ac": "NM_152263.3",
"tx_status": "longest_compatible_remaining",
"strand": -1,
"seg_start": {
"exon_ord": 0,
Expand Down Expand Up @@ -431,6 +440,7 @@ async def tx_segment_to_genomic(
gene=gene,
genomic_ac=genomic_ac,
tx_ac=transcript,
tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
strand=strand,
seg_start=seg_start,
seg_end=seg_end,
Expand Down Expand Up @@ -522,6 +532,7 @@ async def genomic_to_tx_segment(
params["gene"] = start_tx_seg_data.gene
params["genomic_ac"] = start_tx_seg_data.genomic_ac
params["tx_ac"] = start_tx_seg_data.tx_ac
params["tx_status"] = start_tx_seg_data.tx_status
params["strand"] = start_tx_seg_data.strand
params["seg_start"] = start_tx_seg_data.seg
else:
Expand Down Expand Up @@ -557,6 +568,7 @@ async def genomic_to_tx_segment(
params["gene"] = end_tx_seg_data.gene
params["genomic_ac"] = end_tx_seg_data.genomic_ac
params["tx_ac"] = end_tx_seg_data.tx_ac
params["tx_status"] = end_tx_seg_data.tx_status
params["strand"] = end_tx_seg_data.strand

params["seg_end"] = end_tx_seg_data.seg
Expand Down Expand Up @@ -962,6 +974,7 @@ async def _genomic_to_tx_segment(
gene=gene,
genomic_ac=genomic_ac,
tx_ac=transcript,
tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
strand=strand,
seg=TxSegment(
exon_ord=exon_num,
Expand Down
18 changes: 17 additions & 1 deletion src/cool_seq_tool/sources/mane_transcript_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import polars as pl

from cool_seq_tool.resources.data_files import DataFile, get_data_file
from cool_seq_tool.schemas import ManeGeneData
from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -85,6 +85,22 @@ def get_mane_from_transcripts(self, transcripts: list[str]) -> list[dict]:
return []
return mane_rows.to_dicts()

def get_transcript_status(self, tx_ac: str) -> TranscriptPriority:
"""Get MANE status for a transcript

:param tx_ac: A RefSeq transcript accession
:return: A TranscriptPriority object
"""
mane_info = self.get_mane_from_transcripts([tx_ac])
if not mane_info:
return TranscriptPriority.LONGEST_COMPATIBLE_REMAINING
mane_info = mane_info[0]["MANE_status"]
return (
TranscriptPriority.MANE_SELECT
if mane_info == "MANE Select"
else TranscriptPriority.MANE_PLUS_CLINICAL
)

def get_mane_data_from_chr_pos(
self, alt_ac: str, start: int, end: int
) -> list[dict]:
Expand Down
21 changes: 21 additions & 0 deletions tests/mappers/test_exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ def tpm3_exon1():
"gene": "TPM3",
"genomic_ac": "NC_000001.11",
"tx_ac": "NM_152263.3",
"tx_status": "longest_compatible_remaining",
"strand": -1,
"seg": {
"exon_ord": 0,
Expand All @@ -197,6 +198,7 @@ def tpm3_exon8():
"gene": "TPM3",
"genomic_ac": "NC_000001.11",
"tx_ac": "NM_152263.3",
"tx_status": "longest_compatible_remaining",
"strand": -1,
"seg": {
"exon_ord": 7,
Expand All @@ -222,6 +224,7 @@ def tpm3_exon1_g(tpm3_exon1):
"gene": tpm3_exon1.gene,
"genomic_ac": tpm3_exon1.genomic_ac,
"tx_ac": tpm3_exon1.tx_ac,
"tx_status": tpm3_exon1.tx_status,
"strand": tpm3_exon1.strand,
"seg_start": tpm3_exon1.seg,
}
Expand All @@ -235,6 +238,7 @@ def tpm3_exon8_g(tpm3_exon8):
"gene": tpm3_exon8.gene,
"genomic_ac": tpm3_exon8.genomic_ac,
"tx_ac": tpm3_exon8.tx_ac,
"tx_status": tpm3_exon8.tx_status,
"strand": tpm3_exon8.strand,
"seg_end": tpm3_exon8.seg,
}
Expand All @@ -248,6 +252,7 @@ def tpm3_exon1_exon8(tpm3_exon1, tpm3_exon8):
"gene": tpm3_exon8.gene,
"genomic_ac": tpm3_exon8.genomic_ac,
"tx_ac": tpm3_exon8.tx_ac,
"tx_status": tpm3_exon8.tx_status,
"strand": tpm3_exon8.strand,
"seg_start": tpm3_exon1.seg,
"seg_end": tpm3_exon8.seg,
Expand All @@ -269,6 +274,7 @@ def tpm3_exon1_exon8_offset(tpm3_exon1, tpm3_exon8):
"gene": "TPM3",
"genomic_ac": "NC_000001.11",
"tx_ac": "NM_152263.3",
"tx_status": "longest_compatible_remaining",
"strand": -1,
"seg_start": tpm3_exon1_cpy.seg,
"seg_end": tpm3_exon8_cpy.seg,
Expand All @@ -283,6 +289,7 @@ def mane_braf():
"gene": "BRAF",
"genomic_ac": "NC_000007.14",
"tx_ac": "NM_004333.6",
"tx_status": "mane_select",
"strand": -1,
"seg_start": {
"exon_ord": 5,
Expand Down Expand Up @@ -321,6 +328,7 @@ def wee1_exon2_exon11():
"gene": "WEE1",
"genomic_ac": "NC_000011.10",
"tx_ac": "NM_003390.3",
"tx_status": "longest_compatible_remaining",
"strand": 1,
"seg_start": {
"exon_ord": 1,
Expand Down Expand Up @@ -359,6 +367,7 @@ def mane_wee1_exon2_exon11():
"gene": "WEE1",
"genomic_ac": "NC_000011.10",
"tx_ac": "NM_003390.4",
"tx_status": "mane_select",
"strand": 1,
"seg_start": {
"exon_ord": 1,
Expand Down Expand Up @@ -397,6 +406,7 @@ def ntrk1_exon10_exon17():
"gene": "NTRK1",
"genomic_ac": "NC_000001.11",
"tx_ac": "NM_002529.3",
"tx_status": "longest_compatible_remaining",
"strand": 1,
"seg_start": {
"exon_ord": 9,
Expand Down Expand Up @@ -435,6 +445,7 @@ def zbtb10_exon3_end():
"gene": "ZBTB10",
"genomic_ac": "NC_000008.11",
"tx_ac": "NM_001105539.3",
"tx_status": "mane_select",
"strand": 1,
"seg_start": None,
"seg_end": {
Expand All @@ -461,6 +472,7 @@ def zbtb10_exon5_start():
"gene": "ZBTB10",
"genomic_ac": "NC_000008.11",
"tx_ac": "NM_001105539.3",
"tx_status": "mane_select",
"strand": 1,
"seg_start": {
"exon_ord": 4,
Expand All @@ -487,6 +499,7 @@ def tpm3_exon6_end():
"gene": "TPM3",
"genomic_ac": "NC_000001.11",
"tx_ac": "NM_152263.4",
"tx_status": "mane_select",
"strand": -1,
"seg_start": None,
"seg_end": {
Expand All @@ -513,6 +526,7 @@ def tpm3_exon5_start():
"gene": "TPM3",
"genomic_ac": "NC_000001.11",
"tx_ac": "NM_152263.4",
"tx_status": "mane_select",
"strand": -1,
"seg_start": {
"exon_ord": 4,
Expand All @@ -539,6 +553,7 @@ def gusbp3_exon2_end():
"gene": "GUSBP3",
"genomic_ac": "NC_000005.10",
"tx_ac": "NR_027386.2",
"tx_status": "longest_compatible_remaining",
"strand": -1,
"seg_start": None,
"seg_end": {
Expand All @@ -565,6 +580,7 @@ def eln_grch38_intronic():
"gene": "ELN",
"genomic_ac": "NC_000007.14",
"tx_ac": "NM_000501.4",
"tx_status": "mane_select",
"strand": 1,
"seg_start": {
"exon_ord": 0,
Expand Down Expand Up @@ -603,6 +619,7 @@ def gusbp3_exon5_start():
"gene": "GUSBP3",
"genomic_ac": "NC_000005.10",
"tx_ac": "NR_027386.2",
"tx_status": "longest_compatible_remaining",
"strand": -1,
"seg_start": {
"exon_ord": 4,
Expand Down Expand Up @@ -645,6 +662,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
assert actual.gene == expected.gene
assert actual.genomic_ac == expected.genomic_ac
assert actual.tx_ac == expected.tx_ac
assert actual.tx_status == expected.tx_status
assert actual.strand == expected.strand

for seg_attr in ["seg_start", "seg_end"]:
Expand Down Expand Up @@ -676,6 +694,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
assert actual.gene is None
assert actual.genomic_ac is None
assert actual.tx_ac is None
assert actual.tx_status is None
assert actual.strand is None
assert actual.seg_start is None
assert actual.seg_end is None
Expand Down Expand Up @@ -720,6 +739,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
assert actual.gene == expected.gene
assert actual.genomic_ac == expected.genomic_ac
assert actual.tx_ac == expected.tx_ac
assert actual.tx_status == expected.tx_status
assert actual.strand == expected.strand

expected_seg = expected.seg
Expand Down Expand Up @@ -747,6 +767,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
assert actual.gene is None
assert actual.genomic_ac is None
assert actual.tx_ac is None
assert actual.tx_status is None
assert actual.strand is None
assert actual.seg is None
assert len(actual.errors) > 0
Expand Down
11 changes: 10 additions & 1 deletion tests/sources/test_mane_transcript_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import polars as pl
import pytest

from cool_seq_tool.schemas import ManeGeneData
from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -168,6 +168,15 @@ def test_get_mane_from_transcripts(
assert resp == []


def test_get_transcript_status(test_mane_transcript_mappings):
"""Test that get_transcript_status works correctly"""
actual = test_mane_transcript_mappings.get_transcript_status("NM_152263.4")
assert actual == TranscriptPriority.MANE_SELECT

actual = test_mane_transcript_mappings.get_transcript_status("NM_152263.3")
assert actual == TranscriptPriority.LONGEST_COMPATIBLE_REMAINING


def test_get_mane_data_from_chr_pos(
test_mane_transcript_mappings, braf_select, braf_plus_clinical
):
Expand Down