diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py index 4c52890..07e218b 100644 --- a/src/cool_seq_tool/mappers/exon_genomic_coords.py +++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py @@ -14,6 +14,7 @@ CoordinateType, ServiceMeta, Strand, + TranscriptPriority, ) from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings from cool_seq_tool.sources.uta_database import GenomicAlnData, UtaDatabase @@ -113,6 +114,9 @@ class GenomicTxSeg(BaseModelForbidExtra): ) genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.") tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.") + tx_status: TranscriptPriority | None = Field( + None, description="Transcript priority for RefSeq transcript accession" + ) strand: Strand | None = Field( None, description="The strand that the transcript accession exists on." ) @@ -144,6 +148,7 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805 "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.3", + "tx_status": "longest_compatible_remaining", "strand": -1, "seg": { "exon_ord": 0, @@ -172,6 +177,9 @@ class GenomicTxSegService(BaseModelForbidExtra): ) genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.") tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.") + tx_status: TranscriptPriority | None = Field( + None, description="Transcript priority for RefSeq transcript accession" + ) strand: Strand | None = Field( None, description="The strand that the transcript exists on." ) @@ -211,6 +219,7 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805 "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.3", + "tx_status": "longest_compatible_remaining", "strand": -1, "seg_start": { "exon_ord": 0, @@ -431,6 +440,7 @@ async def tx_segment_to_genomic( gene=gene, genomic_ac=genomic_ac, tx_ac=transcript, + tx_status=self.mane_transcript_mappings.get_transcript_status(transcript), strand=strand, seg_start=seg_start, seg_end=seg_end, @@ -522,6 +532,7 @@ async def genomic_to_tx_segment( params["gene"] = start_tx_seg_data.gene params["genomic_ac"] = start_tx_seg_data.genomic_ac params["tx_ac"] = start_tx_seg_data.tx_ac + params["tx_status"] = start_tx_seg_data.tx_status params["strand"] = start_tx_seg_data.strand params["seg_start"] = start_tx_seg_data.seg else: @@ -557,6 +568,7 @@ async def genomic_to_tx_segment( params["gene"] = end_tx_seg_data.gene params["genomic_ac"] = end_tx_seg_data.genomic_ac params["tx_ac"] = end_tx_seg_data.tx_ac + params["tx_status"] = end_tx_seg_data.tx_status params["strand"] = end_tx_seg_data.strand params["seg_end"] = end_tx_seg_data.seg @@ -962,6 +974,7 @@ async def _genomic_to_tx_segment( gene=gene, genomic_ac=genomic_ac, tx_ac=transcript, + tx_status=self.mane_transcript_mappings.get_transcript_status(transcript), strand=strand, seg=TxSegment( exon_ord=exon_num, diff --git a/src/cool_seq_tool/sources/mane_transcript_mappings.py b/src/cool_seq_tool/sources/mane_transcript_mappings.py index 2199afe..d38edfb 100644 --- a/src/cool_seq_tool/sources/mane_transcript_mappings.py +++ b/src/cool_seq_tool/sources/mane_transcript_mappings.py @@ -8,7 +8,7 @@ import polars as pl from cool_seq_tool.resources.data_files import DataFile, get_data_file -from cool_seq_tool.schemas import ManeGeneData +from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority _logger = logging.getLogger(__name__) @@ -85,6 +85,22 @@ def get_mane_from_transcripts(self, transcripts: list[str]) -> list[dict]: return [] return mane_rows.to_dicts() + def get_transcript_status(self, tx_ac: str) -> TranscriptPriority: + """Get MANE status for a transcript + + :param tx_ac: A RefSeq transcript accession + :return: A TranscriptPriority object + """ + mane_info = self.get_mane_from_transcripts([tx_ac]) + if not mane_info: + return TranscriptPriority.LONGEST_COMPATIBLE_REMAINING + mane_info = mane_info[0]["MANE_status"] + return ( + TranscriptPriority.MANE_SELECT + if mane_info == "MANE Select" + else TranscriptPriority.MANE_PLUS_CLINICAL + ) + def get_mane_data_from_chr_pos( self, alt_ac: str, start: int, end: int ) -> list[dict]: diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py index f727670..f5632fc 100644 --- a/tests/mappers/test_exon_genomic_coords.py +++ b/tests/mappers/test_exon_genomic_coords.py @@ -172,6 +172,7 @@ def tpm3_exon1(): "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.3", + "tx_status": "longest_compatible_remaining", "strand": -1, "seg": { "exon_ord": 0, @@ -197,6 +198,7 @@ def tpm3_exon8(): "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.3", + "tx_status": "longest_compatible_remaining", "strand": -1, "seg": { "exon_ord": 7, @@ -222,6 +224,7 @@ def tpm3_exon1_g(tpm3_exon1): "gene": tpm3_exon1.gene, "genomic_ac": tpm3_exon1.genomic_ac, "tx_ac": tpm3_exon1.tx_ac, + "tx_status": tpm3_exon1.tx_status, "strand": tpm3_exon1.strand, "seg_start": tpm3_exon1.seg, } @@ -235,6 +238,7 @@ def tpm3_exon8_g(tpm3_exon8): "gene": tpm3_exon8.gene, "genomic_ac": tpm3_exon8.genomic_ac, "tx_ac": tpm3_exon8.tx_ac, + "tx_status": tpm3_exon8.tx_status, "strand": tpm3_exon8.strand, "seg_end": tpm3_exon8.seg, } @@ -248,6 +252,7 @@ def tpm3_exon1_exon8(tpm3_exon1, tpm3_exon8): "gene": tpm3_exon8.gene, "genomic_ac": tpm3_exon8.genomic_ac, "tx_ac": tpm3_exon8.tx_ac, + "tx_status": tpm3_exon8.tx_status, "strand": tpm3_exon8.strand, "seg_start": tpm3_exon1.seg, "seg_end": tpm3_exon8.seg, @@ -269,6 +274,7 @@ def tpm3_exon1_exon8_offset(tpm3_exon1, tpm3_exon8): "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.3", + "tx_status": "longest_compatible_remaining", "strand": -1, "seg_start": tpm3_exon1_cpy.seg, "seg_end": tpm3_exon8_cpy.seg, @@ -283,6 +289,7 @@ def mane_braf(): "gene": "BRAF", "genomic_ac": "NC_000007.14", "tx_ac": "NM_004333.6", + "tx_status": "mane_select", "strand": -1, "seg_start": { "exon_ord": 5, @@ -321,6 +328,7 @@ def wee1_exon2_exon11(): "gene": "WEE1", "genomic_ac": "NC_000011.10", "tx_ac": "NM_003390.3", + "tx_status": "longest_compatible_remaining", "strand": 1, "seg_start": { "exon_ord": 1, @@ -359,6 +367,7 @@ def mane_wee1_exon2_exon11(): "gene": "WEE1", "genomic_ac": "NC_000011.10", "tx_ac": "NM_003390.4", + "tx_status": "mane_select", "strand": 1, "seg_start": { "exon_ord": 1, @@ -397,6 +406,7 @@ def ntrk1_exon10_exon17(): "gene": "NTRK1", "genomic_ac": "NC_000001.11", "tx_ac": "NM_002529.3", + "tx_status": "longest_compatible_remaining", "strand": 1, "seg_start": { "exon_ord": 9, @@ -435,6 +445,7 @@ def zbtb10_exon3_end(): "gene": "ZBTB10", "genomic_ac": "NC_000008.11", "tx_ac": "NM_001105539.3", + "tx_status": "mane_select", "strand": 1, "seg_start": None, "seg_end": { @@ -461,6 +472,7 @@ def zbtb10_exon5_start(): "gene": "ZBTB10", "genomic_ac": "NC_000008.11", "tx_ac": "NM_001105539.3", + "tx_status": "mane_select", "strand": 1, "seg_start": { "exon_ord": 4, @@ -487,6 +499,7 @@ def tpm3_exon6_end(): "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.4", + "tx_status": "mane_select", "strand": -1, "seg_start": None, "seg_end": { @@ -513,6 +526,7 @@ def tpm3_exon5_start(): "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.4", + "tx_status": "mane_select", "strand": -1, "seg_start": { "exon_ord": 4, @@ -539,6 +553,7 @@ def gusbp3_exon2_end(): "gene": "GUSBP3", "genomic_ac": "NC_000005.10", "tx_ac": "NR_027386.2", + "tx_status": "longest_compatible_remaining", "strand": -1, "seg_start": None, "seg_end": { @@ -565,6 +580,7 @@ def eln_grch38_intronic(): "gene": "ELN", "genomic_ac": "NC_000007.14", "tx_ac": "NM_000501.4", + "tx_status": "mane_select", "strand": 1, "seg_start": { "exon_ord": 0, @@ -603,6 +619,7 @@ def gusbp3_exon5_start(): "gene": "GUSBP3", "genomic_ac": "NC_000005.10", "tx_ac": "NR_027386.2", + "tx_status": "longest_compatible_remaining", "strand": -1, "seg_start": { "exon_ord": 4, @@ -645,6 +662,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True): assert actual.gene == expected.gene assert actual.genomic_ac == expected.genomic_ac assert actual.tx_ac == expected.tx_ac + assert actual.tx_status == expected.tx_status assert actual.strand == expected.strand for seg_attr in ["seg_start", "seg_end"]: @@ -676,6 +694,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True): assert actual.gene is None assert actual.genomic_ac is None assert actual.tx_ac is None + assert actual.tx_status is None assert actual.strand is None assert actual.seg_start is None assert actual.seg_end is None @@ -720,6 +739,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True): assert actual.gene == expected.gene assert actual.genomic_ac == expected.genomic_ac assert actual.tx_ac == expected.tx_ac + assert actual.tx_status == expected.tx_status assert actual.strand == expected.strand expected_seg = expected.seg @@ -747,6 +767,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True): assert actual.gene is None assert actual.genomic_ac is None assert actual.tx_ac is None + assert actual.tx_status is None assert actual.strand is None assert actual.seg is None assert len(actual.errors) > 0 diff --git a/tests/sources/test_mane_transcript_mappings.py b/tests/sources/test_mane_transcript_mappings.py index 2565897..f0e51be 100644 --- a/tests/sources/test_mane_transcript_mappings.py +++ b/tests/sources/test_mane_transcript_mappings.py @@ -5,7 +5,7 @@ import polars as pl import pytest -from cool_seq_tool.schemas import ManeGeneData +from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority @pytest.fixture(scope="module") @@ -168,6 +168,15 @@ def test_get_mane_from_transcripts( assert resp == [] +def test_get_transcript_status(test_mane_transcript_mappings): + """Test that get_transcript_status works correctly""" + actual = test_mane_transcript_mappings.get_transcript_status("NM_152263.4") + assert actual == TranscriptPriority.MANE_SELECT + + actual = test_mane_transcript_mappings.get_transcript_status("NM_152263.3") + assert actual == TranscriptPriority.LONGEST_COMPATIBLE_REMAINING + + def test_get_mane_data_from_chr_pos( test_mane_transcript_mappings, braf_select, braf_plus_clinical ):