Skip to content

Commit 9e53827

Browse files
committed
Add tx_status field in tx segment output
1 parent 31b379b commit 9e53827

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

src/cool_seq_tool/mappers/exon_genomic_coords.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
CoordinateType,
1515
ServiceMeta,
1616
Strand,
17+
TranscriptPriority,
1718
)
1819
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
1920
from cool_seq_tool.sources.uta_database import GenomicAlnData, UtaDatabase
@@ -113,6 +114,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
113114
)
114115
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
115116
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
117+
tx_status: TranscriptPriority | None = Field(
118+
None, description="Transcript priority for RefSeq transcript accession"
119+
)
116120
strand: Strand | None = Field(
117121
None, description="The strand that the transcript accession exists on."
118122
)
@@ -144,6 +148,7 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805
144148
"gene": "TPM3",
145149
"genomic_ac": "NC_000001.11",
146150
"tx_ac": "NM_152263.3",
151+
"tx_status": "longest_compatible_remaining",
147152
"strand": -1,
148153
"seg": {
149154
"exon_ord": 0,
@@ -172,6 +177,9 @@ class GenomicTxSegService(BaseModelForbidExtra):
172177
)
173178
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
174179
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
180+
tx_status: TranscriptPriority | None = Field(
181+
None, description="Transcript priority for RefSeq transcript accession"
182+
)
175183
strand: Strand | None = Field(
176184
None, description="The strand that the transcript exists on."
177185
)
@@ -211,6 +219,7 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805
211219
"gene": "TPM3",
212220
"genomic_ac": "NC_000001.11",
213221
"tx_ac": "NM_152263.3",
222+
"tx_status": "longest_compatible_remaining",
214223
"strand": -1,
215224
"seg_start": {
216225
"exon_ord": 0,
@@ -296,6 +305,22 @@ def __init__(
296305
self.mane_transcript_mappings = mane_transcript_mappings
297306
self.liftover = liftover
298307

308+
def _get_mane_status_transcript(self, tx_ac: str) -> TranscriptPriority:
309+
"""Get MANE status for a transcript
310+
311+
:param tx_ac: A RefSeq transcript accession
312+
:return: A TranscriptPriority object
313+
"""
314+
mane_info = self.mane_transcript_mappings.get_mane_from_transcripts([tx_ac])
315+
if not mane_info:
316+
return TranscriptPriority.LONGEST_COMPATIBLE_REMAINING
317+
mane_info = mane_info[0]["MANE_status"]
318+
return (
319+
TranscriptPriority.MANE_SELECT
320+
if mane_info == "MANE Select"
321+
else TranscriptPriority.MANE_PLUS_CLINICAL
322+
)
323+
299324
async def tx_segment_to_genomic(
300325
self,
301326
transcript: str,
@@ -431,6 +456,7 @@ async def tx_segment_to_genomic(
431456
gene=gene,
432457
genomic_ac=genomic_ac,
433458
tx_ac=transcript,
459+
tx_status=self._get_mane_status_transcript(transcript),
434460
strand=strand,
435461
seg_start=seg_start,
436462
seg_end=seg_end,
@@ -522,6 +548,7 @@ async def genomic_to_tx_segment(
522548
params["gene"] = start_tx_seg_data.gene
523549
params["genomic_ac"] = start_tx_seg_data.genomic_ac
524550
params["tx_ac"] = start_tx_seg_data.tx_ac
551+
params["tx_status"] = start_tx_seg_data.tx_status
525552
params["strand"] = start_tx_seg_data.strand
526553
params["seg_start"] = start_tx_seg_data.seg
527554
else:
@@ -557,6 +584,7 @@ async def genomic_to_tx_segment(
557584
params["gene"] = end_tx_seg_data.gene
558585
params["genomic_ac"] = end_tx_seg_data.genomic_ac
559586
params["tx_ac"] = end_tx_seg_data.tx_ac
587+
params["tx_status"] = end_tx_seg_data.tx_status
560588
params["strand"] = end_tx_seg_data.strand
561589

562590
params["seg_end"] = end_tx_seg_data.seg
@@ -962,6 +990,7 @@ async def _genomic_to_tx_segment(
962990
gene=gene,
963991
genomic_ac=genomic_ac,
964992
tx_ac=transcript,
993+
tx_status=self._get_mane_status_transcript(transcript),
965994
strand=strand,
966995
seg=TxSegment(
967996
exon_ord=exon_num,

tests/mappers/test_exon_genomic_coords.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ def tpm3_exon1():
172172
"gene": "TPM3",
173173
"genomic_ac": "NC_000001.11",
174174
"tx_ac": "NM_152263.3",
175+
"tx_status": "longest_compatible_remaining",
175176
"strand": -1,
176177
"seg": {
177178
"exon_ord": 0,
@@ -197,6 +198,7 @@ def tpm3_exon8():
197198
"gene": "TPM3",
198199
"genomic_ac": "NC_000001.11",
199200
"tx_ac": "NM_152263.3",
201+
"tx_status": "longest_compatible_remaining",
200202
"strand": -1,
201203
"seg": {
202204
"exon_ord": 7,
@@ -222,6 +224,7 @@ def tpm3_exon1_g(tpm3_exon1):
222224
"gene": tpm3_exon1.gene,
223225
"genomic_ac": tpm3_exon1.genomic_ac,
224226
"tx_ac": tpm3_exon1.tx_ac,
227+
"tx_status": tpm3_exon1.tx_status,
225228
"strand": tpm3_exon1.strand,
226229
"seg_start": tpm3_exon1.seg,
227230
}
@@ -235,6 +238,7 @@ def tpm3_exon8_g(tpm3_exon8):
235238
"gene": tpm3_exon8.gene,
236239
"genomic_ac": tpm3_exon8.genomic_ac,
237240
"tx_ac": tpm3_exon8.tx_ac,
241+
"tx_status": tpm3_exon8.tx_status,
238242
"strand": tpm3_exon8.strand,
239243
"seg_end": tpm3_exon8.seg,
240244
}
@@ -248,6 +252,7 @@ def tpm3_exon1_exon8(tpm3_exon1, tpm3_exon8):
248252
"gene": tpm3_exon8.gene,
249253
"genomic_ac": tpm3_exon8.genomic_ac,
250254
"tx_ac": tpm3_exon8.tx_ac,
255+
"tx_status": tpm3_exon8.tx_status,
251256
"strand": tpm3_exon8.strand,
252257
"seg_start": tpm3_exon1.seg,
253258
"seg_end": tpm3_exon8.seg,
@@ -269,6 +274,7 @@ def tpm3_exon1_exon8_offset(tpm3_exon1, tpm3_exon8):
269274
"gene": "TPM3",
270275
"genomic_ac": "NC_000001.11",
271276
"tx_ac": "NM_152263.3",
277+
"tx_status": "longest_compatible_remaining",
272278
"strand": -1,
273279
"seg_start": tpm3_exon1_cpy.seg,
274280
"seg_end": tpm3_exon8_cpy.seg,
@@ -283,6 +289,7 @@ def mane_braf():
283289
"gene": "BRAF",
284290
"genomic_ac": "NC_000007.14",
285291
"tx_ac": "NM_004333.6",
292+
"tx_status": "mane_select",
286293
"strand": -1,
287294
"seg_start": {
288295
"exon_ord": 5,
@@ -321,6 +328,7 @@ def wee1_exon2_exon11():
321328
"gene": "WEE1",
322329
"genomic_ac": "NC_000011.10",
323330
"tx_ac": "NM_003390.3",
331+
"tx_status": "longest_compatible_remaining",
324332
"strand": 1,
325333
"seg_start": {
326334
"exon_ord": 1,
@@ -359,6 +367,7 @@ def mane_wee1_exon2_exon11():
359367
"gene": "WEE1",
360368
"genomic_ac": "NC_000011.10",
361369
"tx_ac": "NM_003390.4",
370+
"tx_status": "mane_select",
362371
"strand": 1,
363372
"seg_start": {
364373
"exon_ord": 1,
@@ -397,6 +406,7 @@ def ntrk1_exon10_exon17():
397406
"gene": "NTRK1",
398407
"genomic_ac": "NC_000001.11",
399408
"tx_ac": "NM_002529.3",
409+
"tx_status": "longest_compatible_remaining",
400410
"strand": 1,
401411
"seg_start": {
402412
"exon_ord": 9,
@@ -435,6 +445,7 @@ def zbtb10_exon3_end():
435445
"gene": "ZBTB10",
436446
"genomic_ac": "NC_000008.11",
437447
"tx_ac": "NM_001105539.3",
448+
"tx_status": "mane_select",
438449
"strand": 1,
439450
"seg_start": None,
440451
"seg_end": {
@@ -461,6 +472,7 @@ def zbtb10_exon5_start():
461472
"gene": "ZBTB10",
462473
"genomic_ac": "NC_000008.11",
463474
"tx_ac": "NM_001105539.3",
475+
"tx_status": "mane_select",
464476
"strand": 1,
465477
"seg_start": {
466478
"exon_ord": 4,
@@ -487,6 +499,7 @@ def tpm3_exon6_end():
487499
"gene": "TPM3",
488500
"genomic_ac": "NC_000001.11",
489501
"tx_ac": "NM_152263.4",
502+
"tx_status": "mane_select",
490503
"strand": -1,
491504
"seg_start": None,
492505
"seg_end": {
@@ -513,6 +526,7 @@ def tpm3_exon5_start():
513526
"gene": "TPM3",
514527
"genomic_ac": "NC_000001.11",
515528
"tx_ac": "NM_152263.4",
529+
"tx_status": "mane_select",
516530
"strand": -1,
517531
"seg_start": {
518532
"exon_ord": 4,
@@ -539,6 +553,7 @@ def gusbp3_exon2_end():
539553
"gene": "GUSBP3",
540554
"genomic_ac": "NC_000005.10",
541555
"tx_ac": "NR_027386.2",
556+
"tx_status": "longest_compatible_remaining",
542557
"strand": -1,
543558
"seg_start": None,
544559
"seg_end": {
@@ -565,6 +580,7 @@ def eln_grch38_intronic():
565580
"gene": "ELN",
566581
"genomic_ac": "NC_000007.14",
567582
"tx_ac": "NM_000501.4",
583+
"tx_status": "mane_select",
568584
"strand": 1,
569585
"seg_start": {
570586
"exon_ord": 0,
@@ -603,6 +619,7 @@ def gusbp3_exon5_start():
603619
"gene": "GUSBP3",
604620
"genomic_ac": "NC_000005.10",
605621
"tx_ac": "NR_027386.2",
622+
"tx_status": "longest_compatible_remaining",
606623
"strand": -1,
607624
"seg_start": {
608625
"exon_ord": 4,
@@ -645,6 +662,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
645662
assert actual.gene == expected.gene
646663
assert actual.genomic_ac == expected.genomic_ac
647664
assert actual.tx_ac == expected.tx_ac
665+
assert actual.tx_status == expected.tx_status
648666
assert actual.strand == expected.strand
649667

650668
for seg_attr in ["seg_start", "seg_end"]:
@@ -676,6 +694,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
676694
assert actual.gene is None
677695
assert actual.genomic_ac is None
678696
assert actual.tx_ac is None
697+
assert actual.tx_status is None
679698
assert actual.strand is None
680699
assert actual.seg_start is None
681700
assert actual.seg_end is None
@@ -720,6 +739,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
720739
assert actual.gene == expected.gene
721740
assert actual.genomic_ac == expected.genomic_ac
722741
assert actual.tx_ac == expected.tx_ac
742+
assert actual.tx_status == expected.tx_status
723743
assert actual.strand == expected.strand
724744

725745
expected_seg = expected.seg
@@ -747,6 +767,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
747767
assert actual.gene is None
748768
assert actual.genomic_ac is None
749769
assert actual.tx_ac is None
770+
assert actual.tx_status is None
750771
assert actual.strand is None
751772
assert actual.seg is None
752773
assert len(actual.errors) > 0

0 commit comments

Comments
 (0)