diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py index 189ec38..bbffc21 100644 --- a/src/cool_seq_tool/mappers/exon_genomic_coords.py +++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py @@ -93,6 +93,9 @@ class GenomicTxSeg(BaseModelForbidExtra): ) genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.") tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.") + strand: Strand | None = Field( + None, description="The strand that the transcript accession exists on." + ) errors: list[StrictStr] = Field([], description="Error messages.") @model_validator(mode="before") @@ -121,6 +124,7 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805 "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.3", + "strand": -1, "seg": { "exon_ord": 0, "offset": 0, @@ -147,6 +151,9 @@ class GenomicTxSegService(BaseModelForbidExtra): ) genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.") tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.") + strand: Strand | None = Field( + None, description="The strand that the transcript exists on." + ) seg_start: TxSegment | None = Field(None, description="Start transcript segment.") seg_end: TxSegment | None = Field(None, description="End transcript segment.") errors: list[StrictStr] = Field([], description="Error messages.") @@ -183,6 +190,7 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805 "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.3", + "strand": -1, "seg_start": { "exon_ord": 0, "offset": 0, @@ -400,6 +408,7 @@ async def tx_segment_to_genomic( gene=gene, genomic_ac=genomic_ac, tx_ac=transcript, + strand=strand, seg_start=seg_start, seg_end=seg_end, ) @@ -490,6 +499,7 @@ async def genomic_to_tx_segment( params["gene"] = start_tx_seg_data.gene params["genomic_ac"] = start_tx_seg_data.genomic_ac params["tx_ac"] = start_tx_seg_data.tx_ac + params["strand"] = start_tx_seg_data.strand params["seg_start"] = start_tx_seg_data.seg else: start_tx_seg_data = None @@ -524,6 +534,7 @@ async def genomic_to_tx_segment( params["gene"] = end_tx_seg_data.gene params["genomic_ac"] = end_tx_seg_data.genomic_ac params["tx_ac"] = end_tx_seg_data.tx_ac + params["strand"] = end_tx_seg_data.strand params["seg_end"] = end_tx_seg_data.seg @@ -902,6 +913,7 @@ async def _genomic_to_tx_segment( gene=gene, genomic_ac=genomic_ac, tx_ac=transcript, + strand=strand, seg=TxSegment( exon_ord=exon_num, offset=offset, diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py index 114f3e1..ca737c9 100644 --- a/tests/mappers/test_exon_genomic_coords.py +++ b/tests/mappers/test_exon_genomic_coords.py @@ -171,6 +171,7 @@ def tpm3_exon1(): "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.3", + "strand": -1, "seg": { "exon_ord": 0, "offset": 0, @@ -194,6 +195,7 @@ def tpm3_exon8(): "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.3", + "strand": -1, "seg": { "exon_ord": 7, "offset": 0, @@ -217,6 +219,7 @@ def tpm3_exon1_g(tpm3_exon1): "gene": tpm3_exon1.gene, "genomic_ac": tpm3_exon1.genomic_ac, "tx_ac": tpm3_exon1.tx_ac, + "strand": tpm3_exon1.strand, "seg_start": tpm3_exon1.seg, } return GenomicTxSegService(**params) @@ -229,6 +232,7 @@ def tpm3_exon8_g(tpm3_exon8): "gene": tpm3_exon8.gene, "genomic_ac": tpm3_exon8.genomic_ac, "tx_ac": tpm3_exon8.tx_ac, + "strand": tpm3_exon8.strand, "seg_end": tpm3_exon8.seg, } return GenomicTxSegService(**params) @@ -241,6 +245,7 @@ def tpm3_exon1_exon8(tpm3_exon1, tpm3_exon8): "gene": tpm3_exon8.gene, "genomic_ac": tpm3_exon8.genomic_ac, "tx_ac": tpm3_exon8.tx_ac, + "strand": tpm3_exon8.strand, "seg_start": tpm3_exon1.seg, "seg_end": tpm3_exon8.seg, } @@ -261,6 +266,7 @@ def tpm3_exon1_exon8_offset(tpm3_exon1, tpm3_exon8): "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.3", + "strand": -1, "seg_start": tpm3_exon1_cpy.seg, "seg_end": tpm3_exon8_cpy.seg, } @@ -274,6 +280,7 @@ def mane_braf(): "gene": "BRAF", "genomic_ac": "NC_000007.14", "tx_ac": "NM_004333.6", + "strand": -1, "seg_start": { "exon_ord": 5, "offset": 1, @@ -309,6 +316,7 @@ def wee1_exon2_exon11(): "gene": "WEE1", "genomic_ac": "NC_000011.10", "tx_ac": "NM_003390.3", + "strand": 1, "seg_start": { "exon_ord": 1, "offset": 205, @@ -344,6 +352,7 @@ def mane_wee1_exon2_exon11(): "gene": "WEE1", "genomic_ac": "NC_000011.10", "tx_ac": "NM_003390.4", + "strand": 1, "seg_start": { "exon_ord": 1, "offset": 205, @@ -379,6 +388,7 @@ def ntrk1_exon10_exon17(): "gene": "NTRK1", "genomic_ac": "NC_000001.11", "tx_ac": "NM_002529.3", + "strand": 1, "seg_start": { "exon_ord": 9, "offset": 0, @@ -414,6 +424,7 @@ def zbtb10_exon3_end(): "gene": "ZBTB10", "genomic_ac": "NC_000008.11", "tx_ac": "NM_001105539.3", + "strand": 1, "seg_start": None, "seg_end": { "exon_ord": 2, @@ -438,6 +449,7 @@ def zbtb10_exon5_start(): "gene": "ZBTB10", "genomic_ac": "NC_000008.11", "tx_ac": "NM_001105539.3", + "strand": 1, "seg_start": { "exon_ord": 4, "offset": -201, @@ -462,6 +474,7 @@ def tpm3_exon6_end(): "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.4", + "strand": -1, "seg_start": None, "seg_end": { "exon_ord": 5, @@ -486,6 +499,7 @@ def tpm3_exon5_start(): "gene": "TPM3", "genomic_ac": "NC_000001.11", "tx_ac": "NM_152263.4", + "strand": -1, "seg_start": { "exon_ord": 4, "offset": -102, @@ -510,6 +524,7 @@ def gusbp3_exon2_end(): "gene": "GUSBP3", "genomic_ac": "NC_000005.10", "tx_ac": "NR_027386.2", + "strand": -1, "seg_start": None, "seg_end": { "exon_ord": 1, @@ -534,6 +549,7 @@ def eln_grch38_intronic(): "gene": "ELN", "genomic_ac": "NC_000007.14", "tx_ac": "NM_000501.4", + "strand": 1, "seg_start": { "exon_ord": 0, "offset": 1, @@ -569,6 +585,7 @@ def gusbp3_exon5_start(): "gene": "GUSBP3", "genomic_ac": "NC_000005.10", "tx_ac": "NR_027386.2", + "strand": -1, "seg_start": { "exon_ord": 4, "offset": -3589, @@ -609,6 +626,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True): assert actual.gene == expected.gene assert actual.genomic_ac == expected.genomic_ac assert actual.tx_ac == expected.tx_ac + assert actual.strand == expected.strand for seg_attr in ["seg_start", "seg_end"]: expected_seg = getattr(expected, seg_attr) @@ -635,6 +653,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True): assert actual.gene is None assert actual.genomic_ac is None assert actual.tx_ac is None + assert actual.strand is None assert actual.seg_start is None assert actual.seg_end is None assert len(actual.errors) > 0 @@ -678,6 +697,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True): assert actual.gene == expected.gene assert actual.genomic_ac == expected.genomic_ac assert actual.tx_ac == expected.tx_ac + assert actual.strand == expected.strand expected_seg = expected.seg if expected_seg: @@ -700,6 +720,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True): assert actual.gene is None assert actual.genomic_ac is None assert actual.tx_ac is None + assert actual.strand is None assert actual.seg is None assert len(actual.errors) > 0