From 6e0fc67f6368ba82f90cd4bc26430776d9ac4db1 Mon Sep 17 00:00:00 2001 From: jarbesfeld Date: Fri, 5 Sep 2025 13:50:59 -0400 Subject: [PATCH 1/4] Add is_exonic field to transcript segment output --- src/cool_seq_tool/mappers/exon_genomic_coords.py | 9 +++++++++ tests/mappers/test_exon_genomic_coords.py | 14 ++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py index 33ccd65..65d25ad 100644 --- a/src/cool_seq_tool/mappers/exon_genomic_coords.py +++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py @@ -65,6 +65,7 @@ class TxSegment(BaseModelForbidExtra): genomic_location: SequenceLocation = Field( ..., description="The genomic position of a transcript segment." ) + is_exonic: bool = Field(default=True, description="If the position occurs on an exon") model_config = ConfigDict( json_schema_extra={ @@ -79,6 +80,7 @@ class TxSegment(BaseModelForbidExtra): }, "end": 154192135, }, + "is_exonic": True, } } ) @@ -136,6 +138,7 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805 }, "end": 154192135, }, + "is_exonic": True, }, "errors": [], } @@ -202,6 +205,7 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805 }, "end": 154192135, }, + "is_exonic": True, }, "seg_end": { "exon_ord": 7, @@ -214,6 +218,7 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805 }, "start": 154170399, }, + "is_exonic": True, }, } } @@ -894,7 +899,9 @@ async def _genomic_to_tx_segment( # Check if breakpoint occurs on an exon. # If not, determine the adjacent exon given the selected transcript + is_exonic = True if not self._is_exonic_breakpoint(genomic_pos, tx_exons): + is_exonic = False exon_num = self._get_adjacent_exon( tx_exons_genomic_coords=tx_exons, strand=strand, @@ -925,6 +932,7 @@ async def _genomic_to_tx_segment( if err_msg: return GenomicTxSeg(errors=[err_msg]) + #print(is_exonic) return GenomicTxSeg( gene=gene, genomic_ac=genomic_ac, @@ -934,6 +942,7 @@ async def _genomic_to_tx_segment( exon_ord=exon_num, offset=offset, genomic_location=genomic_location, + is_exonic=is_exonic, ), ) diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py index 578a88d..5a1ca7a 100644 --- a/tests/mappers/test_exon_genomic_coords.py +++ b/tests/mappers/test_exon_genomic_coords.py @@ -184,6 +184,7 @@ def tpm3_exon1(): }, "end": 154192135, }, + "is_exonic": True, }, } return GenomicTxSeg(**params) @@ -208,6 +209,7 @@ def tpm3_exon8(): }, "start": 154170399, }, + "is_exonic": True, }, } return GenomicTxSeg(**params) @@ -293,6 +295,7 @@ def mane_braf(): }, "end": 140801559, }, + "is_exonic": True, }, "seg_end": { "exon_ord": 14, @@ -305,6 +308,7 @@ def mane_braf(): }, "start": 140753336, }, + "is_exonic": True, }, } return GenomicTxSegService(**params) @@ -438,6 +442,7 @@ def zbtb10_exon3_end(): }, "end": 80514010, }, + "is_exonic": False, }, } return GenomicTxSegService(**params) @@ -462,6 +467,7 @@ def zbtb10_exon5_start(): }, "start": 80518580, }, + "is_exonic": False, }, "seg_end": None, } @@ -488,6 +494,7 @@ def tpm3_exon6_end(): }, "start": 154171410, }, + "is_exonic": False, }, } return GenomicTxSegService(**params) @@ -512,6 +519,7 @@ def tpm3_exon5_start(): }, "end": 154173080, }, + "is_exonic": False, }, "seg_end": None, } @@ -538,6 +546,7 @@ def gusbp3_exon2_end(): }, "start": 69680764, }, + "is_exonic": False, }, } return GenomicTxSegService(**params) @@ -562,6 +571,7 @@ def eln_grch38_intronic(): }, "start": 74028173, }, + "is_exonic": True }, "seg_end": { "exon_ord": 7, @@ -574,6 +584,7 @@ def eln_grch38_intronic(): }, "end": 74043599, }, + "is_exonic": False }, } return GenomicTxSegService(**params) @@ -598,6 +609,7 @@ def gusbp3_exon5_start(): }, "end": 69645878, }, + "is_exonic": False }, "seg_end": None, } @@ -648,6 +660,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True): assert ( actual_seg.genomic_location.end == expected_seg.genomic_location.end ) + assert actual_seg.is_exonic == expected_seg.is_exonic assert actual.errors == expected.errors else: @@ -715,6 +728,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True): actual_seg.genomic_location.start == expected_seg.genomic_location.start ) assert actual_seg.genomic_location.end == expected_seg.genomic_location.end + assert actual_seg.is_exonic == expected_seg.is_exonic assert actual.errors == expected.errors else: From dc9706ffd423355702c9a38ba74d4d85af7c63d7 Mon Sep 17 00:00:00 2001 From: jarbesfeld Date: Fri, 5 Sep 2025 13:51:31 -0400 Subject: [PATCH 2/4] Remove print statement --- src/cool_seq_tool/mappers/exon_genomic_coords.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py index 65d25ad..0abd334 100644 --- a/src/cool_seq_tool/mappers/exon_genomic_coords.py +++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py @@ -932,7 +932,6 @@ async def _genomic_to_tx_segment( if err_msg: return GenomicTxSeg(errors=[err_msg]) - #print(is_exonic) return GenomicTxSeg( gene=gene, genomic_ac=genomic_ac, From 0dc16d3304f07a68304827786907a8226e10de3b Mon Sep 17 00:00:00 2001 From: jarbesfeld Date: Fri, 5 Sep 2025 13:55:17 -0400 Subject: [PATCH 3/4] Run ruff --- src/cool_seq_tool/mappers/exon_genomic_coords.py | 4 +++- tests/mappers/test_exon_genomic_coords.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py index 0abd334..02da5af 100644 --- a/src/cool_seq_tool/mappers/exon_genomic_coords.py +++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py @@ -65,7 +65,9 @@ class TxSegment(BaseModelForbidExtra): genomic_location: SequenceLocation = Field( ..., description="The genomic position of a transcript segment." ) - is_exonic: bool = Field(default=True, description="If the position occurs on an exon") + is_exonic: bool = Field( + default=True, description="If the position occurs on an exon" + ) model_config = ConfigDict( json_schema_extra={ diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py index 5a1ca7a..078f131 100644 --- a/tests/mappers/test_exon_genomic_coords.py +++ b/tests/mappers/test_exon_genomic_coords.py @@ -571,7 +571,7 @@ def eln_grch38_intronic(): }, "start": 74028173, }, - "is_exonic": True + "is_exonic": True, }, "seg_end": { "exon_ord": 7, @@ -584,7 +584,7 @@ def eln_grch38_intronic(): }, "end": 74043599, }, - "is_exonic": False + "is_exonic": False, }, } return GenomicTxSegService(**params) @@ -609,7 +609,7 @@ def gusbp3_exon5_start(): }, "end": 69645878, }, - "is_exonic": False + "is_exonic": False, }, "seg_end": None, } From 591670359d317895491ee25964f15020114a4c05 Mon Sep 17 00:00:00 2001 From: jarbesfeld Date: Sun, 7 Sep 2025 17:30:24 -0400 Subject: [PATCH 4/4] Add is_exonic in else --- src/cool_seq_tool/mappers/exon_genomic_coords.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py index 02da5af..caecf70 100644 --- a/src/cool_seq_tool/mappers/exon_genomic_coords.py +++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py @@ -901,7 +901,6 @@ async def _genomic_to_tx_segment( # Check if breakpoint occurs on an exon. # If not, determine the adjacent exon given the selected transcript - is_exonic = True if not self._is_exonic_breakpoint(genomic_pos, tx_exons): is_exonic = False exon_num = self._get_adjacent_exon( @@ -911,6 +910,7 @@ async def _genomic_to_tx_segment( end=genomic_pos if not is_seg_start else None, ) else: + is_exonic = True exon_data = await self.uta_db.get_tx_exon_aln_v_data( transcript, genomic_pos,