Skip to content

Commit 3ad1054

Browse files
authored
feat!: Create extension for is_exonic (#428)
closes #427
1 parent 8faa9b9 commit 3ad1054

File tree

2 files changed

+41
-24
lines changed

2 files changed

+41
-24
lines changed

src/cool_seq_tool/mappers/exon_genomic_coords.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import logging
44

5+
from ga4gh.core.models import Extension
56
from ga4gh.vrs.models import SequenceLocation, SequenceReference
67
from pydantic import ConfigDict, Field, StrictInt, StrictStr, model_validator
78

@@ -65,9 +66,6 @@ class TxSegment(BaseModelForbidExtra):
6566
genomic_location: SequenceLocation = Field(
6667
..., description="The genomic position of a transcript segment."
6768
)
68-
is_exonic: bool = Field(
69-
default=True, description="If the position occurs on an exon"
70-
)
7169

7270
@model_validator(mode="before")
7371
def check_seg_pos(cls, values: dict) -> dict: # noqa: N805
@@ -99,8 +97,8 @@ def check_seg_pos(cls, values: dict) -> dict: # noqa: N805
9997
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
10098
},
10199
"end": 154192135,
100+
"extensions": [{"name": "is_exonic", "value": True}],
102101
},
103-
"is_exonic": True,
104102
}
105103
}
106104
)
@@ -157,8 +155,8 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805
157155
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
158156
},
159157
"end": 154192135,
158+
"extensions": [{"name": "is_exonic", "value": True}],
160159
},
161-
"is_exonic": True,
162160
},
163161
"errors": [],
164162
}
@@ -224,8 +222,8 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805
224222
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
225223
},
226224
"end": 154192135,
225+
"extensions": [{"name": "is_exonic", "value": True}],
227226
},
228-
"is_exonic": True,
229227
},
230228
"seg_end": {
231229
"exon_ord": 7,
@@ -237,8 +235,8 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805
237235
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
238236
},
239237
"start": 154170399,
238+
"extensions": [{"name": "is_exonic", "value": True}],
240239
},
241-
"is_exonic": True,
242240
},
243241
}
244242
}
@@ -730,7 +728,12 @@ def _get_tx_segment(
730728
), None
731729

732730
def _get_vrs_seq_loc(
733-
self, genomic_ac: str, genomic_pos: int, is_seg_start: bool, strand: Strand
731+
self,
732+
genomic_ac: str,
733+
genomic_pos: int,
734+
is_seg_start: bool,
735+
strand: Strand,
736+
is_exonic: bool = True,
734737
) -> tuple[SequenceLocation | None, str | None]:
735738
"""Create VRS Sequence Location for genomic position where transcript segment
736739
occurs
@@ -740,6 +743,8 @@ def _get_vrs_seq_loc(
740743
:param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment
741744
starts. ``False`` if ``genomic_pos`` is where the transcript segment ends.
742745
:param strand: Strand
746+
:param is_exonic: A boolean indicating if the genomic breakpoint occurs
747+
on an exon. By default, this is set to ``True``.
743748
:return: Tuple containing VRS location (if successful) and error message (if
744749
unable to get GA4GH identifier for ``genomic_ac``).
745750
"""
@@ -759,6 +764,7 @@ def _get_vrs_seq_loc(
759764
),
760765
start=genomic_pos if use_start else None,
761766
end=genomic_pos if not use_start else None,
767+
extensions=[Extension(name="is_exonic", value=is_exonic)],
762768
), None
763769

764770
async def _genomic_to_tx_segment(
@@ -947,7 +953,7 @@ async def _genomic_to_tx_segment(
947953
)
948954

949955
genomic_location, err_msg = self._get_vrs_seq_loc(
950-
genomic_ac, genomic_pos, is_seg_start, strand
956+
genomic_ac, genomic_pos, is_seg_start, strand, is_exonic
951957
)
952958
if err_msg:
953959
return GenomicTxSeg(errors=[err_msg])
@@ -961,7 +967,6 @@ async def _genomic_to_tx_segment(
961967
exon_ord=exon_num,
962968
offset=offset,
963969
genomic_location=genomic_location,
964-
is_exonic=is_exonic,
965970
),
966971
)
967972

tests/mappers/test_exon_genomic_coords.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,8 @@ def tpm3_exon1():
183183
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
184184
},
185185
"end": 154192135,
186+
"extensions": [{"name": "is_exonic", "value": True}],
186187
},
187-
"is_exonic": True,
188188
},
189189
}
190190
return GenomicTxSeg(**params)
@@ -208,8 +208,8 @@ def tpm3_exon8():
208208
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
209209
},
210210
"start": 154170399,
211+
"extensions": [{"name": "is_exonic", "value": True}],
211212
},
212-
"is_exonic": True,
213213
},
214214
}
215215
return GenomicTxSeg(**params)
@@ -294,8 +294,8 @@ def mane_braf():
294294
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
295295
},
296296
"end": 140801559,
297+
"extensions": [{"name": "is_exonic", "value": True}],
297298
},
298-
"is_exonic": True,
299299
},
300300
"seg_end": {
301301
"exon_ord": 14,
@@ -307,8 +307,8 @@ def mane_braf():
307307
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
308308
},
309309
"start": 140753336,
310+
"extensions": [{"name": "is_exonic", "value": True}],
310311
},
311-
"is_exonic": True,
312312
},
313313
}
314314
return GenomicTxSegService(**params)
@@ -332,6 +332,7 @@ def wee1_exon2_exon11():
332332
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
333333
},
334334
"start": 9576092,
335+
"extensions": [{"name": "is_exonic", "value": True}],
335336
},
336337
},
337338
"seg_end": {
@@ -344,6 +345,7 @@ def wee1_exon2_exon11():
344345
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
345346
},
346347
"end": 9588449,
348+
"extensions": [{"name": "is_exonic", "value": True}],
347349
},
348350
},
349351
}
@@ -368,6 +370,7 @@ def mane_wee1_exon2_exon11():
368370
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
369371
},
370372
"start": 9576092,
373+
"extensions": [{"name": "is_exonic", "value": True}],
371374
},
372375
},
373376
"seg_end": {
@@ -380,6 +383,7 @@ def mane_wee1_exon2_exon11():
380383
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
381384
},
382385
"end": 9588449,
386+
"extensions": [{"name": "is_exonic", "value": True}],
383387
},
384388
},
385389
}
@@ -404,6 +408,7 @@ def ntrk1_exon10_exon17():
404408
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
405409
},
406410
"start": 156874570,
411+
"extensions": [{"name": "is_exonic", "value": True}],
407412
},
408413
},
409414
"seg_end": {
@@ -416,6 +421,7 @@ def ntrk1_exon10_exon17():
416421
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
417422
},
418423
"end": 156881850,
424+
"extensions": [{"name": "is_exonic", "value": True}],
419425
},
420426
},
421427
}
@@ -441,8 +447,8 @@ def zbtb10_exon3_end():
441447
"refgetAccession": "SQ.209Z7zJ-mFypBEWLk4rNC6S_OxY5p7bs",
442448
},
443449
"end": 80514010,
450+
"extensions": [{"name": "is_exonic", "value": False}],
444451
},
445-
"is_exonic": False,
446452
},
447453
}
448454
return GenomicTxSegService(**params)
@@ -466,8 +472,8 @@ def zbtb10_exon5_start():
466472
"refgetAccession": "SQ.209Z7zJ-mFypBEWLk4rNC6S_OxY5p7bs",
467473
},
468474
"start": 80518580,
475+
"extensions": [{"name": "is_exonic", "value": False}],
469476
},
470-
"is_exonic": False,
471477
},
472478
"seg_end": None,
473479
}
@@ -493,8 +499,8 @@ def tpm3_exon6_end():
493499
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
494500
},
495501
"start": 154171410,
502+
"extensions": [{"name": "is_exonic", "value": False}],
496503
},
497-
"is_exonic": False,
498504
},
499505
}
500506
return GenomicTxSegService(**params)
@@ -518,8 +524,8 @@ def tpm3_exon5_start():
518524
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
519525
},
520526
"end": 154173080,
527+
"extensions": [{"name": "is_exonic", "value": False}],
521528
},
522-
"is_exonic": False,
523529
},
524530
"seg_end": None,
525531
}
@@ -545,8 +551,8 @@ def gusbp3_exon2_end():
545551
"refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
546552
},
547553
"start": 69680764,
554+
"extensions": [{"name": "is_exonic", "value": False}],
548555
},
549-
"is_exonic": False,
550556
},
551557
}
552558
return GenomicTxSegService(**params)
@@ -570,8 +576,8 @@ def eln_grch38_intronic():
570576
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
571577
},
572578
"start": 74028173,
579+
"extensions": [{"name": "is_exonic", "value": True}],
573580
},
574-
"is_exonic": True,
575581
},
576582
"seg_end": {
577583
"exon_ord": 7,
@@ -583,8 +589,8 @@ def eln_grch38_intronic():
583589
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
584590
},
585591
"end": 74043599,
592+
"extensions": [{"name": "is_exonic", "value": False}],
586593
},
587-
"is_exonic": False,
588594
},
589595
}
590596
return GenomicTxSegService(**params)
@@ -608,8 +614,8 @@ def gusbp3_exon5_start():
608614
"refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
609615
},
610616
"end": 69645878,
617+
"extensions": [{"name": "is_exonic", "value": False}],
611618
},
612-
"is_exonic": False,
613619
},
614620
"seg_end": None,
615621
}
@@ -660,7 +666,10 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
660666
assert (
661667
actual_seg.genomic_location.end == expected_seg.genomic_location.end
662668
)
663-
assert actual_seg.is_exonic == expected_seg.is_exonic
669+
assert (
670+
actual_seg.genomic_location.extensions
671+
== expected_seg.genomic_location.extensions
672+
)
664673

665674
assert actual.errors == expected.errors
666675
else:
@@ -728,7 +737,10 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
728737
actual_seg.genomic_location.start == expected_seg.genomic_location.start
729738
)
730739
assert actual_seg.genomic_location.end == expected_seg.genomic_location.end
731-
assert actual_seg.is_exonic == expected_seg.is_exonic
740+
assert (
741+
actual_seg.genomic_location.extensions
742+
== expected_seg.genomic_location.extensions
743+
)
732744

733745
assert actual.errors == expected.errors
734746
else:

0 commit comments

Comments
 (0)