Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions src/cool_seq_tool/mappers/exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging

from ga4gh.core.models import Extension
from ga4gh.vrs.models import SequenceLocation, SequenceReference
from pydantic import ConfigDict, Field, StrictInt, StrictStr, model_validator

Expand Down Expand Up @@ -65,9 +66,6 @@ class TxSegment(BaseModelForbidExtra):
genomic_location: SequenceLocation = Field(
..., description="The genomic position of a transcript segment."
)
is_exonic: bool = Field(
default=True, description="If the position occurs on an exon"
)

@model_validator(mode="before")
def check_seg_pos(cls, values: dict) -> dict: # noqa: N805
Expand Down Expand Up @@ -99,8 +97,8 @@ def check_seg_pos(cls, values: dict) -> dict: # noqa: N805
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
},
"end": 154192135,
"extensions": [{"name": "is_exonic", "value": True}],
},
"is_exonic": True,
}
}
)
Expand Down Expand Up @@ -157,8 +155,8 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
},
"end": 154192135,
"extensions": [{"name": "is_exonic", "value": True}],
},
"is_exonic": True,
},
"errors": [],
}
Expand Down Expand Up @@ -224,8 +222,8 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
},
"end": 154192135,
"extensions": [{"name": "is_exonic", "value": True}],
},
"is_exonic": True,
},
"seg_end": {
"exon_ord": 7,
Expand All @@ -237,8 +235,8 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
},
"start": 154170399,
"extensions": [{"name": "is_exonic", "value": True}],
},
"is_exonic": True,
},
}
}
Expand Down Expand Up @@ -730,7 +728,12 @@ def _get_tx_segment(
), None

def _get_vrs_seq_loc(
self, genomic_ac: str, genomic_pos: int, is_seg_start: bool, strand: Strand
self,
genomic_ac: str,
genomic_pos: int,
is_seg_start: bool,
strand: Strand,
is_exonic: bool = True,
) -> tuple[SequenceLocation | None, str | None]:
"""Create VRS Sequence Location for genomic position where transcript segment
occurs
Expand All @@ -740,6 +743,8 @@ def _get_vrs_seq_loc(
:param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment
starts. ``False`` if ``genomic_pos`` is where the transcript segment ends.
:param strand: Strand
:param is_exonic: A boolean indicating if the genomic breakpoint occurs
on an exon. By default, this is set to ``True``.
:return: Tuple containing VRS location (if successful) and error message (if
unable to get GA4GH identifier for ``genomic_ac``).
"""
Expand All @@ -759,6 +764,7 @@ def _get_vrs_seq_loc(
),
start=genomic_pos if use_start else None,
end=genomic_pos if not use_start else None,
extensions=[Extension(name="is_exonic", value=is_exonic)],
), None

async def _genomic_to_tx_segment(
Expand Down Expand Up @@ -947,7 +953,7 @@ async def _genomic_to_tx_segment(
)

genomic_location, err_msg = self._get_vrs_seq_loc(
genomic_ac, genomic_pos, is_seg_start, strand
genomic_ac, genomic_pos, is_seg_start, strand, is_exonic
)
if err_msg:
return GenomicTxSeg(errors=[err_msg])
Expand All @@ -961,7 +967,6 @@ async def _genomic_to_tx_segment(
exon_ord=exon_num,
offset=offset,
genomic_location=genomic_location,
is_exonic=is_exonic,
),
)

Expand Down
40 changes: 26 additions & 14 deletions tests/mappers/test_exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ def tpm3_exon1():
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
},
"end": 154192135,
"extensions": [{"name": "is_exonic", "value": True}],
},
"is_exonic": True,
},
}
return GenomicTxSeg(**params)
Expand All @@ -208,8 +208,8 @@ def tpm3_exon8():
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
},
"start": 154170399,
"extensions": [{"name": "is_exonic", "value": True}],
},
"is_exonic": True,
},
}
return GenomicTxSeg(**params)
Expand Down Expand Up @@ -294,8 +294,8 @@ def mane_braf():
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
},
"end": 140801559,
"extensions": [{"name": "is_exonic", "value": True}],
},
"is_exonic": True,
},
"seg_end": {
"exon_ord": 14,
Expand All @@ -307,8 +307,8 @@ def mane_braf():
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
},
"start": 140753336,
"extensions": [{"name": "is_exonic", "value": True}],
},
"is_exonic": True,
},
}
return GenomicTxSegService(**params)
Expand All @@ -332,6 +332,7 @@ def wee1_exon2_exon11():
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
},
"start": 9576092,
"extensions": [{"name": "is_exonic", "value": True}],
},
},
"seg_end": {
Expand All @@ -344,6 +345,7 @@ def wee1_exon2_exon11():
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
},
"end": 9588449,
"extensions": [{"name": "is_exonic", "value": True}],
},
},
}
Expand All @@ -368,6 +370,7 @@ def mane_wee1_exon2_exon11():
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
},
"start": 9576092,
"extensions": [{"name": "is_exonic", "value": True}],
},
},
"seg_end": {
Expand All @@ -380,6 +383,7 @@ def mane_wee1_exon2_exon11():
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
},
"end": 9588449,
"extensions": [{"name": "is_exonic", "value": True}],
},
},
}
Expand All @@ -404,6 +408,7 @@ def ntrk1_exon10_exon17():
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
},
"start": 156874570,
"extensions": [{"name": "is_exonic", "value": True}],
},
},
"seg_end": {
Expand All @@ -416,6 +421,7 @@ def ntrk1_exon10_exon17():
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
},
"end": 156881850,
"extensions": [{"name": "is_exonic", "value": True}],
},
},
}
Expand All @@ -441,8 +447,8 @@ def zbtb10_exon3_end():
"refgetAccession": "SQ.209Z7zJ-mFypBEWLk4rNC6S_OxY5p7bs",
},
"end": 80514010,
"extensions": [{"name": "is_exonic", "value": False}],
},
"is_exonic": False,
},
}
return GenomicTxSegService(**params)
Expand All @@ -466,8 +472,8 @@ def zbtb10_exon5_start():
"refgetAccession": "SQ.209Z7zJ-mFypBEWLk4rNC6S_OxY5p7bs",
},
"start": 80518580,
"extensions": [{"name": "is_exonic", "value": False}],
},
"is_exonic": False,
},
"seg_end": None,
}
Expand All @@ -493,8 +499,8 @@ def tpm3_exon6_end():
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
},
"start": 154171410,
"extensions": [{"name": "is_exonic", "value": False}],
},
"is_exonic": False,
},
}
return GenomicTxSegService(**params)
Expand All @@ -518,8 +524,8 @@ def tpm3_exon5_start():
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
},
"end": 154173080,
"extensions": [{"name": "is_exonic", "value": False}],
},
"is_exonic": False,
},
"seg_end": None,
}
Expand All @@ -545,8 +551,8 @@ def gusbp3_exon2_end():
"refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
},
"start": 69680764,
"extensions": [{"name": "is_exonic", "value": False}],
},
"is_exonic": False,
},
}
return GenomicTxSegService(**params)
Expand All @@ -570,8 +576,8 @@ def eln_grch38_intronic():
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
},
"start": 74028173,
"extensions": [{"name": "is_exonic", "value": True}],
},
"is_exonic": True,
},
"seg_end": {
"exon_ord": 7,
Expand All @@ -583,8 +589,8 @@ def eln_grch38_intronic():
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
},
"end": 74043599,
"extensions": [{"name": "is_exonic", "value": False}],
},
"is_exonic": False,
},
}
return GenomicTxSegService(**params)
Expand All @@ -608,8 +614,8 @@ def gusbp3_exon5_start():
"refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
},
"end": 69645878,
"extensions": [{"name": "is_exonic", "value": False}],
},
"is_exonic": False,
},
"seg_end": None,
}
Expand Down Expand Up @@ -660,7 +666,10 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
assert (
actual_seg.genomic_location.end == expected_seg.genomic_location.end
)
assert actual_seg.is_exonic == expected_seg.is_exonic
assert (
actual_seg.genomic_location.extensions
== expected_seg.genomic_location.extensions
)

assert actual.errors == expected.errors
else:
Expand Down Expand Up @@ -728,7 +737,10 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
actual_seg.genomic_location.start == expected_seg.genomic_location.start
)
assert actual_seg.genomic_location.end == expected_seg.genomic_location.end
assert actual_seg.is_exonic == expected_seg.is_exonic
assert (
actual_seg.genomic_location.extensions
== expected_seg.genomic_location.extensions
)

assert actual.errors == expected.errors
else:
Expand Down