Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/cool_seq_tool/mappers/exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ class TxSegment(BaseModelForbidExtra):
genomic_location: SequenceLocation = Field(
..., description="The genomic position of a transcript segment."
)
is_exonic: bool = Field(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kind of related to this PR... If the genomic location should only contain one of: start or end, then we should add a validator for this (separate issue).

The reason why I was thinking is if there's ever a case where the location would be a region that could include both exon/intron data.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense to me. seg_start and seg_end each should only contain one coordinate as they describe either the start or end of the transcript segment. To confirm, is this something we could add in a different PR?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, create an issue for this

default=True, description="If the position occurs on an exon"
)

model_config = ConfigDict(
json_schema_extra={
Expand All @@ -79,6 +82,7 @@ class TxSegment(BaseModelForbidExtra):
},
"end": 154192135,
},
"is_exonic": True,
}
}
)
Expand Down Expand Up @@ -136,6 +140,7 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805
},
"end": 154192135,
},
"is_exonic": True,
},
"errors": [],
}
Expand Down Expand Up @@ -202,6 +207,7 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805
},
"end": 154192135,
},
"is_exonic": True,
},
"seg_end": {
"exon_ord": 7,
Expand All @@ -214,6 +220,7 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805
},
"start": 154170399,
},
"is_exonic": True,
},
}
}
Expand Down Expand Up @@ -895,13 +902,15 @@ async def _genomic_to_tx_segment(
# Check if breakpoint occurs on an exon.
# If not, determine the adjacent exon given the selected transcript
if not self._is_exonic_breakpoint(genomic_pos, tx_exons):
is_exonic = False
exon_num = self._get_adjacent_exon(
tx_exons_genomic_coords=tx_exons,
strand=strand,
start=genomic_pos if is_seg_start else None,
end=genomic_pos if not is_seg_start else None,
)
else:
is_exonic = True
exon_data = await self.uta_db.get_tx_exon_aln_v_data(
transcript,
genomic_pos,
Expand Down Expand Up @@ -934,6 +943,7 @@ async def _genomic_to_tx_segment(
exon_ord=exon_num,
offset=offset,
genomic_location=genomic_location,
is_exonic=is_exonic,
),
)

Expand Down
14 changes: 14 additions & 0 deletions tests/mappers/test_exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ def tpm3_exon1():
},
"end": 154192135,
},
"is_exonic": True,
},
}
return GenomicTxSeg(**params)
Expand All @@ -208,6 +209,7 @@ def tpm3_exon8():
},
"start": 154170399,
},
"is_exonic": True,
},
}
return GenomicTxSeg(**params)
Expand Down Expand Up @@ -293,6 +295,7 @@ def mane_braf():
},
"end": 140801559,
},
"is_exonic": True,
},
"seg_end": {
"exon_ord": 14,
Expand All @@ -305,6 +308,7 @@ def mane_braf():
},
"start": 140753336,
},
"is_exonic": True,
},
}
return GenomicTxSegService(**params)
Expand Down Expand Up @@ -438,6 +442,7 @@ def zbtb10_exon3_end():
},
"end": 80514010,
},
"is_exonic": False,
},
}
return GenomicTxSegService(**params)
Expand All @@ -462,6 +467,7 @@ def zbtb10_exon5_start():
},
"start": 80518580,
},
"is_exonic": False,
},
"seg_end": None,
}
Expand All @@ -488,6 +494,7 @@ def tpm3_exon6_end():
},
"start": 154171410,
},
"is_exonic": False,
},
}
return GenomicTxSegService(**params)
Expand All @@ -512,6 +519,7 @@ def tpm3_exon5_start():
},
"end": 154173080,
},
"is_exonic": False,
},
"seg_end": None,
}
Expand All @@ -538,6 +546,7 @@ def gusbp3_exon2_end():
},
"start": 69680764,
},
"is_exonic": False,
},
}
return GenomicTxSegService(**params)
Expand All @@ -562,6 +571,7 @@ def eln_grch38_intronic():
},
"start": 74028173,
},
"is_exonic": True,
},
"seg_end": {
"exon_ord": 7,
Expand All @@ -574,6 +584,7 @@ def eln_grch38_intronic():
},
"end": 74043599,
},
"is_exonic": False,
},
}
return GenomicTxSegService(**params)
Expand All @@ -598,6 +609,7 @@ def gusbp3_exon5_start():
},
"end": 69645878,
},
"is_exonic": False,
},
"seg_end": None,
}
Expand Down Expand Up @@ -648,6 +660,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
assert (
actual_seg.genomic_location.end == expected_seg.genomic_location.end
)
assert actual_seg.is_exonic == expected_seg.is_exonic

assert actual.errors == expected.errors
else:
Expand Down Expand Up @@ -715,6 +728,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
actual_seg.genomic_location.start == expected_seg.genomic_location.start
)
assert actual_seg.genomic_location.end == expected_seg.genomic_location.end
assert actual_seg.is_exonic == expected_seg.is_exonic

assert actual.errors == expected.errors
else:
Expand Down