Skip to content

Commit 10a4357

Browse files
committed
Add try_longest_compatible
1 parent 12d0f7e commit 10a4357

File tree

1 file changed

+18
-3
lines changed

1 file changed

+18
-3
lines changed

src/cool_seq_tool/mappers/exon_genomic_coords.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ async def genomic_to_tx_segment(
416416
seg_end_genomic: int | None = None,
417417
transcript: str | None = None,
418418
get_nearest_transcript_junction: bool = False,
419+
try_longest_compatible: bool = True,
419420
gene: str | None = None,
420421
) -> GenomicTxSegService:
421422
"""Get transcript segment data for genomic data, lifted over to GRCh38.
@@ -461,6 +462,8 @@ async def genomic_to_tx_segment(
461462
following the breakpoint for the 3' end. For the negative strand, adjacent
462463
is defined as the exon following the breakpoint for the 5' end and the exon
463464
preceding the breakpoint for the 3' end.
465+
:param try_longest_compatible: ``True`` if should try longest compatible remaining
466+
if mane transcript was not compatible. ``False`` otherwise.
464467
:param gene: A valid, case-sensitive HGNC symbol. Must be given if no ``transcript``
465468
value is provided.
466469
:param coordinate_type: Coordinate type for ``seg_start_genomic`` and
@@ -489,6 +492,7 @@ async def genomic_to_tx_segment(
489492
transcript=transcript,
490493
gene=gene,
491494
get_nearest_transcript_junction=get_nearest_transcript_junction,
495+
try_longest_compatible=try_longest_compatible,
492496
is_seg_start=True,
493497
)
494498
if start_tx_seg_data.errors:
@@ -509,6 +513,7 @@ async def genomic_to_tx_segment(
509513
transcript=transcript,
510514
gene=gene,
511515
get_nearest_transcript_junction=get_nearest_transcript_junction,
516+
try_longest_compatible=try_longest_compatible,
512517
is_seg_start=False,
513518
)
514519
if end_tx_seg_data.errors:
@@ -739,6 +744,7 @@ async def _genomic_to_tx_segment(
739744
transcript: str | None = None,
740745
gene: str | None = None,
741746
get_nearest_transcript_junction: bool = False,
747+
try_longest_compatible: bool = True,
742748
is_seg_start: bool = True,
743749
) -> GenomicTxSeg:
744750
"""Given genomic data, generate a boundary for a transcript segment.
@@ -766,6 +772,8 @@ async def _genomic_to_tx_segment(
766772
following the breakpoint for the 3' end. For the negative strand, adjacent
767773
is defined as the exon following the breakpoint for the 5' end and the exon
768774
preceding the breakpoint for the 3' end.
775+
:param try_longest_compatible: ``True`` if should try longest compatible remaining
776+
if mane transcript was not compatible. ``False`` otherwise.
769777
:param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment starts.
770778
``False`` if ``genomic_pos`` is where the transcript segment ends.
771779
:return: Data for a transcript segment boundary (inter-residue coordinates)
@@ -806,9 +814,16 @@ async def _genomic_to_tx_segment(
806814
):
807815
transcript = mane_transcripts[0]["RefSeq_nuc"]
808816
else:
809-
transcript = await self._select_optimal_transcript(
810-
genomic_pos, genomic_ac, gene
811-
)
817+
if try_longest_compatible:
818+
transcript = await self._select_optimal_transcript(
819+
genomic_pos, genomic_ac, gene
820+
)
821+
else:
822+
return GenomicTxSeg(
823+
errors=[
824+
"A MANE transcript either does not exist or was not found in UTA. Please set `try_longest_compatible` to ``True`` to re-run"
825+
]
826+
)
812827
tx_exons = await self._get_all_exon_coords(
813828
tx_ac=transcript, genomic_ac=genomic_ac
814829
)

0 commit comments

Comments
 (0)