1010from cool_seq_tool .schemas import (
1111 Assembly ,
1212 BaseModelForbidExtra ,
13+ CoordinateType ,
1314 ServiceMeta ,
1415 Strand ,
1516)
@@ -412,6 +413,7 @@ async def genomic_to_tx_segment(
412413 transcript : str | None = None ,
413414 get_nearest_transcript_junction : bool = False ,
414415 gene : str | None = None ,
416+ coordinate_type : CoordinateType = CoordinateType .INTER_RESIDUE ,
415417 ) -> GenomicTxSegService :
416418 """Get transcript segment data for genomic data, lifted over to GRCh38.
417419
@@ -485,6 +487,7 @@ async def genomic_to_tx_segment(
485487 gene = gene ,
486488 get_nearest_transcript_junction = get_nearest_transcript_junction ,
487489 is_seg_start = True ,
490+ coordinate_type = coordinate_type ,
488491 )
489492 if start_tx_seg_data .errors :
490493 return _return_service_errors (start_tx_seg_data .errors )
@@ -505,6 +508,7 @@ async def genomic_to_tx_segment(
505508 gene = gene ,
506509 get_nearest_transcript_junction = get_nearest_transcript_junction ,
507510 is_seg_start = False ,
511+ coordinate_type = coordinate_type ,
508512 )
509513 if end_tx_seg_data .errors :
510514 return _return_service_errors (end_tx_seg_data .errors )
@@ -735,6 +739,7 @@ async def _genomic_to_tx_segment(
735739 gene : str | None = None ,
736740 get_nearest_transcript_junction : bool = False ,
737741 is_seg_start : bool = True ,
742+ coordinate_type : CoordinateType = CoordinateType .INTER_RESIDUE ,
738743 ) -> GenomicTxSeg :
739744 """Given genomic data, generate a boundary for a transcript segment.
740745
@@ -763,6 +768,8 @@ async def _genomic_to_tx_segment(
763768 preceding the breakpoint for the 3' end.
764769 :param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment starts.
765770 ``False`` if ``genomic_pos`` is where the transcript segment ends.
771+ :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
772+ ``seg_end_genomic``
766773 :return: Data for a transcript segment boundary (inter-residue coordinates)
767774 """
768775 params = {key : None for key in GenomicTxSeg .model_fields }
@@ -835,6 +842,13 @@ async def _genomic_to_tx_segment(
835842
836843 strand = Strand (tx_exons [0 ].alt_strand )
837844 params ["strand" ] = strand
845+ use_alt_start_i = self ._use_alt_start_i (
846+ is_seg_start = is_seg_start , strand = strand
847+ )
848+ if use_alt_start_i and coordinate_type == CoordinateType .RESIDUE :
849+ genomic_pos = (
850+ genomic_pos - 1
851+ ) # Convert residue coordinate to inter-residue
838852
839853 # Check if breakpoint occurs on an exon.
840854 # If not, determine the adjacent exon given the selected transcript
@@ -847,15 +861,11 @@ async def _genomic_to_tx_segment(
847861 )
848862
849863 offset = self ._get_exon_offset (
850- start_i = tx_exons [exon_num ].alt_start_i ,
851- end_i = tx_exons [exon_num ].alt_end_i ,
864+ genomic_pos = genomic_pos ,
865+ exon_boundary = tx_exons [exon_num ].alt_start_i
866+ if use_alt_start_i
867+ else tx_exons [exon_num ].alt_end_i ,
852868 strand = strand ,
853- use_start_i = strand == Strand .POSITIVE
854- if is_seg_start
855- else strand != Strand .POSITIVE ,
856- is_in_exon = False ,
857- start = genomic_pos if is_seg_start else None ,
858- end = genomic_pos if not is_seg_start else None ,
859869 )
860870
861871 genomic_location , err_msg = self ._get_vrs_seq_loc (
@@ -931,7 +941,12 @@ async def _genomic_to_tx_segment(
931941 )
932942
933943 return await self ._get_tx_seg_genomic_metadata (
934- genomic_ac , genomic_pos , is_seg_start , gene , tx_ac = transcript
944+ genomic_ac ,
945+ genomic_pos ,
946+ is_seg_start ,
947+ gene ,
948+ tx_ac = transcript ,
949+ coordinate_type = coordinate_type ,
935950 )
936951
937952 async def _get_grch38_ac_pos (
@@ -1049,6 +1064,7 @@ async def _get_tx_seg_genomic_metadata(
10491064 is_seg_start : bool ,
10501065 gene : str ,
10511066 tx_ac : str | None ,
1067+ coordinate_type : CoordinateType = CoordinateType .INTER_RESIDUE ,
10521068 ) -> GenomicTxSeg :
10531069 """Get transcript segment data and associated genomic metadata.
10541070
@@ -1110,15 +1126,18 @@ async def _get_tx_seg_genomic_metadata(
11101126 )
11111127
11121128 tx_exon_aln_data = tx_exon_aln_data [0 ]
1113-
1129+ strand = Strand (tx_exon_aln_data .alt_strand )
1130+ use_alt_start_i = self ._use_alt_start_i (
1131+ is_seg_start = is_seg_start , strand = strand
1132+ )
1133+ if use_alt_start_i and coordinate_type == CoordinateType .RESIDUE :
1134+ genomic_pos = genomic_pos - 1 # Convert residue coordinate to inter-residue
11141135 offset = self ._get_exon_offset (
1115- start_i = tx_exon_aln_data .alt_start_i ,
1116- end_i = tx_exon_aln_data .alt_end_i ,
1117- strand = Strand (tx_exon_aln_data .alt_strand ),
1118- use_start_i = False , # This doesn't impact anything since we're on the exon
1119- is_in_exon = True ,
1120- start = genomic_pos if is_seg_start else None ,
1121- end = genomic_pos if not is_seg_start else None ,
1136+ genomic_pos = genomic_pos ,
1137+ exon_boundary = tx_exon_aln_data .alt_start_i
1138+ if use_alt_start_i
1139+ else tx_exon_aln_data .alt_end_i ,
1140+ strand = strand ,
11221141 )
11231142
11241143 genomic_location , err_msg = self ._get_vrs_seq_loc (
@@ -1150,6 +1169,24 @@ def _is_exonic_breakpoint(pos: int, tx_genomic_coords: list[_ExonCoord]) -> bool
11501169 exon .alt_start_i <= pos <= exon .alt_end_i for exon in tx_genomic_coords
11511170 )
11521171
1172+ @staticmethod
1173+ def _use_alt_start_i (is_seg_start : bool , strand : Strand ) -> bool :
1174+ """Determine whether to use alt_start_i or alt_end_i from UTA when computing
1175+ exon offset
1176+
1177+ :param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment starts.
1178+ ``False`` if ``genomic_pos`` is where the transcript segment ends.
1179+ :param strand: The transcribed strand
1180+ :return ``True`` if alt_start_i should be used, ``False`` if alt_end_i should
1181+ be used
1182+ """
1183+ return bool (
1184+ is_seg_start
1185+ and strand == Strand .POSITIVE
1186+ or not is_seg_start
1187+ and strand == Strand .NEGATIVE
1188+ )
1189+
11531190 @staticmethod
11541191 def _get_adjacent_exon (
11551192 tx_exons_genomic_coords : list [_ExonCoord ],
@@ -1205,38 +1242,21 @@ def _get_adjacent_exon(
12051242
12061243 @staticmethod
12071244 def _get_exon_offset (
1208- start_i : int ,
1209- end_i : int ,
1245+ genomic_pos : int ,
1246+ exon_boundary : int ,
12101247 strand : Strand ,
1211- use_start_i : bool = True ,
1212- is_in_exon : bool = True ,
1213- start : int | None = None ,
1214- end : int | None = None ,
12151248 ) -> int :
12161249 """Compute offset from exon start or end index
12171250
1218- :param start_i: Exon start index (inter-residue)
1219- :param end_i: Exon end index (inter-residue)
1220- :param strand: Strand
1221- :param use_start_i: Whether or not ``start_i`` should be used to compute the
1222- offset, defaults to ``True``. This is only used when ``is_in_exon`` is
1223- ``False``.
1224- :param is_in_exon: Whether or not the position occurs in an exon, defaults to
1225- ``True``
1226- :param start: Provided start position, defaults to ``None``. Must provide
1227- ``start`` or ``end``, not both.
1228- :param end: Provided end position, defaults to ``None``. Must provide ``start``
1229- or ``end``, not both
1251+ :param genomic_pos: The supplied genomic position. This can represent, for
1252+ example, a fusion junction breakpoint
1253+ :param exon_boundary: The genomic position for the exon boundary that the offset
1254+ is being computed against
1255+ :paran strand: The transcribed strand
12301256 :return: Offset from exon start or end index
12311257 """
1232- if is_in_exon :
1233- if start is not None :
1234- offset = start - start_i if strand == Strand .POSITIVE else end_i - start
1235- else :
1236- offset = end - end_i if strand == Strand .POSITIVE else start_i - end
1237- else :
1238- if strand == Strand .POSITIVE :
1239- offset = start - start_i if use_start_i else end - end_i
1240- else :
1241- offset = start_i - end if use_start_i else end_i - start
1242- return offset
1258+ return (
1259+ genomic_pos - exon_boundary
1260+ if strand == Strand .POSITIVE
1261+ else (genomic_pos - exon_boundary ) * - 1
1262+ )
0 commit comments