@@ -805,36 +805,9 @@ async def _genomic_to_tx_segment(
805805 ):
806806 transcript = mane_transcripts [0 ]["RefSeq_nuc" ]
807807 else :
808- # Attempt to find a coding transcript if a MANE transcript
809- # cannot be found
810- results = (
811- await self .mane_transcript .get_longest_compatible_transcript (
812- start_pos = genomic_pos ,
813- end_pos = genomic_pos ,
814- start_annotation_layer = AnnotationLayer .GENOMIC ,
815- gene = gene ,
816- )
808+ transcript = await self ._select_optimal_transcript (
809+ genomic_pos , genomic_ac , gene
817810 )
818- if results :
819- transcript = results .refseq
820- else :
821- # Run if gene is for a noncoding transcript
822- query = f"""
823- SELECT DISTINCT tx_ac
824- FROM { self .uta_db .schema } .tx_exon_aln_v
825- WHERE hgnc = '{ gene } '
826- AND alt_ac = '{ genomic_ac } '
827- """ # noqa: S608
828- result = await self .uta_db .execute_query (query )
829-
830- if result :
831- transcript = result [0 ]["tx_ac" ]
832- else :
833- return GenomicTxSeg (
834- errors = [
835- f"Could not find a transcript for { gene } on { genomic_ac } "
836- ]
837- )
838811 tx_exons = await self ._get_all_exon_coords (
839812 tx_ac = transcript , genomic_ac = genomic_ac
840813 )
@@ -942,6 +915,45 @@ async def _genomic_to_tx_segment(
942915 genomic_ac , genomic_pos , is_seg_start , gene , tx_ac = transcript
943916 )
944917
918+ async def _select_optimal_transcript (
919+ self , genomic_pos : int , genomic_ac : str , gene : str
920+ ) -> str :
921+ """Select the optimal transcript given a genomic position, accession, and gene.
922+
923+ :param genomic_pos: Genomic position where the transcript segment starts or ends
924+ (inter-residue based)
925+ :param genomic_ac: Genomic accession
926+ :param gene: Valid, case-sensitive HGNC gene symbol
927+ :return A string representing the optimal transcript given the above data
928+ """
929+ # Attempt to find a coding transcript if a MANE transcript cannot be found
930+ transcript = None
931+ results = await self .mane_transcript .get_longest_compatible_transcript (
932+ start_pos = genomic_pos ,
933+ end_pos = genomic_pos ,
934+ start_annotation_layer = AnnotationLayer .GENOMIC ,
935+ gene = gene ,
936+ )
937+ if results :
938+ transcript = results .refseq
939+ else :
940+ # Run if gene is for a noncoding transcript
941+ query = f"""
942+ SELECT DISTINCT tx_ac
943+ FROM { self .uta_db .schema } .tx_exon_aln_v
944+ WHERE hgnc = '{ gene } '
945+ AND alt_ac = '{ genomic_ac } '
946+ """ # noqa: S608
947+ result = await self .uta_db .execute_query (query )
948+
949+ if result :
950+ transcript = result [0 ]["tx_ac" ]
951+ else :
952+ return GenomicTxSeg (
953+ errors = [f"Could not find a transcript for { gene } on { genomic_ac } " ]
954+ )
955+ return transcript
956+
945957 async def _get_grch38_ac_pos (
946958 self , genomic_ac : str , genomic_pos : int , grch38_ac : str | None = None
947959 ) -> tuple [str | None , int | None , str | None ]:
@@ -1089,33 +1101,9 @@ async def _get_tx_seg_genomic_metadata(
10891101 err_msg += f" on gene { gene } "
10901102 _logger .warning (err_msg )
10911103 if not await self .uta_db .validate_mane_transcript_acc (mane_data ):
1092- results = await self .mane_transcript .get_longest_compatible_transcript (
1093- start_pos = genomic_pos ,
1094- end_pos = genomic_pos ,
1095- start_annotation_layer = AnnotationLayer .GENOMIC ,
1096- gene = gene ,
1104+ non_mane_transcript = await self ._select_optimal_transcript (
1105+ genomic_pos , genomic_ac , gene
10971106 )
1098- if results :
1099- non_mane_transcript = results .refseq
1100- else :
1101- # Run if gene is for a noncoding transcript
1102- query = f"""
1103- SELECT DISTINCT tx_ac
1104- FROM { self .uta_db .schema } .tx_exon_aln_v
1105- WHERE hgnc = '{ gene } '
1106- AND alt_ac = '{ genomic_ac } '
1107- """ # noqa: S608
1108- result = await self .uta_db .execute_query (query )
1109-
1110- if result :
1111- non_mane_transcript = result [0 ]["tx_ac" ]
1112- else :
1113- return GenomicTxSeg (
1114- errors = [
1115- f"Could not find a transcript for { gene } on { genomic_ac } "
1116- ]
1117- )
1118- return GenomicTxSeg (errors = [err_msg ])
11191107
11201108 if non_mane_transcript :
11211109 tx_ac = non_mane_transcript
0 commit comments