3737
3838async def _get_compatible_transcripts (
3939 target_gene : TargetGene , align_result : AlignmentResult
40- ) -> list [list [str ]]:
41- """Acquire matching transcripts
40+ ) -> set [str ]:
41+ """Acquire transcripts which overlap with all hit subranges
42+ of an alignment result.
4243
4344 :param metadata: metadata for scoreset
4445 :param align_result: output of ``align()`` method
45- :return: List of list of compatible transcripts
46+ :return: Set of compatible transcripts
4647 """
4748 if align_result .chrom .startswith ("chr" ):
4849 aligned_chrom = align_result .chrom [3 :]
@@ -55,29 +56,15 @@ async def _get_compatible_transcripts(
5556 f"Unable to find gene symbol for target gene { target_gene .target_gene_name } "
5657 )
5758 raise TxSelectError (msg )
58- transcript_matches = []
59+ transcript_matches : set [ str ] = set ()
5960 for hit_range in align_result .hit_subranges :
6061 matches_list = await get_transcripts (
6162 gene_symbol , chromosome , hit_range .start , hit_range .end
6263 )
63- if matches_list :
64- transcript_matches .append (matches_list )
64+ transcript_matches .intersection_update (matches_list )
6565 return transcript_matches
6666
6767
68- def _reduce_compatible_transcripts (matching_transcripts : list [list [str ]]) -> list [str ]:
69- """Reduce list of list of transcripts to a list containing only entries present
70- in each sublist
71-
72- :param matching_transcripts: list of list of transcript accession IDs
73- :return: list of transcripts shared by all sublists
74- """
75- common_transcripts_set = set (matching_transcripts [0 ])
76- for sublist in matching_transcripts [1 :]:
77- common_transcripts_set .intersection_update (sublist )
78- return list (common_transcripts_set )
79-
80-
8168def _choose_best_mane_transcript (
8269 mane_transcripts : list [ManeDescription ],
8370) -> ManeDescription | None :
@@ -157,11 +144,8 @@ async def _select_protein_reference(
157144 reference sequence
158145 """
159146 matching_transcripts = await _get_compatible_transcripts (target_gene , align_result )
147+
160148 if not matching_transcripts :
161- common_transcripts = None
162- else :
163- common_transcripts = _reduce_compatible_transcripts (matching_transcripts )
164- if not common_transcripts :
165149 if not target_gene .target_uniprot_ref :
166150 msg = f"Unable to find matching transcripts for target gene { target_gene .target_gene_name } "
167151 raise TxSelectError (msg )
@@ -174,10 +158,12 @@ async def _select_protein_reference(
174158 nm_accession = None
175159 tx_mode = None
176160 else :
177- mane_transcripts = get_mane_transcripts (common_transcripts )
161+ mane_transcripts = get_mane_transcripts (matching_transcripts )
178162 best_tx = _choose_best_mane_transcript (mane_transcripts )
179163 if not best_tx :
180- best_tx = await _get_longest_compatible_transcript (common_transcripts )
164+ best_tx = await _get_longest_compatible_transcript (
165+ list (matching_transcripts )
166+ )
181167 if not best_tx :
182168 msg = f"Unable to find matching MANE transcripts for target gene { target_gene .target_gene_name } "
183169 raise TxSelectError (msg )
0 commit comments