Skip to content

Commit e421e82

Browse files
committed
refactor: change transcript parameter type to set for simplified transcript reduction
1 parent d5e5d75 commit e421e82

File tree

2 files changed

+12
-26
lines changed

2 files changed

+12
-26
lines changed

src/dcd_mapping/lookup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ def translate_hgvs_to_vrs(hgvs: str) -> Allele:
596596
# ----------------------------------- MANE ----------------------------------- #
597597

598598

599-
def get_mane_transcripts(transcripts: list[str]) -> list[ManeDescription]:
599+
def get_mane_transcripts(transcripts: set[str]) -> list[ManeDescription]:
600600
"""Get corresponding MANE data for transcripts. Results given in order of
601601
transcript preference.
602602

src/dcd_mapping/transcripts.py

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,13 @@
3737

3838
async def _get_compatible_transcripts(
3939
target_gene: TargetGene, align_result: AlignmentResult
40-
) -> list[list[str]]:
41-
"""Acquire matching transcripts
40+
) -> set[str]:
41+
"""Acquire transcripts which overlap with all hit subranges
42+
of an alignment result.
4243
4344
:param metadata: metadata for scoreset
4445
:param align_result: output of ``align()`` method
45-
:return: List of list of compatible transcripts
46+
:return: Set of compatible transcripts
4647
"""
4748
if align_result.chrom.startswith("chr"):
4849
aligned_chrom = align_result.chrom[3:]
@@ -55,29 +56,15 @@ async def _get_compatible_transcripts(
5556
f"Unable to find gene symbol for target gene {target_gene.target_gene_name}"
5657
)
5758
raise TxSelectError(msg)
58-
transcript_matches = []
59+
transcript_matches: set[str] = set()
5960
for hit_range in align_result.hit_subranges:
6061
matches_list = await get_transcripts(
6162
gene_symbol, chromosome, hit_range.start, hit_range.end
6263
)
63-
if matches_list:
64-
transcript_matches.append(matches_list)
64+
transcript_matches.intersection_update(matches_list)
6565
return transcript_matches
6666

6767

68-
def _reduce_compatible_transcripts(matching_transcripts: list[list[str]]) -> list[str]:
69-
"""Reduce list of list of transcripts to a list containing only entries present
70-
in each sublist
71-
72-
:param matching_transcripts: list of list of transcript accession IDs
73-
:return: list of transcripts shared by all sublists
74-
"""
75-
common_transcripts_set = set(matching_transcripts[0])
76-
for sublist in matching_transcripts[1:]:
77-
common_transcripts_set.intersection_update(sublist)
78-
return list(common_transcripts_set)
79-
80-
8168
def _choose_best_mane_transcript(
8269
mane_transcripts: list[ManeDescription],
8370
) -> ManeDescription | None:
@@ -157,11 +144,8 @@ async def _select_protein_reference(
157144
reference sequence
158145
"""
159146
matching_transcripts = await _get_compatible_transcripts(target_gene, align_result)
147+
160148
if not matching_transcripts:
161-
common_transcripts = None
162-
else:
163-
common_transcripts = _reduce_compatible_transcripts(matching_transcripts)
164-
if not common_transcripts:
165149
if not target_gene.target_uniprot_ref:
166150
msg = f"Unable to find matching transcripts for target gene {target_gene.target_gene_name}"
167151
raise TxSelectError(msg)
@@ -174,10 +158,12 @@ async def _select_protein_reference(
174158
nm_accession = None
175159
tx_mode = None
176160
else:
177-
mane_transcripts = get_mane_transcripts(common_transcripts)
161+
mane_transcripts = get_mane_transcripts(matching_transcripts)
178162
best_tx = _choose_best_mane_transcript(mane_transcripts)
179163
if not best_tx:
180-
best_tx = await _get_longest_compatible_transcript(common_transcripts)
164+
best_tx = await _get_longest_compatible_transcript(
165+
list(matching_transcripts)
166+
)
181167
if not best_tx:
182168
msg = f"Unable to find matching MANE transcripts for target gene {target_gene.target_gene_name}"
183169
raise TxSelectError(msg)

0 commit comments

Comments
 (0)