Skip to content

Commit b43065c

Browse files
committed
Add support when get_nearest_transcript_junction is false
1 parent a9c7422 commit b43065c

File tree

2 files changed

+43
-12
lines changed

2 files changed

+43
-12
lines changed

src/cool_seq_tool/mappers/exon_genomic_coords.py

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,24 +1073,56 @@ async def _get_tx_seg_genomic_metadata(
10731073
transcript
10741074
:return: Transcript segment data and associated genomic metadata
10751075
"""
1076-
if tx_ac:
1077-
# We should always try to liftover
1078-
grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
1079-
if not grch38_ac:
1080-
return GenomicTxSeg(errors=[f"Invalid genomic accession: {genomic_ac}"])
1081-
grch38_ac = grch38_ac[0]
1082-
else:
1076+
# We should always try to liftover
1077+
grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
1078+
if not grch38_ac:
1079+
return GenomicTxSeg(errors=[f"Invalid genomic accession: {genomic_ac}"])
1080+
grch38_ac = grch38_ac[0]
1081+
1082+
non_mane_transcript = None
1083+
1084+
if not tx_ac:
10831085
mane_data = self.mane_transcript_mappings.get_gene_mane_data(gene)
10841086
if not mane_data:
10851087
err_msg = f"Unable to find mane data for {genomic_ac} with position {genomic_pos}"
10861088
if gene:
10871089
err_msg += f" on gene {gene}"
10881090
_logger.warning(err_msg)
1089-
return GenomicTxSeg(errors=[err_msg])
1091+
if not await self.uta_db.validate_mane_transcript_acc(mane_data):
1092+
results = await self.mane_transcript.get_longest_compatible_transcript(
1093+
start_pos=genomic_pos,
1094+
end_pos=genomic_pos,
1095+
start_annotation_layer=AnnotationLayer.GENOMIC,
1096+
gene=gene,
1097+
)
1098+
if results:
1099+
non_mane_transcript = results.refseq
1100+
else:
1101+
# Run if gene is for a noncoding transcript
1102+
query = f"""
1103+
SELECT DISTINCT tx_ac
1104+
FROM {self.uta_db.schema}.tx_exon_aln_v
1105+
WHERE hgnc = '{gene}'
1106+
AND alt_ac = '{genomic_ac}'
1107+
""" # noqa: S608
1108+
result = await self.uta_db.execute_query(query)
1109+
1110+
if result:
1111+
non_mane_transcript = result[0]["tx_ac"]
1112+
else:
1113+
return GenomicTxSeg(
1114+
errors=[
1115+
f"Could not find a transcript for {gene} on {genomic_ac}"
1116+
]
1117+
)
1118+
return GenomicTxSeg(errors=[err_msg])
10901119

1091-
mane_data = mane_data[0]
1092-
tx_ac = mane_data["RefSeq_nuc"]
1093-
grch38_ac = mane_data["GRCh38_chr"]
1120+
if non_mane_transcript:
1121+
tx_ac = non_mane_transcript
1122+
else:
1123+
mane_data = mane_data[0]
1124+
tx_ac = mane_data["RefSeq_nuc"]
1125+
grch38_ac = mane_data["GRCh38_chr"]
10941126

10951127
# Always liftover to GRCh38
10961128
genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos(

tests/mappers/test_exon_genomic_coords.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1050,7 +1050,6 @@ async def test_genomic_to_transcript(test_egc_mapper, tpm3_exon1, tpm3_exon8):
10501050
genomic_ac="NC_000016.10",
10511051
is_seg_start=False,
10521052
gene="NPIPB5",
1053-
get_nearest_transcript_junction=True,
10541053
)
10551054
assert resp.tx_ac == "NM_001135865.2"
10561055

0 commit comments

Comments
 (0)