Skip to content

Commit 25f4332

Browse files
committed
API support for accession-based mapping
Note that this is mostly structured to handle multi-target mapping, but does not contain all changes required for multi-target mapping (specifically final output / reference sequence structure). Such changes would require corresponding changes in mavedb-api which we are not prepared to deploy yet.
1 parent 8fb8e0b commit 25f4332

File tree

1 file changed

+47
-17
lines changed

1 file changed

+47
-17
lines changed

src/api/routers/map.py

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from fastapi.responses import JSONResponse
77
from requests import HTTPError
88

9-
from dcd_mapping.align import AlignmentError, BlatNotFoundError, align
9+
from dcd_mapping.align import AlignmentError, BlatNotFoundError, build_alignment_result
1010
from dcd_mapping.annotate import (
1111
_get_computed_reference_sequence,
1212
_get_mapped_reference_sequence,
@@ -23,7 +23,7 @@
2323
)
2424
from dcd_mapping.resource_utils import ResourceAcquisitionError
2525
from dcd_mapping.schemas import ScoreAnnotation, ScoresetMapping, VrsVersion
26-
from dcd_mapping.transcripts import TxSelectError, select_transcript
26+
from dcd_mapping.transcripts import select_transcripts
2727
from dcd_mapping.vrs_map import VrsMapError, vrs_map
2828

2929
router = APIRouter(
@@ -37,9 +37,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> ScoresetMapp
3737
"""Perform end-to-end mapping for a scoreset.
3838
3939
:param urn: identifier for a scoreset.
40-
:param output_path: optional path to save output at
41-
:param vrs_version: version of VRS objects to output (1.3 or 2)
42-
:param silent: if True, suppress console information output
40+
:param store_path: optional path to save output at
4341
"""
4442
try:
4543
metadata = get_scoreset_metadata(urn, store_path)
@@ -62,7 +60,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> ScoresetMapp
6260
)
6361

6462
try:
65-
alignment_result = align(metadata, True)
63+
alignment_results = build_alignment_result(metadata, True)
6664
except BlatNotFoundError as e:
6765
msg = "BLAT command appears missing. Ensure it is available on the $PATH or use the environment variable BLAT_BIN_PATH to point to it. See instructions in the README prerequisites section for more."
6866
raise HTTPException(status_code=500, detail=msg) from e
@@ -75,54 +73,82 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> ScoresetMapp
7573
metadata=metadata, error_message=str(e).strip("'")
7674
).model_dump(exclude_none=True)
7775
)
78-
79-
try:
80-
transcript = await select_transcript(metadata, records, alignment_result)
81-
except (TxSelectError, KeyError, ValueError) as e:
76+
except ScoresetNotSupportedError as e:
8277
return JSONResponse(
8378
content=ScoresetMapping(
8479
metadata=metadata, error_message=str(e).strip("'")
8580
).model_dump(exclude_none=True)
8681
)
82+
83+
try:
84+
transcripts = await select_transcripts(metadata, records, alignment_results)
85+
# NOTE: transcript selection errors are handled in select_transcripts,
86+
# and they do not cause the entire mapping process to exit; instead, an error will be reported
87+
# on the target level and on the variant level for variants relative to that target
88+
# HTTPErrors and DataLookupErrors cause the mapping process to exit because these indicate
89+
# underlying issues with data providers.
8790
except HTTPError as e:
8891
msg = f"HTTP error occurred during transcript selection: {e}"
8992
raise HTTPException(status_code=500, detail=msg) from e
9093
except DataLookupError as e:
9194
msg = f"Data lookup error occurred during transcript selection: {e}"
9295
raise HTTPException(status_code=500, detail=msg) from e
9396

97+
vrs_results = {}
9498
try:
95-
vrs_results = vrs_map(metadata, alignment_result, records, transcript, True)
99+
for target_gene in metadata.target_genes:
100+
vrs_results[target_gene] = vrs_map(
101+
metadata=metadata.target_genes[target_gene],
102+
align_result=alignment_results[target_gene],
103+
records=records[target_gene],
104+
transcript=transcripts[target_gene],
105+
silent=True,
106+
)
96107
except VrsMapError as e:
97108
return JSONResponse(
98109
content=ScoresetMapping(
99110
metadata=metadata, error_message=str(e).strip("'")
100111
).model_dump(exclude_none=True)
101112
)
102-
if vrs_results is None:
113+
# TODO this should instead check if all values in dict are none. or might not need this at all.
114+
if vrs_results is None or len(vrs_results) == 0:
103115
return ScoresetMapping(
104116
metadata=metadata,
105117
error_message="No variant mappings available for this score set",
106118
)
107119

120+
annotated_vrs_results = {}
108121
try:
109-
vrs_results = annotate(vrs_results, transcript, metadata, VrsVersion.V_2)
122+
for target_gene in vrs_results:
123+
annotated_vrs_results[target_gene] = annotate(
124+
vrs_results[target_gene],
125+
transcripts[target_gene],
126+
metadata.target_genes[target_gene],
127+
metadata.urn,
128+
VrsVersion.V_2,
129+
)
110130
except Exception as e:
111131
return JSONResponse(
112132
content=ScoresetMapping(
113133
metadata=metadata, error_message=str(e).strip("'")
114134
).model_dump(exclude_none=True)
115135
)
116-
if vrs_results is None:
136+
# TODO this should instead check if all values in dict are none. or might not need this at all.
137+
if vrs_results is None or len(vrs_results) == 0:
117138
return ScoresetMapping(
118139
metadata=metadata,
119140
error_message="No annotated variant mappings available for this score set",
120141
)
121142

143+
# TODO this will need to be changed to support multi-target score sets.
144+
# This version works for accession based score sets.
145+
# Not implementing multi-target changes because this will require corresponding changes on mavedb-api and we want to get this on staging quickly right now.
146+
# For now, only accept single-target score sets so that we don't need to change structure of JSON output.
147+
target_gene = list(metadata["target_genes"].keys())[0] # noqa: RUF015
122148
try:
123149
raw_metadata = get_raw_scoreset_metadata(urn, store_path)
124150
preferred_layers = {
125-
_set_scoreset_layer(urn, vrs_results),
151+
_set_scoreset_layer(urn, vrs_results[target_gene]),
126152
}
127153

128154
reference_sequences = {
@@ -136,10 +162,14 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> ScoresetMapp
136162
for layer in preferred_layers:
137163
reference_sequences[layer][
138164
"computed_reference_sequence"
139-
] = _get_computed_reference_sequence(metadata, layer, transcript)
165+
] = _get_computed_reference_sequence(
166+
metadata.target_genes[target_gene], layer, transcripts[target_gene]
167+
)
140168
reference_sequences[layer][
141169
"mapped_reference_sequence"
142-
] = _get_mapped_reference_sequence(layer, transcript, alignment_result)
170+
] = _get_mapped_reference_sequence(
171+
layer, transcripts[target_gene], alignment_results[target_gene]
172+
)
143173

144174
mapped_scores: list[ScoreAnnotation] = []
145175
for m in vrs_results:

0 commit comments

Comments
 (0)