Skip to content

Commit 7c23872

Browse files
authored
Merge pull request #27 from VariantEffect/feature/bencap/26/tmp-files-for-score-metadata
Temp Files for Score and Metadata Files
2 parents eee87ff + b8251bc commit 7c23872

File tree

4 files changed

+50
-28
lines changed

4 files changed

+50
-28
lines changed

src/api/routers/map.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
""""Provide mapping router"""
2+
from pathlib import Path
3+
24
from cool_seq_tool.schemas import AnnotationLayer
35
from fastapi import APIRouter, HTTPException
46
from fastapi.responses import JSONResponse
@@ -17,6 +19,7 @@
1719
get_raw_scoreset_metadata,
1820
get_scoreset_metadata,
1921
get_scoreset_records,
22+
with_mavedb_score_set,
2023
)
2124
from dcd_mapping.resource_utils import ResourceAcquisitionError
2225
from dcd_mapping.schemas import ScoreAnnotation, ScoresetMapping, VrsVersion
@@ -29,7 +32,8 @@
2932

3033

3134
@router.post(path="/map/{urn}", status_code=200, response_model=ScoresetMapping)
32-
async def map_scoreset(urn: str) -> ScoresetMapping:
35+
@with_mavedb_score_set
36+
async def map_scoreset(urn: str, store_path: Path | None = None) -> ScoresetMapping:
3337
"""Perform end-to-end mapping for a scoreset.
3438
3539
:param urn: identifier for a scoreset.
@@ -38,8 +42,8 @@ async def map_scoreset(urn: str) -> ScoresetMapping:
3842
:param silent: if True, suppress console information output
3943
"""
4044
try:
41-
metadata = get_scoreset_metadata(urn)
42-
records = get_scoreset_records(urn, True)
45+
metadata = get_scoreset_metadata(urn, store_path)
46+
records = get_scoreset_records(urn, True, store_path)
4347
except ScoresetNotSupportedError as e:
4448
return ScoresetMapping(
4549
metadata=None,
@@ -116,7 +120,7 @@ async def map_scoreset(urn: str) -> ScoresetMapping:
116120
)
117121

118122
try:
119-
raw_metadata = get_raw_scoreset_metadata(urn)
123+
raw_metadata = get_raw_scoreset_metadata(urn, store_path)
120124
preferred_layers = {
121125
_set_scoreset_layer(urn, vrs_results),
122126
}
@@ -132,7 +136,7 @@ async def map_scoreset(urn: str) -> ScoresetMapping:
132136
for layer in preferred_layers:
133137
reference_sequences[layer][
134138
"computed_reference_sequence"
135-
] = _get_computed_reference_sequence(urn, layer, transcript)
139+
] = _get_computed_reference_sequence(metadata, layer, transcript)
136140
reference_sequences[layer][
137141
"mapped_reference_sequence"
138142
] = _get_mapped_reference_sequence(layer, transcript, alignment_result)

src/dcd_mapping/annotate.py

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
get_seqrepo,
3030
get_vrs_id_from_identifier,
3131
)
32-
from dcd_mapping.mavedb_data import get_raw_scoreset_metadata, get_scoreset_metadata
3332
from dcd_mapping.resource_utils import LOCAL_STORE_PATH
3433
from dcd_mapping.schemas import (
3534
AlignmentResult,
@@ -409,7 +408,7 @@ def annotate(
409408

410409

411410
def _get_computed_reference_sequence(
412-
ss: str,
411+
metadata: ScoresetMetadata,
413412
layer: AnnotationLayer,
414413
tx_output: TxSelectResult | None = None,
415414
) -> ComputedReferenceSequence:
@@ -429,7 +428,6 @@ def _get_computed_reference_sequence(
429428
sequence_type=TargetSequenceType.PROTEIN,
430429
sequence_id=seq_id,
431430
)
432-
metadata = get_scoreset_metadata(ss)
433431
seq_id = f"ga4gh:SQ.{sha512t24u(metadata.target_sequence.encode('ascii'))}"
434432
return ComputedReferenceSequence(
435433
sequence=metadata.target_sequence,
@@ -516,7 +514,7 @@ def write_scoreset_mapping_to_json(
516514

517515

518516
def save_mapped_output_json(
519-
urn: str,
517+
metadata: ScoresetMetadata,
520518
mappings: list[ScoreAnnotationWithLayer],
521519
align_result: AlignmentResult,
522520
tx_output: TxSelectResult | None,
@@ -533,10 +531,9 @@ def save_mapped_output_json(
533531
<dcd_mapping_data_dir>/urn:mavedb:00000XXX-X-X_mapping_<ISO8601 datetime>.json
534532
:return: output location
535533
"""
536-
metadata = get_raw_scoreset_metadata(urn)
537534
if preferred_layer_only:
538535
preferred_layers = {
539-
_set_scoreset_layer(urn, mappings),
536+
_set_scoreset_layer(metadata.urn, mappings),
540537
}
541538
else:
542539
preferred_layers = {mapping.annotation_layer for mapping in mappings}
@@ -549,20 +546,10 @@ def save_mapped_output_json(
549546
for layer in preferred_layers:
550547
reference_sequences[layer][
551548
"computed_reference_sequence"
552-
] = _get_computed_reference_sequence(urn, layer, tx_output)
549+
] = _get_computed_reference_sequence(metadata, layer, tx_output)
553550
reference_sequences[layer][
554551
"mapped_reference_sequence"
555552
] = _get_mapped_reference_sequence(layer, tx_output, align_result)
556-
# except Exception as e:
557-
# _logger.warning(
558-
# str(e)
559-
# )
560-
# output = ScoresetMapping(
561-
# metadata=metadata,
562-
# error_message = str(e).strip("'")
563-
# )
564-
565-
# return write_scoreset_mapping_to_json
566553

567554
mapped_scores: list[ScoreAnnotation] = []
568555
for m in mappings:
@@ -573,7 +560,7 @@ def save_mapped_output_json(
573560
mapped_scores.append(ScoreAnnotation(**m.model_dump()))
574561

575562
output = ScoresetMapping(
576-
metadata=metadata,
563+
metadata=metadata.model_dump(),
577564
computed_protein_reference_sequence=reference_sequences[
578565
AnnotationLayer.PROTEIN
579566
]["computed_reference_sequence"],
@@ -589,4 +576,4 @@ def save_mapped_output_json(
589576
mapped_scores=mapped_scores,
590577
)
591578

592-
return write_scoreset_mapping_to_json(urn, output, output_path)
579+
return write_scoreset_mapping_to_json(metadata.urn, output, output_path)

src/dcd_mapping/main.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
ScoresetNotSupportedError,
2525
get_scoreset_metadata,
2626
get_scoreset_records,
27+
with_mavedb_score_set,
2728
)
2829
from dcd_mapping.resource_utils import ResourceAcquisitionError
2930
from dcd_mapping.schemas import (
@@ -264,7 +265,7 @@ async def map_scoreset(
264265
return
265266
try:
266267
final_output = save_mapped_output_json(
267-
metadata.urn,
268+
metadata,
268269
vrs_results,
269270
alignment_result,
270271
transcript,
@@ -287,12 +288,14 @@ async def map_scoreset(
287288
_emit_info(f"Annotated scores saved to: {final_output}.", silent)
288289

289290

291+
@with_mavedb_score_set
290292
async def map_scoreset_urn(
291293
urn: str,
292294
output_path: Path | None = None,
293295
vrs_version: VrsVersion = VrsVersion.V_2,
294296
prefer_genomic: bool = False,
295297
silent: bool = True,
298+
store_path: Path | None = None,
296299
) -> None:
297300
"""Perform end-to-end mapping for a scoreset.
298301
@@ -302,8 +305,8 @@ async def map_scoreset_urn(
302305
:param silent: if True, suppress console information output
303306
"""
304307
try:
305-
metadata = get_scoreset_metadata(urn)
306-
records = get_scoreset_records(urn, silent)
308+
metadata = get_scoreset_metadata(urn, store_path)
309+
records = get_scoreset_records(urn, silent, store_path)
307310
except ScoresetNotSupportedError as e:
308311
_emit_info(f"Score set not supported: {e}", silent, logging.ERROR)
309312
final_output = write_scoreset_mapping_to_json(

src/dcd_mapping/mavedb_data.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22
33
Much of this can/should be replaced by the ``mavetools`` library? (and/or ``wags-tails``.)
44
"""
5+
56
import csv
67
import json
78
import logging
89
import tempfile
910
import zipfile
11+
from collections.abc import Callable
12+
from functools import wraps
1013
from pathlib import Path
1114
from typing import Any
1215

@@ -20,7 +23,7 @@
2023
authentication_header,
2124
http_download,
2225
)
23-
from dcd_mapping.schemas import ScoreRow, ScoresetMetadata, UniProtRef
26+
from dcd_mapping.schemas import ScoreRow, ScoresetMapping, ScoresetMetadata, UniProtRef
2427

2528
__all__ = [
2629
"get_scoreset_urns",
@@ -135,6 +138,7 @@ def get_raw_scoreset_metadata(
135138
"""
136139
if not dcd_mapping_dir:
137140
dcd_mapping_dir = LOCAL_STORE_PATH
141+
138142
metadata_file = dcd_mapping_dir / f"{scoreset_urn}_metadata.json"
139143
if not metadata_file.exists():
140144
url = f"{MAVEDB_BASE_URL}/api/v1/score-sets/{scoreset_urn}"
@@ -265,3 +269,27 @@ def get_scoreset_records(
265269
raise ResourceAcquisitionError(msg) from e
266270

267271
return _load_scoreset_records(scores_csv)
272+
273+
274+
def with_mavedb_score_set(fn: Callable) -> Callable:
275+
@wraps(fn)
276+
async def wrapper(*args, **kwargs) -> ScoresetMapping: # noqa: ANN002
277+
urn = args[0] if args else kwargs["urn"]
278+
silent = kwargs.get("silent", False)
279+
280+
with tempfile.TemporaryDirectory(
281+
prefix=f"{LOCAL_STORE_PATH.as_posix()}/"
282+
) as temp_dir:
283+
# Set up metadata and scores for the current run. Now they will be accessible by these functions
284+
# without the need to download the data again.
285+
temp_dir_as_path = Path(temp_dir)
286+
get_scoreset_metadata(urn, temp_dir_as_path)
287+
get_scoreset_records(urn, silent, temp_dir_as_path)
288+
289+
# Pass the storage path of the temp directory to the wrapped function as a kwarg.
290+
kwargs["store_path"] = temp_dir_as_path
291+
v: ScoresetMapping = await fn(*args, **kwargs)
292+
293+
return v
294+
295+
return wrapper

0 commit comments

Comments
 (0)