|
| 1 | +""""Provide mapping router""" |
| 2 | +from cool_seq_tool.schemas import AnnotationLayer |
| 3 | +from fastapi import APIRouter, HTTPException |
| 4 | +from fastapi.responses import JSONResponse |
| 5 | +from requests import HTTPError |
| 6 | + |
| 7 | +from dcd_mapping.align import AlignmentError, BlatNotFoundError, align |
| 8 | +from dcd_mapping.annotate import ( |
| 9 | + _get_computed_reference_sequence, |
| 10 | + _get_mapped_reference_sequence, |
| 11 | + _set_scoreset_layer, |
| 12 | + annotate, |
| 13 | +) |
| 14 | +from dcd_mapping.lookup import DataLookupError |
| 15 | +from dcd_mapping.mavedb_data import ( |
| 16 | + ScoresetNotSupportedError, |
| 17 | + get_raw_scoreset_metadata, |
| 18 | + get_scoreset_metadata, |
| 19 | + get_scoreset_records, |
| 20 | +) |
| 21 | +from dcd_mapping.resource_utils import ResourceAcquisitionError |
| 22 | +from dcd_mapping.schemas import ScoreAnnotation, ScoresetMapping, VrsVersion |
| 23 | +from dcd_mapping.transcripts import TxSelectError, select_transcript |
| 24 | +from dcd_mapping.vrs_map import VrsMapError, vrs_map |
| 25 | + |
| 26 | +router = APIRouter( |
| 27 | + prefix="/api/v1", tags=["mappings"], responses={404: {"description": "Not found"}} |
| 28 | +) |
| 29 | + |
| 30 | + |
| 31 | +@router.post(path="/map/{urn}", status_code=200, response_model=ScoresetMapping) |
| 32 | +async def map_scoreset(urn: str) -> ScoresetMapping: |
| 33 | + """Perform end-to-end mapping for a scoreset. |
| 34 | +
|
| 35 | + :param urn: identifier for a scoreset. |
| 36 | + :param output_path: optional path to save output at |
| 37 | + :param vrs_version: version of VRS objects to output (1.3 or 2) |
| 38 | + :param silent: if True, suppress console information output |
| 39 | + """ |
| 40 | + try: |
| 41 | + metadata = get_scoreset_metadata(urn) |
| 42 | + records = get_scoreset_records(urn, True) |
| 43 | + except ScoresetNotSupportedError as e: |
| 44 | + return ScoresetMapping( |
| 45 | + metadata=None, |
| 46 | + error_message=str(e).strip("'"), |
| 47 | + ) |
| 48 | + except ResourceAcquisitionError as e: |
| 49 | + msg = f"Unable to acquire resource from MaveDB: {e}" |
| 50 | + raise HTTPException(status_code=500, detail=msg) from e |
| 51 | + |
| 52 | + try: |
| 53 | + alignment_result = align(metadata, True) |
| 54 | + except BlatNotFoundError as e: |
| 55 | + msg = "BLAT command appears missing. Ensure it is available on the $PATH or use the environment variable BLAT_BIN_PATH to point to it. See instructions in the README prerequisites section for more." |
| 56 | + raise HTTPException(status_code=500, detail=msg) from e |
| 57 | + except ResourceAcquisitionError as e: |
| 58 | + msg = f"BLAT resource could not be acquired: {e}" |
| 59 | + raise HTTPException(status_code=500, detail=msg) from e |
| 60 | + except AlignmentError as e: |
| 61 | + return JSONResponse( |
| 62 | + content=ScoresetMapping( |
| 63 | + metadata=metadata, error_message=str(e).strip("'") |
| 64 | + ).model_dump(exclude_none=True) |
| 65 | + ) |
| 66 | + |
| 67 | + try: |
| 68 | + transcript = await select_transcript(metadata, records, alignment_result) |
| 69 | + except (TxSelectError, KeyError, ValueError) as e: |
| 70 | + return JSONResponse( |
| 71 | + content=ScoresetMapping( |
| 72 | + metadata=metadata, error_message=str(e).strip("'") |
| 73 | + ).model_dump(exclude_none=True) |
| 74 | + ) |
| 75 | + except HTTPError as e: |
| 76 | + msg = f"HTTP error occurred during transcript selection: {e}" |
| 77 | + raise HTTPException(status_code=500, detail=msg) from e |
| 78 | + except DataLookupError as e: |
| 79 | + msg = f"Data lookup error occurred during transcript selection: {e}" |
| 80 | + raise HTTPException(status_code=500, detail=msg) from e |
| 81 | + |
| 82 | + try: |
| 83 | + vrs_results = vrs_map(metadata, alignment_result, records, transcript, True) |
| 84 | + except VrsMapError as e: |
| 85 | + return JSONResponse( |
| 86 | + content=ScoresetMapping( |
| 87 | + metadata=metadata, error_message=str(e).strip("'") |
| 88 | + ).model_dump(exclude_none=True) |
| 89 | + ) |
| 90 | + if vrs_results is None: |
| 91 | + return ScoresetMapping( |
| 92 | + metadata=metadata, |
| 93 | + error_message="No variant mappings available for this score set", |
| 94 | + ) |
| 95 | + |
| 96 | + try: |
| 97 | + vrs_results = annotate(vrs_results, transcript, metadata, VrsVersion.V_2) |
| 98 | + except Exception as e: |
| 99 | + return JSONResponse( |
| 100 | + content=ScoresetMapping( |
| 101 | + metadata=metadata, error_message=str(e).strip("'") |
| 102 | + ).model_dump(exclude_none=True) |
| 103 | + ) |
| 104 | + if vrs_results is None: |
| 105 | + return ScoresetMapping( |
| 106 | + metadata=metadata, |
| 107 | + error_message="No annotated variant mappings available for this score set", |
| 108 | + ) |
| 109 | + |
| 110 | + try: |
| 111 | + raw_metadata = get_raw_scoreset_metadata(urn) |
| 112 | + preferred_layers = { |
| 113 | + _set_scoreset_layer(urn, vrs_results), |
| 114 | + } |
| 115 | + |
| 116 | + reference_sequences = { |
| 117 | + layer: { |
| 118 | + "computed_reference_sequence": None, |
| 119 | + "mapped_reference_sequence": None, |
| 120 | + } |
| 121 | + for layer in AnnotationLayer |
| 122 | + } |
| 123 | + |
| 124 | + for layer in preferred_layers: |
| 125 | + reference_sequences[layer][ |
| 126 | + "computed_reference_sequence" |
| 127 | + ] = _get_computed_reference_sequence(urn, layer, transcript) |
| 128 | + reference_sequences[layer][ |
| 129 | + "mapped_reference_sequence" |
| 130 | + ] = _get_mapped_reference_sequence(layer, transcript, alignment_result) |
| 131 | + |
| 132 | + mapped_scores: list[ScoreAnnotation] = [] |
| 133 | + for m in vrs_results: |
| 134 | + if m.annotation_layer in preferred_layers: |
| 135 | + # drop annotation layer from mapping object |
| 136 | + mapped_scores.append(ScoreAnnotation(**m.model_dump())) |
| 137 | + except Exception as e: |
| 138 | + return JSONResponse( |
| 139 | + content=ScoresetMapping( |
| 140 | + metadata=metadata, error_message=str(e).strip("'") |
| 141 | + ).model_dump(exclude_none=True) |
| 142 | + ) |
| 143 | + |
| 144 | + return JSONResponse( |
| 145 | + content=ScoresetMapping( |
| 146 | + metadata=raw_metadata, |
| 147 | + computed_protein_reference_sequence=reference_sequences[ |
| 148 | + AnnotationLayer.PROTEIN |
| 149 | + ]["computed_reference_sequence"], |
| 150 | + mapped_protein_reference_sequence=reference_sequences[ |
| 151 | + AnnotationLayer.PROTEIN |
| 152 | + ]["mapped_reference_sequence"], |
| 153 | + computed_genomic_reference_sequence=reference_sequences[ |
| 154 | + AnnotationLayer.GENOMIC |
| 155 | + ]["computed_reference_sequence"], |
| 156 | + mapped_genomic_reference_sequence=reference_sequences[ |
| 157 | + AnnotationLayer.GENOMIC |
| 158 | + ]["mapped_reference_sequence"], |
| 159 | + mapped_scores=mapped_scores, |
| 160 | + ).model_dump(exclude_none=True) |
| 161 | + ) |
0 commit comments