66from fastapi .responses import JSONResponse
77from requests import HTTPError
88
9- from dcd_mapping .align import AlignmentError , BlatNotFoundError , align
9+ from dcd_mapping .align import AlignmentError , BlatNotFoundError , build_alignment_result
1010from dcd_mapping .annotate import (
1111 _get_computed_reference_sequence ,
1212 _get_mapped_reference_sequence ,
2222 with_mavedb_score_set ,
2323)
2424from dcd_mapping .resource_utils import ResourceAcquisitionError
25- from dcd_mapping .schemas import ScoreAnnotation , ScoresetMapping , VrsVersion
26- from dcd_mapping .transcripts import TxSelectError , select_transcript
25+ from dcd_mapping .schemas import (
26+ ScoreAnnotation ,
27+ ScoresetMapping ,
28+ TargetType ,
29+ TxSelectResult ,
30+ VrsVersion ,
31+ )
32+ from dcd_mapping .transcripts import select_transcripts
2733from dcd_mapping .vrs_map import VrsMapError , vrs_map
2834
2935router = APIRouter (
3339
3440@router .post (path = "/map/{urn}" , status_code = 200 , response_model = ScoresetMapping )
3541@with_mavedb_score_set
36- async def map_scoreset (urn : str , store_path : Path | None = None ) -> ScoresetMapping :
42+ async def map_scoreset (urn : str , store_path : Path | None = None ) -> JSONResponse :
3743 """Perform end-to-end mapping for a scoreset.
3844
3945 :param urn: identifier for a scoreset.
40- :param output_path: optional path to save output at
41- :param vrs_version: version of VRS objects to output (1.3 or 2)
42- :param silent: if True, suppress console information output
46+ :param store_path: optional path to save output at
4347 """
4448 try :
4549 metadata = get_scoreset_metadata (urn , store_path )
46- records = get_scoreset_records (urn , True , store_path )
50+ records = get_scoreset_records (metadata , True , store_path )
4751 except ScoresetNotSupportedError as e :
48- return ScoresetMapping (
49- metadata = None ,
50- error_message = str (e ).strip ("'" ),
52+ return JSONResponse (
53+ content = ScoresetMapping (
54+ metadata = None ,
55+ error_message = str (e ).strip ("'" ),
56+ ).model_dump (exclude_none = True )
5157 )
5258 except ResourceAcquisitionError as e :
5359 msg = f"Unable to acquire resource from MaveDB: { e } "
@@ -62,7 +68,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> ScoresetMapp
6268 )
6369
6470 try :
65- alignment_result = align (metadata , True )
71+ alignment_results = build_alignment_result (metadata , True )
6672 except BlatNotFoundError as e :
6773 msg = "BLAT command appears missing. Ensure it is available on the $PATH or use the environment variable BLAT_BIN_PATH to point to it. See instructions in the README prerequisites section for more."
6874 raise HTTPException (status_code = 500 , detail = msg ) from e
@@ -75,77 +81,151 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> ScoresetMapp
7581 metadata = metadata , error_message = str (e ).strip ("'" )
7682 ).model_dump (exclude_none = True )
7783 )
78-
79- try :
80- transcript = await select_transcript (metadata , records , alignment_result )
81- except (TxSelectError , KeyError , ValueError ) as e :
84+ except ScoresetNotSupportedError as e :
8285 return JSONResponse (
8386 content = ScoresetMapping (
8487 metadata = metadata , error_message = str (e ).strip ("'" )
8588 ).model_dump (exclude_none = True )
8689 )
90+
91+ try :
92+ transcripts = await select_transcripts (metadata , records , alignment_results )
93+ # NOTE: transcript selection errors are handled in select_transcripts,
94+ # and they do not cause the entire mapping process to exit; instead, an error will be reported
95+ # on the target level and on the variant level for variants relative to that target
96+ # HTTPErrors and DataLookupErrors cause the mapping process to exit because these indicate
97+ # underlying issues with data providers.
8798 except HTTPError as e :
8899 msg = f"HTTP error occurred during transcript selection: { e } "
89100 raise HTTPException (status_code = 500 , detail = msg ) from e
90101 except DataLookupError as e :
91102 msg = f"Data lookup error occurred during transcript selection: { e } "
92103 raise HTTPException (status_code = 500 , detail = msg ) from e
93104
105+ vrs_results = {}
94106 try :
95- vrs_results = vrs_map (metadata , alignment_result , records , transcript , True )
107+ for target_gene in metadata .target_genes :
108+ vrs_results [target_gene ] = vrs_map (
109+ metadata = metadata .target_genes [target_gene ],
110+ align_result = alignment_results [target_gene ],
111+ records = records [target_gene ],
112+ transcript = transcripts [target_gene ],
113+ silent = True ,
114+ )
96115 except VrsMapError as e :
97116 return JSONResponse (
98117 content = ScoresetMapping (
99118 metadata = metadata , error_message = str (e ).strip ("'" )
100119 ).model_dump (exclude_none = True )
101120 )
102- if vrs_results is None :
103- return ScoresetMapping (
104- metadata = metadata ,
105- error_message = "No variant mappings available for this score set" ,
121+ if not vrs_results or all (
122+ mapping_result is None for mapping_result in vrs_results .values ()
123+ ):
124+ return JSONResponse (
125+ content = ScoresetMapping (
126+ metadata = metadata ,
127+ error_message = "No variant mappings available for this score set" ,
128+ ).model_dump (exclude_none = True )
106129 )
107130
131+ annotated_vrs_results = {}
108132 try :
109- vrs_results = annotate (vrs_results , transcript , metadata , VrsVersion .V_2 )
133+ for target_gene in vrs_results :
134+ annotated_vrs_results [target_gene ] = annotate (
135+ vrs_results [target_gene ],
136+ transcripts [target_gene ],
137+ metadata .target_genes [target_gene ],
138+ metadata .urn ,
139+ VrsVersion .V_2 ,
140+ )
110141 except Exception as e :
111142 return JSONResponse (
112143 content = ScoresetMapping (
113144 metadata = metadata , error_message = str (e ).strip ("'" )
114145 ).model_dump (exclude_none = True )
115146 )
116- if vrs_results is None :
117- return ScoresetMapping (
118- metadata = metadata ,
119- error_message = "No annotated variant mappings available for this score set" ,
147+ if not annotated_vrs_results or all (
148+ mapping_result is None for mapping_result in annotated_vrs_results .values ()
149+ ):
150+ return JSONResponse (
151+ content = ScoresetMapping (
152+ metadata = metadata ,
153+ error_message = "No annotated variant mappings available for this score set" ,
154+ ).model_dump (exclude_none = True )
120155 )
121156
122157 try :
123158 raw_metadata = get_raw_scoreset_metadata (urn , store_path )
124- preferred_layers = {
125- _set_scoreset_layer (urn , vrs_results ),
126- }
127-
128- reference_sequences = {
129- layer : {
130- "computed_reference_sequence" : None ,
131- "mapped_reference_sequence" : None ,
159+ reference_sequences : dict [str , dict ] = {}
160+ mapped_scores : list [ScoreAnnotation ] = []
161+ for target_gene in annotated_vrs_results :
162+ preferred_layers = {
163+ _set_scoreset_layer (urn , annotated_vrs_results [target_gene ]),
132164 }
133- for layer in AnnotationLayer
134- }
135-
136- for layer in preferred_layers :
137- reference_sequences [layer ][
138- "computed_reference_sequence"
139- ] = _get_computed_reference_sequence (metadata , layer , transcript )
140- reference_sequences [layer ][
141- "mapped_reference_sequence"
142- ] = _get_mapped_reference_sequence (layer , transcript , alignment_result )
165+ target_gene_name = metadata .target_genes [target_gene ].target_gene_name
166+ reference_sequences [target_gene_name ] = {
167+ layer : {
168+ "computed_reference_sequence" : None ,
169+ "mapped_reference_sequence" : None ,
170+ }
171+ for layer in preferred_layers
172+ }
173+ # sometimes Nonetype layers show up in preferred layers dict; remove these
174+ preferred_layers .discard (None )
175+ for layer in preferred_layers :
176+ reference_sequences [target_gene_name ][layer ][
177+ "computed_reference_sequence"
178+ ] = _get_computed_reference_sequence (
179+ metadata .target_genes [target_gene ], layer , transcripts [target_gene ]
180+ )
181+ reference_sequences [target_gene_name ][layer ][
182+ "mapped_reference_sequence"
183+ ] = _get_mapped_reference_sequence (
184+ metadata .target_genes [target_gene ],
185+ layer ,
186+ transcripts [target_gene ],
187+ alignment_results [target_gene ],
188+ )
189+
190+ for m in annotated_vrs_results [target_gene ]:
191+ if m .pre_mapped is None :
192+ mapped_scores .append (ScoreAnnotation (** m .model_dump ()))
193+ elif m .annotation_layer in preferred_layers :
194+ # drop annotation layer from mapping object
195+ mapped_scores .append (ScoreAnnotation (** m .model_dump ()))
196+
197+ # drop Nonetype reference sequences
198+ for target_gene in reference_sequences :
199+ for layer in list (reference_sequences [target_gene ].keys ()):
200+ if (
201+ reference_sequences [target_gene ][layer ][
202+ "mapped_reference_sequence"
203+ ]
204+ is None
205+ and reference_sequences [target_gene ][layer ][
206+ "computed_reference_sequence"
207+ ]
208+ is None
209+ ) or layer is None :
210+ del reference_sequences [target_gene ][layer ]
211+
212+ # if genomic layer, not accession-based, and target gene type is coding, add cdna entry (just the sequence accession) to reference_sequences dict
213+ if (
214+ AnnotationLayer .GENOMIC in reference_sequences [target_gene_name ]
215+ and metadata .target_genes [target_gene ].target_gene_category
216+ == TargetType .PROTEIN_CODING
217+ and metadata .target_genes [target_gene ].target_accession_id is None
218+ and transcripts [target_gene ] is not None
219+ and isinstance (transcripts [target_gene ], TxSelectResult )
220+ and transcripts [target_gene ].nm is not None
221+ ):
222+ reference_sequences [target_gene_name ][AnnotationLayer .CDNA ] = {
223+ "computed_reference_sequence" : None ,
224+ "mapped_reference_sequence" : {
225+ "sequence_accessions" : [transcripts [target_gene ].nm ]
226+ },
227+ }
143228
144- mapped_scores : list [ScoreAnnotation ] = []
145- for m in vrs_results :
146- if m .annotation_layer in preferred_layers :
147- # drop annotation layer from mapping object
148- mapped_scores .append (ScoreAnnotation (** m .model_dump ()))
149229 except Exception as e :
150230 return JSONResponse (
151231 content = ScoresetMapping (
@@ -156,18 +236,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> ScoresetMapp
156236 return JSONResponse (
157237 content = ScoresetMapping (
158238 metadata = raw_metadata ,
159- computed_protein_reference_sequence = reference_sequences [
160- AnnotationLayer .PROTEIN
161- ]["computed_reference_sequence" ],
162- mapped_protein_reference_sequence = reference_sequences [
163- AnnotationLayer .PROTEIN
164- ]["mapped_reference_sequence" ],
165- computed_genomic_reference_sequence = reference_sequences [
166- AnnotationLayer .GENOMIC
167- ]["computed_reference_sequence" ],
168- mapped_genomic_reference_sequence = reference_sequences [
169- AnnotationLayer .GENOMIC
170- ]["mapped_reference_sequence" ],
239+ reference_sequences = reference_sequences ,
171240 mapped_scores = mapped_scores ,
172241 ).model_dump (exclude_none = True )
173242 )
0 commit comments