|
35 | 35 | NonexistentMappingResultsError, |
36 | 36 | ) |
37 | 37 | from mavedb.lib.logging.context import format_raised_exception_info_as_dict |
| 38 | +from mavedb.lib.mapping import ANNOTATION_LAYERS |
38 | 39 | from mavedb.lib.score_sets import ( |
39 | 40 | columns_for_dataset, |
40 | 41 | create_variants, |
@@ -390,55 +391,43 @@ async def map_variants_for_score_set( |
390 | 391 | score_set.mapping_state = MappingState.failed |
391 | 392 | score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} |
392 | 393 | else: |
393 | | - # TODO(VariantEffect/dcd-mapping2#2) after adding multi target mapping support: |
394 | | - # this assumes single-target mapping, will need to be changed to support multi-target mapping |
395 | | - # just in case there are multiple target genes in the db for a score set (this point shouldn't be reached |
396 | | - # while we only support single-target mapping), match up the target sequence with the one in the computed genomic reference sequence. |
397 | | - # TODO(VariantEffect/dcd-mapping2#3) after adding accession-based score set mapping support: |
398 | | - # this also assumes that the score set is based on a target sequence, not a target accession |
399 | | - |
400 | | - computed_genomic_ref = mapping_results.get("computed_genomic_reference_sequence") |
401 | | - mapped_genomic_ref = mapping_results.get("mapped_genomic_reference_sequence") |
402 | | - computed_protein_ref = mapping_results.get("computed_protein_reference_sequence") |
403 | | - mapped_protein_ref = mapping_results.get("mapped_protein_reference_sequence") |
404 | | - |
405 | | - if computed_genomic_ref: |
406 | | - target_sequence = computed_genomic_ref["sequence"] # noqa: F841 |
407 | | - elif computed_protein_ref: |
408 | | - target_sequence = computed_protein_ref["sequence"] # noqa: F841 |
409 | | - else: |
| 394 | + reference_metadata = mapping_results.get("reference_sequences") |
| 395 | + if not reference_metadata: |
410 | 396 | raise NonexistentMappingReferenceError() |
411 | 397 |
|
412 | | - # TODO(VariantEffect/dcd_mapping2#2): Handle variant mappings for score sets with more than 1 target. |
413 | | - target_gene = score_set.target_genes[0] |
414 | | - |
415 | | - excluded_pre_mapped_keys = {"sequence"} |
416 | | - if computed_genomic_ref and mapped_genomic_ref: |
417 | | - pre_mapped_metadata = computed_genomic_ref |
418 | | - target_gene.pre_mapped_metadata = cast( |
419 | | - { |
420 | | - "genomic": { |
421 | | - k: pre_mapped_metadata[k] |
422 | | - for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys |
423 | | - } |
424 | | - }, |
425 | | - JSONB, |
| 398 | + for target_gene_identifier in reference_metadata: |
| 399 | + target_gene = next( |
| 400 | + ( |
| 401 | + target_gene |
| 402 | + for target_gene in score_set.target_genes |
| 403 | + if target_gene.name == target_gene_identifier |
| 404 | + ), |
| 405 | + None, |
426 | 406 | ) |
427 | | - target_gene.post_mapped_metadata = cast({"genomic": mapped_genomic_ref}, JSONB) |
428 | | - elif computed_protein_ref and mapped_protein_ref: |
429 | | - pre_mapped_metadata = computed_protein_ref |
430 | | - target_gene.pre_mapped_metadata = cast( |
431 | | - { |
432 | | - "protein": { |
433 | | - k: pre_mapped_metadata[k] |
434 | | - for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys |
| 407 | + if not target_gene: |
| 408 | + raise ValueError( |
| 409 | + f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." |
| 410 | + ) |
| 411 | + # allow for multiple annotation layers |
| 412 | + pre_mapped_metadata = {} |
| 413 | + post_mapped_metadata = {} |
| 414 | + excluded_pre_mapped_keys = {"sequence"} |
| 415 | + for annotation_layer in reference_metadata[target_gene_identifier]: |
| 416 | + layer_premapped = reference_metadata[target_gene_identifier][annotation_layer].get( |
| 417 | + "computed_reference_sequence" |
| 418 | + ) |
| 419 | + if layer_premapped: |
| 420 | + pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { |
| 421 | + k: layer_premapped[k] |
| 422 | + for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys |
435 | 423 | } |
436 | | - }, |
437 | | - JSONB, |
438 | | - ) |
439 | | - target_gene.post_mapped_metadata = cast({"protein": mapped_protein_ref}, JSONB) |
440 | | - else: |
441 | | - raise NonexistentMappingReferenceError() |
| 424 | + layer_postmapped = reference_metadata[target_gene_identifier][annotation_layer].get( |
| 425 | + "mapped_reference_sequence" |
| 426 | + ) |
| 427 | + if layer_postmapped: |
| 428 | + post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped |
| 429 | + target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) |
| 430 | + target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) |
442 | 431 |
|
443 | 432 | total_variants = 0 |
444 | 433 | successful_mapped_variants = 0 |
|
0 commit comments