|
50 | 50 | from mavedb.models.enums.processing_state import ProcessingState |
51 | 51 | from mavedb.models.mapped_variant import MappedVariant |
52 | 52 | from mavedb.models.published_variant import PublishedVariantsMV |
| 53 | +from mavedb.models.target_gene import TargetGene |
53 | 54 | from mavedb.models.score_set import ScoreSet |
54 | 55 | from mavedb.models.user import User |
55 | 56 | from mavedb.models.variant import Variant |
@@ -247,6 +248,12 @@ async def create_variants_for_score_set( |
247 | 248 | # Mapping variants |
248 | 249 | #################################################################################################### |
249 | 250 |
|
| 251 | +ANNOTATION_LAYERS = { |
| 252 | + "g": "genomic", |
| 253 | + "p": "protein", |
| 254 | + "c": "cdna", |
| 255 | +} |
| 256 | + |
250 | 257 |
|
251 | 258 | @asynccontextmanager |
252 | 259 | async def mapping_in_execution(redis: ArqRedis, job_id: str): |
@@ -397,48 +404,43 @@ async def map_variants_for_score_set( |
397 | 404 | # TODO(VariantEffect/dcd-mapping2#3) after adding accession-based score set mapping support: |
398 | 405 | # this also assumes that the score set is based on a target sequence, not a target accession |
399 | 406 |
|
400 | | - computed_genomic_ref = mapping_results.get("computed_genomic_reference_sequence") |
401 | | - mapped_genomic_ref = mapping_results.get("mapped_genomic_reference_sequence") |
402 | | - computed_protein_ref = mapping_results.get("computed_protein_reference_sequence") |
403 | | - mapped_protein_ref = mapping_results.get("mapped_protein_reference_sequence") |
404 | | - |
405 | | - if computed_genomic_ref: |
406 | | - target_sequence = computed_genomic_ref["sequence"] # noqa: F841 |
407 | | - elif computed_protein_ref: |
408 | | - target_sequence = computed_protein_ref["sequence"] # noqa: F841 |
409 | | - else: |
| 407 | + reference_metadata = mapping_results.get("reference_sequences") |
| 408 | + if not reference_metadata: |
410 | 409 | raise NonexistentMappingReferenceError() |
411 | 410 |
|
412 | | - # TODO(VariantEffect/dcd_mapping2#2): Handle variant mappings for score sets with more than 1 target. |
413 | | - target_gene = score_set.target_genes[0] |
414 | | - |
415 | | - excluded_pre_mapped_keys = {"sequence"} |
416 | | - if computed_genomic_ref and mapped_genomic_ref: |
417 | | - pre_mapped_metadata = computed_genomic_ref |
418 | | - target_gene.pre_mapped_metadata = cast( |
419 | | - { |
420 | | - "genomic": { |
421 | | - k: pre_mapped_metadata[k] |
422 | | - for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys |
423 | | - } |
424 | | - }, |
425 | | - JSONB, |
426 | | - ) |
427 | | - target_gene.post_mapped_metadata = cast({"genomic": mapped_genomic_ref}, JSONB) |
428 | | - elif computed_protein_ref and mapped_protein_ref: |
429 | | - pre_mapped_metadata = computed_protein_ref |
430 | | - target_gene.pre_mapped_metadata = cast( |
431 | | - { |
432 | | - "protein": { |
433 | | - k: pre_mapped_metadata[k] |
434 | | - for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys |
| 411 | + for target_gene_identifier in reference_metadata: |
| 412 | + target_gene = db.scalars( |
| 413 | + select( |
| 414 | + TargetGene.where( |
| 415 | + TargetGene.name == target_gene_identifier, TargetGene.score_set_id == score_set.id |
| 416 | + ) |
| 417 | + ) |
| 418 | + ).one_or_none() |
| 419 | + if not target_gene: |
| 420 | + raise ValueError( |
| 421 | + f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." |
| 422 | + ) |
| 423 | + # allow for multiple annotation layers |
| 424 | + pre_mapped_metadata = {} |
| 425 | + post_mapped_metadata = {} |
| 426 | + excluded_pre_mapped_keys = {"sequence"} |
| 427 | + for annotation_layer in reference_metadata[target_gene_identifier]: |
| 428 | + layer_premapped = reference_metadata[target_gene_identifier][annotation_layer].get( |
| 429 | + "computed_reference_sequence" |
| 430 | + ) |
| 431 | + if layer_premapped: |
| 432 | + pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { |
| 433 | + k: layer_premapped[k] |
| 434 | + for k in set(list(layer_premapped.keys())) |
| 435 | + - excluded_pre_mapped_keys # TODO does this work if no 'sequence' key? |
435 | 436 | } |
436 | | - }, |
437 | | - JSONB, |
438 | | - ) |
439 | | - target_gene.post_mapped_metadata = cast({"protein": mapped_protein_ref}, JSONB) |
440 | | - else: |
441 | | - raise NonexistentMappingReferenceError() |
| 437 | + layer_postmapped = reference_metadata[target_gene_identifier][annotation_layer].get( |
| 438 | + "mapped_reference_sequence" |
| 439 | + ) |
| 440 | + if layer_postmapped: |
| 441 | + post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped |
| 442 | + target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) |
| 443 | + target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) |
442 | 444 |
|
443 | 445 | total_variants = 0 |
444 | 446 | successful_mapped_variants = 0 |
|
0 commit comments