Skip to content

Commit a68de0a

Browse files
committed
Support mapper update for multi-target score sets
1 parent ce029be commit a68de0a

File tree

1 file changed

+41
-39
lines changed

1 file changed

+41
-39
lines changed

src/mavedb/worker/jobs.py

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
from mavedb.models.enums.processing_state import ProcessingState
5151
from mavedb.models.mapped_variant import MappedVariant
5252
from mavedb.models.published_variant import PublishedVariantsMV
53+
from mavedb.models.target_gene import TargetGene
5354
from mavedb.models.score_set import ScoreSet
5455
from mavedb.models.user import User
5556
from mavedb.models.variant import Variant
@@ -247,6 +248,12 @@ async def create_variants_for_score_set(
247248
# Mapping variants
248249
####################################################################################################
249250

251+
ANNOTATION_LAYERS = {
252+
"g": "genomic",
253+
"p": "protein",
254+
"c": "cdna",
255+
}
256+
250257

251258
@asynccontextmanager
252259
async def mapping_in_execution(redis: ArqRedis, job_id: str):
@@ -397,48 +404,43 @@ async def map_variants_for_score_set(
397404
# TODO(VariantEffect/dcd-mapping2#3) after adding accession-based score set mapping support:
398405
# this also assumes that the score set is based on a target sequence, not a target accession
399406

400-
computed_genomic_ref = mapping_results.get("computed_genomic_reference_sequence")
401-
mapped_genomic_ref = mapping_results.get("mapped_genomic_reference_sequence")
402-
computed_protein_ref = mapping_results.get("computed_protein_reference_sequence")
403-
mapped_protein_ref = mapping_results.get("mapped_protein_reference_sequence")
404-
405-
if computed_genomic_ref:
406-
target_sequence = computed_genomic_ref["sequence"] # noqa: F841
407-
elif computed_protein_ref:
408-
target_sequence = computed_protein_ref["sequence"] # noqa: F841
409-
else:
407+
reference_metadata = mapping_results.get("reference_sequences")
408+
if not reference_metadata:
410409
raise NonexistentMappingReferenceError()
411410

412-
# TODO(VariantEffect/dcd_mapping2#2): Handle variant mappings for score sets with more than 1 target.
413-
target_gene = score_set.target_genes[0]
414-
415-
excluded_pre_mapped_keys = {"sequence"}
416-
if computed_genomic_ref and mapped_genomic_ref:
417-
pre_mapped_metadata = computed_genomic_ref
418-
target_gene.pre_mapped_metadata = cast(
419-
{
420-
"genomic": {
421-
k: pre_mapped_metadata[k]
422-
for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys
423-
}
424-
},
425-
JSONB,
426-
)
427-
target_gene.post_mapped_metadata = cast({"genomic": mapped_genomic_ref}, JSONB)
428-
elif computed_protein_ref and mapped_protein_ref:
429-
pre_mapped_metadata = computed_protein_ref
430-
target_gene.pre_mapped_metadata = cast(
431-
{
432-
"protein": {
433-
k: pre_mapped_metadata[k]
434-
for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys
411+
for target_gene_identifier in reference_metadata:
412+
target_gene = db.scalars(
413+
select(
414+
TargetGene.where(
415+
TargetGene.name == target_gene_identifier, TargetGene.score_set_id == score_set.id
416+
)
417+
)
418+
).one_or_none()
419+
if not target_gene:
420+
raise ValueError(
421+
f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}."
422+
)
423+
# allow for multiple annotation layers
424+
pre_mapped_metadata = {}
425+
post_mapped_metadata = {}
426+
excluded_pre_mapped_keys = {"sequence"}
427+
for annotation_layer in reference_metadata[target_gene_identifier]:
428+
layer_premapped = reference_metadata[target_gene_identifier][annotation_layer].get(
429+
"computed_reference_sequence"
430+
)
431+
if layer_premapped:
432+
pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = {
433+
k: layer_premapped[k]
434+
for k in set(list(layer_premapped.keys()))
435+
- excluded_pre_mapped_keys # TODO does this work if no 'sequence' key?
435436
}
436-
},
437-
JSONB,
438-
)
439-
target_gene.post_mapped_metadata = cast({"protein": mapped_protein_ref}, JSONB)
440-
else:
441-
raise NonexistentMappingReferenceError()
437+
layer_postmapped = reference_metadata[target_gene_identifier][annotation_layer].get(
438+
"mapped_reference_sequence"
439+
)
440+
if layer_postmapped:
441+
post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped
442+
target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB)
443+
target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB)
442444

443445
total_variants = 0
444446
successful_mapped_variants = 0

0 commit comments

Comments
 (0)