Skip to content

Commit 06d1996

Browse files
committed
Update VRSMap class and script for multi-target mapper changes
1 parent 0d6efc6 commit 06d1996

File tree

2 files changed

+39
-45
lines changed

2 files changed

+39
-45
lines changed

src/mavedb/lib/mapping.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from datetime import date
2-
from typing import Optional, TypedDict
2+
from typing import Optional, TypedDict, Union
33

44
import requests
55

@@ -17,10 +17,7 @@ class ScoreSetMappingResults(TypedDict):
1717
metadata: Optional[dict[str, str]]
1818
dcd_mapping_version: str
1919
mapped_date_utc: date
20-
computed_genomic_reference_sequence: Optional[dict[str, str]]
21-
mapped_genomic_reference_sequence: Optional[dict[str, str]]
22-
computed_protein_reference_sequence: Optional[dict[str, str]]
23-
mapped_protein_reference_sequence: Optional[dict[str, str]]
20+
reference_sequences: Optional[dict[str, dict[str, dict[str, dict[str, Union[str, list[str]]]]]]]
2421
mapped_scores: Optional[list[dict]]
2522
error_message: Optional[str]
2623

src/mavedb/scripts/populate_mapped_variants.py

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@
88
from sqlalchemy.orm import Session
99

1010
from mavedb.data_providers.services import vrs_mapper
11+
from mavedb.lib.exceptions import NonexistentMappingReferenceError
1112
from mavedb.lib.logging.context import format_raised_exception_info_as_dict
13+
from mavedb.lib.mapping import ANNOTATION_LAYERS
1214
from mavedb.models.enums.mapping_state import MappingState
1315
from mavedb.models.score_set import ScoreSet
1416
from mavedb.models.mapped_variant import MappedVariant
15-
from mavedb.models.target_gene import TargetGene
1617
from mavedb.models.variant import Variant
1718

1819
from mavedb.scripts.environment import script_environment, with_database_session
@@ -91,47 +92,43 @@ def populate_mapped_variant_data(db: Session, urns: Sequence[Optional[str]], all
9192
db.commit()
9293
logger.info(f"No mapped variants available for {score_set.urn}.")
9394
else:
94-
computed_genomic_ref = mapped_scoreset.get("computed_genomic_reference_sequence")
95-
mapped_genomic_ref = mapped_scoreset.get("mapped_genomic_reference_sequence")
96-
computed_protein_ref = mapped_scoreset.get("computed_protein_reference_sequence")
97-
mapped_protein_ref = mapped_scoreset.get("mapped_protein_reference_sequence")
98-
99-
# assumes one target gene per score set, which is currently true in mavedb as of sept. 2024.
100-
target_gene = db.scalars(
101-
select(TargetGene)
102-
.join(ScoreSet)
103-
.where(
104-
ScoreSet.urn == str(score_set.urn),
95+
reference_metadata = mapped_scoreset.get("reference_sequences")
96+
if not reference_metadata:
97+
raise NonexistentMappingReferenceError()
98+
99+
for target_gene_identifier in reference_metadata:
100+
target_gene = next(
101+
(
102+
target_gene
103+
for target_gene in score_set.target_genes
104+
if target_gene.name == target_gene_identifier
105+
),
106+
None,
105107
)
106-
).one()
107-
108-
excluded_pre_mapped_keys = {"sequence"}
109-
if computed_genomic_ref and mapped_genomic_ref:
110-
pre_mapped_metadata = computed_genomic_ref
111-
target_gene.pre_mapped_metadata = cast(
112-
{
113-
"genomic": {
114-
k: pre_mapped_metadata[k]
115-
for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys
108+
if not target_gene:
109+
raise ValueError(
110+
f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}."
111+
)
112+
# allow for multiple annotation layers
113+
pre_mapped_metadata = {}
114+
post_mapped_metadata = {}
115+
excluded_pre_mapped_keys = {"sequence"}
116+
for annotation_layer in reference_metadata[target_gene_identifier]:
117+
layer_premapped = reference_metadata[target_gene_identifier][annotation_layer].get(
118+
"computed_reference_sequence"
119+
)
120+
if layer_premapped:
121+
pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = {
122+
k: layer_premapped[k]
123+
for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys
116124
}
117-
},
118-
JSONB,
119-
)
120-
target_gene.post_mapped_metadata = cast({"genomic": mapped_genomic_ref}, JSONB)
121-
elif computed_protein_ref and mapped_protein_ref:
122-
pre_mapped_metadata = computed_protein_ref
123-
target_gene.pre_mapped_metadata = cast(
124-
{
125-
"protein": {
126-
k: pre_mapped_metadata[k]
127-
for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys
128-
}
129-
},
130-
JSONB,
131-
)
132-
target_gene.post_mapped_metadata = cast({"protein": mapped_protein_ref}, JSONB)
133-
else:
134-
raise ValueError(f"incomplete or inconsistent metadata for score set {score_set.urn}")
125+
layer_postmapped = reference_metadata[target_gene_identifier][annotation_layer].get(
126+
"mapped_reference_sequence"
127+
)
128+
if layer_postmapped:
129+
post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped
130+
target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB)
131+
target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB)
135132

136133
mapped_variants = [
137134
variant_from_mapping(db=db, mapping=mapped_score, dcd_mapping_version=dcd_mapping_version)

0 commit comments

Comments
 (0)