|
8 | 8 | from sqlalchemy.orm import Session |
9 | 9 |
|
10 | 10 | from mavedb.data_providers.services import vrs_mapper |
| 11 | +from mavedb.lib.exceptions import NonexistentMappingReferenceError |
11 | 12 | from mavedb.lib.logging.context import format_raised_exception_info_as_dict |
| 13 | +from mavedb.lib.mapping import ANNOTATION_LAYERS |
12 | 14 | from mavedb.models.enums.mapping_state import MappingState |
13 | 15 | from mavedb.models.score_set import ScoreSet |
14 | 16 | from mavedb.models.mapped_variant import MappedVariant |
15 | | -from mavedb.models.target_gene import TargetGene |
16 | 17 | from mavedb.models.variant import Variant |
17 | 18 |
|
18 | 19 | from mavedb.scripts.environment import script_environment, with_database_session |
@@ -91,47 +92,43 @@ def populate_mapped_variant_data(db: Session, urns: Sequence[Optional[str]], all |
91 | 92 | db.commit() |
92 | 93 | logger.info(f"No mapped variants available for {score_set.urn}.") |
93 | 94 | else: |
94 | | - computed_genomic_ref = mapped_scoreset.get("computed_genomic_reference_sequence") |
95 | | - mapped_genomic_ref = mapped_scoreset.get("mapped_genomic_reference_sequence") |
96 | | - computed_protein_ref = mapped_scoreset.get("computed_protein_reference_sequence") |
97 | | - mapped_protein_ref = mapped_scoreset.get("mapped_protein_reference_sequence") |
98 | | - |
99 | | - # assumes one target gene per score set, which is currently true in mavedb as of sept. 2024. |
100 | | - target_gene = db.scalars( |
101 | | - select(TargetGene) |
102 | | - .join(ScoreSet) |
103 | | - .where( |
104 | | - ScoreSet.urn == str(score_set.urn), |
| 95 | + reference_metadata = mapped_scoreset.get("reference_sequences") |
| 96 | + if not reference_metadata: |
| 97 | + raise NonexistentMappingReferenceError() |
| 98 | + |
| 99 | + for target_gene_identifier in reference_metadata: |
| 100 | + target_gene = next( |
| 101 | + ( |
| 102 | + target_gene |
| 103 | + for target_gene in score_set.target_genes |
| 104 | + if target_gene.name == target_gene_identifier |
| 105 | + ), |
| 106 | + None, |
105 | 107 | ) |
106 | | - ).one() |
107 | | - |
108 | | - excluded_pre_mapped_keys = {"sequence"} |
109 | | - if computed_genomic_ref and mapped_genomic_ref: |
110 | | - pre_mapped_metadata = computed_genomic_ref |
111 | | - target_gene.pre_mapped_metadata = cast( |
112 | | - { |
113 | | - "genomic": { |
114 | | - k: pre_mapped_metadata[k] |
115 | | - for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys |
| 108 | + if not target_gene: |
| 109 | + raise ValueError( |
| 110 | + f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." |
| 111 | + ) |
| 112 | + # allow for multiple annotation layers |
| 113 | + pre_mapped_metadata = {} |
| 114 | + post_mapped_metadata = {} |
| 115 | + excluded_pre_mapped_keys = {"sequence"} |
| 116 | + for annotation_layer in reference_metadata[target_gene_identifier]: |
| 117 | + layer_premapped = reference_metadata[target_gene_identifier][annotation_layer].get( |
| 118 | + "computed_reference_sequence" |
| 119 | + ) |
| 120 | + if layer_premapped: |
| 121 | + pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { |
| 122 | + k: layer_premapped[k] |
| 123 | + for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys |
116 | 124 | } |
117 | | - }, |
118 | | - JSONB, |
119 | | - ) |
120 | | - target_gene.post_mapped_metadata = cast({"genomic": mapped_genomic_ref}, JSONB) |
121 | | - elif computed_protein_ref and mapped_protein_ref: |
122 | | - pre_mapped_metadata = computed_protein_ref |
123 | | - target_gene.pre_mapped_metadata = cast( |
124 | | - { |
125 | | - "protein": { |
126 | | - k: pre_mapped_metadata[k] |
127 | | - for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys |
128 | | - } |
129 | | - }, |
130 | | - JSONB, |
131 | | - ) |
132 | | - target_gene.post_mapped_metadata = cast({"protein": mapped_protein_ref}, JSONB) |
133 | | - else: |
134 | | - raise ValueError(f"incomplete or inconsistent metadata for score set {score_set.urn}") |
| 125 | + layer_postmapped = reference_metadata[target_gene_identifier][annotation_layer].get( |
| 126 | + "mapped_reference_sequence" |
| 127 | + ) |
| 128 | + if layer_postmapped: |
| 129 | + post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped |
| 130 | + target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) |
| 131 | + target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) |
135 | 132 |
|
136 | 133 | mapped_variants = [ |
137 | 134 | variant_from_mapping(db=db, mapping=mapped_score, dcd_mapping_version=dcd_mapping_version) |
|
0 commit comments