Skip to content

Commit 3f92d23

Browse files
committed
wip: VA-Spec implementation for mapped variants
1 parent 18ca242 commit 3f92d23

File tree

9 files changed

+327
-0
lines changed

9 files changed

+327
-0
lines changed

src/mavedb/lib/annotation/agent.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import logging
2+
3+
from ga4gh.core.entity_models import Agent, Extension, AgentSubtype
4+
5+
from mavedb import __version__
6+
7+
logger = logging.getLogger(__name__)
8+
9+
10+
def mavedb_api_agent():
11+
version_at_time_of_generation = Extension(
12+
name="mavedb_api_version",
13+
value=__version__,
14+
# TODO: stitched from constant base URL?
15+
description="The MaveDB API version used to generate this record. See: https://github.com/VariantEffect/mavedb-api/releases",
16+
)
17+
18+
return Agent(
19+
subtype=AgentSubtype.SOFTWARE,
20+
label="mavedb api",
21+
description=f"mavedb api agent, version {__version__}",
22+
extensions=[version_at_time_of_generation],
23+
)
24+
25+
26+
def mavedb_vrs_agent(version: str):
27+
version_at_time_of_variant_generation = Extension(
28+
name="mavedb_vrs_version",
29+
value=version,
30+
# TODO: stitched from constant base URL?
31+
description="The VRS mapping version used to generate this record. See: https://github.com/VariantEffect/dcd_mapping2/releases",
32+
)
33+
34+
return Agent(
35+
subtype=AgentSubtype.SOFTWARE,
36+
label="mavedb vrs mapper",
37+
description=f"mavedb vrs mapping agent, version {version_at_time_of_variant_generation.value}",
38+
extensions=[version_at_time_of_variant_generation],
39+
)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import logging
2+
3+
from fastapi.encoders import jsonable_encoder
4+
from ga4gh.va_spec.profiles.assay_var_effect import AveFunctionalClassification
5+
6+
from mavedb.models.mapped_variant import MappedVariant
7+
from mavedb.lib.validation.utilities import inf_or_float
8+
from mavedb.view_models.score_set import ScoreRanges
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
def functional_classification_of_variant(mapped_variant: MappedVariant) -> AveFunctionalClassification:
14+
if mapped_variant.variant.score_set.score_ranges is None:
15+
return AveFunctionalClassification.INDETERMINATE
16+
17+
# This view model object is much simpler to work with.
18+
score_ranges = ScoreRanges(**jsonable_encoder(mapped_variant.variant.score_set.score_ranges))
19+
20+
# This property of this column is guaranteed to be defined.
21+
functional_score: float = mapped_variant.variant.data["score"] # type: ignore
22+
for range in score_ranges.ranges:
23+
lower_bound, upper_bound = inf_or_float(range.range[0], lower=True), inf_or_float(range.range[1], lower=False)
24+
if functional_score > lower_bound and functional_score <= upper_bound:
25+
return (
26+
AveFunctionalClassification.NORMAL
27+
if range.classification == "normal"
28+
else AveFunctionalClassification.ABNORMAL
29+
)
30+
31+
return AveFunctionalClassification.INDETERMINATE
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import logging
2+
from datetime import datetime
3+
4+
from ga4gh.core.entity_models import Contribution
5+
6+
from mavedb.models.mapped_variant import MappedVariant
7+
from mavedb.lib.annotation.agent import mavedb_api_agent, mavedb_vrs_agent
8+
from mavedb.lib.annotation.method import mavedb_api_as_method, mavedb_vrs_as_method
9+
10+
11+
logger = logging.getLogger(__name__)
12+
13+
14+
def mavedb_api_contribution():
15+
return Contribution(contributor=[mavedb_api_agent()], date=datetime.today(), specifiedBy=[mavedb_api_as_method()])
16+
17+
18+
def mavedb_vrs_contribution(mapped_variant: MappedVariant):
19+
return Contribution(
20+
contributor=[mavedb_vrs_agent(mapped_variant.mapping_api_version)],
21+
date=mapped_variant.mapped_date,
22+
specifiedBy=[mavedb_vrs_as_method()],
23+
)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import logging
2+
3+
from ga4gh.core.entity_models import DataSet
4+
5+
from mavedb.models.score_set import ScoreSet
6+
7+
from mavedb.lib.annotation.document import score_set_to_document
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
# TODO: How deep should these objects get? See: https://github.com/ga4gh/va-spec/blob/1.x/examples/mavedb/pten-variant-example.json
13+
def score_set_to_data_set(score_set: ScoreSet) -> DataSet:
14+
"""
15+
Transforms a score set object to a GA4GH DataSet object.
16+
"""
17+
return DataSet(
18+
id=score_set.urn,
19+
subtype="variant effect data set",
20+
license=score_set.license.short_name,
21+
# TODO: Stitch together constants to create this string.
22+
# TODO: Better to use this IRI reference, or the actual `Document` GA4GH object?
23+
reportedIn=score_set_to_document(score_set),
24+
)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import logging
2+
import urllib.parse
3+
4+
from ga4gh.core.entity_models import Document
5+
6+
from mavedb.models.experiment import Experiment
7+
from mavedb.models.score_set import ScoreSet
8+
from mavedb.models.variant import Variant
9+
10+
logger = logging.getLogger(__name__)
11+
12+
# TODO: move centrally
13+
BASE_URL = "https://mavedb.org"
14+
15+
16+
def experiment_to_document(experiment: Experiment) -> Document:
17+
return Document(
18+
id=experiment.urn,
19+
# TODO: necessary? better as label?
20+
subtype="MaveDB experiment",
21+
title=experiment.title,
22+
urls=f"{BASE_URL}/experiments/{experiment.urn}",
23+
)
24+
25+
26+
def score_set_to_document(score_set: ScoreSet) -> Document:
27+
return Document(
28+
id=score_set.urn,
29+
# TODO: necessary? better as label?
30+
subtype="MaveDB score set",
31+
title=score_set.title,
32+
urls=f"{BASE_URL}/score-sets/{score_set.urn}",
33+
)
34+
35+
36+
def variant_to_document(variant: Variant) -> Document:
37+
return Document(
38+
id=variant.urn,
39+
# TODO: necessary? better as label?
40+
subtype="MaveDB variant",
41+
# TODO: Maybe the variant specific page? Maybe nothing, if we don't guarantee a
42+
# variant specific page exists?
43+
# TODO: This should be stitched from constants
44+
# TODO(#372)
45+
urls=f"https://mavedb.org/score-sets/{variant.score_set.urn}?variant={urllib.parse.quote_plus(variant.urn)}", # type: ignore
46+
)
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import logging
2+
from typing import Optional, Sequence, Union
3+
4+
from ga4gh.core.entity_models import Method
5+
6+
from mavedb.models.publication_identifier import PublicationIdentifier
7+
from mavedb.models.score_set_publication_identifier import ScoreSetPublicationIdentifierAssociation
8+
from mavedb.models.experiment_publication_identifier import ExperimentPublicationIdentifierAssociation
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
# TODO: How deep should these objects get?
14+
def publication_identifier_to_method(publication: PublicationIdentifier) -> Method:
15+
"""
16+
Generate a GA4GH method definition based on the provided publication
17+
"""
18+
return Method(
19+
# TODO: Is this necessarily true?
20+
subtype="experimental protocol",
21+
reportedIn=publication.url,
22+
)
23+
24+
25+
PublicationIdentifierAssociations = Union[
26+
Sequence[ScoreSetPublicationIdentifierAssociation],
27+
Sequence[ExperimentPublicationIdentifierAssociation],
28+
]
29+
30+
31+
def publication_identifiers_to_method(publications: PublicationIdentifierAssociations) -> Optional[Method]:
32+
"""
33+
Generate a GA4GH method definition based on the list of provided publications
34+
"""
35+
primary_publication_identifier = next(
36+
(publication.publication for publication in publications if publication.primary), None
37+
)
38+
return (
39+
Method(
40+
# TODO: Is this necessarily true?
41+
subtype="experimental protocol",
42+
reportedIn=primary_publication_identifier.url,
43+
)
44+
if primary_publication_identifier
45+
else None
46+
)
47+
48+
49+
def mavedb_api_as_method():
50+
return Method(subtype="software version", reportedIn="https://github.com/VariantEffect/mavedb-api/releases")
51+
52+
53+
def mavedb_vrs_as_method():
54+
return Method(subtype="software version", reportedIn="https://github.com/VariantEffect/dcd_mapping2/releases")
55+
56+
57+
def variant_interpretation_guideline_method():
58+
return Method(subtype="Variant Interpretation Guideline", reportedIn="https://pubmed.ncbi.nlm.nih.gov/29785012/")

src/mavedb/lib/annotation/py.typed

Whitespace-only changes.
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import logging
2+
3+
from ga4gh.va_spec.profiles import (
4+
AssayVariantEffectMeasurementStudyResult,
5+
AssayVariantEffectFunctionalClassificationStatement,
6+
AssayVariantEffectClinicalClassificationStatement,
7+
)
8+
9+
10+
from mavedb.models.mapped_variant import MappedVariant
11+
from mavedb.lib.annotation.classification import functional_classification_of_variant
12+
from mavedb.lib.annotation.contribution import mavedb_api_contribution, mavedb_vrs_contribution
13+
from mavedb.lib.annotation.dataset import score_set_to_data_set
14+
from mavedb.lib.annotation.method import (
15+
publication_identifiers_to_method,
16+
variant_interpretation_functional_guideline_method,
17+
variant_interpretation_clinical_guideline_method,
18+
)
19+
from mavedb.lib.annotation.document import variant_to_document, score_set_to_document
20+
21+
22+
logger = logging.getLogger(__name__)
23+
24+
25+
def mapped_variant_to_variant_effect_measurement_study_result(
26+
mapped_variant: MappedVariant,
27+
) -> AssayVariantEffectMeasurementStudyResult:
28+
return AssayVariantEffectMeasurementStudyResult(
29+
id=mapped_variant.variant.urn,
30+
sourceDataSet=score_set_to_data_set(mapped_variant.variant.score_set),
31+
focusVariant=mapped_variant.post_mapped,
32+
# This property of this column is guaranteed to be defined.
33+
score=mapped_variant.variant.data["score"], # type: ignore
34+
specifiedBy=publication_identifiers_to_method(
35+
mapped_variant.variant.score_set.publication_identifier_associations
36+
),
37+
reportedIn=variant_to_document(mapped_variant.variant),
38+
contributions=[mavedb_api_contribution(), mavedb_vrs_contribution(mapped_variant)],
39+
# TODO: Is the score set more representative of the content, or the mapping?
40+
dateAuthored=mapped_variant.mapped_date,
41+
)
42+
43+
44+
def mapped_variant_to_functional_classification_statement(
45+
mapped_variant: MappedVariant,
46+
) -> AssayVariantEffectFunctionalClassificationStatement:
47+
return AssayVariantEffectFunctionalClassificationStatement(
48+
id=mapped_variant.variant.urn,
49+
subjectVariant=mapped_variant.post_mapped,
50+
# TODO: What is going here? Example points to a MAVE minimum information document.
51+
# Maybe something from the keywords?
52+
objectAssay="",
53+
classification=functional_classification_of_variant(mapped_variant),
54+
specifiedBy=variant_interpretation_functional_guideline_method(),
55+
# TODO: Creation or modification?
56+
# TODO: Is the score set more representative of the content, or the mapping?
57+
dateAuthored=mapped_variant.variant.score_set.modification_date,
58+
reportedIn=[
59+
score_set_to_document(mapped_variant.variant.score_set),
60+
variant_to_document(mapped_variant.variant),
61+
],
62+
# TODO: Do we want user contributions?
63+
# TODO: Is this misleading if we don't track all user contributions?
64+
contributions=[mavedb_vrs_contribution(mapped_variant)],
65+
)
66+
67+
68+
def mapped_variant_to_clinical_classification_statement(mapped_variant: MappedVariant):
69+
return AssayVariantEffectClinicalClassificationStatement(
70+
id=mapped_variant.variant.urn,
71+
subjectVariant=mapped_variant.post_mapped,
72+
# TODO: What is going here? Example points to a MAVE minimum information document.
73+
# Maybe something from the keywords?
74+
objectAssay="",
75+
classification=None,
76+
specifiedBy=variant_interpretation_clinical_guideline_method(),
77+
# TODO: Creation or modification?
78+
# TODO: Is the score set more representative of the content, or the mapping?
79+
dateAuthored=mapped_variant.variant.score_set.modification_date,
80+
reportedIn=[
81+
score_set_to_document(mapped_variant.variant.score_set),
82+
variant_to_document(mapped_variant.variant),
83+
],
84+
# TODO: Do we want user contributions?
85+
# TODO: Is this misleading if we don't track all user contributions?
86+
contributions=[mavedb_vrs_contribution(mapped_variant)],
87+
)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from typing import TypedDict, Optional
2+
3+
from ga4gh.va_spec.profiles import (
4+
AssayVariantEffectMeasurementStudyResult,
5+
AssayVariantEffectFunctionalClassificationStatement,
6+
AssayVariantEffectClinicalClassificationStatement,
7+
)
8+
9+
from mavedb.view_models.base.base import BaseModel
10+
11+
12+
class VariantAnnotationSpecification(TypedDict):
13+
AssayVariantEffectMeasurementStudyResult: AssayVariantEffectMeasurementStudyResult
14+
AssayVariantEffectFunctionalClassificationStatement: Optional[AssayVariantEffectFunctionalClassificationStatement]
15+
AssayVariantEffectClinicalClassificationStatement: Optional[AssayVariantEffectClinicalClassificationStatement]
16+
17+
18+
class AnnotatedVariant(BaseModel):
19+
__root__: dict[str, VariantAnnotationSpecification]

0 commit comments

Comments
 (0)