Skip to content

Commit 71df4cb

Browse files
committed
Refactor score ranges into score calibrations.
- Refactored score range jsonb in score sets into a separate data model with db tables and publication associations - Removed tests related to score ranges and replaced them with tests for score calibrations. - Updated test cases to validate the creation of score sets with investigator-provided calibrations. - Ensured that score set creation fails when non-investigator provided calibrations are included. - Adjusted error assertions to reflect changes in validation logic for score calibrations.
1 parent 34ba357 commit 71df4cb

35 files changed

+7231
-2670
lines changed

src/mavedb/lib/annotation/classification.py

Lines changed: 86 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
from ga4gh.va_spec.base.enums import StrengthOfEvidenceProvided
77

88
from mavedb.models.mapped_variant import MappedVariant
9-
from mavedb.lib.annotation.constants import ZEIBERG_CALIBRATION_CALIBRATION_STRENGTH_OF_EVIDENCE_MAP
10-
from mavedb.lib.validation.utilities import inf_or_float
11-
from mavedb.view_models.score_range import ScoreSetRanges
9+
from mavedb.view_models.score_calibration import FunctionalRange
1210

1311
logger = logging.getLogger(__name__)
1412

@@ -24,18 +22,30 @@ class ExperimentalVariantFunctionalImpactClassification(StrEnum):
2422
def functional_classification_of_variant(
2523
mapped_variant: MappedVariant,
2624
) -> ExperimentalVariantFunctionalImpactClassification:
27-
if mapped_variant.variant.score_set.score_ranges is None:
25+
"""Classify a variant's functional impact as normal, abnormal, or indeterminate.
26+
27+
Uses the primary score calibration and its functional ranges.
28+
Raises ValueError if required calibration or score is missing.
29+
"""
30+
if not mapped_variant.variant.score_set.score_calibrations:
2831
raise ValueError(
29-
f"Variant {mapped_variant.variant.urn} does not have a score set with score ranges."
32+
f"Variant {mapped_variant.variant.urn} does not have a score set with score calibrations."
3033
" Unable to classify functional impact."
3134
)
3235

33-
# This view model object is much simpler to work with.
34-
score_ranges = ScoreSetRanges(**mapped_variant.variant.score_set.score_ranges).investigator_provided
36+
# TODO#494: Support for multiple calibrations (all non-research use only).
37+
score_calibrations = mapped_variant.variant.score_set.score_calibrations or []
38+
primary_calibration = next((c for c in score_calibrations if c.primary), None)
39+
40+
if not primary_calibration:
41+
raise ValueError(
42+
f"Variant {mapped_variant.variant.urn} does not have a primary score calibration."
43+
" Unable to classify functional impact."
44+
)
3545

36-
if not score_ranges or not score_ranges.ranges:
46+
if not primary_calibration.functional_ranges:
3747
raise ValueError(
38-
f"Variant {mapped_variant.variant.urn} does not have investigator-provided score ranges."
48+
f"Variant {mapped_variant.variant.urn} does not have ranges defined in its primary score calibration."
3949
" Unable to classify functional impact."
4050
)
4151

@@ -47,33 +57,48 @@ def functional_classification_of_variant(
4757
" Unable to classify functional impact."
4858
)
4959

50-
for range in score_ranges.ranges:
51-
lower_bound, upper_bound = inf_or_float(range.range[0], lower=True), inf_or_float(range.range[1], lower=False)
52-
if functional_score > lower_bound and functional_score <= upper_bound:
53-
if range.classification == "normal":
60+
for functional_range in primary_calibration.functional_ranges:
61+
# It's easier to reason with the view model objects for functional ranges than the JSONB fields in the raw database object.
62+
functional_range_view = FunctionalRange.model_validate(functional_range)
63+
64+
if functional_range_view.is_contained_by_range(functional_score):
65+
if functional_range_view.classification == "normal":
5466
return ExperimentalVariantFunctionalImpactClassification.NORMAL
55-
elif range.classification == "abnormal":
67+
elif functional_range_view.classification == "abnormal":
5668
return ExperimentalVariantFunctionalImpactClassification.ABNORMAL
5769
else:
5870
return ExperimentalVariantFunctionalImpactClassification.INDETERMINATE
5971

6072
return ExperimentalVariantFunctionalImpactClassification.INDETERMINATE
6173

6274

63-
def zeiberg_calibration_clinical_classification_of_variant(
75+
def pathogenicity_classification_of_variant(
6476
mapped_variant: MappedVariant,
6577
) -> tuple[VariantPathogenicityEvidenceLine.Criterion, Optional[StrengthOfEvidenceProvided]]:
66-
if mapped_variant.variant.score_set.score_ranges is None:
78+
"""Classify a variant's pathogenicity and evidence strength using clinical calibration.
79+
80+
Uses the first clinical score calibration and its functional ranges.
81+
Raises ValueError if required calibration, score, or evidence strength is missing.
82+
"""
83+
if not mapped_variant.variant.score_set.score_calibrations:
6784
raise ValueError(
68-
f"Variant {mapped_variant.variant.urn} does not have a score set with score thresholds."
85+
f"Variant {mapped_variant.variant.urn} does not have a score set with score calibrations."
6986
" Unable to classify clinical impact."
7087
)
7188

72-
score_ranges = ScoreSetRanges(**mapped_variant.variant.score_set.score_ranges).zeiberg_calibration
89+
# TODO#494: Support multiple clinical calibrations.
90+
score_calibrations = mapped_variant.variant.score_set.score_calibrations or []
91+
primary_calibration = next((c for c in score_calibrations if c.primary), None)
92+
93+
if not primary_calibration:
94+
raise ValueError(
95+
f"Variant {mapped_variant.variant.urn} does not have a primary score calibration."
96+
" Unable to classify clinical impact."
97+
)
7398

74-
if not score_ranges or not score_ranges.ranges:
99+
if not primary_calibration.functional_ranges:
75100
raise ValueError(
76-
f"Variant {mapped_variant.variant.urn} does not have pillar project score ranges."
101+
f"Variant {mapped_variant.variant.urn} does not have ranges defined in its primary score calibration."
77102
" Unable to classify clinical impact."
78103
)
79104

@@ -85,9 +110,44 @@ def zeiberg_calibration_clinical_classification_of_variant(
85110
" Unable to classify clinical impact."
86111
)
87112

88-
for range in score_ranges.ranges:
89-
lower_bound, upper_bound = inf_or_float(range.range[0], lower=True), inf_or_float(range.range[1], lower=False)
90-
if functional_score > lower_bound and functional_score <= upper_bound:
91-
return ZEIBERG_CALIBRATION_CALIBRATION_STRENGTH_OF_EVIDENCE_MAP[range.evidence_strength]
92-
93-
return ZEIBERG_CALIBRATION_CALIBRATION_STRENGTH_OF_EVIDENCE_MAP[0]
113+
for pathogenicity_range in primary_calibration.functional_ranges:
114+
# It's easier to reason with the view model objects for functional ranges than the JSONB fields in the raw database object.
115+
pathogenicity_range_view = FunctionalRange.model_validate(pathogenicity_range)
116+
117+
if pathogenicity_range_view.is_contained_by_range(functional_score):
118+
if pathogenicity_range_view.acmg_classification is None:
119+
return (VariantPathogenicityEvidenceLine.Criterion.PS3, None)
120+
121+
# More of a type guard, as the ACMGClassification model we construct above enforces that
122+
# criterion and evidence strength are mutually defined.
123+
if (
124+
pathogenicity_range_view.acmg_classification.evidence_strength is None
125+
or pathogenicity_range_view.acmg_classification.criterion is None
126+
): # pragma: no cover - enforced by model validators in FunctionalRange view model
127+
return (VariantPathogenicityEvidenceLine.Criterion.PS3, None)
128+
129+
# TODO#540: Handle moderate+
130+
if (
131+
pathogenicity_range_view.acmg_classification.evidence_strength.name
132+
not in StrengthOfEvidenceProvided._member_names_
133+
):
134+
raise ValueError(
135+
f"Variant {mapped_variant.variant.urn} is contained in a clinical calibration range with an invalid evidence strength."
136+
" Unable to classify clinical impact."
137+
)
138+
139+
if (
140+
pathogenicity_range_view.acmg_classification.criterion.name
141+
not in VariantPathogenicityEvidenceLine.Criterion._member_names_
142+
): # pragma: no cover - enforced by model validators in FunctionalRange view model
143+
raise ValueError(
144+
f"Variant {mapped_variant.variant.urn} is contained in a clinical calibration range with an invalid criterion."
145+
" Unable to classify clinical impact."
146+
)
147+
148+
return (
149+
VariantPathogenicityEvidenceLine.Criterion[pathogenicity_range_view.acmg_classification.criterion.name],
150+
StrengthOfEvidenceProvided[pathogenicity_range_view.acmg_classification.evidence_strength.name],
151+
)
152+
153+
return (VariantPathogenicityEvidenceLine.Criterion.PS3, None)
Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,2 @@
1-
from ga4gh.va_spec.acmg_2015 import VariantPathogenicityEvidenceLine
2-
from ga4gh.va_spec.base.enums import StrengthOfEvidenceProvided
3-
41
GENERIC_DISEASE_MEDGEN_CODE = "C0012634"
52
MEDGEN_SYSTEM = "https://www.ncbi.nlm.nih.gov/medgen/"
6-
7-
ZEIBERG_CALIBRATION_CALIBRATION_STRENGTH_OF_EVIDENCE_MAP = {
8-
# No evidence
9-
0: (VariantPathogenicityEvidenceLine.Criterion.PS3, None),
10-
# Supporting evidence
11-
-1: (VariantPathogenicityEvidenceLine.Criterion.BS3, StrengthOfEvidenceProvided.SUPPORTING),
12-
1: (VariantPathogenicityEvidenceLine.Criterion.PS3, StrengthOfEvidenceProvided.SUPPORTING),
13-
# Moderate evidence
14-
-2: (VariantPathogenicityEvidenceLine.Criterion.BS3, StrengthOfEvidenceProvided.MODERATE),
15-
2: (VariantPathogenicityEvidenceLine.Criterion.PS3, StrengthOfEvidenceProvided.MODERATE),
16-
-3: (VariantPathogenicityEvidenceLine.Criterion.BS3, StrengthOfEvidenceProvided.MODERATE),
17-
3: (VariantPathogenicityEvidenceLine.Criterion.PS3, StrengthOfEvidenceProvided.MODERATE),
18-
# Strong evidence
19-
-4: (VariantPathogenicityEvidenceLine.Criterion.BS3, StrengthOfEvidenceProvided.STRONG),
20-
4: (VariantPathogenicityEvidenceLine.Criterion.PS3, StrengthOfEvidenceProvided.STRONG),
21-
-5: (VariantPathogenicityEvidenceLine.Criterion.BS3, StrengthOfEvidenceProvided.STRONG),
22-
5: (VariantPathogenicityEvidenceLine.Criterion.PS3, StrengthOfEvidenceProvided.STRONG),
23-
-6: (VariantPathogenicityEvidenceLine.Criterion.BS3, StrengthOfEvidenceProvided.STRONG),
24-
6: (VariantPathogenicityEvidenceLine.Criterion.PS3, StrengthOfEvidenceProvided.STRONG),
25-
-7: (VariantPathogenicityEvidenceLine.Criterion.BS3, StrengthOfEvidenceProvided.STRONG),
26-
7: (VariantPathogenicityEvidenceLine.Criterion.PS3, StrengthOfEvidenceProvided.STRONG),
27-
# Very Strong evidence
28-
-8: (VariantPathogenicityEvidenceLine.Criterion.BS3, StrengthOfEvidenceProvided.VERY_STRONG),
29-
8: (VariantPathogenicityEvidenceLine.Criterion.PS3, StrengthOfEvidenceProvided.VERY_STRONG),
30-
}
31-
32-
# TODO#493
33-
FUNCTIONAL_RANGES = ["investigator_provided"]
34-
CLINICAL_RANGES = ["zeiberg_calibration"]

src/mavedb/lib/annotation/evidence_line.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
VariantPathogenicityProposition,
1313
)
1414

15-
from mavedb.lib.annotation.classification import zeiberg_calibration_clinical_classification_of_variant
15+
from mavedb.lib.annotation.classification import pathogenicity_classification_of_variant
1616
from mavedb.lib.annotation.contribution import (
1717
mavedb_api_contribution,
1818
mavedb_vrs_contribution,
@@ -33,7 +33,7 @@ def acmg_evidence_line(
3333
proposition: VariantPathogenicityProposition,
3434
evidence: list[Union[StudyResult, EvidenceLineType, StatementType, iriReference]],
3535
) -> Optional[VariantPathogenicityEvidenceLine]:
36-
evidence_outcome, evidence_strength = zeiberg_calibration_clinical_classification_of_variant(mapped_variant)
36+
evidence_outcome, evidence_strength = pathogenicity_classification_of_variant(mapped_variant)
3737

3838
if not evidence_strength:
3939
evidence_outcome_code = f"{evidence_outcome.value}_not_met"

src/mavedb/lib/annotation/util.py

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from typing import Literal
12
from ga4gh.core.models import Extension
23
from ga4gh.vrs.models import (
34
MolecularVariation,
@@ -8,9 +9,9 @@
89
Expression,
910
LiteralSequenceExpression,
1011
)
11-
from mavedb.lib.annotation.constants import CLINICAL_RANGES, FUNCTIONAL_RANGES
1212
from mavedb.models.mapped_variant import MappedVariant
1313
from mavedb.lib.annotation.exceptions import MappingDataDoesntExistException
14+
from mavedb.view_models.score_calibration import SavedScoreCalibration
1415

1516

1617
def allele_from_mapped_variant_dictionary_result(allelic_mapping_results: dict) -> Allele:
@@ -162,43 +163,51 @@ def _can_annotate_variant_base_assumptions(mapped_variant: MappedVariant) -> boo
162163
return True
163164

164165

165-
def _variant_score_ranges_have_required_keys_and_ranges_for_annotation(
166-
mapped_variant: MappedVariant, key_options: list[str]
166+
def _variant_score_calibrations_have_required_calibrations_and_ranges_for_annotation(
167+
mapped_variant: MappedVariant, annotation_type: Literal["pathogenicity", "functional"]
167168
) -> bool:
168169
"""
169-
Check if a mapped variant's score set contains any of the required score range keys for annotation and is present.
170+
Check if a mapped variant's score set contains any of the required calibrations for annotation.
170171
171172
Args:
172173
mapped_variant (MappedVariant): The mapped variant object containing the variant with score set data.
173-
key_options (list[str]): List of possible score range keys to check for in the score set.
174+
annotation_type (Literal["pathogenicity", "functional"]): The type of annotation to check for.
175+
Must be either "pathogenicity" or "functional".
174176
175177
Returns:
176-
bool: False if none of the required keys are found or if all found keys have None values or if all found keys
177-
do not have range data.
178-
Returns True (implicitly) if at least one required key exists with a non-None value.
178+
bool: False if none of the required kinds are found or if all found calibrations have None or empty functional
179+
range values/do not have range data.
180+
Returns True (implicitly) if at least one required kind exists and has a non-empty functional range.
179181
"""
180-
if mapped_variant.variant.score_set.score_ranges is None:
182+
if mapped_variant.variant.score_set.score_calibrations is None:
181183
return False
182184

183-
if not any(
184-
range_key in mapped_variant.variant.score_set.score_ranges
185-
and mapped_variant.variant.score_set.score_ranges[range_key] is not None
186-
and mapped_variant.variant.score_set.score_ranges[range_key]["ranges"]
187-
for range_key in key_options
188-
):
185+
# TODO#494: Support for multiple calibrations (all non-research use only).
186+
primary_calibration = next((c for c in mapped_variant.variant.score_set.score_calibrations if c.primary), None)
187+
if not primary_calibration:
189188
return False
190189

190+
saved_calibration = SavedScoreCalibration.model_validate(primary_calibration)
191+
if annotation_type == "pathogenicity":
192+
return (
193+
saved_calibration.functional_ranges is not None
194+
and len(saved_calibration.functional_ranges) > 0
195+
and any(fr.acmg_classification is not None for fr in saved_calibration.functional_ranges)
196+
)
197+
198+
if annotation_type == "functional":
199+
return saved_calibration.functional_ranges is not None and len(saved_calibration.functional_ranges) > 0
200+
191201
return True
192202

193203

194204
def can_annotate_variant_for_pathogenicity_evidence(mapped_variant: MappedVariant) -> bool:
195205
"""
196206
Determine if a mapped variant can be annotated for pathogenicity evidence.
197207
198-
This function checks whether a given mapped variant meets all the necessary
199-
requirements to receive pathogenicity evidence annotations. It validates
200-
both basic annotation assumptions and the presence of required clinical
201-
score range keys.
208+
This function checks if a variant meets all the necessary conditions to receive
209+
pathogenicity evidence annotations by validating base assumptions and ensuring the variant's
210+
score calibrations contain the required kinds for pathogenicity evidence annotation.
202211
203212
Args:
204213
mapped_variant (MappedVariant): The mapped variant object to evaluate
@@ -211,14 +220,16 @@ def can_annotate_variant_for_pathogenicity_evidence(mapped_variant: MappedVarian
211220
Notes:
212221
The function performs two main validation checks:
213222
1. Basic annotation assumptions via _can_annotate_variant_base_assumptions
214-
2. Required clinical range keys via _variant_score_ranges_have_required_keys_and_ranges_for_annotation
223+
2. Verifies score calibrations have an appropriate calibration for pathogenicity evidence annotation.
215224
216225
Both checks must pass for the variant to be considered eligible for
217226
pathogenicity evidence annotation.
218227
"""
219228
if not _can_annotate_variant_base_assumptions(mapped_variant):
220229
return False
221-
if not _variant_score_ranges_have_required_keys_and_ranges_for_annotation(mapped_variant, CLINICAL_RANGES):
230+
if not _variant_score_calibrations_have_required_calibrations_and_ranges_for_annotation(
231+
mapped_variant, "pathogenicity"
232+
):
222233
return False
223234

224235
return True
@@ -230,7 +241,7 @@ def can_annotate_variant_for_functional_statement(mapped_variant: MappedVariant)
230241
231242
This function checks if a variant meets all the necessary conditions to receive
232243
functional annotations by validating base assumptions and ensuring the variant's
233-
score ranges contain the required keys for functional annotation.
244+
score calibrations contain the required kinds for functional annotation.
234245
235246
Args:
236247
mapped_variant (MappedVariant): The variant object to check for annotation
@@ -243,11 +254,13 @@ def can_annotate_variant_for_functional_statement(mapped_variant: MappedVariant)
243254
Notes:
244255
The function performs two main checks:
245256
1. Validates base assumptions using _can_annotate_variant_base_assumptions
246-
2. Verifies score ranges have required keys using FUNCTIONAL_RANGES
257+
2. Verifies score calibrations have an appropriate calibration for functional annotation.
247258
"""
248259
if not _can_annotate_variant_base_assumptions(mapped_variant):
249260
return False
250-
if not _variant_score_ranges_have_required_keys_and_ranges_for_annotation(mapped_variant, FUNCTIONAL_RANGES):
261+
if not _variant_score_calibrations_have_required_calibrations_and_ranges_for_annotation(
262+
mapped_variant, "functional"
263+
):
251264
return False
252265

253266
return True

0 commit comments

Comments
 (0)