Skip to content

Commit acf5a6a

Browse files
committed
Support for Score Ranges with an Unspecified Classification
To support all pillar project data sets, it is necessary to support score ranges without an explicit classification. This requires some changes to existing validation logic: - The wild type score is no longer required at all times. If you have provided a score range with `normal` classification, the wild type score is required and is required to be within this range. - If you do provide a wild type score, it is required you provide at least one `normal` classification. - Users may provide a new `Not Specified` classification, which comes free of normal and abnormal connotations. - All other validation restrictions remain in place and also apply to the new classification As part of these changes, a new file `utils.py` has been added to mavedb lib code. This file at present contains only one new function to help with string sanitization for score ranges, but should be used for other shared library utilities. At some point, we should make an effort to refactor shared utilities into it.
1 parent 60d4e0d commit acf5a6a

File tree

4 files changed

+70
-17
lines changed

4 files changed

+70
-17
lines changed

src/mavedb/lib/utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import re
2+
3+
4+
def sanitize_string(s: str):
5+
"""
6+
Sanitize a string to a consistent format:
7+
- Strip leading and trailing whitespace
8+
- Convert to lowercase
9+
- Replace internal whitespace with underscores
10+
"""
11+
return re.sub(r"\s+", "_", s.strip().lower())
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
default_ranges = ["normal", "abnormal"]
1+
default_ranges = ["normal", "abnormal", "not_specified"]

src/mavedb/view_models/score_set.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from mavedb.lib.validation.constants.score_set import default_ranges
1212
from mavedb.lib.validation.exceptions import ValidationError
1313
from mavedb.lib.validation.utilities import inf_or_float, is_null
14+
from mavedb.lib.utils import sanitize_string
1415
from mavedb.models.enums.mapping_state import MappingState
1516
from mavedb.models.enums.processing_state import ProcessingState
1617
from mavedb.view_models import PublicationIdentifiersGetter, record_type_validator, set_record_type
@@ -64,7 +65,7 @@ class ScoreRange(BaseModel):
6465

6566
@validator("classification")
6667
def range_classification_value_is_accepted(cls, field_value: str):
67-
classification = field_value.strip().lower()
68+
classification = sanitize_string(field_value)
6869
if classification not in default_ranges:
6970
raise ValidationError(
7071
f"Unexpected classification value(s): {classification}. Permitted values: {default_ranges}"
@@ -89,7 +90,7 @@ def ranges_are_not_backwards(cls, field_value: tuple[Any]):
8990

9091

9192
class ScoreRanges(BaseModel):
92-
wt_score: float
93+
wt_score: Optional[float]
9394
ranges: list[ScoreRange] # type: ignore
9495

9596

@@ -209,17 +210,16 @@ def score_range_labels_must_be_unique(cls, field_value: Optional[ScoreRanges]):
209210
return field_value
210211

211212
@validator("score_ranges")
212-
def ranges_contain_normal_and_abnormal(cls, field_value: Optional[ScoreRanges]):
213+
def score_range_normal_classification_exists_if_wild_type_score_provided(cls, field_value: Optional[ScoreRanges]):
213214
if field_value is None:
214215
return None
215216

216-
ranges = set([range_model.classification for range_model in field_value.ranges])
217-
if not set(default_ranges).issubset(ranges):
218-
raise ValidationError(
219-
"Both `normal` and `abnormal` ranges must be provided.",
220-
# Raise this error inside the first classification provided by the model.
221-
custom_loc=["body", "scoreRanges", "ranges", 0, "classification"],
222-
)
217+
if field_value.wt_score is not None:
218+
if not any([range_model.classification == "normal" for range_model in field_value.ranges]):
219+
raise ValidationError(
220+
"A wild type score has been provided, but no normal classification range exists.",
221+
custom_loc=["body", "scoreRanges", "wtScore"],
222+
)
223223

224224
return field_value
225225

@@ -264,6 +264,16 @@ def wild_type_score_in_normal_range(cls, field_value: Optional[ScoreRanges]):
264264
normal_ranges = [
265265
range_model.range for range_model in field_value.ranges if range_model.classification == "normal"
266266
]
267+
268+
if normal_ranges and field_value.wt_score is None:
269+
raise ValidationError(
270+
"A normal range has been provided, but no wild type score has been provided.",
271+
custom_loc=["body", "scoreRanges", "wtScore"],
272+
)
273+
274+
if field_value.wt_score is None:
275+
return field_value
276+
267277
for range in normal_ranges:
268278
if field_value.wt_score >= inf_or_float(range[0], lower=True) and field_value.wt_score < inf_or_float(
269279
range[1], lower=False

tests/view_models/test_score_set.py

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -440,20 +440,35 @@ def test_cannot_create_score_set_with_wild_type_outside_normal_range():
440440
)
441441

442442

443-
@pytest.mark.parametrize("present_name", default_ranges)
444-
def test_cannot_create_score_set_without_default_range(present_name):
443+
def test_cannot_create_score_set_with_wild_type_score_and_no_normal_range():
444+
wt_score = -0.5
445445
score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy()
446446
score_set_test["score_ranges"] = {
447-
"wt_score": -1.5,
447+
"wt_score": wt_score,
448448
"ranges": [
449-
{"label": "range_2", "classification": f"{present_name}", "range": (-3, -1)},
449+
{"label": "range_1", "classification": "abnormal", "range": (-1, 0)},
450+
],
451+
}
452+
453+
with pytest.raises(ValueError) as exc_info:
454+
ScoreSetModify(**jsonable_encoder(score_set_test))
455+
456+
assert "A wild type score has been provided, but no normal classification range exists." in str(exc_info.value)
457+
458+
459+
def test_cannot_create_score_set_with_normal_range_and_no_wild_type_score():
460+
score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy()
461+
score_set_test["score_ranges"] = {
462+
"wt_score": None,
463+
"ranges": [
464+
{"label": "range_1", "classification": "normal", "range": (-1, 0)},
450465
],
451466
}
452467

453468
with pytest.raises(ValueError) as exc_info:
454469
ScoreSetModify(**jsonable_encoder(score_set_test))
455470

456-
assert "Both `normal` and `abnormal` ranges must be provided." in str(exc_info.value)
471+
assert "A normal range has been provided, but no wild type score has been provided." in str(exc_info.value)
457472

458473

459474
def test_cannot_create_score_set_without_default_ranges():
@@ -468,4 +483,21 @@ def test_cannot_create_score_set_without_default_ranges():
468483
with pytest.raises(ValueError) as exc_info:
469484
ScoreSetModify(**jsonable_encoder(score_set_test))
470485

471-
assert "Unexpected classification value(s): other. Permitted values: ['normal', 'abnormal']" in str(exc_info.value)
486+
assert (
487+
"Unexpected classification value(s): other. Permitted values: ['normal', 'abnormal', 'not_specified']"
488+
in str(exc_info.value)
489+
)
490+
491+
492+
@pytest.mark.parametrize("classification", default_ranges)
493+
def test_can_create_score_set_with_any_range_classification(classification):
494+
wt_score = -0.5 if classification == "normal" else None
495+
score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy()
496+
score_set_test["score_ranges"] = {
497+
"wt_score": wt_score,
498+
"ranges": [
499+
{"label": "range_1", "classification": classification, "range": (-1, 0)},
500+
],
501+
}
502+
503+
ScoreSetModify(**jsonable_encoder(score_set_test))

0 commit comments

Comments
 (0)