Skip to content

Commit 66cbd6a

Browse files
committed
Compound scoring: Cap rankscore values to (min, ) range
* compound scoring: When correcting rankscore, cap to min bound * Update READMEs on compound scoring
1 parent 293e7b1 commit 66cbd6a

File tree

5 files changed

+89
-1
lines changed

5 files changed

+89
-1
lines changed

docs/commands/score-compounds.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Score Compounds
2+
3+
This module performs ranking of compound variants.
4+
5+
During the ranking of these compounds the rank score might be modified in place.
6+
See `genmod/score_variants/compound_scorer.py:L248`.
7+
8+
## Rankscore Capping
9+
Since the rank scores are modified in place in this module, there's a risk
10+
that the modified rank score might fall outside the valid range of normalization
11+
bounds `(MIN, MAX)` that was established in the `score_variants` module.
12+
13+
This applies to variants belonging to the lower range of rank scores.
14+
15+
When this happens, the modified rank score is capped to `(MIN, )` if it's of `RankScore` type
16+
or `(0, 1)` if it's of `RankScoreNormalized` type.
17+
18+
In previous Genmod versions there were no such capping rule in effect.
19+
Earlier ranked variants from `compounds` module might show lower rank
20+
scores compared to this implementation.

docs/commands/score-variants.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,7 @@ and `CategorySumMin` is the sum of minimal score values for all categories,
1313
i. e `CategorySumMin = SUM(CategoryMin_n) for 0...n categories`.
1414
The same applies to `CategorySumMax = SUM(CategoryMax_n) for 0...n categories`.
1515

16-
Refer to `score_variants.py::score()` method for implementation details.
16+
Refer to `score_variants.py::score()` method for implementation details.
17+
18+
Additionally, also read in the `score-compounds.md` on compound scoring step that affects
19+
final rank score values.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from genmod.score_variants.score_variant import MIN_SCORE_NORMALIZED
2+
from genmod.score_variants.rank_score_variant_definitions import RANK_SCORE_TYPE_NAMES
3+
4+
5+
def cap_rank_score_to_min_bound(rank_score_type: str,
6+
rank_score,
7+
min_rank_score_value: float) -> float:
8+
"""
9+
Caps rank_score to fall withing MIN bound of normalized rank score, if it's outside valid range.
10+
Args:
11+
rank_score_type: Type of rank score
12+
rank_score: The value to bounds check
13+
min_rank_score_value: Minimum allowed bound according to rank score normalization
14+
Returns:
15+
Bounds capped rank score, either to min_rank_score_value (if RankScore)
16+
or MIN_SCORE_NORMALIZED if RankScoreNormalized type.
17+
"""
18+
19+
if rank_score_type not in set(RANK_SCORE_TYPE_NAMES):
20+
raise ValueError(f'Unknown rank score type {rank_score_type}')
21+
22+
if rank_score_type == 'RankScoreNormalized':
23+
min_rank_score_value = MIN_SCORE_NORMALIZED
24+
25+
if rank_score < min_rank_score_value:
26+
return min_rank_score_value
27+
return rank_score

genmod/score_variants/compound_scorer.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
from genmod.score_variants.score_variant import as_normalized_max_min, MIN_SCORE_NORMALIZED, MAX_SCORE_NORMALIZED
2525
from genmod.score_variants.rank_score_variant_definitions import RANK_SCORE_TYPE_NAMES
26+
from genmod.score_variants.cap_rank_score_to_min_bound import cap_rank_score_to_min_bound
2627

2728
logger = logging.getLogger(__name__)
2829

@@ -249,6 +250,12 @@ def run(self):
249250
min_rank_score_value=variant_rankscore_normalization_bounds[variant_id][0],
250251
max_rank_score_value=variant_rankscore_normalization_bounds[variant_id][1]
251252
)
253+
# In case the current_rank_score falls outside normalization bounds after modification,
254+
# cap it to within the MIN normalization bound.
255+
current_rank_score = cap_rank_score_to_min_bound(rank_score_type=rank_score_type,
256+
rank_score=current_rank_score,
257+
min_rank_score_value=variant_rankscore_normalization_bounds[variant_id][0]
258+
)
252259

253260
for compound_id in compound_list:
254261
logger.debug("Checking compound {0}".format(compound_id))
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from genmod.score_variants.cap_rank_score_to_min_bound import cap_rank_score_to_min_bound, MIN_SCORE_NORMALIZED
2+
3+
4+
MIN_SCORE: float = -5.0
5+
6+
7+
def test_rankscore_normalized_capping():
8+
"""
9+
Test the MIN normalization bounds capping of rankscore normalized.
10+
"""
11+
# GIVEN a normalized rank score
12+
# WHEN running cap method
13+
# THEN expect rank score to be larger than min bound
14+
for rank_score_normalized in range(-10, 10):
15+
assert cap_rank_score_to_min_bound(rank_score_type='RankScoreNormalized',
16+
rank_score=float(rank_score_normalized),
17+
min_rank_score_value=MIN_SCORE_NORMALIZED) >= MIN_SCORE_NORMALIZED
18+
19+
20+
def test_rankscore_capping():
21+
"""
22+
Test the MIN normalization bounds capping of rankscore.
23+
"""
24+
25+
# GIVEN a rank score
26+
# WHEN running cap method
27+
# THEN expect rank score to be larger than min bound
28+
for rank_score in range(-10, 10):
29+
assert cap_rank_score_to_min_bound(rank_score_type='RankScore',
30+
rank_score=rank_score,
31+
min_rank_score_value=MIN_SCORE) >= MIN_SCORE

0 commit comments

Comments
 (0)