Skip to content

Commit ee4ccc5

Browse files
authored
updated scoring moving normalization into moderators. Trend anotate in bitstomach updated to use trend signal. Added MPM table to esteemer. (#326)
1 parent 2a06a71 commit ee4ccc5

File tree

11 files changed

+71
-63
lines changed

11 files changed

+71
-63
lines changed

bit_stomach/trend_annotate.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from rdflib import Literal, URIRef, BNode
55
from rdflib.namespace import RDF
66
#from calc_gaps_slopes import gap_calc,trend_calc,monotonic_pred,mod_collector
7-
7+
from bitstomach2.signals import Trend
88

99

1010

@@ -101,14 +101,5 @@ def theil_reg(df, xcol, ycol):
101101
return pd.Series(model)
102102

103103
def calculate_trend(df, month, performance_rate):
104-
performance_rates = list(df[performance_rate])
105-
last_index= len(performance_rates) - 1
106-
change_this_month = performance_rates[last_index ] - performance_rates[last_index - 1]
107-
change_last_month = performance_rates[last_index - 1] - performance_rates[last_index - 2]
108-
109-
if change_this_month * change_last_month < 0:
110-
return 0
111-
112-
return (performance_rates[last_index ] - performance_rates[last_index - 2]) / 2
113-
114-
104+
df["passed_rate"]=df[performance_rate]
105+
return Trend._detect(df)

bitstomach2/bitstomach.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ def fix_up(performance_data):
4343
performance_df.rename(
4444
columns={"MPOG_goal": "goal_comparator_content"}, inplace=True
4545
)
46-
performance_df["passed_percentage"] = (
47-
performance_df["passed_count"] / performance_df["denominator"] * 100.0
46+
performance_df["passed_rate"] = (
47+
performance_df["passed_count"] / performance_df["denominator"]
4848
)
4949

5050
return performance_df

bitstomach2/signals/_comparison.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def detect(perf_data: pd.DataFrame) -> Optional[List[Resource]]:
2828
if perf_data.empty:
2929
raise ValueError
3030

31-
level = perf_data["passed_percentage"][-1:].to_list()[0]
31+
level = perf_data["passed_rate"][-1:].to_list()[0]
3232

3333
resources = []
3434
comp_cols = [
@@ -37,12 +37,12 @@ def detect(perf_data: pd.DataFrame) -> Optional[List[Resource]]:
3737
"peer_90th_percentile_benchmark",
3838
"goal_comparator_content",
3939
]
40-
comparators = perf_data[-1:][comp_cols].to_dict(orient="records")[0]
40+
comparators = perf_data[-1:][comp_cols].to_dict(orient="records")[0]
4141

4242
for key, value in comparators.items():
43-
gap = Comparison._detect(level, value)
43+
gap = Comparison._detect(level, value / 100)
4444

45-
r = Comparison._resource(gap, key, value)
45+
r = Comparison._resource(gap, key, value / 100)
4646

4747
resources.append(r)
4848

@@ -88,9 +88,9 @@ def moderators(cls, motivating_informations: List[Resource]) -> List[dict]:
8888

8989
for signal in super().select(motivating_informations):
9090
motivating_info_dict = super().moderators(signal)
91-
motivating_info_dict["gap_size"] = signal.value(
91+
motivating_info_dict["gap_size"] = round(abs(signal.value(
9292
SLOWMO.PerformanceGapSize
93-
).value
93+
).value),4)
9494
motivating_info_dict["comparator_type"] = signal.value(
9595
SLOWMO.RegardingComparator / RDF.type
9696
).identifier

bitstomach2/signals/_trend.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def detect(perf_data: pd.DataFrame) -> Optional[List[Resource]]:
2121
if perf_data.empty:
2222
raise ValueError
2323

24-
if perf_data["passed_percentage"].count() < 3:
24+
if perf_data["passed_rate"].count() < 3:
2525
return None
2626

2727
slope = Trend._detect(perf_data)
@@ -57,9 +57,9 @@ def moderators(cls, motivating_informations: List[Resource]) -> List[dict]:
5757

5858
for signal in super().select(motivating_informations):
5959
motivating_info_dict = super().moderators(signal)
60-
motivating_info_dict["trend_size"] = signal.value(
60+
motivating_info_dict["trend_size"] = round(abs(signal.value(
6161
SLOWMO.PerformanceTrendSlope
62-
).value
62+
).value),4)
6363

6464
mods.append(motivating_info_dict)
6565

@@ -70,10 +70,13 @@ def _detect(perf_data: pd.DataFrame) -> float:
7070
"""
7171
calcolates the slope of a monotonically increasing or decreasing trend over three month.
7272
"""
73-
performance_rates = perf_data["passed_percentage"]
73+
performance_rates = perf_data["passed_rate"]
7474
change_this_month = performance_rates.iloc[-1] - performance_rates.iloc[-2]
7575
change_last_month = performance_rates.iloc[-2] - performance_rates.iloc[-3]
7676

77+
if change_this_month == 0:
78+
return 0
79+
7780
if change_this_month * change_last_month < 0:
7881
return 0
7982

esteemer/esteemer.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
from esteemer.signals import History
99
from utils.namespace import SLOWMO
1010

11+
MPM = {"Social Worse": {Comparison.signal_type: 0.5, History.signal_type: -0.5},
12+
"Social better": {Comparison.signal_type: 0.5, History.signal_type: -0.1},
13+
"Improving": {Trend.signal_type: 0.8, History.signal_type: -0.1},
14+
"Worsening": {Trend.signal_type: 0.8, History.signal_type: -0.5}
15+
}
1116

1217
def score(candidate_resource: Resource, history: json, preferences: json) -> Resource:
1318
"""
@@ -33,7 +38,11 @@ def score(candidate_resource: Resource, history: json, preferences: json) -> Res
3338
preference_score = calculate_preference_score(candidate_resource, preferences)
3439

3540
# calculate final score = function of sub-scores
36-
final_score = motivating_info["score"] + -history_info["score"] + preference_score
41+
final_score = motivating_info["score"] + history_info["score"] + preference_score
42+
43+
candidate_resource[URIRef("motivating_score")] = Literal(motivating_info["score"] , datatype=XSD.double)
44+
candidate_resource[URIRef("history_score")] = Literal(history_info["score"], datatype=XSD.double)
45+
3746

3847
candidate_resource[SLOWMO.Score] = Literal(final_score, datatype=XSD.double)
3948

@@ -64,6 +73,7 @@ def calculate_motivating_info_score(candidate_resource: Resource) -> dict:
6473
]
6574

6675
mod = {}
76+
6777
match causal_pathway.value:
6878
case "Social Worse":
6979
comparator_type = candidate_resource.value(SLOWMO.IsAbout).identifier
@@ -76,7 +86,7 @@ def calculate_motivating_info_score(candidate_resource: Resource) -> dict:
7686
if moderator["comparator_type"] == comparator_type
7787
][0]
7888

79-
mod["score"] = round(abs(mod["gap_size"] / 100), 4) / 5 - 0.02
89+
mod["score"] = (mod["gap_size"] / 5 - 0.02) * MPM[causal_pathway.value][Comparison.signal_type]
8090
case "Social better":
8191
comparator_type = candidate_resource.value(SLOWMO.IsAbout).identifier
8292
moderators = Comparison.moderators(motivating_informations)
@@ -87,13 +97,13 @@ def calculate_motivating_info_score(candidate_resource: Resource) -> dict:
8797
if moderator["comparator_type"] == comparator_type
8898
][0]
8999

90-
mod["score"] = round(abs(mod["gap_size"] / 100), 4) + 0.02
100+
mod["score"] = (mod["gap_size"] + 0.02) * MPM[causal_pathway.value][Comparison.signal_type]
91101
case "Improving":
92102
mod = Trend.moderators(motivating_informations)[0]
93-
mod["score"] = round(abs(mod["trend_size"] / 100), 4) * 5
103+
mod["score"] = (mod["trend_size"] * 5) * MPM[causal_pathway.value][Trend.signal_type]
94104
case "Worsening":
95105
mod = Trend.moderators(motivating_informations)[0]
96-
mod["score"] = round(abs(mod["trend_size"] / 100), 4)
106+
mod["score"] = (mod["trend_size"]) * MPM[causal_pathway.value][Trend.signal_type]
97107
case _:
98108
mod["score"] = 0.0
99109
return mod
@@ -105,7 +115,7 @@ def calculate_history_score(candidate_resource: Resource, history: dict) -> dict
105115
106116
Parameters:
107117
- candidate_resource (Resource): The candidate resource.
108-
- history (json): The history of messages.
118+
- history (dict): The history of messages.
109119
110120
Returns:
111121
float: history sub-score.
@@ -124,8 +134,10 @@ def calculate_history_score(candidate_resource: Resource, history: dict) -> dict
124134
return {"score": 0}
125135

126136
mod = History.moderators(signals)[0]
127-
128-
mod["score"] = mod["occurance"] / 11
137+
138+
causal_pathway = list(candidate_resource.objects(URIRef("slowmo:acceptable_by")))[0]
139+
140+
mod["score"] = mod["recurrence_count"] * MPM[causal_pathway.value][History.signal_type]
129141

130142
return mod
131143

esteemer/signals/_history.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ def detect(message_history: dict) -> Optional[List[Resource]]:
2020
history = pd.DataFrame.from_dict(message_history, orient="index")
2121
history = history.sort_index()
2222

23-
occurance = History._detect(history)
23+
recurrence = History._detect(history)
2424

25-
return [History._resource(occurance)]
25+
return [History._resource(recurrence)]
2626

2727
@classmethod
28-
def _resource(cls, occurance: int) -> Resource:
28+
def _resource(cls, recurrence_count: int) -> Resource:
2929
base = super()._resource()
3030

31-
base[URIRef("occurance")] = Literal(occurance, datatype=XSD.integer)
31+
base[URIRef("recurrence_count")] = Literal(recurrence_count, datatype=XSD.integer)
3232
return base
3333

3434
@classmethod
@@ -37,7 +37,7 @@ def moderators(cls, signals: List[Resource]) -> List[dict]:
3737

3838
for signal in super().select(signals):
3939
history_dict = {}
40-
history_dict["occurance"] = signal.value(URIRef("occurance")).value
40+
history_dict["recurrence_count"] = round( signal.value(URIRef("recurrence_count")).value / 11, 4)
4141
mods.append(history_dict)
4242

4343
return mods

esteemer/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def candidates_records(performer_graph: Graph) -> List[List]:
156156
Returns:
157157
dict: The representation of candidates as a dictionary.
158158
"""
159-
candidate_list = [["staff_number", "measure", "month", "score", "name", "acceptable_by", "selected"]]
159+
candidate_list = [["staff_number", "measure", "month", "score","motivating_score", "history_score", "name", "acceptable_by", "selected"]]
160160

161161

162162
for a_candidate in candidates(performer_graph):
@@ -174,7 +174,9 @@ def candidate_as_record(a_candidate: Resource) -> List:
174174
representation.append(a_candidate.value(SLOWMO.RegardingMeasure).identifier)
175175
representation.append("N/A")
176176
score = a_candidate.value(SLOWMO.Score)
177-
representation.append(round(float(score.value), 4) if score else None)
177+
representation.append(float(score.value) if score else None)
178+
representation.append(float( a_candidate.value(URIRef("motivating_score"))) if score else None)
179+
representation.append(float( a_candidate.value(URIRef("history_score"))) if score else None)
178180
representation.append(a_candidate.value( SLOWMO.name))
179181
representation.append(a_candidate.value( URIRef("slowmo:acceptable_by")))
180182
representation.append(a_candidate.value( URIRef("slowmo:selected")))

tests/bitstomach2/test_comparisons.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def perf_data() -> pd.DataFrame:
2424
"staff_number",
2525
"measure",
2626
"month",
27-
"passed_percentage",
27+
"passed_rate",
2828
"passed_count",
2929
"flagged_count",
3030
"denominator",
@@ -33,8 +33,8 @@ def perf_data() -> pd.DataFrame:
3333
"peer_90th_percentile_benchmark",
3434
"goal_comparator_content",
3535
],
36-
[157, "BP01", "2022-08-01", 85.0, 85.0, 0, 100.0, 84.0, 88.0, 90.0, 99.0],
37-
[157, "BP01", "2022-09-01", 90.0, 90.0, 0, 100.0, 85.0, 89.0, 91.0, 100.0],
36+
[157, "BP01", "2022-08-01", 0.85, 85.0, 0, 100.0, 84.0, 88.0, 90.0, 99.0],
37+
[157, "BP01", "2022-09-01", 0.90, 90.0, 0, 100.0, 85.0, 89.0, 91.0, 100.0],
3838
]
3939
return pd.DataFrame(performance_data[1:], columns=performance_data[0])
4040

@@ -72,7 +72,7 @@ def test_multiple_gap_values(perf_data):
7272

7373
assert 4 == len(signals)
7474

75-
expected_gap_sizes = [5.0, 1.0, -1.0, -10.0]
75+
expected_gap_sizes = [0.05, 0.01, -0.01, -0.1]
7676

7777
for index, signal in enumerate(signals):
7878
v = signal.value(SLOWMO.PerformanceGapSize).value
@@ -84,7 +84,7 @@ def test_comparator_node(perf_data):
8484

8585
signals = signal.detect(perf_data)
8686

87-
expected_comparator_values = [85.0, 89.0, 91.0, 100.0]
87+
expected_comparator_values = [0.85, 0.89, 0.91, 1.0]
8888

8989
for index, signal in enumerate(signals):
9090
assert Literal(expected_comparator_values[index]) == signal.value(

tests/bitstomach2/test_trends.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def test_empty_perf_data_raises_value_error():
2121
def test_no_trend_returns_none():
2222
mi = Trend.detect(
2323
pd.DataFrame(
24-
{"passed_percentage": [90, 90, 90]},
24+
{"passed_rate": [90, 90, 90]},
2525
)
2626
)
2727
assert mi is None
@@ -30,24 +30,24 @@ def test_no_trend_returns_none():
3030
## Signal detection tests
3131
def test_trend_is_detected():
3232
slope = Trend._detect(
33-
pd.DataFrame(columns=["passed_percentage"], data=[[90], [91], [92]])
33+
pd.DataFrame(columns=["passed_rate"], data=[[90], [91], [92]])
3434
)
3535
assert slope == 1
3636

3737
slope = Trend._detect(
38-
pd.DataFrame(columns=["passed_percentage"], data=[[90], [92], [94]])
38+
pd.DataFrame(columns=["passed_rate"], data=[[90], [92], [94]])
3939
)
4040
assert slope == 2
4141

4242
slope = Trend._detect(
43-
pd.DataFrame(columns=["passed_percentage"], data=[[90], [92], [90], [92], [94]])
43+
pd.DataFrame(columns=["passed_rate"], data=[[90], [92], [90], [92], [94]])
4444
)
4545
assert slope == 2
4646

4747

4848
def test_trend_as_resource():
4949
signal = Trend.detect(
50-
pd.DataFrame(columns=["passed_percentage"], data=[[90], [91], [92]])
50+
pd.DataFrame(columns=["passed_rate"], data=[[90], [91], [92]])
5151
).pop()
5252

5353
assert isinstance(signal, Resource)
@@ -99,7 +99,7 @@ def test_select():
9999
pd.DataFrame(
100100
columns=[
101101
"measure",
102-
"passed_percentage",
102+
"passed_rate",
103103
"peer_average_comparator",
104104
"peer_75th_percentile_benchmark",
105105
"peer_90th_percentile_benchmark",
@@ -111,7 +111,7 @@ def test_select():
111111

112112
r2 = Trend.detect(
113113
pd.DataFrame(
114-
{"passed_percentage": [89, 90, 91]},
114+
{"passed_rate": [89, 90, 91]},
115115
)
116116
)
117117

@@ -141,12 +141,12 @@ def test_select():
141141
def test_trend_identity():
142142
r1 = Trend.detect(
143143
pd.DataFrame(
144-
{"passed_percentage": [89, 90, 91]},
144+
{"passed_rate": [89, 90, 91]},
145145
)
146146
)
147147
r2 = Trend.detect(
148148
pd.DataFrame(
149-
{"passed_percentage": [89, 90, 91]},
149+
{"passed_rate": [89, 90, 91]},
150150
)
151151
)
152152

@@ -162,7 +162,7 @@ def test_detect_creates_correct_signal_with_magick_mock_calc():
162162

163163
signal = Trend.detect(
164164
pd.DataFrame(
165-
{"passed_percentage": [89, 90, 91]}, # slope 1.0
165+
{"passed_rate": [89, 90, 91]}, # slope 1.0
166166
)
167167
)
168168

@@ -176,7 +176,7 @@ def test_detect_with_decoy_calc(decoy: Decoy):
176176

177177
signal = Trend.detect(
178178
pd.DataFrame(
179-
{"passed_percentage": [89, 90, 91]}, # slope 1.0
179+
{"passed_rate": [89, 90, 91]}, # slope 1.0
180180
)
181181
)
182182

0 commit comments

Comments
 (0)