Skip to content

Commit 921cd99

Browse files
pboisverfaridseifi
andauthored
created a context file to handle init and update of a context which hosts preferences and history and provides them for esteemer (#463)
Co-authored-by: Farid Seifi <[email protected]>
1 parent 1a0602f commit 921cd99

File tree

14 files changed

+280
-190
lines changed

14 files changed

+280
-190
lines changed

bulk-up/src/bulk_up/log_to_data.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import pandas as pd
66
from rdflib import RDF, RDFS, Graph, URIRef
7+
from scaffold.utils.utils import get_performance_month
78

89
module_path = ".." # run this script from the bulk-up folder. This will add the directory above (which is precision-feedback-pipeline/) to system path to be able to import pipeline modules
910
sys.path.append(module_path)
@@ -103,7 +104,13 @@ def generate_response(output_message, input_message):
103104

104105

105106
def add_signal_properties(row, output_message, input_message):
106-
performance_df = prepare(input_message)
107+
performance_month = get_performance_month(input_message)
108+
performance_df = pd.DataFrame(
109+
input_message["Performance_data"][1:],
110+
columns=input_message["Performance_data"][0],
111+
)
112+
113+
performance_df = prepare(performance_month, performance_df)
107114
performance_df = performance_df[
108115
performance_df["measure"] == output_message["selected_candidate"]["measure"]
109116
].tail(12)

bulk-up/src/bulk_up/prepare_csv_inputs.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,18 @@ def extract_number(filename):
6363
)
6464
writer.writerows([preferences])
6565

66-
all_keys = set(["staff_number"])
66+
all_keys = set(["staff_number", "month", "history"])
6767
for input_file in input_files:
6868
input_data = orjson.loads(input_file.read_bytes())
69-
all_keys.update(input_data["History"].keys())
7069
with open("history.csv", "w", newline="") as file:
7170
writer = csv.DictWriter(file, fieldnames=all_keys)
7271
writer.writeheader()
7372
for index, input_file in enumerate(input_files):
7473
input_data = orjson.loads(input_file.read_bytes())
75-
if input_data["History"]:
76-
history = {"staff_number": input_data["Performance_data"][1][0]}
77-
history.update(input_data["History"])
74+
for key, value in input_data["History"].items():
75+
history = {
76+
"staff_number": input_data["Performance_data"][1][0],
77+
"month": key,
78+
"history": value,
79+
}
7880
writer.writerows([history])

scaffold/api.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1-
from fastapi import FastAPI, Request
1+
import pandas as pd
2+
from fastapi import FastAPI, HTTPException, Request
23
from fastapi.responses import RedirectResponse
34

4-
from scaffold.pipeline import run_pipeline
5+
from scaffold import context
6+
from scaffold.bitstomach.bitstomach import prepare
7+
from scaffold.pipeline import pipeline
58
from scaffold.startup import startup
69
from scaffold.utils.settings import settings
10+
from scaffold.utils.utils import get_performance_month
711

812
app = FastAPI()
913

@@ -30,6 +34,21 @@ async def template():
3034
@app.post("/createprecisionfeedback/")
3135
async def createprecisionfeedback(info: Request):
3236
req_info = await info.json()
33-
full_message = run_pipeline(req_info)
37+
context.update(req_info)
38+
39+
performance_month = get_performance_month(req_info)
40+
performance_df = prepare(
41+
performance_month,
42+
pd.DataFrame(
43+
req_info["Performance_data"][1:], columns=req_info["Performance_data"][0]
44+
),
45+
)
46+
try:
47+
full_message = pipeline(performance_df)
48+
full_message["message_instance_id"] = req_info["message_instance_id"]
49+
full_message["performance_data"] = req_info["Performance_data"]
50+
except HTTPException as e:
51+
e.detail["message_instance_id"] = req_info["message_instance_id"]
52+
raise e
3453

3554
return full_message

scaffold/bitstomach/bitstomach.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from rdflib import RDF, BNode, Graph, Literal
33

44
from scaffold.bitstomach.signals import SIGNALS
5-
from scaffold.utils import settings
65
from scaffold.utils.namespace import PSDO, SLOWMO
76

87

@@ -33,23 +32,15 @@ def extract_signals(perf_df: pd.DataFrame) -> Graph:
3332
return g
3433

3534

36-
def prepare(req_info):
37-
performance_month = req_info["performance_month"]
38-
if settings.settings.performance_month:
39-
performance_month = settings.settings.performance_month
40-
41-
42-
performance_data = req_info["Performance_data"]
43-
performance_df = pd.DataFrame(performance_data[1:], columns=performance_data[0])
44-
45-
return prepare_performance_df(performance_month, performance_df)
46-
47-
def prepare_performance_df(performance_month, performance_df):
35+
def prepare(performance_month, performance_df):
36+
# we would have multiple staff performance data at this point so this like won't work right
4837
performance_df.attrs["staff_number"] = int(performance_df.at[0, "staff_number"])
4938

5039
performance_df["goal_comparator_content"] = performance_df["MPOG_goal"]
5140

52-
performance_df.attrs["performance_month"] = performance_month if performance_month else performance_df["month"].max()
41+
performance_df.attrs["performance_month"] = (
42+
performance_month if performance_month else performance_df["month"].max()
43+
)
5344

5445
performance_df = performance_df[
5546
performance_df["month"] <= performance_df.attrs["performance_month"]

scaffold/cli.py

Lines changed: 36 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,20 @@
66
import orjson
77
import pandas as pd
88
import typer
9+
from fastapi import HTTPException
910
from loguru import logger
1011

11-
from scaffold.bitstomach.bitstomach import prepare_performance_df
12-
from scaffold.pipeline import pipeline, run_pipeline
13-
from scaffold.startup import set_preferences, startup
12+
from scaffold import context
13+
from scaffold.bitstomach.bitstomach import prepare
14+
from scaffold.pipeline import pipeline
15+
from scaffold.startup import startup
1416
from scaffold.utils.utils import (
1517
add_candidates,
1618
add_response,
1719
analyse_candidates,
1820
analyse_responses,
1921
extract_number,
20-
get_history,
21-
get_preferences,
22+
get_performance_month,
2223
)
2324

2425
cli = typer.Typer(no_args_is_help=True)
@@ -53,17 +54,30 @@ def batch(
5354
)
5455
raise SystemExit(1)
5556

56-
if max_files is not None:
57-
input_files = input_files[:max_files]
58-
5957
success_count = 0
6058
failure_count = 0
6159

62-
for input_file in input_files:
60+
for input_file in input_files[:max_files]:
6361
try:
6462
input_data = orjson.loads(input_file.read_bytes())
6563

66-
response_data = run_pipeline(input_data)
64+
context.update(input_data)
65+
66+
performance_month = get_performance_month(input_data)
67+
performance_df = prepare(
68+
performance_month,
69+
pd.DataFrame(
70+
input_data["Performance_data"][1:],
71+
columns=input_data["Performance_data"][0],
72+
),
73+
)
74+
try:
75+
full_message = pipeline(performance_df)
76+
full_message["message_instance_id"] = input_data["message_instance_id"]
77+
full_message["performance_data"] = input_data["Performance_data"]
78+
except HTTPException as e:
79+
e.detail["message_instance_id"] = input_data["message_instance_id"]
80+
raise e
6781

6882
if not stats_only:
6983
directory = input_file.parent / "messages"
@@ -76,7 +90,7 @@ def batch(
7690
output_path = directory / new_filename
7791

7892
output_path.write_bytes(
79-
orjson.dumps(response_data, option=orjson.OPT_INDENT_2)
93+
orjson.dumps(full_message, option=orjson.OPT_INDENT_2)
8094
)
8195
logger.info(f"Message created at {output_path}")
8296
else:
@@ -86,13 +100,13 @@ def batch(
86100
except Exception as e:
87101
logger.error(f"✘ Failed to process {input_file}: {e}")
88102
failure_count += 1
89-
response_data = e.detail
103+
full_message = e.detail
90104

91-
add_response(response_data)
105+
add_response(full_message)
92106
if not stats_only:
93-
add_candidates(response_data, input_data["performance_month"])
107+
add_candidates(full_message, input_data["performance_month"])
94108

95-
logger.info(f"Total files scanned: {len(input_files)}")
109+
logger.info(f"Total files scanned: {len(input_files[:max_files])}")
96110
logger.info(f"Successful: {success_count}, Failed: {failure_count}")
97111
analyse_responses()
98112
if not stats_only:
@@ -105,14 +119,6 @@ def batch_csv(
105119
pathlib.Path,
106120
typer.Argument(help="Path to a CSV file containing performance data"),
107121
],
108-
preferences_path: Annotated[
109-
pathlib.Path,
110-
typer.Argument(help="Path to a CSV file containing the preferences"),
111-
],
112-
history_path: Annotated[
113-
pathlib.Path,
114-
typer.Argument(help="Path to a CSV file containing the history"),
115-
],
116122
max_files: Annotated[
117123
int, typer.Option("--max-files", help="Maximum number of files to process")
118124
] = None,
@@ -128,44 +134,26 @@ def batch_csv(
128134
] = False,
129135
):
130136
startup()
137+
context.init()
131138

132-
all_performance_data = pd.read_csv(performance_data_path, parse_dates=["month"])
133-
all_preferences = pd.read_csv(preferences_path)
134-
all_hostory = pd.read_csv(history_path)
135-
136-
if max_files is not None:
137-
first_n_staff = (
138-
all_performance_data["staff_number"].drop_duplicates().head(max_files)
139-
)
140-
performance_data = all_performance_data[
141-
all_performance_data["staff_number"].isin(first_n_staff)
142-
].reset_index(drop=True)
143-
# performance_data = all_performance_data[all_performance_data['staff_number'].isin(set(range(1, max_files + 1)))].reset_index(drop=True)
139+
performance_data = pd.read_csv(performance_data_path, parse_dates=["month"])
144140
success_count = 0
145141
failure_count = 0
146-
for provider_id in performance_data["staff_number"].unique().tolist():
142+
for provider_id in (
143+
performance_data["staff_number"].drop_duplicates().head(max_files)
144+
):
147145
try:
148-
preferences = set_preferences(
149-
get_preferences(
150-
all_preferences[all_preferences["staff_number"] == provider_id]
151-
)
152-
)
153-
history = get_history(
154-
all_hostory[all_hostory["staff_number"] == provider_id]
155-
)
156-
157-
performance_df = prepare_performance_df(
146+
performance_df = prepare(
158147
performance_month,
159148
performance_data[
160149
performance_data["staff_number"] == provider_id
161150
].reset_index(drop=True),
162151
)
163-
result = pipeline(preferences, history, performance_df)
152+
result = pipeline(performance_df)
164153
if not stats_only:
165154
directory = performance_data_path.parent / "messages"
166155
os.makedirs(directory, exist_ok=True)
167156

168-
performance_month = performance_month
169157
new_filename = (
170158
f"Provider_{provider_id} - message for {performance_month}.json"
171159
)
@@ -197,7 +185,6 @@ def batch_csv(
197185

198186
@cli.command()
199187
def web(workers: int = 5):
200-
# uvicorn.run(["scaffold.api:app","--workers", str(workers)], reload=False, use_colors=True)
201188
subprocess.run(
202189
["uvicorn", "scaffold.api:app", "--workers", str(workers), "--use-colors"]
203190
)

0 commit comments

Comments
 (0)