66import orjson
77import pandas as pd
88import typer
9+ from fastapi import HTTPException
910from loguru import logger
1011
11- from scaffold .bitstomach .bitstomach import prepare_performance_df
12- from scaffold .pipeline import pipeline , run_pipeline
13- from scaffold .startup import set_preferences , startup
12+ from scaffold import context
13+ from scaffold .bitstomach .bitstomach import prepare
14+ from scaffold .pipeline import pipeline
15+ from scaffold .startup import startup
1416from scaffold .utils .utils import (
1517 add_candidates ,
1618 add_response ,
1719 analyse_candidates ,
1820 analyse_responses ,
1921 extract_number ,
20- get_history ,
21- get_preferences ,
22+ get_performance_month ,
2223)
2324
2425cli = typer .Typer (no_args_is_help = True )
@@ -53,17 +54,30 @@ def batch(
5354 )
5455 raise SystemExit (1 )
5556
56- if max_files is not None :
57- input_files = input_files [:max_files ]
58-
5957 success_count = 0
6058 failure_count = 0
6159
62- for input_file in input_files :
60+ for input_file in input_files [: max_files ] :
6361 try :
6462 input_data = orjson .loads (input_file .read_bytes ())
6563
66- response_data = run_pipeline (input_data )
64+ context .update (input_data )
65+
66+ performance_month = get_performance_month (input_data )
67+ performance_df = prepare (
68+ performance_month ,
69+ pd .DataFrame (
70+ input_data ["Performance_data" ][1 :],
71+ columns = input_data ["Performance_data" ][0 ],
72+ ),
73+ )
74+ try :
75+ full_message = pipeline (performance_df )
76+ full_message ["message_instance_id" ] = input_data ["message_instance_id" ]
77+ full_message ["performance_data" ] = input_data ["Performance_data" ]
78+ except HTTPException as e :
79+ e .detail ["message_instance_id" ] = input_data ["message_instance_id" ]
80+ raise e
6781
6882 if not stats_only :
6983 directory = input_file .parent / "messages"
@@ -76,7 +90,7 @@ def batch(
7690 output_path = directory / new_filename
7791
7892 output_path .write_bytes (
79- orjson .dumps (response_data , option = orjson .OPT_INDENT_2 )
93+ orjson .dumps (full_message , option = orjson .OPT_INDENT_2 )
8094 )
8195 logger .info (f"Message created at { output_path } " )
8296 else :
@@ -86,13 +100,13 @@ def batch(
86100 except Exception as e :
87101 logger .error (f"✘ Failed to process { input_file } : { e } " )
88102 failure_count += 1
89- response_data = e .detail
103+ full_message = e .detail
90104
91- add_response (response_data )
105+ add_response (full_message )
92106 if not stats_only :
93- add_candidates (response_data , input_data ["performance_month" ])
107+ add_candidates (full_message , input_data ["performance_month" ])
94108
95- logger .info (f"Total files scanned: { len (input_files )} " )
109+ logger .info (f"Total files scanned: { len (input_files [: max_files ] )} " )
96110 logger .info (f"Successful: { success_count } , Failed: { failure_count } " )
97111 analyse_responses ()
98112 if not stats_only :
@@ -105,14 +119,6 @@ def batch_csv(
105119 pathlib .Path ,
106120 typer .Argument (help = "Path to a CSV file containing performance data" ),
107121 ],
108- preferences_path : Annotated [
109- pathlib .Path ,
110- typer .Argument (help = "Path to a CSV file containing the preferences" ),
111- ],
112- history_path : Annotated [
113- pathlib .Path ,
114- typer .Argument (help = "Path to a CSV file containing the history" ),
115- ],
116122 max_files : Annotated [
117123 int , typer .Option ("--max-files" , help = "Maximum number of files to process" )
118124 ] = None ,
@@ -128,44 +134,26 @@ def batch_csv(
128134 ] = False ,
129135):
130136 startup ()
137+ context .init ()
131138
132- all_performance_data = pd .read_csv (performance_data_path , parse_dates = ["month" ])
133- all_preferences = pd .read_csv (preferences_path )
134- all_hostory = pd .read_csv (history_path )
135-
136- if max_files is not None :
137- first_n_staff = (
138- all_performance_data ["staff_number" ].drop_duplicates ().head (max_files )
139- )
140- performance_data = all_performance_data [
141- all_performance_data ["staff_number" ].isin (first_n_staff )
142- ].reset_index (drop = True )
143- # performance_data = all_performance_data[all_performance_data['staff_number'].isin(set(range(1, max_files + 1)))].reset_index(drop=True)
139+ performance_data = pd .read_csv (performance_data_path , parse_dates = ["month" ])
144140 success_count = 0
145141 failure_count = 0
146- for provider_id in performance_data ["staff_number" ].unique ().tolist ():
142+ for provider_id in (
143+ performance_data ["staff_number" ].drop_duplicates ().head (max_files )
144+ ):
147145 try :
148- preferences = set_preferences (
149- get_preferences (
150- all_preferences [all_preferences ["staff_number" ] == provider_id ]
151- )
152- )
153- history = get_history (
154- all_hostory [all_hostory ["staff_number" ] == provider_id ]
155- )
156-
157- performance_df = prepare_performance_df (
146+ performance_df = prepare (
158147 performance_month ,
159148 performance_data [
160149 performance_data ["staff_number" ] == provider_id
161150 ].reset_index (drop = True ),
162151 )
163- result = pipeline (preferences , history , performance_df )
152+ result = pipeline (performance_df )
164153 if not stats_only :
165154 directory = performance_data_path .parent / "messages"
166155 os .makedirs (directory , exist_ok = True )
167156
168- performance_month = performance_month
169157 new_filename = (
170158 f"Provider_{ provider_id } - message for { performance_month } .json"
171159 )
@@ -197,7 +185,6 @@ def batch_csv(
197185
198186@cli .command ()
199187def web (workers : int = 5 ):
200- # uvicorn.run(["scaffold.api:app","--workers", str(workers)], reload=False, use_colors=True)
201188 subprocess .run (
202189 ["uvicorn" , "scaffold.api:app" , "--workers" , str (workers ), "--use-colors" ]
203190 )
0 commit comments