@@ -98,18 +98,20 @@ def post_json_message(filename):
9898
9999 with lock :
100100 add_response (response , response_data )
101- add_candidates (response_data )
101+ add_candidates (response_data , data [ "performance_month" ] )
102102
103103 except Exception as e :
104104 print (f"Error processing { filename } : { e } " )
105105
106106
107- def add_candidates (response_data : dict ):
107+ def add_candidates (response_data : dict , performance_month : str ):
108108 global candidate_df
109109 data = response_data .get ("candidates" , None )
110110 if data :
111+ candidates = pd .DataFrame (data [1 :], columns = data [0 ])
112+ candidates ["performance_month" ] = performance_month
111113 candidate_df = pd .concat (
112- [candidate_df , pd . DataFrame ( data [ 1 :], columns = data [ 0 ]) ], ignore_index = True
114+ [candidate_df , candidates ], ignore_index = True
113115 )
114116
115117
@@ -126,7 +128,7 @@ def add_response(response: requests.Response, response_data):
126128 response_df = pd .concat (
127129 [response_df , pd .DataFrame (response_dict )], ignore_index = True
128130 )
129- print (response_dict )
131+ print (response_dict , end = ' \r ' )
130132
131133
132134def analyse_responses ():
@@ -157,72 +159,70 @@ def analyse_candidates():
157159 if OUTPUT :
158160 candidate_df .to_csv (OUTPUT , index = False )
159161
160- # causal pathways
162+
161163 candidate_df .rename (columns = {"acceptable_by" : "causal_pathway" }, inplace = True )
162- causal_pathway = (
163- candidate_df .groupby ("causal_pathway" )["selected" ]
164+ candidate_df ["score" ] = candidate_df ["score" ].astype (float )
165+ candidate_df .rename (columns = {"name" : "message" }, inplace = True )
166+
167+ # causal pathways
168+ causal_pathway_report = build_table ("causal_pathway" )
169+ print (causal_pathway_report , "\n " )
170+
171+ # messages
172+ message_report = build_table ("message" )
173+ print (message_report , "\n " )
174+
175+ # measures
176+ measure_report = build_table ("measure" )
177+ print (measure_report , "\n " )
178+
179+
180+ def build_table (grouping_column ):
181+ report_table = (
182+ candidate_df .groupby (grouping_column )["selected" ]
164183 .agg (acceptable = ("count" ), selected = ("sum" ))
165184 .reset_index ()
166185 )
167- candidate_df ["score" ] = candidate_df ["score" ].astype (float )
168- scores = (
169- candidate_df .groupby ("causal_pathway" )["score" ]
186+ scores = round (
187+ candidate_df .groupby (grouping_column )["score" ]
170188 .agg (acceptable_score = ("mean" ))
171189 .reset_index ()
172- )
173- causal_pathway = pd .merge (causal_pathway , scores , on = "causal_pathway" , how = "left" )
190+ , 2 )
191+ report_table = pd .merge (report_table , scores , on = grouping_column , how = "left" )
174192
175- causal_pathway ["% acceptable" ] = round (
176- causal_pathway ["acceptable" ] / causal_pathway ["acceptable" ].sum () * 100 , 1
193+ report_table ["% acceptable" ] = round (
194+ report_table ["acceptable" ] / report_table ["acceptable" ].sum () * 100 , 1
177195 )
178- causal_pathway ["% selected" ] = round (
179- causal_pathway ["selected" ] / causal_pathway [ "acceptable" ] * 100 , 1
196+ report_table ["% selected" ] = round (
197+ report_table ["selected" ] / report_table [ "selected" ]. sum () * 100 , 1
180198 )
181- selected_scores = (
199+ report_table ["% of acceptable selected" ] = round (
200+ report_table ["selected" ] / report_table ["acceptable" ] * 100 , 1
201+ )
202+ selected_scores = round (
182203 candidate_df [candidate_df ["selected" ]]
183- .groupby ("causal_pathway" )["score" ]
204+ .groupby (grouping_column )["score" ]
184205 .agg (selected_score = ("mean" ))
185206 .reset_index ()
186- )
187- causal_pathway = pd .merge (
188- causal_pathway , selected_scores , on = "causal_pathway" , how = "left"
207+ , 2 )
208+ report_table = pd .merge (
209+ report_table , selected_scores , on = grouping_column , how = "left"
189210 )
190211
191- causal_pathway = causal_pathway [
212+ report_table = report_table [
192213 [
193- "causal_pathway" ,
214+ grouping_column ,
194215 "acceptable" ,
195216 "% acceptable" ,
196217 "acceptable_score" ,
197218 "selected" ,
198219 "% selected" ,
199220 "selected_score" ,
221+ "% of acceptable selected" ,
200222 ]
201223 ]
202- print (causal_pathway , "\n " )
203-
204- # messages
205- candidate_df .rename (columns = {"name" : "message" }, inplace = True )
206- message = (
207- candidate_df .groupby ("message" )["selected" ]
208- .agg (total = ("count" ), selected = ("sum" ))
209- .reset_index ()
210- )
211- message ["%" ] = round (message ["total" ] / message ["total" ].sum () * 100 , 1 )
212- message ["% selected" ] = round (message ["selected" ] / message ["total" ] * 100 , 1 )
213- message = message [["message" , "%" , "total" , "selected" , "% selected" ]]
214- print (message , "\n " )
215-
216- # measures
217- measure = (
218- candidate_df .groupby ("measure" )["selected" ]
219- .agg (total = ("count" ), selected = ("sum" ))
220- .reset_index ()
221- )
222- measure ["%" ] = round (measure ["total" ] / measure ["total" ].sum () * 100 , 1 )
223- measure ["% selected" ] = round (measure ["selected" ] / measure ["total" ] * 100 , 1 )
224- measure = measure [["measure" , "%" , "total" , "selected" , "% selected" ]]
225- print (measure , "\n " )
224+
225+ return report_table
226226
227227
228228def extract_number (filename ):
@@ -246,7 +246,7 @@ def main():
246246 if SAMPLE :
247247 n = min (SAMPLE , len (input_files ))
248248 input_files = sorted (random .sample (input_files , n ), key = extract_number )
249-
249+
250250 with ThreadPoolExecutor (WORKERS ) as executor :
251251 executor .map (post_json_message , input_files )
252252
0 commit comments