Skip to content

Commit 995aeeb

Browse files
authored
refactoring the bulk up app and minor updates. (#387)
1 parent 3c1da58 commit 995aeeb

File tree

1 file changed

+48
-48
lines changed

1 file changed

+48
-48
lines changed

bulk-up/src/bulk_up/req.py

Lines changed: 48 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -98,18 +98,20 @@ def post_json_message(filename):
9898

9999
with lock:
100100
add_response(response, response_data)
101-
add_candidates(response_data)
101+
add_candidates(response_data, data["performance_month"])
102102

103103
except Exception as e:
104104
print(f"Error processing {filename}: {e}")
105105

106106

107-
def add_candidates(response_data: dict):
107+
def add_candidates(response_data: dict, performance_month: str):
108108
global candidate_df
109109
data = response_data.get("candidates", None)
110110
if data:
111+
candidates = pd.DataFrame(data[1:], columns=data[0])
112+
candidates["performance_month"] = performance_month
111113
candidate_df = pd.concat(
112-
[candidate_df, pd.DataFrame(data[1:], columns=data[0])], ignore_index=True
114+
[candidate_df, candidates], ignore_index=True
113115
)
114116

115117

@@ -126,7 +128,7 @@ def add_response(response: requests.Response, response_data):
126128
response_df = pd.concat(
127129
[response_df, pd.DataFrame(response_dict)], ignore_index=True
128130
)
129-
print(response_dict)
131+
print(response_dict, end='\r')
130132

131133

132134
def analyse_responses():
@@ -157,72 +159,70 @@ def analyse_candidates():
157159
if OUTPUT:
158160
candidate_df.to_csv(OUTPUT, index=False)
159161

160-
# causal pathways
162+
161163
candidate_df.rename(columns={"acceptable_by": "causal_pathway"}, inplace=True)
162-
causal_pathway = (
163-
candidate_df.groupby("causal_pathway")["selected"]
164+
candidate_df["score"] = candidate_df["score"].astype(float)
165+
candidate_df.rename(columns={"name": "message"}, inplace=True)
166+
167+
# causal pathways
168+
causal_pathway_report = build_table("causal_pathway")
169+
print(causal_pathway_report, "\n")
170+
171+
# messages
172+
message_report = build_table("message")
173+
print(message_report, "\n")
174+
175+
# measures
176+
measure_report = build_table("measure")
177+
print(measure_report, "\n")
178+
179+
180+
def build_table(grouping_column):
181+
report_table = (
182+
candidate_df.groupby(grouping_column)["selected"]
164183
.agg(acceptable=("count"), selected=("sum"))
165184
.reset_index()
166185
)
167-
candidate_df["score"] = candidate_df["score"].astype(float)
168-
scores = (
169-
candidate_df.groupby("causal_pathway")["score"]
186+
scores = round(
187+
candidate_df.groupby(grouping_column)["score"]
170188
.agg(acceptable_score=("mean"))
171189
.reset_index()
172-
)
173-
causal_pathway = pd.merge(causal_pathway, scores, on="causal_pathway", how="left")
190+
,2)
191+
report_table = pd.merge(report_table, scores, on=grouping_column, how="left")
174192

175-
causal_pathway["% acceptable"] = round(
176-
causal_pathway["acceptable"] / causal_pathway["acceptable"].sum() * 100, 1
193+
report_table["% acceptable"] = round(
194+
report_table["acceptable"] / report_table["acceptable"].sum() * 100, 1
177195
)
178-
causal_pathway["% selected"] = round(
179-
causal_pathway["selected"] / causal_pathway["acceptable"] * 100, 1
196+
report_table["% selected"] = round(
197+
report_table["selected"] / report_table["selected"].sum() * 100, 1
180198
)
181-
selected_scores = (
199+
report_table["% of acceptable selected"] = round(
200+
report_table["selected"] / report_table["acceptable"] * 100, 1
201+
)
202+
selected_scores = round(
182203
candidate_df[candidate_df["selected"]]
183-
.groupby("causal_pathway")["score"]
204+
.groupby(grouping_column)["score"]
184205
.agg(selected_score=("mean"))
185206
.reset_index()
186-
)
187-
causal_pathway = pd.merge(
188-
causal_pathway, selected_scores, on="causal_pathway", how="left"
207+
,2)
208+
report_table = pd.merge(
209+
report_table, selected_scores, on=grouping_column, how="left"
189210
)
190211

191-
causal_pathway = causal_pathway[
212+
report_table = report_table[
192213
[
193-
"causal_pathway",
214+
grouping_column,
194215
"acceptable",
195216
"% acceptable",
196217
"acceptable_score",
197218
"selected",
198219
"% selected",
199220
"selected_score",
221+
"% of acceptable selected",
200222
]
201223
]
202-
print(causal_pathway, "\n")
203-
204-
# messages
205-
candidate_df.rename(columns={"name": "message"}, inplace=True)
206-
message = (
207-
candidate_df.groupby("message")["selected"]
208-
.agg(total=("count"), selected=("sum"))
209-
.reset_index()
210-
)
211-
message["%"] = round(message["total"] / message["total"].sum() * 100, 1)
212-
message["% selected"] = round(message["selected"] / message["total"] * 100, 1)
213-
message = message[["message", "%", "total", "selected", "% selected"]]
214-
print(message, "\n")
215-
216-
# measures
217-
measure = (
218-
candidate_df.groupby("measure")["selected"]
219-
.agg(total=("count"), selected=("sum"))
220-
.reset_index()
221-
)
222-
measure["%"] = round(measure["total"] / measure["total"].sum() * 100, 1)
223-
measure["% selected"] = round(measure["selected"] / measure["total"] * 100, 1)
224-
measure = measure[["measure", "%", "total", "selected", "% selected"]]
225-
print(measure, "\n")
224+
225+
return report_table
226226

227227

228228
def extract_number(filename):
@@ -246,7 +246,7 @@ def main():
246246
if SAMPLE:
247247
n = min(SAMPLE, len(input_files))
248248
input_files = sorted(random.sample(input_files, n), key=extract_number)
249-
249+
250250
with ThreadPoolExecutor(WORKERS) as executor:
251251
executor.map(post_json_message, input_files)
252252

0 commit comments

Comments
 (0)