Skip to content

Commit e3b69b8

Browse files
authored
script to generate input files and response reports from logs is added. (#403)
1 parent b0fc63c commit e3b69b8

File tree

4 files changed

+155
-2
lines changed

4 files changed

+155
-2
lines changed

bulk-up/poetry.lock

Lines changed: 27 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bulk-up/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ python = "^3.9"
1414
google-auth = "^2.29.0"
1515
requests = "^2.31.0"
1616
pandas = "^2.2.2"
17+
openpyxl = "^3.1.2"
1718

1819

1920
[tool.poetry.group.dev.dependencies]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import json
2+
import os
3+
4+
import pandas as pd
5+
6+
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "")
7+
INPUT_DIR = os.environ.get("INPUT_DIR", "pfp.xlsx")
8+
9+
sheet_name = "Sheet1" # Change this to the name of the sheet in your .xlsx file
10+
df = pd.read_excel(INPUT_DIR, sheet_name=sheet_name, engine="openpyxl")
11+
12+
for index, message in enumerate(df["Input_Message"]):
13+
if pd.isnull(message):
14+
continue
15+
16+
message_json = json.loads(message.replace("_x000D_", ""))
17+
staff_number = message_json["Performance_data"][1][0]
18+
19+
performance_month = message_json.get("performance_month", None)
20+
if not performance_month:
21+
continue
22+
23+
directory = os.path.join(OUTPUT_DIR, performance_month)
24+
os.makedirs(directory, exist_ok=True)
25+
26+
file_name = f"Provider_{staff_number}.json"
27+
file_path = os.path.join(directory, file_name)
28+
29+
with open(file_path, "w", encoding="utf-8") as file:
30+
file.write(str(message))
31+
print("Text files have been created for each cell in the 'Input_Message' column.")
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import json
2+
import os
3+
from datetime import datetime
4+
5+
import pandas as pd
6+
7+
INPUT_DIR = os.environ.get("INPUT_DIR", "/home/faridsei/dev/test/pfp2.xlsx")
8+
SHEET_NAME = "Sheet1" # Change this to the name of the sheet in your .xlsx file
9+
10+
11+
def add_response(response_data):
12+
global response_df
13+
14+
selected_candidate = response_data.get("selected_candidate", None)
15+
pm = response_data.get("performance_month", None)
16+
pm = datetime.strptime(pm, "%B %Y") if pm else "missing"
17+
response_dict: dict = {
18+
"staff_number": [response_data.get("staff_number", None)],
19+
"performance_month": [pm],
20+
"causal_pathway": selected_candidate["acceptable_by"],
21+
"measure": selected_candidate["measure"],
22+
"message": selected_candidate.get("message_template_name", "missing")
23+
if selected_candidate
24+
else [None],
25+
}
26+
response_df = pd.concat(
27+
[response_df, pd.DataFrame(response_dict)], ignore_index=True
28+
)
29+
30+
31+
def analyse_responses():
32+
global response_df
33+
34+
causal_pathway = (
35+
response_df.groupby(["performance_month", "causal_pathway"])["staff_number"]
36+
.agg(count=("count"))
37+
.reset_index()
38+
)
39+
40+
causal_pathway["monthly_total"] = causal_pathway.groupby("performance_month")[
41+
"count"
42+
].transform("sum")
43+
causal_pathway["% "] = round(
44+
causal_pathway["count"] / causal_pathway["monthly_total"] * 100, 1
45+
)
46+
47+
causal_pathway = causal_pathway[
48+
["performance_month", "monthly_total", "causal_pathway", "count", "% "]
49+
]
50+
print(f"\n {causal_pathway} \n")
51+
52+
message = (
53+
response_df.groupby(["performance_month", "message"])["staff_number"]
54+
.agg(count=("count"))
55+
.reset_index()
56+
)
57+
58+
message["monthly_total"] = message.groupby("performance_month")["count"].transform(
59+
"sum"
60+
)
61+
message["% "] = round(message["count"] / message["monthly_total"] * 100, 1)
62+
message = message[["performance_month", "monthly_total", "message", "count", "% "]]
63+
64+
print(f"\n {message} \n")
65+
66+
measure = (
67+
response_df.groupby(["performance_month", "measure"])["staff_number"]
68+
.agg(count=("count"))
69+
.reset_index()
70+
)
71+
72+
measure["monthly_total"] = measure.groupby("performance_month")["count"].transform(
73+
"sum"
74+
)
75+
measure["% "] = round(measure["count"] / measure["monthly_total"] * 100, 1)
76+
measure = measure[["performance_month", "monthly_total", "measure", "count", "% "]]
77+
78+
print(f"\n {measure} \n")
79+
80+
81+
df = pd.read_excel(INPUT_DIR, sheet_name=SHEET_NAME, engine="openpyxl")
82+
response_df: pd.DataFrame = pd.DataFrame()
83+
84+
for index, message in enumerate(df["Output_Message"]):
85+
if pd.isnull(message):
86+
continue
87+
88+
message_parts = message.split(',"image":')
89+
if len(message_parts) > 1:
90+
message_json = json.loads(message_parts[0] + "}}")
91+
else:
92+
message_json = json.loads(message)
93+
94+
add_response(message_json)
95+
96+
analyse_responses()

0 commit comments

Comments
 (0)