|
| 1 | +import json |
| 2 | +import os |
| 3 | +from datetime import datetime |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | + |
| 7 | +INPUT_DIR = os.environ.get("INPUT_DIR", "/home/faridsei/dev/test/pfp2.xlsx") |
| 8 | +SHEET_NAME = "Sheet1" # Change this to the name of the sheet in your .xlsx file |
| 9 | + |
| 10 | + |
| 11 | +def add_response(response_data): |
| 12 | + global response_df |
| 13 | + |
| 14 | + selected_candidate = response_data.get("selected_candidate", None) |
| 15 | + pm = response_data.get("performance_month", None) |
| 16 | + pm = datetime.strptime(pm, "%B %Y") if pm else "missing" |
| 17 | + response_dict: dict = { |
| 18 | + "staff_number": [response_data.get("staff_number", None)], |
| 19 | + "performance_month": [pm], |
| 20 | + "causal_pathway": selected_candidate["acceptable_by"], |
| 21 | + "measure": selected_candidate["measure"], |
| 22 | + "message": selected_candidate.get("message_template_name", "missing") |
| 23 | + if selected_candidate |
| 24 | + else [None], |
| 25 | + } |
| 26 | + response_df = pd.concat( |
| 27 | + [response_df, pd.DataFrame(response_dict)], ignore_index=True |
| 28 | + ) |
| 29 | + |
| 30 | + |
| 31 | +def analyse_responses(): |
| 32 | + global response_df |
| 33 | + |
| 34 | + causal_pathway = ( |
| 35 | + response_df.groupby(["performance_month", "causal_pathway"])["staff_number"] |
| 36 | + .agg(count=("count")) |
| 37 | + .reset_index() |
| 38 | + ) |
| 39 | + |
| 40 | + causal_pathway["monthly_total"] = causal_pathway.groupby("performance_month")[ |
| 41 | + "count" |
| 42 | + ].transform("sum") |
| 43 | + causal_pathway["% "] = round( |
| 44 | + causal_pathway["count"] / causal_pathway["monthly_total"] * 100, 1 |
| 45 | + ) |
| 46 | + |
| 47 | + causal_pathway = causal_pathway[ |
| 48 | + ["performance_month", "monthly_total", "causal_pathway", "count", "% "] |
| 49 | + ] |
| 50 | + print(f"\n {causal_pathway} \n") |
| 51 | + |
| 52 | + message = ( |
| 53 | + response_df.groupby(["performance_month", "message"])["staff_number"] |
| 54 | + .agg(count=("count")) |
| 55 | + .reset_index() |
| 56 | + ) |
| 57 | + |
| 58 | + message["monthly_total"] = message.groupby("performance_month")["count"].transform( |
| 59 | + "sum" |
| 60 | + ) |
| 61 | + message["% "] = round(message["count"] / message["monthly_total"] * 100, 1) |
| 62 | + message = message[["performance_month", "monthly_total", "message", "count", "% "]] |
| 63 | + |
| 64 | + print(f"\n {message} \n") |
| 65 | + |
| 66 | + measure = ( |
| 67 | + response_df.groupby(["performance_month", "measure"])["staff_number"] |
| 68 | + .agg(count=("count")) |
| 69 | + .reset_index() |
| 70 | + ) |
| 71 | + |
| 72 | + measure["monthly_total"] = measure.groupby("performance_month")["count"].transform( |
| 73 | + "sum" |
| 74 | + ) |
| 75 | + measure["% "] = round(measure["count"] / measure["monthly_total"] * 100, 1) |
| 76 | + measure = measure[["performance_month", "monthly_total", "measure", "count", "% "]] |
| 77 | + |
| 78 | + print(f"\n {measure} \n") |
| 79 | + |
| 80 | + |
| 81 | +df = pd.read_excel(INPUT_DIR, sheet_name=SHEET_NAME, engine="openpyxl") |
| 82 | +response_df: pd.DataFrame = pd.DataFrame() |
| 83 | + |
| 84 | +for index, message in enumerate(df["Output_Message"]): |
| 85 | + if pd.isnull(message): |
| 86 | + continue |
| 87 | + |
| 88 | + message_parts = message.split(',"image":') |
| 89 | + if len(message_parts) > 1: |
| 90 | + message_json = json.loads(message_parts[0] + "}}") |
| 91 | + else: |
| 92 | + message_json = json.loads(message) |
| 93 | + |
| 94 | + add_response(message_json) |
| 95 | + |
| 96 | +analyse_responses() |
0 commit comments