script to generate input files and response reports from logs is added. (#403)

faridseifi · web-flow · commit e3b69b89478a · 2024-05-16T16:45:17.000-04:00
diff --git a/bulk-up/poetry.lock b/bulk-up/poetry.lock
diff --git a/bulk-up/pyproject.toml b/bulk-up/pyproject.toml
@@ -14,6 +14,7 @@ python = "^3.9"
 google-auth = "^2.29.0"
 requests = "^2.31.0"
 pandas = "^2.2.2"
+openpyxl = "^3.1.2"
 
 
 [tool.poetry.group.dev.dependencies]
diff --git a/bulk-up/src/bulk_up/log_to_inputs.py b/bulk-up/src/bulk_up/log_to_inputs.py
@@ -0,0 +1,31 @@
+import json
+import os
+
+import pandas as pd
+
+OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "")
+INPUT_DIR = os.environ.get("INPUT_DIR", "pfp.xlsx")
+
+sheet_name = "Sheet1"  # Change this to the name of the sheet in your .xlsx file
+df = pd.read_excel(INPUT_DIR, sheet_name=sheet_name, engine="openpyxl")
+
+for index, message in enumerate(df["Input_Message"]):
+    if pd.isnull(message):
+        continue
+
+    message_json = json.loads(message.replace("_x000D_", ""))
+    staff_number = message_json["Performance_data"][1][0]
+
+    performance_month = message_json.get("performance_month", None)
+    if not performance_month:
+        continue
+
+    directory = os.path.join(OUTPUT_DIR, performance_month)
+    os.makedirs(directory, exist_ok=True)
+
+    file_name = f"Provider_{staff_number}.json"
+    file_path = os.path.join(directory, file_name)
+
+    with open(file_path, "w", encoding="utf-8") as file:
+        file.write(str(message))
+print("Text files have been created for each cell in the 'Input_Message' column.")
diff --git a/bulk-up/src/bulk_up/log_to_reports.py b/bulk-up/src/bulk_up/log_to_reports.py
@@ -0,0 +1,96 @@
+import json
+import os
+from datetime import datetime
+
+import pandas as pd
+
+INPUT_DIR = os.environ.get("INPUT_DIR", "/home/faridsei/dev/test/pfp2.xlsx")
+SHEET_NAME = "Sheet1"  # Change this to the name of the sheet in your .xlsx file
+
+
+def add_response(response_data):
+    global response_df
+
+    selected_candidate = response_data.get("selected_candidate", None)
+    pm = response_data.get("performance_month", None)
+    pm = datetime.strptime(pm, "%B %Y") if pm else "missing"
+    response_dict: dict = {
+        "staff_number": [response_data.get("staff_number", None)],
+        "performance_month": [pm],
+        "causal_pathway": selected_candidate["acceptable_by"],
+        "measure": selected_candidate["measure"],
+        "message": selected_candidate.get("message_template_name", "missing")
+        if selected_candidate
+        else [None],
+    }
+    response_df = pd.concat(
+        [response_df, pd.DataFrame(response_dict)], ignore_index=True
+    )
+
+
+def analyse_responses():
+    global response_df
+
+    causal_pathway = (
+        response_df.groupby(["performance_month", "causal_pathway"])["staff_number"]
+        .agg(count=("count"))
+        .reset_index()
+    )
+
+    causal_pathway["monthly_total"] = causal_pathway.groupby("performance_month")[
+        "count"
+    ].transform("sum")
+    causal_pathway["%  "] = round(
+        causal_pathway["count"] / causal_pathway["monthly_total"] * 100, 1
+    )
+
+    causal_pathway = causal_pathway[
+        ["performance_month", "monthly_total", "causal_pathway", "count", "%  "]
+    ]
+    print(f"\n {causal_pathway} \n")
+
+    message = (
+        response_df.groupby(["performance_month", "message"])["staff_number"]
+        .agg(count=("count"))
+        .reset_index()
+    )
+
+    message["monthly_total"] = message.groupby("performance_month")["count"].transform(
+        "sum"
+    )
+    message["%  "] = round(message["count"] / message["monthly_total"] * 100, 1)
+    message = message[["performance_month", "monthly_total", "message", "count", "%  "]]
+
+    print(f"\n {message} \n")
+
+    measure = (
+        response_df.groupby(["performance_month", "measure"])["staff_number"]
+        .agg(count=("count"))
+        .reset_index()
+    )
+
+    measure["monthly_total"] = measure.groupby("performance_month")["count"].transform(
+        "sum"
+    )
+    measure["%  "] = round(measure["count"] / measure["monthly_total"] * 100, 1)
+    measure = measure[["performance_month", "monthly_total", "measure", "count", "%  "]]
+
+    print(f"\n {measure} \n")
+
+
+df = pd.read_excel(INPUT_DIR, sheet_name=SHEET_NAME, engine="openpyxl")
+response_df: pd.DataFrame = pd.DataFrame()
+
+for index, message in enumerate(df["Output_Message"]):
+    if pd.isnull(message):
+        continue
+
+    message_parts = message.split(',"image":')
+    if len(message_parts) > 1:
+        message_json = json.loads(message_parts[0] + "}}")
+    else:
+        message_json = json.loads(message)
+
+    add_response(message_json)
+
+analyse_responses()