|
| 1 | +import copy |
1 | 2 | import json |
2 | 3 | import os |
3 | | - |
4 | | -import pandas as pd |
5 | 4 | import uuid |
6 | | -import copy |
7 | 5 | from datetime import datetime |
| 6 | + |
| 7 | +import pandas as pd |
8 | 8 | from dateutil.relativedelta import relativedelta |
9 | 9 |
|
10 | 10 | OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "outputs") |
11 | | -INPUT_DIR = os.environ.get("INPUT_DIR", "/home/faridsei/dev/test/OBI/obi_cat2_dystocia_compliance.xlsx") |
| 11 | +INPUT_DIR = os.environ.get( |
| 12 | + "INPUT_DIR", "/home/faridsei/dev/test/OBI/obi_cat2_dystocia_compliance.xlsx" |
| 13 | +) |
12 | 14 | PERFORMANCE_MONTH = os.environ.get("PERFORMANCE_MONTH", None) |
13 | | -QUARTERLY_DATA=os.environ.get("QUARTERLY_DATA", False) |
| 15 | +QUARTERLY_DATA = os.environ.get("QUARTERLY_DATA", False) |
14 | 16 | INPUT_TEMPLATE = { |
15 | | - "@context": { |
16 | | - "@vocab": "http://schema.org/", |
17 | | - "slowmo": "http://example.com/slowmo#", |
18 | | - "csvw": "http://www.w3.org/ns/csvw#", |
19 | | - "dc": "http://purl.org/dc/terms/", |
20 | | - "psdo": "http://purl.obolibrary.org/obo/", |
21 | | - "slowmo:Measure": "http://example.com/slowmo#Measure", |
22 | | - "slowmo:IsAboutPerformer": "http://example.com/slowmo#IsAboutPerformer", |
23 | | - "slowmo:ColumnUse": "http://example.com/slowmo#ColumnUse", |
24 | | - "slowmo:IsAboutTemplate": "http://example.com/slowmo#IsAboutTemplate", |
25 | | - "slowmo:spek": "http://example.com/slowmo#spek", |
26 | | - "slowmo:IsAboutCausalPathway": "http://example.com/slowmo#IsAboutCausalPathway", |
27 | | - "slowmo:IsAboutOrganization": "http://example.com/slowmo#IsAboutOrganization", |
28 | | - "slowmo:IsAboutMeasure": "http://example.com/slowmo#IsAboutMeasure", |
29 | | - "slowmo:InputTable": "http://example.com/slowmo#InputTable", |
30 | | - "slowmo:WithComparator": "http://example.com/slowmo#WithComparator", |
31 | | - "has_part": "http://purl.obolibrary.org/obo/bfo#BFO_0000051", |
32 | | - "has_disposition": "http://purl.obolibrary.org/obo/RO_0000091" |
33 | | - }, |
34 | | - "message_instance_id": "", |
35 | | - "performance_month": "", |
36 | | - "Performance_data": [ |
37 | | - [ "staff_number", "measure", "month", "passed_count", "flagged_count", "denominator", "peer_average_comparator", "peer_75th_percentile_benchmark", "peer_90th_percentile_benchmark", "MPOG_goal" ], |
38 | | - |
39 | | - ], |
40 | | - "History": { |
41 | | - |
42 | | - }, |
43 | | - "Preferences": {} |
| 17 | + "@context": { |
| 18 | + "@vocab": "http://schema.org/", |
| 19 | + "slowmo": "http://example.com/slowmo#", |
| 20 | + "csvw": "http://www.w3.org/ns/csvw#", |
| 21 | + "dc": "http://purl.org/dc/terms/", |
| 22 | + "psdo": "http://purl.obolibrary.org/obo/", |
| 23 | + "slowmo:Measure": "http://example.com/slowmo#Measure", |
| 24 | + "slowmo:IsAboutPerformer": "http://example.com/slowmo#IsAboutPerformer", |
| 25 | + "slowmo:ColumnUse": "http://example.com/slowmo#ColumnUse", |
| 26 | + "slowmo:IsAboutTemplate": "http://example.com/slowmo#IsAboutTemplate", |
| 27 | + "slowmo:spek": "http://example.com/slowmo#spek", |
| 28 | + "slowmo:IsAboutCausalPathway": "http://example.com/slowmo#IsAboutCausalPathway", |
| 29 | + "slowmo:IsAboutOrganization": "http://example.com/slowmo#IsAboutOrganization", |
| 30 | + "slowmo:IsAboutMeasure": "http://example.com/slowmo#IsAboutMeasure", |
| 31 | + "slowmo:InputTable": "http://example.com/slowmo#InputTable", |
| 32 | + "slowmo:WithComparator": "http://example.com/slowmo#WithComparator", |
| 33 | + "has_part": "http://purl.obolibrary.org/obo/bfo#BFO_0000051", |
| 34 | + "has_disposition": "http://purl.obolibrary.org/obo/RO_0000091", |
| 35 | + }, |
| 36 | + "message_instance_id": "", |
| 37 | + "performance_month": "", |
| 38 | + "Performance_data": [ |
| 39 | + [ |
| 40 | + "staff_number", |
| 41 | + "measure", |
| 42 | + "month", |
| 43 | + "passed_count", |
| 44 | + "flagged_count", |
| 45 | + "denominator", |
| 46 | + "peer_average_comparator", |
| 47 | + "peer_75th_percentile_benchmark", |
| 48 | + "peer_90th_percentile_benchmark", |
| 49 | + "MPOG_goal", |
| 50 | + ], |
| 51 | + ], |
| 52 | + "History": {}, |
| 53 | + "Preferences": {}, |
44 | 54 | } |
45 | 55 |
|
46 | | -measure_name_to_id={ |
| 56 | +measure_name_to_id = { |
47 | 57 | "Cat II Compliance - 12 month rolling average": "CATII12", |
48 | 58 | "Cat II Compliance - Monthly": "CATII", |
49 | 59 | "Dystocia Compliance - 12 month rolling average": "DC12", |
50 | | - "Dystocia Compliance - Monthly": "DC" |
51 | | - |
| 60 | + "Dystocia Compliance - Monthly": "DC", |
52 | 61 | } |
53 | 62 |
|
54 | | -sheet_name = "obi_cat2_dystocia_compliance" #"Sheet1" # Change this to the name of the sheet in your .xlsx file |
| 63 | +sheet_name = "obi_cat2_dystocia_compliance" # "Sheet1" # Change this to the name of the sheet in your .xlsx file |
55 | 64 | df = pd.read_excel(INPUT_DIR, sheet_name=sheet_name, engine="openpyxl") |
56 | | -df['Time interval'] = pd.to_datetime(df['Time interval']).dt.strftime('%Y-%m-%d') |
| 65 | +df["Time interval"] = pd.to_datetime(df["Time interval"]).dt.strftime("%Y-%m-%d") |
57 | 66 |
|
58 | | -unique_site_ids = df['site_id'].unique() |
| 67 | +unique_site_ids = df["site_id"].unique() |
59 | 68 | if not PERFORMANCE_MONTH: |
60 | | - PERFORMANCE_MONTH = df['Time interval'].max() |
61 | | -site_id= None |
| 69 | + PERFORMANCE_MONTH = df["Time interval"].max() |
| 70 | +site_id = None |
62 | 71 | for site_id in unique_site_ids: |
63 | | - site_rows = df[df['site_id'] == site_id] |
| 72 | + site_rows = df[df["site_id"] == site_id] |
64 | 73 | input_file = copy.deepcopy(INPUT_TEMPLATE) |
65 | 74 | measure = "" |
66 | 75 | Numerator = 0 |
67 | | - Denominator = 0 |
| 76 | + Denominator = 0 |
68 | 77 | for _, row in site_rows.iterrows(): |
69 | 78 | Numerator += row["Numerator"] |
70 | 79 | Denominator += row["Denominator"] |
71 | 80 | time_interval = row["Time interval"] |
72 | 81 | if QUARTERLY_DATA: |
73 | | - |
74 | | - date1 = datetime.strptime(row["Time interval"], '%Y-%m-%d') |
75 | | - date2 = datetime.strptime(PERFORMANCE_MONTH, '%Y-%m-%d') |
76 | | - year_diff = date2.year - date1.year |
77 | | - month_diff = date2.month - date1.month |
| 82 | + date1 = datetime.strptime(row["Time interval"], "%Y-%m-%d") |
| 83 | + date2 = datetime.strptime(PERFORMANCE_MONTH, "%Y-%m-%d") |
| 84 | + year_diff = date2.year - date1.year |
| 85 | + month_diff = date2.month - date1.month |
78 | 86 |
|
79 | | - # Total months difference |
80 | | - total_months = year_diff * 12 + month_diff |
| 87 | + # Total months difference |
| 88 | + total_months = year_diff * 12 + month_diff |
81 | 89 |
|
82 | | - # Check if the difference is a multiple of 4 then just continue to the next row (added up the Numerators and Denominators and checked the measure to be the same) |
83 | | - if total_months % 3 != 0: |
84 | | - |
85 | | - #make sure the quarter data is for the same measure |
86 | | - if measure == "": |
87 | | - measure = measure_name_to_id[row["Performance measure name"]] |
88 | | - if measure != measure_name_to_id[row["Performance measure name"]]: |
89 | | - raise ValueError("Sorry, wrong quarterly data issue for site "+ site_id + "meassure "+ measure + "date "+ row["Time interval"]) |
90 | | - continue |
91 | | - time_interval = (date2 - relativedelta(months=total_months / 3)).strftime('%Y-%m-%d') |
| 90 | + # Check if the difference is a multiple of 4 then just continue to the next row (added up the Numerators and Denominators and checked the measure to be the same) |
| 91 | + if total_months % 3 != 0: |
| 92 | + # make sure the quarter data is for the same measure |
| 93 | + if measure == "": |
| 94 | + measure = measure_name_to_id[row["Performance measure name"]] |
| 95 | + if measure != measure_name_to_id[row["Performance measure name"]]: |
| 96 | + raise ValueError( |
| 97 | + "Sorry, wrong quarterly data issue for site " |
| 98 | + + site_id |
| 99 | + + "meassure " |
| 100 | + + measure |
| 101 | + + "date " |
| 102 | + + row["Time interval"] |
| 103 | + ) |
| 104 | + continue |
| 105 | + time_interval = (date2 - relativedelta(months=total_months / 3)).strftime( |
| 106 | + "%Y-%m-%d" |
| 107 | + ) |
92 | 108 | # if not row["Performance level (monthly rate)"]: |
93 | 109 | # continue |
94 | 110 | # Format the row and write it to the file |
95 | | - input_file["Performance_data"].append([int(site_id),measure_name_to_id[row["Performance measure name"]],time_interval, |
96 | | - Numerator,Denominator-Numerator,Denominator,0,0,0,row["Target"]*100 ]) |
| 111 | + input_file["Performance_data"].append( |
| 112 | + [ |
| 113 | + int(site_id), |
| 114 | + measure_name_to_id[row["Performance measure name"]], |
| 115 | + time_interval, |
| 116 | + Numerator, |
| 117 | + Denominator - Numerator, |
| 118 | + Denominator, |
| 119 | + 0, |
| 120 | + 0, |
| 121 | + 0, |
| 122 | + row["Target"] * 100, |
| 123 | + ] |
| 124 | + ) |
97 | 125 | input_file["message_instance_id"] = str(uuid.uuid4()) |
98 | 126 | input_file["performance_month"] = PERFORMANCE_MONTH |
99 | | - |
100 | | - measure ="" |
| 127 | + |
| 128 | + measure = "" |
101 | 129 | Numerator = 0 |
102 | 130 | Denominator = 0 |
103 | 131 |
|
|
106 | 134 | file_path = os.path.join(OUTPUT_DIR, output_file_name) |
107 | 135 | with open(file_path, "w") as file: |
108 | 136 | json.dump(input_file, file, indent=4) |
109 | | - |
|
0 commit comments