Skip to content

Commit 80d4cae

Browse files
authored
475 create a script to generate random performance data (#476)
* random performance data generator created.
1 parent d5a48e2 commit 80d4cae

File tree

6 files changed

+305
-4
lines changed

6 files changed

+305
-4
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,4 @@ PFPenv/
3131
.env.local
3232
.ruff_cache/
3333
.pytest_cache/
34+
bulk-up/random_performance_data/

bulk-up/poetry.lock

Lines changed: 82 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bulk-up/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ openpyxl = "^3.1.2"
1818
rdflib = "^7.0.0"
1919
pyyaml = "^6.0.2"
2020
loguru = "^0.7.2"
21+
orjson = "^3.11.0"
2122

2223

2324
[tool.poetry.group.dev.dependencies]
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
import json
2+
import random
3+
import uuid
4+
from datetime import datetime
5+
from pathlib import Path
6+
7+
from dateutil.relativedelta import relativedelta
8+
9+
10+
def generate_preferences(probability=0.035):
11+
if random.random() > probability:
12+
return {}
13+
14+
def random_float(min_val, max_val, decimals=2):
15+
return round(random.uniform(min_val, max_val), decimals)
16+
17+
# Randomly choose one of the display formats to be 1
18+
display_options = ["Bar chart", "Line chart", "Text-only", "System-generated"]
19+
selected_display = random.choice(display_options)
20+
display_format = {
21+
option: 1 if option == selected_display else 0 for option in display_options
22+
}
23+
24+
preferences = {
25+
"Utilities": {
26+
"Message_Format": {
27+
"Social gain": str(random_float(0.01, 0.1)),
28+
"Social stayed better": str(random_float(-0.2, -0.05)),
29+
"Worsening": str(random_float(-0.2, -0.05)),
30+
"Improving": str(random_float(-0.2, -0.05)),
31+
"Social loss": str(random_float(0.5, 0.8)),
32+
"Social stayed worse": str(random_float(-0.7, -0.4)),
33+
"Social better": str(random_float(-1.4, -1.0)),
34+
"Social worse": str(random_float(0.3, 0.6)),
35+
"Social approach": str(random_float(0.8, 1.1)),
36+
"Goal gain": str(random_float(0.01, 0.08)),
37+
"Goal approach": str(random_float(0.8, 1.1)),
38+
},
39+
"Display_Format": display_format,
40+
}
41+
}
42+
43+
return preferences
44+
45+
46+
# Variables
47+
performance_month = "2025-01-01"
48+
performance_date = datetime.strptime(performance_month, "%Y-%m-%d")
49+
measures = [
50+
"BP01",
51+
"BP02",
52+
"BP03",
53+
"BP04",
54+
"BP05",
55+
"BP06",
56+
"GLU01",
57+
"GLU02",
58+
"GLU03",
59+
"GLU04",
60+
"GLU05",
61+
"NMB01",
62+
"NMB02",
63+
"NMB03",
64+
"PAIN01",
65+
"PAIN02",
66+
"PONV01",
67+
"PONV04",
68+
"PONV05",
69+
"PUL01",
70+
"PUL02",
71+
"PUL03",
72+
"SMOK01",
73+
"SMOK02",
74+
"SMOK03",
75+
"SUS01",
76+
"SUS02",
77+
"SUS03",
78+
"SUS04",
79+
"SUS05",
80+
"TEMP01",
81+
"TEMP02",
82+
"TEMP03",
83+
"TOC01",
84+
"TOC02",
85+
"TOC03",
86+
"TRAN01",
87+
]
88+
institutions = list(range(1, 51))
89+
num_months = 12
90+
months = [
91+
(performance_date - relativedelta(months=i)).strftime("%Y-%m-%d")
92+
for i in reversed(range(0, num_months))
93+
]
94+
95+
# Output directory
96+
output_dir = Path("random_performance_data")
97+
output_dir.mkdir(exist_ok=True)
98+
99+
# generate comparator values
100+
101+
102+
# Generate list of 6 months before performance_month
103+
104+
105+
# Output dictionary
106+
comparators = {}
107+
108+
for inst in institutions:
109+
comparators[inst] = {}
110+
for measure in measures:
111+
comparators[inst][measure] = {}
112+
for date in months:
113+
# Generate values with the correct constraints
114+
peer_average = round(random.uniform(55.0, 90.0), 1)
115+
peer_90th = round(random.uniform(90.1, 99.9), 1)
116+
peer_75th = round(random.uniform(peer_average + 0.1, peer_90th - 0.1), 1)
117+
118+
# Safety check
119+
if peer_75th <= peer_average:
120+
peer_75th = round(peer_average + 0.5, 1)
121+
if peer_90th <= peer_75th:
122+
peer_90th = round(peer_75th + 0.5, 1)
123+
124+
comparators[inst][measure][date] = {
125+
"peer_average_comparator": peer_average,
126+
"peer_75th_percentile_benchmark": peer_75th,
127+
"peer_90th_percentile_benchmark": peer_90th,
128+
"MPOG_goal": 90.0,
129+
}
130+
131+
132+
# Counter to ensure global uniqueness
133+
global_staff_counter = 1
134+
135+
for institution in institutions:
136+
num_staff = random.randint(5, 25)
137+
for staff_number in range(num_staff):
138+
staff_data = {
139+
"@context": {
140+
"@vocab": "http://schema.org/",
141+
"slowmo": "http://example.com/slowmo#",
142+
"csvw": "http://www.w3.org/ns/csvw#",
143+
"dc": "http://purl.org/dc/terms/",
144+
"psdo": "http://purl.obolibrary.org/obo/",
145+
"slowmo:Measure": "http://example.com/slowmo#Measure",
146+
"slowmo:IsAboutPerformer": "http://example.com/slowmo#IsAboutPerformer",
147+
"slowmo:ColumnUse": "http://example.com/slowmo#ColumnUse",
148+
"slowmo:IsAboutTemplate": "http://example.com/slowmo#IsAboutTemplate",
149+
"slowmo:spek": "http://example.com/slowmo#spek",
150+
"slowmo:IsAboutCausalPathway": "http://example.com/slowmo#IsAboutCausalPathway",
151+
"slowmo:IsAboutOrganization": "http://example.com/slowmo#IsAboutOrganization",
152+
"slowmo:IsAboutMeasure": "http://example.com/slowmo#IsAboutMeasure",
153+
"slowmo:InputTable": "http://example.com/slowmo#InputTable",
154+
"slowmo:WithComparator": "http://example.com/slowmo#WithComparator",
155+
"has_part": "http://purl.obolibrary.org/obo/bfo#BFO_0000051",
156+
"has_disposition": "http://purl.obolibrary.org/obo/RO_0000091",
157+
},
158+
"message_instance_id": f"{str(uuid.uuid4())}",
159+
"performance_month": performance_month,
160+
"staff_number": f"STAFF-{global_staff_counter:06}", # e.g., STAFF-000001
161+
"institution_id": institution,
162+
"Performance_data": [
163+
[
164+
"staff_number",
165+
"measure",
166+
"month",
167+
"passed_count",
168+
"flagged_count",
169+
"denominator",
170+
"peer_average_comparator",
171+
"peer_75th_percentile_benchmark",
172+
"peer_90th_percentile_benchmark",
173+
"MPOG_goal",
174+
],
175+
],
176+
"History": {},
177+
"Preferences": generate_preferences(),
178+
"debug": "no",
179+
}
180+
181+
for measure in comparators[institution]:
182+
for month in comparators[institution][measure]:
183+
# Get comparator values
184+
comparator_values = comparators[institution][measure][month]
185+
186+
# Random denominator, passed ≥ 1
187+
denominator = random.randint(1, 40)
188+
passed_count = random.randint(1, denominator)
189+
flagged_count = denominator - passed_count
190+
191+
# Build row
192+
row = [
193+
global_staff_counter,
194+
measure,
195+
month,
196+
passed_count,
197+
flagged_count,
198+
denominator,
199+
comparator_values["peer_average_comparator"],
200+
comparator_values["peer_75th_percentile_benchmark"],
201+
comparator_values["peer_90th_percentile_benchmark"],
202+
comparator_values["MPOG_goal"],
203+
]
204+
staff_data["Performance_data"].append(row)
205+
206+
file_name = f"Provider_{global_staff_counter}.json"
207+
file_path = output_dir / file_name
208+
209+
# Write JSON file
210+
with open(file_path, "w") as f:
211+
json.dump(staff_data, f, indent=2)
212+
213+
global_staff_counter += 1

0 commit comments

Comments
 (0)