Skip to content

Commit a09c505

Browse files
authored
453 clean up main (#454)
* remove excessive timing and logging statements from main. some formatting * api separated and simple cli for one json input added * restructuring the app to meet Python package structure to be able to build and install the app and use the cli script
1 parent 9289976 commit a09c505

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+915
-691
lines changed

.env.remote

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# required knowledgebase paths
2-
mpm=https://raw.githubusercontent.com/Display-Lab/knowledge-base/v1.4/prioritization_algorithms/motivational_potential_model.csv
3-
preferences=https://raw.githubusercontent.com/Display-Lab/knowledge-base/v1.4/preferences.json
4-
manifest=https://github.com/Display-Lab/knowledge-base/releases/download/v1.4/mpog_manifest.yaml
2+
mpm=https://raw.githubusercontent.com/Display-Lab/knowledge-base/1.7/prioritization_algorithms/motivational_potential_model.csv
3+
preferences=https://raw.githubusercontent.com/Display-Lab/knowledge-base/1.7/preferences.json
4+
manifest=https://raw.githubusercontent.com/Display-Lab/knowledge-base/refs/tags/1.7/mpog_manifest.yaml
55

66
# defaults
77
# log_level=WARNING

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ outputs/*
2121
python/.vscode/settings.json
2222
**/dist/
2323
**/__pycache__/
24-
.python-version
24+
2525
ES.json
2626
venv/
2727
.venv/

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.13

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,5 @@ ENV PATH="/code/app/venv/bin:$PATH"
4848

4949
# Start up main app
5050
EXPOSE 8080
51-
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
51+
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8080"]
5252

Procfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
web: gunicorn -w 1 -k uvicorn.workers.UvicornWorker main:app
1+
web: gunicorn -w 1 -k uvicorn.workers.UvicornWorker scaffold.api:app

README.md

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,10 @@ manifest=file:///Users/bob/knowledge-base/mpog_local_manifest.yaml
7979
...
8080
```
8181

82-
Run SCAFFOLD
82+
Run SCAFFOLD API
8383

8484
```zsh
85-
ENV_PATH=.env.local uvicorn main:app
85+
ENV_PATH=.env.local uvicorn scaffold.api:app
8686
# Expect to see a server start message like this "INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)"
8787
```
8888

@@ -92,6 +92,19 @@ You can use Postman or your favorite tool to send a message and check the result
9292
curl --data "@tests/test_cases/input_message.json" http://localhost:8000/createprecisionfeedback/
9393
```
9494

95+
Run SCAFFOLD CLI
96+
First install the python app. Then use the following command to run the pipeline on one input file
97+
98+
```zsh
99+
ENV_PATH=/user/.../dev.env pipeline single '/path/to/input/file.json'
100+
```
101+
102+
or use the following command to run the pipeline api
103+
104+
```zsh
105+
ENV_PATH=/user/.../dev.env pipeline web
106+
```
107+
95108
## Environment variables
96109

97110
### Knowledge base settings
@@ -154,13 +167,13 @@ If the key is a relative path, it must end with a '/'. In that case the key is g
154167
### examples
155168

156169
```zsh
157-
ENV_PATH=/user/.../dev.env log_level=INFO use_preferences=True use_coachiness=True use_mi=True generate_image=False uvicorn main:app --workers=5
170+
ENV_PATH=/user/.../dev.env log_level=INFO use_preferences=True use_coachiness=True use_mi=True generate_image=False uvicorn api:app --workers=5
158171
```
159172

160173

161174
for windows:
162175
```psh
163-
$env:ENV_PATH=/user/.../dev.env; $env:log_level="INFO"; $env:use_preferences="True"; $env:use_coachiness="True"; $env:use_mi="True"; $env:generate_image="False"; uvicorn main:app --workers=5
176+
$env:ENV_PATH=/user/.../dev.env; $env:log_level="INFO"; $env:use_preferences="True"; $env:use_coachiness="True"; $env:use_mi="True"; $env:generate_image="False"; uvicorn api:app --workers=5
164177
```
165178

166179
> :point_right: `uvicorn` can be run with multiple workers. This is useful when testing with a client that can send multiple requests.

bulk-up/src/bulk_up/dataset_to_inputs.py

Lines changed: 93 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,103 +1,131 @@
1+
import copy
12
import json
23
import os
3-
4-
import pandas as pd
54
import uuid
6-
import copy
75
from datetime import datetime
6+
7+
import pandas as pd
88
from dateutil.relativedelta import relativedelta
99

1010
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "outputs")
11-
INPUT_DIR = os.environ.get("INPUT_DIR", "/home/faridsei/dev/test/OBI/obi_cat2_dystocia_compliance.xlsx")
11+
INPUT_DIR = os.environ.get(
12+
"INPUT_DIR", "/home/faridsei/dev/test/OBI/obi_cat2_dystocia_compliance.xlsx"
13+
)
1214
PERFORMANCE_MONTH = os.environ.get("PERFORMANCE_MONTH", None)
13-
QUARTERLY_DATA=os.environ.get("QUARTERLY_DATA", False)
15+
QUARTERLY_DATA = os.environ.get("QUARTERLY_DATA", False)
1416
INPUT_TEMPLATE = {
15-
"@context": {
16-
"@vocab": "http://schema.org/",
17-
"slowmo": "http://example.com/slowmo#",
18-
"csvw": "http://www.w3.org/ns/csvw#",
19-
"dc": "http://purl.org/dc/terms/",
20-
"psdo": "http://purl.obolibrary.org/obo/",
21-
"slowmo:Measure": "http://example.com/slowmo#Measure",
22-
"slowmo:IsAboutPerformer": "http://example.com/slowmo#IsAboutPerformer",
23-
"slowmo:ColumnUse": "http://example.com/slowmo#ColumnUse",
24-
"slowmo:IsAboutTemplate": "http://example.com/slowmo#IsAboutTemplate",
25-
"slowmo:spek": "http://example.com/slowmo#spek",
26-
"slowmo:IsAboutCausalPathway": "http://example.com/slowmo#IsAboutCausalPathway",
27-
"slowmo:IsAboutOrganization": "http://example.com/slowmo#IsAboutOrganization",
28-
"slowmo:IsAboutMeasure": "http://example.com/slowmo#IsAboutMeasure",
29-
"slowmo:InputTable": "http://example.com/slowmo#InputTable",
30-
"slowmo:WithComparator": "http://example.com/slowmo#WithComparator",
31-
"has_part": "http://purl.obolibrary.org/obo/bfo#BFO_0000051",
32-
"has_disposition": "http://purl.obolibrary.org/obo/RO_0000091"
33-
},
34-
"message_instance_id": "",
35-
"performance_month": "",
36-
"Performance_data": [
37-
[ "staff_number", "measure", "month", "passed_count", "flagged_count", "denominator", "peer_average_comparator", "peer_75th_percentile_benchmark", "peer_90th_percentile_benchmark", "MPOG_goal" ],
38-
39-
],
40-
"History": {
41-
42-
},
43-
"Preferences": {}
17+
"@context": {
18+
"@vocab": "http://schema.org/",
19+
"slowmo": "http://example.com/slowmo#",
20+
"csvw": "http://www.w3.org/ns/csvw#",
21+
"dc": "http://purl.org/dc/terms/",
22+
"psdo": "http://purl.obolibrary.org/obo/",
23+
"slowmo:Measure": "http://example.com/slowmo#Measure",
24+
"slowmo:IsAboutPerformer": "http://example.com/slowmo#IsAboutPerformer",
25+
"slowmo:ColumnUse": "http://example.com/slowmo#ColumnUse",
26+
"slowmo:IsAboutTemplate": "http://example.com/slowmo#IsAboutTemplate",
27+
"slowmo:spek": "http://example.com/slowmo#spek",
28+
"slowmo:IsAboutCausalPathway": "http://example.com/slowmo#IsAboutCausalPathway",
29+
"slowmo:IsAboutOrganization": "http://example.com/slowmo#IsAboutOrganization",
30+
"slowmo:IsAboutMeasure": "http://example.com/slowmo#IsAboutMeasure",
31+
"slowmo:InputTable": "http://example.com/slowmo#InputTable",
32+
"slowmo:WithComparator": "http://example.com/slowmo#WithComparator",
33+
"has_part": "http://purl.obolibrary.org/obo/bfo#BFO_0000051",
34+
"has_disposition": "http://purl.obolibrary.org/obo/RO_0000091",
35+
},
36+
"message_instance_id": "",
37+
"performance_month": "",
38+
"Performance_data": [
39+
[
40+
"staff_number",
41+
"measure",
42+
"month",
43+
"passed_count",
44+
"flagged_count",
45+
"denominator",
46+
"peer_average_comparator",
47+
"peer_75th_percentile_benchmark",
48+
"peer_90th_percentile_benchmark",
49+
"MPOG_goal",
50+
],
51+
],
52+
"History": {},
53+
"Preferences": {},
4454
}
4555

46-
measure_name_to_id={
56+
measure_name_to_id = {
4757
"Cat II Compliance - 12 month rolling average": "CATII12",
4858
"Cat II Compliance - Monthly": "CATII",
4959
"Dystocia Compliance - 12 month rolling average": "DC12",
50-
"Dystocia Compliance - Monthly": "DC"
51-
60+
"Dystocia Compliance - Monthly": "DC",
5261
}
5362

54-
sheet_name = "obi_cat2_dystocia_compliance" #"Sheet1" # Change this to the name of the sheet in your .xlsx file
63+
sheet_name = "obi_cat2_dystocia_compliance" # "Sheet1" # Change this to the name of the sheet in your .xlsx file
5564
df = pd.read_excel(INPUT_DIR, sheet_name=sheet_name, engine="openpyxl")
56-
df['Time interval'] = pd.to_datetime(df['Time interval']).dt.strftime('%Y-%m-%d')
65+
df["Time interval"] = pd.to_datetime(df["Time interval"]).dt.strftime("%Y-%m-%d")
5766

58-
unique_site_ids = df['site_id'].unique()
67+
unique_site_ids = df["site_id"].unique()
5968
if not PERFORMANCE_MONTH:
60-
PERFORMANCE_MONTH = df['Time interval'].max()
61-
site_id= None
69+
PERFORMANCE_MONTH = df["Time interval"].max()
70+
site_id = None
6271
for site_id in unique_site_ids:
63-
site_rows = df[df['site_id'] == site_id]
72+
site_rows = df[df["site_id"] == site_id]
6473
input_file = copy.deepcopy(INPUT_TEMPLATE)
6574
measure = ""
6675
Numerator = 0
67-
Denominator = 0
76+
Denominator = 0
6877
for _, row in site_rows.iterrows():
6978
Numerator += row["Numerator"]
7079
Denominator += row["Denominator"]
7180
time_interval = row["Time interval"]
7281
if QUARTERLY_DATA:
73-
74-
date1 = datetime.strptime(row["Time interval"], '%Y-%m-%d')
75-
date2 = datetime.strptime(PERFORMANCE_MONTH, '%Y-%m-%d')
76-
year_diff = date2.year - date1.year
77-
month_diff = date2.month - date1.month
82+
date1 = datetime.strptime(row["Time interval"], "%Y-%m-%d")
83+
date2 = datetime.strptime(PERFORMANCE_MONTH, "%Y-%m-%d")
84+
year_diff = date2.year - date1.year
85+
month_diff = date2.month - date1.month
7886

79-
# Total months difference
80-
total_months = year_diff * 12 + month_diff
87+
# Total months difference
88+
total_months = year_diff * 12 + month_diff
8189

82-
# Check if the difference is a multiple of 4 then just continue to the next row (added up the Numerators and Denominators and checked the measure to be the same)
83-
if total_months % 3 != 0:
84-
85-
#make sure the quarter data is for the same measure
86-
if measure == "":
87-
measure = measure_name_to_id[row["Performance measure name"]]
88-
if measure != measure_name_to_id[row["Performance measure name"]]:
89-
raise ValueError("Sorry, wrong quarterly data issue for site "+ site_id + "meassure "+ measure + "date "+ row["Time interval"])
90-
continue
91-
time_interval = (date2 - relativedelta(months=total_months / 3)).strftime('%Y-%m-%d')
90+
# Check if the difference is a multiple of 4 then just continue to the next row (added up the Numerators and Denominators and checked the measure to be the same)
91+
if total_months % 3 != 0:
92+
# make sure the quarter data is for the same measure
93+
if measure == "":
94+
measure = measure_name_to_id[row["Performance measure name"]]
95+
if measure != measure_name_to_id[row["Performance measure name"]]:
96+
raise ValueError(
97+
"Sorry, wrong quarterly data issue for site "
98+
+ site_id
99+
+ "meassure "
100+
+ measure
101+
+ "date "
102+
+ row["Time interval"]
103+
)
104+
continue
105+
time_interval = (date2 - relativedelta(months=total_months / 3)).strftime(
106+
"%Y-%m-%d"
107+
)
92108
# if not row["Performance level (monthly rate)"]:
93109
# continue
94110
# Format the row and write it to the file
95-
input_file["Performance_data"].append([int(site_id),measure_name_to_id[row["Performance measure name"]],time_interval,
96-
Numerator,Denominator-Numerator,Denominator,0,0,0,row["Target"]*100 ])
111+
input_file["Performance_data"].append(
112+
[
113+
int(site_id),
114+
measure_name_to_id[row["Performance measure name"]],
115+
time_interval,
116+
Numerator,
117+
Denominator - Numerator,
118+
Denominator,
119+
0,
120+
0,
121+
0,
122+
row["Target"] * 100,
123+
]
124+
)
97125
input_file["message_instance_id"] = str(uuid.uuid4())
98126
input_file["performance_month"] = PERFORMANCE_MONTH
99-
100-
measure =""
127+
128+
measure = ""
101129
Numerator = 0
102130
Denominator = 0
103131

@@ -106,4 +134,3 @@
106134
file_path = os.path.join(OUTPUT_DIR, output_file_name)
107135
with open(file_path, "w") as file:
108136
json.dump(input_file, file, indent=4)
109-

bulk-up/src/bulk_up/history_extractor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ def add_history(filename):
3232
for row in performance_data:
3333
if row[1] not in passed_rate:
3434
passed_rate[row[1]] = {}
35-
if int(row[5])!=0:
35+
if int(row[5]) != 0:
3636
passed_rate[row[1]][row[2]] = int(row[3]) / int(row[5])
3737
else:
38-
passed_rate[row[1]][row[2]]=0
38+
passed_rate[row[1]][row[2]] = 0
3939

4040
with lock:
4141
for key, value in data["History"].items():

bulk-up/src/bulk_up/log_to_data.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,7 @@
3232
df = pd.read_excel(INPUT_DIR, sheet_name=SHEET_NAME, engine="openpyxl")
3333
response_df: pd.DataFrame = pd.DataFrame()
3434

35-
graph: Graph = manifest_to_graph(
36-
KNOWLEDGE_BASE_LOCAL_MANIFEST
37-
)
35+
graph: Graph = manifest_to_graph(KNOWLEDGE_BASE_LOCAL_MANIFEST)
3836

3937
is_about_to_columns: dict = {
4038
PSDO.achievement_set: "represented set",

esteemer/signals/__init__.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)