Skip to content

Commit 892c21c

Browse files
authored
Merge pull request #1341 from MIT-LCP/mimic_iv_mysql_fixes
MIMIC-IV MySQL build script update
2 parents 2068cca + 09028e3 commit 892c21c

File tree

9 files changed

+940
-380
lines changed

9 files changed

+940
-380
lines changed

.github/workflows/build-db.yml

Lines changed: 97 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -42,56 +42,74 @@ jobs:
4242
- name: 'Set up Cloud SDK'
4343
uses: 'google-github-actions/setup-gcloud@v0'
4444

45-
- name: Download demo and create tables on PostgreSQL
45+
- name: Download demo data from GCP
4646
run: |
4747
echo "Downloading MIMIC-IV demo from GCP."
4848
gsutil -q -u $PROJECT_ID -m cp -r gs://mimic-iv-archive/v2.0/demo ./
49-
echo "Building and loading data into psql."
50-
psql -q -h $POSTGRES_HOST -U postgres -f mimic-iv/buildmimic/postgres/create.sql
51-
psql -q -h $POSTGRES_HOST -U postgres -v mimic_data_dir=demo -f mimic-iv/buildmimic/postgres/load_gz.sql
49+
env:
50+
PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
51+
52+
- name: Load icu/hosp data into PostgreSQL
53+
run: |
54+
echo "Loading data into psql."
55+
psql -q -h $POSTGRES_HOST -U postgres -f ${BUILDCODE_PATH}/create.sql
56+
psql -q -h $POSTGRES_HOST -U postgres -v mimic_data_dir=demo -f ${BUILDCODE_PATH}/load_gz.sql
5257
echo "Validating build."
53-
psql -h $POSTGRES_HOST -U postgres -f mimic-iv/buildmimic/postgres/validate_demo.sql > validate_results.txt
54-
cat validate_results.txt
58+
psql -h $POSTGRES_HOST -U postgres -f ${BUILDCODE_PATH}/validate_demo.sql > results
59+
60+
# if we find "FAILED", then we did not pass the build
61+
if grep -F -q "FAILED" results; then
62+
echo "Failed the following row counts:"
63+
head -n 1 results
64+
grep "FAILED" results
65+
exit 1
66+
else
67+
echo "Built and loaded demo data successfully."
68+
cat results
69+
fi
5570
5671
env:
5772
# The hostname used to communicate with the PostgreSQL service container
5873
POSTGRES_HOST: postgres
5974
PGPASSWORD: postgres
6075
# The default PostgreSQL port
6176
POSTGRES_PORT: 5432
62-
PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
77+
BUILDCODE_PATH: mimic-iv/buildmimic/postgres
6378

64-
build-mimic-iv-ed-psql:
65-
# Containers must run in Linux based operating systems
66-
runs-on: ubuntu-latest
67-
# Docker Hub image that `container-job` executes in
68-
container: node:latest
79+
- name: Load ed data into PostgreSQL
80+
run: |
81+
echo "Loading data into psql."
82+
psql -q -h $POSTGRES_HOST -U postgres -f ${BUILDCODE_PATH}/create.sql
83+
psql -q -h $POSTGRES_HOST -U postgres -v mimic_data_dir=demo/ed -f ${BUILDCODE_PATH}/load_gz.sql
84+
echo "Validating build."
85+
psql -h $POSTGRES_HOST -U postgres -f ${BUILDCODE_PATH}/validate_demo.sql > results
86+
87+
# if we find "FAILED", then we did not pass the build
88+
if grep -F -q "FAILED" results; then
89+
echo "Failed the following row counts:"
90+
head -n 1 results
91+
grep "FAILED" results
92+
exit 1
93+
else
94+
echo "Built and loaded demo data successfully."
95+
cat results
96+
fi
6997
70-
# Service containers to run with `container-job`
71-
services:
72-
# Label used to access the service container
73-
postgres:
74-
# Docker Hub image
75-
image: postgres
76-
# Provide the password for postgres
7798
env:
78-
POSTGRES_PASSWORD: postgres
79-
# Set health checks to wait until postgres has started
80-
options: >-
81-
--health-cmd pg_isready
82-
--health-interval 10s
83-
--health-timeout 5s
84-
--health-retries 5
99+
# The hostname used to communicate with the PostgreSQL service container
100+
POSTGRES_HOST: postgres
101+
PGPASSWORD: postgres
102+
# The default PostgreSQL port
103+
POSTGRES_PORT: 5432
104+
BUILDCODE_PATH: mimic-iv-ed/buildmimic/postgres
105+
106+
build-mimic-iv-mysql:
107+
runs-on: ubuntu-22.04
85108

86109
steps:
87110
- name: Check out repository code
88111
uses: actions/checkout@v3
89112

90-
- name: Install psql command
91-
run: |
92-
apt-get update
93-
apt-get install --yes --no-install-recommends postgresql-client
94-
95113
- id: 'auth'
96114
uses: 'google-github-actions/auth@v0'
97115
with:
@@ -101,23 +119,54 @@ jobs:
101119
- name: 'Set up Cloud SDK'
102120
uses: 'google-github-actions/setup-gcloud@v0'
103121

104-
- name: Download demo and create tables on PostgreSQL
122+
- name: Download data from GCP
105123
run: |
106124
echo "Downloading MIMIC-IV demo from GCP."
107-
gsutil -q -u $PROJECT_ID -m cp -r gs://mimic-iv-archive/v2.0/demo/ed ./
108-
MIMIC_DATA_DIR=`pwd`/ed
109-
echo "Building and loading data into psql."
110-
cd mimic-iv-ed/buildmimic/postgres
111-
psql -q -h $POSTGRES_HOST -U postgres -f create.sql
112-
psql -q -h $POSTGRES_HOST -U postgres -v mimic_data_dir=$MIMIC_DATA_DIR -f load_gz.sql
113-
echo "Validating build."
114-
psql -h $POSTGRES_HOST -U postgres -f validate_demo.sql > validate_results.txt
115-
cat validate_results.txt
116-
125+
gsutil -q -u $PROJECT_ID -m cp -r gs://mimic-iv-archive/v2.0/demo ./
126+
mv demo/hosp/*.csv.gz ./
127+
mv demo/icu/*.csv.gz ./
128+
mv demo/ed/*.csv.gz ./
129+
gzip -d *.csv.gz
117130
env:
118-
# The hostname used to communicate with the PostgreSQL service container
119-
POSTGRES_HOST: postgres
120-
PGPASSWORD: postgres
121-
# The default PostgreSQL port
122-
POSTGRES_PORT: 5432
123-
PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
131+
PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
132+
133+
- name: Start MySQL service
134+
run: |
135+
sudo /etc/init.d/mysql start
136+
mysql -u root -proot -e "SET GLOBAL local_infile=1;"
137+
mysql -u root -proot -e "SET GLOBAL sql_notes=0;"
138+
mysql -u root -proot -e "create database mimic"
139+
140+
- name: Load icu/hosp demo data
141+
run: |
142+
echo "Loading data into mysql."
143+
mysql -u root -proot --local-infile=1 mimic < mimic-iv/buildmimic/mysql/load.sql
144+
mysql -u root -proot mimic < mimic-iv/buildmimic/mysql/validate_demo.sql > results
145+
146+
# if we find "FAILED", then we did not pass the build
147+
if grep -F -q "FAILED" results; then
148+
echo "Failed the following row counts:"
149+
head -n 1 results
150+
grep "FAILED" results
151+
exit 1
152+
else
153+
echo "Built and loaded demo data successfully."
154+
cat results
155+
fi
156+
157+
- name: Load ed demo data
158+
run: |
159+
echo "Loading data into mysql."
160+
mysql -u root -proot --local-infile=1 mimic < mimic-iv-ed/buildmimic/mysql/load.sql
161+
mysql -u root -proot mimic < mimic-iv-ed/buildmimic/mysql/validate_demo.sql > results
162+
163+
# if we find "FAILED", then we did not pass the build
164+
if grep -F -q "FAILED" results; then
165+
echo "Failed the following row counts:"
166+
head -n 1 results
167+
grep "FAILED" results
168+
exit 1
169+
else
170+
echo "Built and loaded demo data successfully."
171+
cat results
172+
fi
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
-- --------------------------------------
2+
-- --------------------------------------
3+
-- Indexes for the MIMIC-IV-ED module --
4+
-- --------------------------------------
5+
-- --------------------------------------
6+
7+
-- Note: MySql has no "DROP INDEX IF EXISTS ..." statement,
8+
-- hence running this a second time may error.
9+
10+
-- The load.sql script already creates primary keys for stay_id
11+
-- on stayids and triage, so we don't need to include those.
12+
13+
-- diagnosis
14+
15+
-- DROP INDEX diagnosis_idx01 ON diagnosis;
16+
CREATE INDEX diagnosis_idx01
17+
ON diagnosis (subject_id, stay_id);
18+
-- DROP INDEX diagnosis_idx02;
19+
CREATE INDEX diagnosis_idx02
20+
ON diagnosis (icd_code, icd_version);
21+
22+
23+
-- edstays
24+
25+
-- DROP INDEX edstays_idx01;
26+
CREATE INDEX edstays_idx01
27+
ON edstays (subject_id, hadm_id, stay_id);
28+
-- DROP INDEX edstays_idx02;
29+
-- CREATE UNIQUE INDEX edstays_idx02
30+
-- ON edstays (stay_id);
31+
-- DROP INDEX edstays_idx03;
32+
CREATE INDEX edstays_idx03
33+
ON edstays (intime, outtime);
34+
35+
-- medrecon
36+
37+
-- DROP INDEX medrecon_idx01;
38+
CREATE INDEX medrecon_idx01
39+
ON medrecon (subject_id, stay_id, charttime);
40+
41+
-- pyxis
42+
43+
-- DROP INDEX pyxis_idx01;
44+
CREATE INDEX pyxis_idx01
45+
ON pyxis (subject_id, stay_id, charttime);
46+
-- DROP INDEX pyxis_idx02;
47+
CREATE INDEX pyxis_idx02
48+
ON pyxis (gsn);
49+
50+
-- triage
51+
52+
-- DROP INDEX triage_idx01;
53+
CREATE INDEX triage_idx01
54+
ON triage (subject_id, stay_id);
55+
-- DROP INDEX triage_idx02;
56+
-- CREATE INDEX triage_idx02
57+
-- ON triage (stay_id);
58+
59+
-- vitalsign
60+
61+
-- DROP INDEX vitalsign_idx01;
62+
CREATE INDEX vitalsign_idx01
63+
ON vitalsign (subject_id, stay_id, charttime);
64+

0 commit comments

Comments
 (0)