|
| 1 | +#!/bin/bash |
| 2 | +# This shell script converts BigQuery .sql files into PostgreSQL .sql files. |
| 3 | + |
| 4 | +# String replacements are necessary for some queries. |
| 5 | +export REGEX_SCHEMA='s/`physionet-data.(mimic_core|mimic_icu|mimic_derived|mimic_hosp).(.+?)`/\1.\2/g' |
| 6 | +# Note that these queries are very senstive to changes, e.g. adding whitespaces after comma can already change the behavior. |
| 7 | +export REGEX_DATETIME_DIFF="s/DATETIME_DIFF\((.+?),\s?(.+?),\s?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1,\2,'\3')/g" |
| 8 | +export REGEX_DATETIME_TRUNC="s/DATETIME_TRUNC\((.+?),\s?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATE_TRUNC('\2', \1)/g" |
| 9 | +# Add necessary quotes to INTERVAL, e.g. "INTERVAL 5 hour" to "INTERVAL '5' hour" |
| 10 | +export REGEX_INTERVAL="s/interval\s([[:digit:]]+)\s(hour|day|month|year)/INTERVAL '\1' \2/gI" |
| 11 | +# Add numeric cast to ROUND(), e.g. "ROUND(1.234, 2)" to "ROUND( CAST(1.234 as numeric), 2)". |
| 12 | +export PERL_REGEX_ROUND='s/ROUND\(((.|\n)*?)\, /ROUND\( CAST\( \1 as numeric\)\,/g' |
| 13 | +# Specific queries for some problems that arose with some files. |
| 14 | +export REGEX_INT="s/CAST\(hr AS INT64\)/CAST\(hr AS bigint\)/g" |
| 15 | +export REGEX_ARRAY="s/GENERATE_ARRAY\(-24, CEIL\(DATETIME\_DIFF\(it\.outtime_hr, it\.intime_hr, HOUR\)\)\)/ARRAY\(SELECT \* FROM generate\_series\(-24, CEIL\(DATETIME\_DIFF\(it\.outtime_hr, it\.intime_hr, HOUR\)\)\)\)/g" |
| 16 | +export REGEX_HOUR_INTERVAL="s/INTERVAL CAST\(hr AS INT64\) HOUR/interval \'1\' hour * CAST\(hr AS bigint\)/g" |
| 17 | +export REGEX_SECONDS="s/SECOND\)/\'SECOND\'\)/g" |
| 18 | +export CONNSTR='-U postgres -h localhost -p 5500 -d mimic-iv' # -d mimic |
| 19 | + |
| 20 | + |
| 21 | +# First, we re-create the postgres-make-concepts.sql file. |
| 22 | +echo "\echo ''" > postgres/postgres-make-concepts.sql |
| 23 | + |
| 24 | +# Now we add some preamble for the user running the script. |
| 25 | +echo "\echo '==='" >> postgres/postgres-make-concepts.sql |
| 26 | +echo "\echo 'Beginning to create materialized views for MIMIC database.'" >> postgres/postgres-make-concepts.sql |
| 27 | +echo "\echo '"'Any notices of the form "NOTICE: materialized view "XXXXXX" does not exist" can be ignored.'"'" >> postgres/postgres-make-concepts.sql |
| 28 | +echo "\echo 'The scripts drop views before creating them, and these notices indicate nothing existed prior to creating the view.'" >> postgres/postgres-make-concepts.sql |
| 29 | +echo "\echo '==='" >> postgres/postgres-make-concepts.sql |
| 30 | +echo "\echo ''" >> postgres/postgres-make-concepts.sql |
| 31 | + |
| 32 | +# reporting to stdout the folder being run |
| 33 | +echo -n "Dependencies:" |
| 34 | + |
| 35 | +# output table creation calls to the make-concepts script |
| 36 | +echo "" >> postgres/postgres-make-concepts.sql |
| 37 | +echo "-- dependencies" >> postgres/postgres-make-concepts.sql |
| 38 | + |
| 39 | +for dir_and_table in demographics.icustay_times demographics.weight_durations measurement.urine_output organfailure.kdigo_uo; |
| 40 | +do |
| 41 | + d=`echo ${dir_and_table} | cut -d. -f1` |
| 42 | + tbl=`echo ${dir_and_table} | cut -d. -f2` |
| 43 | + |
| 44 | + # make the sub-folder for postgres if it does not exist |
| 45 | + mkdir -p "postgres/${d}" |
| 46 | + |
| 47 | + # convert the bigquery script to psql and output it to the appropriate subfolder |
| 48 | + echo -n " ${d}.${tbl} .." |
| 49 | + echo "-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY." > "postgres/${d}/${tbl}.sql" |
| 50 | + echo "DROP TABLE IF EXISTS ${tbl}; CREATE TABLE ${tbl} AS " >> "postgres/${d}/${tbl}.sql" |
| 51 | + |
| 52 | + # for two scripts, add a perl replace to cast rounded values as numeric |
| 53 | + if [[ "${tbl}" == "icustay_times" ]] || [[ "${tbl}" == "urine_output" ]]; then |
| 54 | + cat "${d}/${tbl}.sql" | sed -r -e "${REGEX_ARRAY}" | sed -r -e "${REGEX_HOUR_INTERVAL}" | sed -r -e "${REGEX_INT}" | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_DATETIME_TRUNC}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | sed -r -e "${REGEX_SECONDS}" | perl -0777 -pe "${PERL_REGEX_ROUND}" >> "postgres/${d}/${tbl}.sql" |
| 55 | + else |
| 56 | + cat "${d}/${tbl}.sql" | sed -r -e "${REGEX_ARRAY}" | sed -r -e "${REGEX_HOUR_INTERVAL}" | sed -r -e "${REGEX_INT}" | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_DATETIME_TRUNC}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | sed -r -e "${REGEX_SECONDS}" >> "postgres/${d}/${tbl}.sql" |
| 57 | + fi |
| 58 | + |
| 59 | + # write out a call to this script in the make concepts file |
| 60 | + echo "\i ${d}/${tbl}.sql" >> postgres/postgres-make-concepts.sql |
| 61 | +done |
| 62 | +echo " done!" |
| 63 | + |
| 64 | +# Iterate through each concept subfolder, and: |
| 65 | +# (1) apply the above regular expressions to update the script |
| 66 | +# (2) output to the postgres subfolder |
| 67 | +# (3) add a line to the postgres-make-concepts.sql script to generate this table |
| 68 | + |
| 69 | +# order of the folders is important for a few tables here: |
| 70 | +# * scores (sofa et al) depend on labs, icustay_hourly |
| 71 | +# * sepsis depends on score (sofa.sql in particular) |
| 72 | +# * organfailure depends on measurement and firstday |
| 73 | +# the order *only* matters during the conversion step because our loop is |
| 74 | +# inserting table build commands into the postgres-make-concepts.sql file |
| 75 | +for d in demographics measurement comorbidity medication treatment firstday organfailure score sepsis; |
| 76 | +do |
| 77 | + mkdir -p "postgres/${d}" |
| 78 | + echo -n "${d}:" |
| 79 | + echo "" >> postgres/postgres-make-concepts.sql |
| 80 | + echo "-- ${d}" >> postgres/postgres-make-concepts.sql |
| 81 | + for fn in `ls $d`; |
| 82 | + do |
| 83 | + # only run SQL queries |
| 84 | + if [[ "${fn: -4}" == ".sql" ]]; then |
| 85 | + # table name is file name minus extension |
| 86 | + tbl="${fn::-4}" |
| 87 | + |
| 88 | + # skip first_day_sofa as it depends on other firstday queries, we'll generate it later |
| 89 | + # we also skipped tables generated in the "Dependencies" loop above. |
| 90 | + if [[ "${tbl}" == "first_day_sofa" ]] || [[ "${tbl}" == "icustay_times" ]] || [[ "${tbl}" == "weight_durations" ]] || [[ "${tbl}" == "urine_output" ]] || [[ "${tbl}" == "kdigo_uo" ]] || [[ "${tbl}" == "sepsis3" ]]; then |
| 91 | + continue |
| 92 | + fi |
| 93 | + echo -n " ${tbl} .." |
| 94 | + echo "-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY." > "postgres/${d}/${tbl}.sql" |
| 95 | + echo "DROP TABLE IF EXISTS ${tbl}; CREATE TABLE ${tbl} AS " >> "postgres/${d}/${tbl}.sql" |
| 96 | + cat "${d}/${tbl}.sql" | sed -r -e "${REGEX_ARRAY}" | sed -r -e "${REGEX_HOUR_INTERVAL}" | sed -r -e "${REGEX_INT}" | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_DATETIME_TRUNC}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | perl -0777 -pe "${PERL_REGEX_ROUND}" >> "postgres/${d}/${fn}" |
| 97 | + |
| 98 | + echo "\i ${d}/${fn}" >> postgres/postgres-make-concepts.sql |
| 99 | + fi |
| 100 | + done |
| 101 | + echo " done!" |
| 102 | +done |
| 103 | + |
| 104 | +# finally generate first_day_sofa which depends on concepts in firstday folder |
| 105 | +echo "" >> postgres/postgres-make-concepts.sql |
| 106 | +echo "-- final tables dependent on previous concepts" >> postgres/postgres-make-concepts.sql |
| 107 | + |
| 108 | +for dir_and_table in firstday.first_day_sofa sepsis.sepsis3 |
| 109 | +do |
| 110 | + d=`echo ${dir_and_table} | cut -d. -f1` |
| 111 | + tbl=`echo ${dir_and_table} | cut -d. -f2` |
| 112 | + |
| 113 | + # make the sub-folder for postgres if it does not exist |
| 114 | + mkdir -p "postgres/${d}" |
| 115 | + |
| 116 | + # convert the bigquery script to psql and output it to the appropriate subfolder |
| 117 | + echo -n " ${d}.${tbl} .." |
| 118 | + echo "-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY." > "postgres/${d}/${tbl}.sql" |
| 119 | + echo "DROP TABLE IF EXISTS ${tbl}; CREATE TABLE ${tbl} AS " >> "postgres/${d}/${tbl}.sql" |
| 120 | + |
| 121 | + cat "${d}/${tbl}.sql" | sed -r -e "${REGEX_ARRAY}" | sed -r -e "${REGEX_HOUR_INTERVAL}" | sed -r -e "${REGEX_INT}" | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_DATETIME_TRUNC}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | sed -r -e "${REGEX_SECONDS}" >> "postgres/${d}/${tbl}.sql" |
| 122 | + |
| 123 | + # write out a call to this script in the make concepts file |
| 124 | + echo "\i ${d}/${tbl}.sql" >> postgres/postgres-make-concepts.sql |
| 125 | +done |
| 126 | +echo " done!" |
0 commit comments