|
| 1 | +#!/bin/bash |
| 2 | +# This file makes tables for the concepts in this subfolder. |
| 3 | +# Be sure to run postgres-functions.sql first, as the concepts rely on those function definitions. |
| 4 | +# Note that this may take a large amount of time and hard drive space. |
| 5 | + |
| 6 | +# String replacements are necessary for some queries. |
| 7 | +export REGEX_SCHEMA='s/`physionet-data.(mimic_core|mimic_icu|mimic_derived|mimic_hosp).(.+?)`/\1.\2/g' |
| 8 | +# Note that these queries are very senstive to changes, e.g. adding whitespaces after comma can already change the behavior. |
| 9 | +export REGEX_DATETIME_DIFF="s/DATETIME_DIFF\((.+?),\s?(.+?),\s?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1,\2,'\3')/g" |
| 10 | +# Add necessary quotes to INTERVAL, e.g. "INTERVAL 5 hour" to "INTERVAL '5' hour" |
| 11 | +export REGEX_INTERVAL="s/interval\s([[:digit:]]+)\s(hour|day|month|year)/INTERVAL '\1' \2/gI" |
| 12 | +# Add numeric cast to ROUND(), e.g. "ROUND(1.234, 2)" to "ROUND( CAST(1.234 as numeric), 2)". |
| 13 | +export PERL_REGEX_ROUND='s/ROUND\(((.|\n)*?)\, /ROUND\( CAST\( \1 as numeric\)\,/g' |
| 14 | +# Specific queries for some problems that arose with some files. |
| 15 | +export REGEX_INT="s/CAST\(hr AS INT64\)/CAST\(hr AS bigint\)/g" |
| 16 | +export REGEX_ARRAY="s/GENERATE_ARRAY\(-24, CEIL\(DATETIME\_DIFF\(it\.outtime_hr, it\.intime_hr, HOUR\)\)\)/ARRAY\(SELECT \* FROM generate\_series\(-24, CEIL\(DATETIME\_DIFF\(it\.outtime_hr, it\.intime_hr, HOUR\)\)\)\)/g" |
| 17 | +export REGEX_HOUR_INTERVAL="s/INTERVAL CAST\(hr AS INT64\) HOUR/interval \'1\' hour * CAST\(hr AS bigint\)/g" |
| 18 | +export CONNSTR='-U postgres -h localhost -p 5500 -d mimic-iv' # -d mimic |
| 19 | + |
| 20 | +# This is set as the search_path variable for psql. |
| 21 | +# A search path of "public,mimic_icu" will search both public and mimic_icu |
| 22 | +# schemas for data, but will create tables on the public schema. |
| 23 | +export PSQL_PREAMBLE='SET search_path TO public,mimic_icu' |
| 24 | +export TARGET_DATASET='mimic_derived' |
| 25 | + |
| 26 | +echo '' |
| 27 | +echo '===' |
| 28 | +echo 'Beginning to create tables for MIMIC database.' |
| 29 | +echo 'Any notices of the form "NOTICE: TABLE "XXXXXX" does not exist" can be ignored.' |
| 30 | +echo 'The scripts drop views before creating them, and these notices indicate nothing existed prior to creating the view.' |
| 31 | +echo '===' |
| 32 | +echo '' |
| 33 | +echo "Generating ${TARGET_DATASET}.icustay_times" |
| 34 | +{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS ${TARGET_DATASET}.icustay_times; CREATE TABLE ${TARGET_DATASET}.icustay_times AS "; cat demographics/icustay_times.sql;} | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | perl -0777 -pe "${PERL_REGEX_ROUND}" | psql ${CONNSTR} |
| 35 | + |
| 36 | +echo "Generating ${TARGET_DATASET}.weight_durations" |
| 37 | +{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS ${TARGET_DATASET}.weight_durations; CREATE TABLE ${TARGET_DATASET}.weight_durations AS "; cat demographics/weight_durations.sql;} | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | psql ${CONNSTR} |
| 38 | + |
| 39 | +echo "Generating ${TARGET_DATASET}.urine_output" |
| 40 | +{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS ${TARGET_DATASET}.urine_output; CREATE TABLE ${TARGET_DATASET}.urine_output AS "; cat measurement/urine_output.sql;} | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | perl -0777 -pe "${PERL_REGEX_ROUND}" | psql ${CONNSTR} |
| 41 | + |
| 42 | +# Explicit Regex for cast of second to 'second' in organfailure/kdigo_uo. |
| 43 | +export REGEX_SECONDS="s/SECOND\)/\'SECOND\'\)/g" |
| 44 | +echo "Generating ${TARGET_DATASET}.kdigo_uo" |
| 45 | +{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS ${TARGET_DATASET}.kdigo_uo; CREATE TABLE ${TARGET_DATASET}.kdigo_uo AS "; cat organfailure/kdigo_uo.sql;} | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | sed -r -e "${REGEX_SECONDS}" | psql ${CONNSTR} |
| 46 | + |
| 47 | + |
| 48 | +# generate tables in subfolders |
| 49 | +# order is important for a few tables here: |
| 50 | +# * firstday should go last |
| 51 | +# * sepsis depends on score (sofa.sql in particular) |
| 52 | +# * organfailure depends on measurement |
| 53 | +# * repeated score and sepsis at the end because some table interdepend on each other |
| 54 | +for d in demographics measurement comorbidity medication organfailure treatment score sepsis firstday score sepsis; |
| 55 | +do |
| 56 | + for fn in `ls $d`; |
| 57 | + do |
| 58 | + echo "${d}" |
| 59 | + # only run SQL queries |
| 60 | + if [[ "${fn: -4}" == ".sql" ]]; then |
| 61 | + # table name is file name minus extension |
| 62 | + tbl="${fn::-4}" |
| 63 | + |
| 64 | + # Create first_day_lab after measurements done and before it is used by scores. |
| 65 | + if [[ "${tbl}" == "charlson" ]]; then |
| 66 | + # Generate some tables first to prevent conflicts during processing. |
| 67 | + # Have to replace column names. Probalby a mistake in the original SQL script. |
| 68 | + export REGEX_LAB_1="s/abs_basophils/basophils_abs/g" |
| 69 | + export REGEX_LAB_2="s/abs_eosinophils/eosinophils_abs/g" |
| 70 | + export REGEX_LAB_3="s/abs_lymphocytes/lymphocytes_abs/g" |
| 71 | + export REGEX_LAB_4="s/abs_monocytes/monocytes_abs/g" |
| 72 | + export REGEX_LAB_5="s/abs_neutrophils/neutrophils_abs/g" |
| 73 | + export REGEX_LAB_6="s/atyps/atypical_lymphocytes/g" |
| 74 | + export REGEX_LAB_7="s/imm_granulocytes/immature_granulocytes/g" |
| 75 | + export REGEX_LAB_8="s/metas/metamyelocytes/g" |
| 76 | + echo "Generating ${TARGET_DATASET}.first_day_lab" |
| 77 | + { echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS ${TARGET_DATASET}.first_day_lab; CREATE TABLE ${TARGET_DATASET}.first_day_lab AS "; cat firstday/first_day_lab.sql;} | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | sed -r -e "${REGEX_LAB_1}" | sed -r -e "${REGEX_LAB_2}" | sed -r -e "${REGEX_LAB_3}" | sed -r -e "${REGEX_LAB_4}" | sed -r -e "${REGEX_LAB_5}" | sed -r -e "${REGEX_LAB_6}" | sed -r -e "${REGEX_LAB_7}" | sed -r -e "${REGEX_LAB_8}" | perl -0777 -pe "${PERL_REGEX_ROUND}" | psql ${CONNSTR} |
| 78 | + fi |
| 79 | + |
| 80 | + # skip first_day_sofa as it depends on other firstday queries, also skipped already processed tables. |
| 81 | + if [[ "${tbl}" == "first_day_sofa" ]] || [[ "${tbl}" == "icustay_times" ]] || [[ "${tbl}" == "weight_durations" ]] || [[ "${tbl}" == "urine_output" ]] || [[ "${tbl}" == "kdigo_uo" ]] || [[ "${tbl}" == "first_day_lab" ]]; then |
| 82 | + continue |
| 83 | + fi |
| 84 | + echo "Generating ${TARGET_DATASET}.${tbl}" |
| 85 | + { echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS ${TARGET_DATASET}.${tbl}; CREATE TABLE ${TARGET_DATASET}.${tbl} AS "; cat "${d}/${fn}";} | sed -r -e "${REGEX_ARRAY}" | sed -r -e "${REGEX_HOUR_INTERVAL}" | sed -r -e "${REGEX_INT}" | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | perl -0777 -pe "${PERL_REGEX_ROUND}" | psql ${CONNSTR} |
| 86 | + fi |
| 87 | + done |
| 88 | +done |
| 89 | + |
| 90 | + |
| 91 | +# generate first_day_sofa table last |
| 92 | +echo "Generating ${TARGET_DATASET}.first_day_sofa" |
| 93 | +{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS ${TARGET_DATASET}.first_day_sofa; CREATE TABLE ${TARGET_DATASET}.first_day_sofa AS "; cat firstday/first_day_sofa.sql;} | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | perl -0777 -pe "${PERL_REGEX_ROUND}" | psql ${CONNSTR} |
0 commit comments