Skip to content

Commit 842e1b6

Browse files
committed
add CAST(... AS NUMERIC) for ROUND() calls, remove perl regex
1 parent b6b3fb5 commit 842e1b6

File tree

7 files changed

+22
-23
lines changed

7 files changed

+22
-23
lines changed

mimic-iv/concepts/convert_bigquery_to_postgres.sh

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ export REGEX_DATETIME_DIFF="s/DATETIME_DIFF\(([^,]+), ?(.*), ?(DAY|MINUTE|SECOND
88
export REGEX_DATETIME_TRUNC="s/DATETIME_TRUNC\(([^,]+), ?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATE_TRUNC('\2', \1)/g"
99
# Add necessary quotes to INTERVAL, e.g. "INTERVAL 5 hour" to "INTERVAL '5' hour"
1010
export REGEX_INTERVAL="s/interval ([[:digit:]]+) (hour|day|month|year)/INTERVAL '\1' \2/gI"
11-
# Add numeric cast to ROUND(), e.g. "ROUND(1.234, 2)" to "ROUND( CAST(1.234 as numeric), 2)".
12-
export PERL_REGEX_ROUND='s/ROUND\(((.|\n)*?)(\, [24]\))/ROUND\( CAST\( \1 as numeric\)\3/g'
1311
# Specific queries for some problems that arose with some files.
1412
export REGEX_INT="s/CAST\(hr AS INT64\)/CAST\(hr AS bigint\)/g"
1513
export REGEX_ARRAY="s/GENERATE_ARRAY\(-24, CEIL\(DATETIME\_DIFF\(it\.outtime_hr, it\.intime_hr, HOUR\)\)\)/ARRAY\(SELECT \* FROM generate\_series\(-24, CEIL\(DATETIME\_DIFF\(it\.outtime_hr, it\.intime_hr, HOUR\)\)\)\)/g"
@@ -51,7 +49,7 @@ do
5149

5250
# for two scripts, add a perl replace to cast rounded values as numeric
5351
if [[ "${tbl}" == "icustay_times" ]] || [[ "${tbl}" == "urine_output" ]]; then
54-
cat "${d}/${tbl}.sql" | sed -r -e "${REGEX_ARRAY}" | sed -r -e "${REGEX_HOUR_INTERVAL}" | sed -r -e "${REGEX_INT}" | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_DATETIME_TRUNC}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | sed -r -e "${REGEX_SECONDS}" | perl -0777 -pe "${PERL_REGEX_ROUND}" >> "postgres/${d}/${tbl}.sql"
52+
cat "${d}/${tbl}.sql" | sed -r -e "${REGEX_ARRAY}" | sed -r -e "${REGEX_HOUR_INTERVAL}" | sed -r -e "${REGEX_INT}" | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_DATETIME_TRUNC}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | sed -r -e "${REGEX_SECONDS}" >> "postgres/${d}/${tbl}.sql"
5553
else
5654
cat "${d}/${tbl}.sql" | sed -r -e "${REGEX_ARRAY}" | sed -r -e "${REGEX_HOUR_INTERVAL}" | sed -r -e "${REGEX_INT}" | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_DATETIME_TRUNC}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | sed -r -e "${REGEX_SECONDS}" >> "postgres/${d}/${tbl}.sql"
5755
fi
@@ -93,7 +91,7 @@ do
9391
echo -n " ${tbl} .."
9492
echo "-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY." > "postgres/${d}/${tbl}.sql"
9593
echo "DROP TABLE IF EXISTS ${tbl}; CREATE TABLE ${tbl} AS " >> "postgres/${d}/${tbl}.sql"
96-
cat "${d}/${tbl}.sql" | sed -r -e "${REGEX_ARRAY}" | sed -r -e "${REGEX_HOUR_INTERVAL}" | sed -r -e "${REGEX_INT}" | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_DATETIME_TRUNC}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" | perl -0777 -pe "${PERL_REGEX_ROUND}" >> "postgres/${d}/${fn}"
94+
cat "${d}/${tbl}.sql" | sed -r -e "${REGEX_ARRAY}" | sed -r -e "${REGEX_HOUR_INTERVAL}" | sed -r -e "${REGEX_INT}" | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_DATETIME_TRUNC}" | sed -r -e "${REGEX_SCHEMA}" | sed -r -e "${REGEX_INTERVAL}" >> "postgres/${d}/${fn}"
9795

9896
echo "\i ${d}/${fn}" >> postgres/postgres-make-concepts.sql
9997
fi

mimic-iv/concepts/demographics/icustay_detail.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ SELECT ie.subject_id, ie.hadm_id, ie.stay_id
1616

1717
-- icu level factors
1818
, ie.intime as icu_intime, ie.outtime as icu_outtime
19-
, ROUND(DATETIME_DIFF(ie.outtime, ie.intime, HOUR)/24.0, 2) as los_icu
19+
, ROUND(CAST(DATETIME_DIFF(ie.outtime, ie.intime, HOUR)/24.0 AS NUMERIC), 2) as los_icu
2020
, DENSE_RANK() OVER (PARTITION BY ie.hadm_id ORDER BY ie.intime) AS icustay_seq
2121

2222
-- first ICU stay *for the current hospitalization*

mimic-iv/concepts/firstday/first_day_height.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ WITH ce AS
2121
SELECT
2222
ie.subject_id
2323
, ie.stay_id
24-
, ROUND(AVG(height), 2) AS height
24+
, ROUND(CAST(AVG(height) AS NUMERIC), 2) AS height
2525
FROM `physionet-data.mimiciv_icu.icustays` ie
2626
LEFT JOIN `physionet-data.mimiciv_derived.height` ht
2727
ON ie.stay_id = ht.stay_id

mimic-iv/concepts/measurement/blood_differential.sql

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -94,31 +94,31 @@ subject_id, hadm_id, charttime, specimen_id
9494

9595
, wbc
9696
-- impute absolute count if percentage & WBC is available
97-
, ROUND(CASE
97+
, ROUND(CAST(CASE
9898
WHEN basophils_abs IS NULL AND basophils IS NOT NULL AND impute_abs = 1
9999
THEN basophils * wbc / 100
100100
ELSE basophils_abs
101-
END, 4) AS basophils_abs
102-
, ROUND(CASE
101+
END AS NUMERIC), 4) AS basophils_abs
102+
, ROUND(CAST(CASE
103103
WHEN eosinophils_abs IS NULL AND eosinophils IS NOT NULL AND impute_abs = 1
104104
THEN eosinophils * wbc / 100
105105
ELSE eosinophils_abs
106-
END, 4) AS eosinophils_abs
107-
, ROUND(CASE
106+
END AS NUMERIC), 4) AS eosinophils_abs
107+
, ROUND(CAST(CASE
108108
WHEN lymphocytes_abs IS NULL AND lymphocytes IS NOT NULL AND impute_abs = 1
109109
THEN lymphocytes * wbc / 100
110110
ELSE lymphocytes_abs
111-
END, 4) AS lymphocytes_abs
112-
, ROUND(CASE
111+
END AS NUMERIC), 4) AS lymphocytes_abs
112+
, ROUND(CAST(CASE
113113
WHEN monocytes_abs IS NULL AND monocytes IS NOT NULL AND impute_abs = 1
114114
THEN monocytes * wbc / 100
115115
ELSE monocytes_abs
116-
END, 4) AS monocytes_abs
117-
, ROUND(CASE
116+
END AS NUMERIC), 4) AS monocytes_abs
117+
, ROUND(CAST(CASE
118118
WHEN neutrophils_abs IS NULL AND neutrophils IS NOT NULL AND impute_abs = 1
119119
THEN neutrophils * wbc / 100
120120
ELSE neutrophils_abs
121-
END, 4) AS neutrophils_abs
121+
END AS NUMERIC), 4) AS neutrophils_abs
122122

123123
, basophils
124124
, eosinophils

mimic-iv/concepts/measurement/height.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ WITH ht_in AS
44
SELECT
55
c.subject_id, c.stay_id, c.charttime
66
-- Ensure that all heights are in centimeters
7-
, ROUND(c.valuenum * 2.54, 2) AS height
7+
, ROUND(CAST(c.valuenum * 2.54 AS NUMERIC), 2) AS height
88
, c.valuenum as height_orig
99
FROM `physionet-data.mimiciv_icu.chartevents` c
1010
WHERE c.valuenum IS NOT NULL
@@ -16,7 +16,7 @@ WITH ht_in AS
1616
SELECT
1717
c.subject_id, c.stay_id, c.charttime
1818
-- Ensure that all heights are in centimeters
19-
, ROUND(c.valuenum, 2) AS height
19+
, ROUND(CAST(c.valuenum AS NUMERIC), 2) AS height
2020
FROM `physionet-data.mimiciv_icu.chartevents` c
2121
WHERE c.valuenum IS NOT NULL
2222
-- Height cm

mimic-iv/concepts/measurement/urine_output_rate.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,9 @@ select
8181
, CASE WHEN uo_tm_12hr >= 12 THEN ROUND(CAST((ur.urineoutput_12hr/wd.weight/uo_tm_12hr) AS NUMERIC), 4) END AS uo_mlkghr_12hr
8282
, CASE WHEN uo_tm_24hr >= 24 THEN ROUND(CAST((ur.urineoutput_24hr/wd.weight/uo_tm_24hr) AS NUMERIC), 4) END AS uo_mlkghr_24hr
8383
-- time of earliest UO measurement that was used to calculate the rate
84-
, ROUND(uo_tm_6hr, 2) AS uo_tm_6hr
85-
, ROUND(uo_tm_12hr, 2) AS uo_tm_12hr
86-
, ROUND(uo_tm_24hr, 2) AS uo_tm_24hr
84+
, ROUND(CAST(uo_tm_6hr AS NUMERIC), 2) AS uo_tm_6hr
85+
, ROUND(CAST(uo_tm_12hr AS NUMERIC), 2) AS uo_tm_12hr
86+
, ROUND(CAST(uo_tm_24hr AS NUMERIC), 2) AS uo_tm_24hr
8787
from ur_stg ur
8888
LEFT JOIN `physionet-data.mimiciv_derived.weight_durations` wd
8989
ON ur.stay_id = wd.stay_id

mimic-iv/concepts/medication/norepinephrine_equivalent_dose.sql

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@
33
-- by Goradia et al. 2020.
44
SELECT stay_id, starttime, endtime
55
-- calculate the dose
6-
, ROUND(COALESCE(norepinephrine, 0)
6+
, ROUND(CAST(
7+
COALESCE(norepinephrine, 0)
78
+ COALESCE(epinephrine, 0)
89
+ COALESCE(phenylephrine/10, 0)
910
+ COALESCE(dopamine/100, 0)
1011
-- + metaraminol/8 -- metaraminol not used in BIDMC
1112
+ COALESCE(vasopressin*2.5, 0)
1213
-- angotensin_ii*10 -- angitensin ii rarely used, currently not incorporated
1314
-- (it could be included due to norepinephrine sparing effects)
14-
, 4) AS norepinephrine_equivalent_dose
15+
AS NUMERIC), 4) AS norepinephrine_equivalent_dose
1516
-- angotensin_ii*10 -- angitensin ii rarely used, currently not incorporated
1617
-- (it could be included due to norepinephrine sparing effects)
1718
FROM `physionet-data.mimiciv_derived.vasoactive_agent`

0 commit comments

Comments
 (0)