Skip to content

Commit b6d6b6d

Browse files
committed
mimic-iv/concepts/convert_bigquery_to_postgres: fix regular expressions
Currently, `REGEX_DATETIME_DIFF` doesn't match for all occurrences of `DATETIME_DIFF` and `postgres-make-concepts.sql` errors out. For example: ``` psql:demographics/age.sql:30: ERROR: column "year" does not exist LINE 22: ...mittime, DATETIME(pa.anchor_year, 1, 1, 0, 0, 0), YEAR) + pa.. ``` Also, `PERL_REGEX_ROUND` doesn't correctly match `ROUND(...)` with nested functions, for example with a nested `DATETIME_DIFF`: ``` psql:demographics/icustay_detail.sql:33: ERROR: syntax error at or near "as" LINE 17: , ROUND( CAST( DATETIME_DIFF(ie.outtime as numeric),ie.intim... ``` Update both regular expressions accordingly.
1 parent 14b9f5f commit b6d6b6d

File tree

8 files changed

+20
-20
lines changed

8 files changed

+20
-20
lines changed

mimic-iv/concepts/convert_bigquery_to_postgres.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
# String replacements are necessary for some queries.
55
export REGEX_SCHEMA='s/`physionet-data.(mimiciv_hosp|mimiciv_icu|mimiciv_derived).([A-Za-z0-9_-]+)`/\1.\2/g'
66
# Note that these queries are very senstive to changes, e.g. adding whitespaces after comma can already change the behavior.
7-
export REGEX_DATETIME_DIFF="s/DATETIME_DIFF\(([^,]+), ?([^,]+), ?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1, \2, '\3')/g"
7+
export REGEX_DATETIME_DIFF="s/DATETIME_DIFF\(([^,]+), ?(.*), ?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1, \2, '\3')/g"
88
export REGEX_DATETIME_TRUNC="s/DATETIME_TRUNC\(([^,]+), ?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATE_TRUNC('\2', \1)/g"
99
# Add necessary quotes to INTERVAL, e.g. "INTERVAL 5 hour" to "INTERVAL '5' hour"
1010
export REGEX_INTERVAL="s/interval ([[:digit:]]+) (hour|day|month|year)/INTERVAL '\1' \2/gI"
1111
# Add numeric cast to ROUND(), e.g. "ROUND(1.234, 2)" to "ROUND( CAST(1.234 as numeric), 2)".
12-
export PERL_REGEX_ROUND='s/ROUND\(((.|\n)*?)\, /ROUND\( CAST\( \1 as numeric\)\,/g'
12+
export PERL_REGEX_ROUND='s/ROUND\(((.|\n)*?)(\, [24]\))/ROUND\( CAST\( \1 as numeric\)\3/g'
1313
# Specific queries for some problems that arose with some files.
1414
export REGEX_INT="s/CAST\(hr AS INT64\)/CAST\(hr AS bigint\)/g"
1515
export REGEX_ARRAY="s/GENERATE_ARRAY\(-24, CEIL\(DATETIME\_DIFF\(it\.outtime_hr, it\.intime_hr, HOUR\)\)\)/ARRAY\(SELECT \* FROM generate\_series\(-24, CEIL\(DATETIME\_DIFF\(it\.outtime_hr, it\.intime_hr, HOUR\)\)\)\)/g"

mimic-iv/concepts/postgres/demographics/age.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ SELECT
2323
, ad.admittime
2424
, pa.anchor_age
2525
, pa.anchor_year
26-
, DATETIME_DIFF(ad.admittime, DATETIME(pa.anchor_year, 1, 1, 0, 0, 0), YEAR) + pa.anchor_age AS age
26+
, DATETIME_DIFF(ad.admittime, DATETIME(pa.anchor_year, 1, 1, 0, 0, 0), 'YEAR') + pa.anchor_age AS age
2727
FROM mimiciv_hosp.admissions ad
2828
INNER JOIN mimiciv_hosp.patients pa
2929
ON ad.subject_id = pa.subject_id

mimic-iv/concepts/postgres/demographics/icustay_detail.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ SELECT ie.subject_id, ie.hadm_id, ie.stay_id
88
-- hospital level factors
99
, adm.admittime, adm.dischtime
1010
, DATETIME_DIFF(adm.dischtime, adm.admittime, 'DAY') as los_hospital
11-
, DATETIME_DIFF(adm.admittime, DATETIME(pat.anchor_year, 1, 1, 0, 0, 0), YEAR) + pat.anchor_age as admission_age
11+
, DATETIME_DIFF(adm.admittime, DATETIME(pat.anchor_year, 1, 1, 0, 0, 0), 'YEAR') + pat.anchor_age as admission_age
1212
, adm.race
1313
, adm.hospital_expire_flag
1414
, DENSE_RANK() OVER (PARTITION BY adm.subject_id ORDER BY adm.admittime) AS hospstay_seq
@@ -18,7 +18,7 @@ SELECT ie.subject_id, ie.hadm_id, ie.stay_id
1818

1919
-- icu level factors
2020
, ie.intime as icu_intime, ie.outtime as icu_outtime
21-
, ROUND( CAST( DATETIME_DIFF(ie.outtime as numeric),ie.intime, 'HOUR')/24.0, 2) as los_icu
21+
, ROUND( CAST( DATETIME_DIFF(ie.outtime, ie.intime, 'HOUR')/24.0 as numeric), 2) as los_icu
2222
, DENSE_RANK() OVER (PARTITION BY ie.hadm_id ORDER BY ie.intime) AS icustay_seq
2323

2424
-- first ICU stay *for the current hospitalization*

mimic-iv/concepts/postgres/firstday/first_day_height.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ WITH ce AS
2323
SELECT
2424
ie.subject_id
2525
, ie.stay_id
26-
, ROUND( CAST( AVG(height) as numeric),2) AS height
26+
, ROUND( CAST( AVG(height) as numeric), 2) AS height
2727
FROM mimiciv_icu.icustays ie
2828
LEFT JOIN mimiciv_derived.height ht
2929
ON ie.stay_id = ht.stay_id

mimic-iv/concepts/postgres/measurement/blood_differential.sql

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,27 +100,27 @@ subject_id, hadm_id, charttime, specimen_id
100100
WHEN basophils_abs IS NULL AND basophils IS NOT NULL AND impute_abs = 1
101101
THEN basophils * wbc / 100
102102
ELSE basophils_abs
103-
END as numeric),4) AS basophils_abs
103+
END as numeric), 4) AS basophils_abs
104104
, ROUND( CAST( CASE
105105
WHEN eosinophils_abs IS NULL AND eosinophils IS NOT NULL AND impute_abs = 1
106106
THEN eosinophils * wbc / 100
107107
ELSE eosinophils_abs
108-
END as numeric),4) AS eosinophils_abs
108+
END as numeric), 4) AS eosinophils_abs
109109
, ROUND( CAST( CASE
110110
WHEN lymphocytes_abs IS NULL AND lymphocytes IS NOT NULL AND impute_abs = 1
111111
THEN lymphocytes * wbc / 100
112112
ELSE lymphocytes_abs
113-
END as numeric),4) AS lymphocytes_abs
113+
END as numeric), 4) AS lymphocytes_abs
114114
, ROUND( CAST( CASE
115115
WHEN monocytes_abs IS NULL AND monocytes IS NOT NULL AND impute_abs = 1
116116
THEN monocytes * wbc / 100
117117
ELSE monocytes_abs
118-
END as numeric),4) AS monocytes_abs
118+
END as numeric), 4) AS monocytes_abs
119119
, ROUND( CAST( CASE
120120
WHEN neutrophils_abs IS NULL AND neutrophils IS NOT NULL AND impute_abs = 1
121121
THEN neutrophils * wbc / 100
122122
ELSE neutrophils_abs
123-
END as numeric),4) AS neutrophils_abs
123+
END as numeric), 4) AS neutrophils_abs
124124

125125
, basophils
126126
, eosinophils

mimic-iv/concepts/postgres/measurement/height.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ WITH ht_in AS
66
SELECT
77
c.subject_id, c.stay_id, c.charttime
88
-- Ensure that all heights are in centimeters
9-
, ROUND( CAST( c.valuenum * 2.54 as numeric),2) AS height
9+
, ROUND( CAST( c.valuenum * 2.54 as numeric), 2) AS height
1010
, c.valuenum as height_orig
1111
FROM mimiciv_icu.chartevents c
1212
WHERE c.valuenum IS NOT NULL
@@ -18,7 +18,7 @@ WITH ht_in AS
1818
SELECT
1919
c.subject_id, c.stay_id, c.charttime
2020
-- Ensure that all heights are in centimeters
21-
, ROUND( CAST( c.valuenum as numeric),2) AS height
21+
, ROUND( CAST( c.valuenum as numeric), 2) AS height
2222
FROM mimiciv_icu.chartevents c
2323
WHERE c.valuenum IS NOT NULL
2424
-- Height cm

mimic-iv/concepts/postgres/measurement/urine_output_rate.sql

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,13 @@ select
7979
, ur.urineoutput_6hr
8080
, ur.urineoutput_12hr
8181
, ur.urineoutput_24hr
82-
, CASE WHEN uo_tm_6hr >= 6 THEN ROUND( CAST( CAST((ur.urineoutput_6hr/wd.weight/uo_tm_6hr) AS NUMERIC) as numeric),4) END AS uo_mlkghr_6hr
83-
, CASE WHEN uo_tm_12hr >= 12 THEN ROUND( CAST( CAST((ur.urineoutput_12hr/wd.weight/uo_tm_12hr) AS NUMERIC) as numeric),4) END AS uo_mlkghr_12hr
84-
, CASE WHEN uo_tm_24hr >= 24 THEN ROUND( CAST( CAST((ur.urineoutput_24hr/wd.weight/uo_tm_24hr) AS NUMERIC) as numeric),4) END AS uo_mlkghr_24hr
82+
, CASE WHEN uo_tm_6hr >= 6 THEN ROUND( CAST( CAST((ur.urineoutput_6hr/wd.weight/uo_tm_6hr) AS NUMERIC) as numeric), 4) END AS uo_mlkghr_6hr
83+
, CASE WHEN uo_tm_12hr >= 12 THEN ROUND( CAST( CAST((ur.urineoutput_12hr/wd.weight/uo_tm_12hr) AS NUMERIC) as numeric), 4) END AS uo_mlkghr_12hr
84+
, CASE WHEN uo_tm_24hr >= 24 THEN ROUND( CAST( CAST((ur.urineoutput_24hr/wd.weight/uo_tm_24hr) AS NUMERIC) as numeric), 4) END AS uo_mlkghr_24hr
8585
-- time of earliest UO measurement that was used to calculate the rate
86-
, ROUND( CAST( uo_tm_6hr as numeric),2) AS uo_tm_6hr
87-
, ROUND( CAST( uo_tm_12hr as numeric),2) AS uo_tm_12hr
88-
, ROUND( CAST( uo_tm_24hr as numeric),2) AS uo_tm_24hr
86+
, ROUND( CAST( uo_tm_6hr as numeric), 2) AS uo_tm_6hr
87+
, ROUND( CAST( uo_tm_12hr as numeric), 2) AS uo_tm_12hr
88+
, ROUND( CAST( uo_tm_24hr as numeric), 2) AS uo_tm_24hr
8989
from ur_stg ur
9090
LEFT JOIN mimiciv_derived.weight_durations wd
9191
ON ur.stay_id = wd.stay_id

mimic-iv/concepts/postgres/measurement/vitalsign.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ select
1717
, ROUND( CAST(
1818
AVG(case when itemid in (223761) and valuenum > 70 and valuenum < 120 then (valuenum-32)/1.8 -- converted to degC in valuenum call
1919
when itemid in (223762) and valuenum > 10 and valuenum < 50 then valuenum else null end)
20-
as numeric),2) as temperature
20+
as numeric), 2) as temperature
2121
, MAX(CASE WHEN itemid = 224642 THEN value ELSE NULL END) AS temperature_site
2222
, AVG(case when itemid in (220277) and valuenum > 0 and valuenum <= 100 then valuenum else null end) as spo2
2323
, AVG(case when itemid in (225664,220621,226537) and valuenum > 0 then valuenum else null end) as glucose

0 commit comments

Comments
 (0)