Skip to content

Commit 4ea0513

Browse files
authored
Merge pull request #1183 from MIT-LCP/mimiciii_misc_query_fixes
Misc query fixes to MIMIC-III
2 parents 10613e7 + bb788a4 commit 4ea0513

File tree

5 files changed

+199
-170
lines changed

5 files changed

+199
-170
lines changed

mimic-iii/concepts/durations/epinephrine_dose.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ with vasocv1 as
2424
, max(case when itemid in (30044,30119,30309) then amount else null end) as vaso_amount
2525

2626
FROM `physionet-data.mimiciii_clinical.inputevents_cv` cv
27-
left join `physionet-data.mimiciii_clinical.weight_durations` wd
27+
left join `physionet-data.mimiciii_derived.weight_durations` wd
2828
on cv.icustay_id = wd.icustay_id
2929
and cv.charttime between wd.starttime and wd.endtime
3030
where itemid in

mimic-iii/concepts/durations/norepinephrine_dose.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ with vasocv1 as
2424
, max(case when itemid in (30047,30120) then amount else null end) as vaso_amount
2525

2626
FROM `physionet-data.mimiciii_clinical.inputevents_cv` cv
27-
left join `physionet-data.mimiciii_clinical.weight_durations` wd
27+
left join `physionet-data.mimiciii_derived.weight_durations` wd
2828
on cv.icustay_id = wd.icustay_id
2929
and cv.charttime between wd.starttime and wd.endtime
3030
where itemid in (30047,30120) -- norepinephrine

mimic-iii/concepts/make-concepts.sh

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,14 @@ echo 'Top level files..'
1717
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.code_status < code_status.sql
1818
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.echo_data < echo_data.sql
1919

20+
echo 'Running queries in 10 directories.'
21+
22+
echo 'Directory 1: demographics'
23+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.heightweight < demographics/heightweight.sql
24+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.icustay_detail < demographics/icustay_detail.sql
25+
2026
# Durations (usually of treatments)
21-
echo 'Directory 1 of 9: durations'
27+
echo 'Directory 2: durations'
2228
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.ventilation_classification < durations/ventilation_classification.sql
2329
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.ventilation_durations < durations/ventilation_durations.sql
2430
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.crrt_durations < durations/crrt_durations.sql
@@ -33,19 +39,41 @@ bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.phenylephrine_duratio
3339
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.vasopressin_durations < durations/vasopressin_durations.sql
3440
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.vasopressor_durations < durations/vasopressor_durations.sql
3541
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.weight_durations < durations/weight_durations.sql
42+
# dose queries for vasopressors
43+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.dobutamine_dose < durations/dobutamine_dose.sql
44+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.dopamine_dose < durations/dopamine_dose.sql
45+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.epinephrine_dose < durations/epinephrine_dose.sql
46+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.norepinephrine_dose < durations/norepinephrine_dose.sql
47+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.phenylephrine_dose < durations/phenylephrine_dose.sql
48+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.vasopressin_dose < durations/vasopressin_dose.sql
3649

37-
echo 'Directory 2 of 9: comorbidity'
50+
# "pivoted" tables which have icustay_id / timestamp as the primary key
51+
echo 'Directory 3: pivoted tables'
52+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_vital < pivot/pivoted_vital.sql
53+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_uo < pivot/pivoted_uo.sql
54+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_rrt < pivot/pivoted_rrt.sql
55+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_lab < pivot/pivoted_lab.sql
56+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_invasive_lines < pivot/pivoted_invasive_lines.sql
57+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_icp < pivot/pivoted_icp.sql
58+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_height < pivot/pivoted_height.sql
59+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_gcs < pivot/pivoted_gcs.sql
60+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_fio2 < pivot/pivoted_fio2.sql
61+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_bg < pivot/pivoted_bg.sql
62+
# pivoted_bg_art must be run after pivoted_bg
63+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_bg_art < pivot/pivoted_bg_art.sql
64+
# pivoted oasis depends on icustay_hours in demographics
65+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_oasis < pivot/pivoted_oasis.sql
66+
# pivoted sofa depends on many above pivoted views, ventilation_durations, and dose queries
67+
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.pivoted_sofa < pivot/pivoted_sofa.sql
68+
69+
echo 'Directory 4: comorbidity'
3870
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.elixhauser_ahrq_v37 < comorbidity/elixhauser_ahrq_v37.sql
3971
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.elixhauser_ahrq_v37_no_drg < comorbidity/elixhauser_ahrq_v37-no_drg.sql
4072
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.elixhauser_quan < comorbidity/elixhauser_quan.sql
4173
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.elixhauser_score_ahrq < comorbidity/elixhauser_score_ahrq.sql
4274
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.elixhauser_score_quan < comorbidity/elixhauser_score_quan.sql
4375

44-
echo 'Directory 3 of 9: demographics'
45-
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.heightweight < demographics/heightweight.sql
46-
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.icustay_detail < demographics/icustay_detail.sql
47-
48-
echo 'Directory 4 of 9: firstday'
76+
echo 'Directory 5: firstday'
4977
# data which is extracted from a patient's first ICU stay
5078
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.blood_gas_first_day < firstday/blood_gas_first_day.sql
5179
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.blood_gas_first_day_arterial < firstday/blood_gas_first_day_arterial.sql
@@ -57,22 +85,22 @@ bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.ventilation_first_day
5785
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.vitals_first_day < firstday/vitals_first_day.sql
5886
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.weight_first_day < firstday/weight_first_day.sql
5987

60-
echo 'Directory 5 of 9: fluid_balance'
88+
echo 'Directory 6: fluid_balance'
6189
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.urine_output < fluid_balance/urine_output.sql
6290

63-
echo 'Directory 6 of 9: sepsis'
91+
echo 'Directory 7: sepsis'
6492
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.angus < sepsis/angus.sql
6593
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.martin < sepsis/martin.sql
6694
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.explicit < sepsis/explicit.sql
6795

6896
# diagnosis mapping using CCS
69-
echo 'Directory 7 of 9: diagnosis'
97+
echo 'Directory 8: diagnosis'
7098
# load the ccs_multi_dx.csv.gz file into bq
7199
bq load --source_format=CSV ${TARGET_DATASET}.ccs_multi_dx diagnosis/ccs_multi_dx.csv.gz diagnosis/ccs_multi_dx.json
72100
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.ccs_dx < diagnosis/ccs_dx.sql
73101

74102
# Organ failure scores
75-
echo 'Directory 8 of 9: organfailure'
103+
echo 'Directory 9: organfailure'
76104
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.kdigo_creatinine < organfailure/kdigo_creatinine.sql
77105
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.kdigo_uo < organfailure/kdigo_uo.sql
78106
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.kdigo_stages < organfailure/kdigo_stages.sql
@@ -81,7 +109,7 @@ bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.kdigo_stages_48hr < o
81109
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.meld < organfailure/meld.sql
82110

83111
# Severity of illness scores (requires many views from above)
84-
echo 'Directory 9 of 9: severityscores'
112+
echo 'Directory 10: severityscores'
85113
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.oasis < severityscores/oasis.sql
86114
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.sofa < severityscores/sofa.sql
87115
bq query ${BQ_FLAGS} --destination_table=${TARGET_DATASET}.saps < severityscores/saps.sql

mimic-iii/concepts/pivot/pivoted_bg.sql

Lines changed: 0 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -144,159 +144,3 @@ left join iid_assign iid
144144
and grp.charttime >= iid.data_start
145145
and grp.charttime < iid.data_end
146146
order by grp.hadm_id, grp.charttime;
147-
148-
CREATE VIEW `physionet-data.mimiciii_derived.pivoted_bg_art` AS
149-
with stg_spo2 as
150-
(
151-
select hadm_id, charttime
152-
-- avg here is just used to group SpO2 by charttime
153-
, avg(valuenum) as spo2
154-
FROM `physionet-data.mimiciii_clinical.chartevents`
155-
-- o2 sat
156-
where ITEMID in
157-
(
158-
646 -- SpO2
159-
, 220277 -- O2 saturation pulseoxymetry
160-
)
161-
and valuenum > 0 and valuenum <= 100
162-
group by hadm_id, charttime
163-
)
164-
, stg_fio2 as
165-
(
166-
select hadm_id, charttime
167-
-- pre-process the FiO2s to ensure they are between 21-100%
168-
, max(
169-
case
170-
when itemid = 223835
171-
then case
172-
when valuenum > 0 and valuenum <= 1
173-
then valuenum * 100
174-
-- improperly input data - looks like O2 flow in litres
175-
when valuenum > 1 and valuenum < 21
176-
then null
177-
when valuenum >= 21 and valuenum <= 100
178-
then valuenum
179-
else null end -- unphysiological
180-
when itemid in (3420, 3422)
181-
-- all these values are well formatted
182-
then valuenum
183-
when itemid = 190 and valuenum > 0.20 and valuenum < 1
184-
-- well formatted but not in %
185-
then valuenum * 100
186-
else null end
187-
) as fio2_chartevents
188-
FROM `physionet-data.mimiciii_clinical.chartevents`
189-
where ITEMID in
190-
(
191-
3420 -- FiO2
192-
, 190 -- FiO2 set
193-
, 223835 -- Inspired O2 Fraction (FiO2)
194-
, 3422 -- FiO2 [measured]
195-
)
196-
and valuenum > 0 and valuenum < 100
197-
-- exclude rows marked as error
198-
AND (error IS NULL OR error != 1)
199-
group by hadm_id, charttime
200-
)
201-
, stg2 as
202-
(
203-
select bg.*
204-
, row_number() OVER (partition by bg.hadm_id, bg.charttime order by s1.charttime DESC) as lastrowspo2
205-
, s1.spo2
206-
from `physionet-data.mimiciii_derived` bg
207-
left join stg_spo2 s1
208-
-- same hospitalization
209-
on bg.hadm_id = s1.hadm_id
210-
-- spo2 occurred at most 2 hours before this blood gas
211-
and s1.charttime between DATETIME_SUB(bg.charttime, INTERVAL 2 HOUR) and bg.charttime
212-
where bg.po2 is not null
213-
)
214-
, stg3 as
215-
(
216-
select bg.*
217-
, row_number() OVER (partition by bg.hadm_id, bg.charttime order by s2.charttime DESC) as lastrowfio2
218-
, s2.fio2_chartevents
219-
220-
-- create our specimen prediction
221-
, 1/(1+exp(-(-0.02544
222-
+ 0.04598 * po2
223-
+ coalesce(-0.15356 * spo2 , -0.15356 * 97.49420 + 0.13429)
224-
+ coalesce( 0.00621 * fio2_chartevents , 0.00621 * 51.49550 + -0.24958)
225-
+ coalesce( 0.10559 * hemoglobin , 0.10559 * 10.32307 + 0.05954)
226-
+ coalesce( 0.13251 * so2 , 0.13251 * 93.66539 + -0.23172)
227-
+ coalesce(-0.01511 * pco2 , -0.01511 * 42.08866 + -0.01630)
228-
+ coalesce( 0.01480 * fio2 , 0.01480 * 63.97836 + -0.31142)
229-
+ coalesce(-0.00200 * aado2 , -0.00200 * 442.21186 + -0.01328)
230-
+ coalesce(-0.03220 * bicarbonate , -0.03220 * 22.96894 + -0.06535)
231-
+ coalesce( 0.05384 * totalco2 , 0.05384 * 24.72632 + -0.01405)
232-
+ coalesce( 0.08202 * lactate , 0.08202 * 3.06436 + 0.06038)
233-
+ coalesce( 0.10956 * ph , 0.10956 * 7.36233 + -0.00617)
234-
+ coalesce( 0.00848 * o2flow , 0.00848 * 7.59362 + -0.35803)
235-
))) as specimen_prob
236-
from stg2 bg
237-
left join stg_fio2 s2
238-
-- same patient
239-
on bg.hadm_id = s2.hadm_id
240-
-- fio2 occurred at most 4 hours before this blood gas
241-
and s2.charttime between DATETIME_SUB(bg.charttime, INTERVAL 4 HOUR) and bg.charttime
242-
and s2.fio2_chartevents > 0
243-
where bg.lastRowSpO2 = 1 -- only the row with the most recent SpO2 (if no SpO2 found lastRowSpO2 = 1)
244-
)
245-
select
246-
stg3.hadm_id
247-
, stg3.icustay_id
248-
, stg3.charttime
249-
, specimen -- raw data indicating sample type, only present 80% of the time
250-
-- prediction of specimen for missing data
251-
, case
252-
when SPECIMEN is not null then SPECIMEN
253-
when SPECIMEN_PROB > 0.75 then 'ART'
254-
else null end as specimen_pred
255-
, specimen_prob
256-
257-
-- oxygen related parameters
258-
, so2, spo2 -- note spo2 is FROM `physionet-data.mimiciii_clinical.chartevents`
259-
, po2, pco2
260-
, fio2_chartevents, fio2
261-
, aado2
262-
-- also calculate AADO2
263-
, case
264-
when PO2 is not null
265-
and pco2 is not null
266-
and coalesce(FIO2, fio2_chartevents) is not null
267-
-- multiple by 100 because FiO2 is in a % but should be a fraction
268-
then (coalesce(FIO2, fio2_chartevents)/100) * (760 - 47) - (pco2/0.8) - po2
269-
else null
270-
end as aado2_calc
271-
, case
272-
when PO2 is not null and coalesce(FIO2, fio2_chartevents) is not null
273-
-- multiply by 100 because FiO2 is in a % but should be a fraction
274-
then 100*PO2/(coalesce(FIO2, fio2_chartevents))
275-
else null
276-
end as pao2fio2ratio
277-
-- acid-base parameters
278-
, ph, baseexcess
279-
, bicarbonate, totalco2
280-
281-
-- blood count parameters
282-
, hematocrit
283-
, hemoglobin
284-
, carboxyhemoglobin
285-
, methemoglobin
286-
287-
-- chemistry
288-
, chloride, calcium
289-
, temperature
290-
, potassium, sodium
291-
, lactate
292-
, glucose
293-
294-
-- ventilation stuff that's sometimes input
295-
, intubated, tidalvolume, ventilationrate, ventilator
296-
, peep, o2flow
297-
, requiredo2
298-
from stg3
299-
where lastRowFiO2 = 1 -- only the most recent FiO2
300-
-- restrict it to *only* arterial samples
301-
and (specimen = 'ART' or specimen_prob > 0.75)
302-
order by hadm_id, charttime;

0 commit comments

Comments
 (0)