Skip to content

Commit a7425b2

Browse files
committed
update with improvements from mimic-iv script
1 parent 06e4a74 commit a7425b2

File tree

1 file changed

+13
-145
lines changed

1 file changed

+13
-145
lines changed

mimic-iii/buildmimic/duckdb/import_duckdb.sh

Lines changed: 13 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -57,46 +57,20 @@ elif [ -n "$3" ]; then
5757
yell "import.sh takes a maximum of two arguments."
5858
die "Usage: ./import_duckdb.sh mimic_data_dir [output_db]"
5959
elif [ -s "$OUTFILE" ]; then
60-
yell "File \"$OUTFILE\" already exists."
61-
die "Please specify an alternate output db name."
60+
yell "File \"$OUTFILE\" already exists."
61+
read -p "Continue? (y/d/n) 'y' continues, 'd' deletes original file, 'n' stops: " yn
62+
case $yn in
63+
[Yy]* ) ;; # OK
64+
[Nn]* ) exit;;
65+
[Dd]* ) rm "$OUTFILE";;
66+
* ) die "Unrecognized input.";;
67+
esac
6268
fi
6369

64-
65-
# create database schemas and tables
66-
# below SQL is "postgres_creat_tables_pg10.sql" from mimic-iii postgres git repo,
67-
# with the following changes:
68-
# 1. Remove optional precision value from TIMESTAMP(NN) -> TIMESTAMP
69-
# duckdb does not support this.
70-
# 2. Remove PARTITION from chartevents.
70+
# create tables using DDL from postgres
71+
# minor changes: TIMESTAMP(nn) -> TIMESTAMP
7172

7273
try duckdb "$OUTFILE" <<EOSQL
73-
-- -------------------------------------------------------------------------------
74-
--
75-
-- Create the MIMIC-III tables
76-
--
77-
-- -------------------------------------------------------------------------------
78-
79-
--------------------------------------------------------
80-
-- File created - Thursday-November-28-2015
81-
--------------------------------------------------------
82-
83-
-- If running scripts individually, you can set the schema where all tables are created as follows:
84-
-- SET search_path TO mimiciii;
85-
86-
-- Restoring the search path to its default value can be accomplished as follows:
87-
-- SET search_path TO "$user",public;
88-
89-
/* Set the mimic_data_dir variable to point to directory containing
90-
all .csv files. If using Docker, this should not be changed here.
91-
Rather, when running the docker container, use the -v option
92-
to have Docker mount a host volume to the container path /mimic_data
93-
as explained in the README file
94-
*/
95-
96-
97-
--------------------------------------------------------
98-
-- DDL for Table ADMISSIONS
99-
--------------------------------------------------------
10074
10175
DROP TABLE IF EXISTS ADMISSIONS CASCADE;
10276
CREATE TABLE ADMISSIONS
@@ -124,10 +98,6 @@ CREATE TABLE ADMISSIONS
12498
CONSTRAINT adm_hadm_unique UNIQUE (HADM_ID)
12599
) ;
126100
127-
--------------------------------------------------------
128-
-- DDL for Table CALLOUT
129-
--------------------------------------------------------
130-
131101
DROP TABLE IF EXISTS CALLOUT CASCADE;
132102
CREATE TABLE CALLOUT
133103
(
@@ -158,10 +128,6 @@ CREATE TABLE CALLOUT
158128
CONSTRAINT callout_rowid_pk PRIMARY KEY (ROW_ID)
159129
) ;
160130
161-
--------------------------------------------------------
162-
-- DDL for Table CAREGIVERS
163-
--------------------------------------------------------
164-
165131
DROP TABLE IF EXISTS CAREGIVERS CASCADE;
166132
CREATE TABLE CAREGIVERS
167133
(
@@ -173,10 +139,6 @@ CREATE TABLE CAREGIVERS
173139
CONSTRAINT cg_cgid_unique UNIQUE (CGID)
174140
) ;
175141
176-
--------------------------------------------------------
177-
-- DDL for Table CHARTEVENTS
178-
--------------------------------------------------------
179-
180142
DROP TABLE IF EXISTS chartevents CASCADE;
181143
CREATE TABLE chartevents
182144
(
@@ -197,11 +159,6 @@ CREATE TABLE chartevents
197159
STOPPED VARCHAR(50)
198160
);
199161
200-
201-
--------------------------------------------------------
202-
-- DDL for Table CPTEVENTS
203-
--------------------------------------------------------
204-
205162
DROP TABLE IF EXISTS CPTEVENTS CASCADE;
206163
CREATE TABLE CPTEVENTS
207164
(
@@ -220,10 +177,6 @@ CREATE TABLE CPTEVENTS
220177
CONSTRAINT cpt_rowid_pk PRIMARY KEY (ROW_ID)
221178
) ;
222179
223-
--------------------------------------------------------
224-
-- DDL for Table DATETIMEEVENTS
225-
--------------------------------------------------------
226-
227180
DROP TABLE IF EXISTS DATETIMEEVENTS CASCADE;
228181
CREATE TABLE DATETIMEEVENTS
229182
(
@@ -244,10 +197,6 @@ CREATE TABLE DATETIMEEVENTS
244197
CONSTRAINT datetime_rowid_pk PRIMARY KEY (ROW_ID)
245198
) ;
246199
247-
--------------------------------------------------------
248-
-- DDL for Table DIAGNOSES_ICD
249-
--------------------------------------------------------
250-
251200
DROP TABLE IF EXISTS DIAGNOSES_ICD CASCADE;
252201
CREATE TABLE DIAGNOSES_ICD
253202
(
@@ -259,10 +208,6 @@ CREATE TABLE DIAGNOSES_ICD
259208
CONSTRAINT diagnosesicd_rowid_pk PRIMARY KEY (ROW_ID)
260209
) ;
261210
262-
--------------------------------------------------------
263-
-- DDL for Table DRGCODES
264-
--------------------------------------------------------
265-
266211
DROP TABLE IF EXISTS DRGCODES CASCADE;
267212
CREATE TABLE DRGCODES
268213
(
@@ -277,10 +222,6 @@ CREATE TABLE DRGCODES
277222
CONSTRAINT drg_rowid_pk PRIMARY KEY (ROW_ID)
278223
) ;
279224
280-
--------------------------------------------------------
281-
-- DDL for Table D_CPT
282-
--------------------------------------------------------
283-
284225
DROP TABLE IF EXISTS D_CPT CASCADE;
285226
CREATE TABLE D_CPT
286227
(
@@ -297,10 +238,6 @@ CREATE TABLE D_CPT
297238
CONSTRAINT dcpt_rowid_pk PRIMARY KEY (ROW_ID)
298239
) ;
299240
300-
--------------------------------------------------------
301-
-- DDL for Table D_ICD_DIAGNOSES
302-
--------------------------------------------------------
303-
304241
DROP TABLE IF EXISTS D_ICD_DIAGNOSES CASCADE;
305242
CREATE TABLE D_ICD_DIAGNOSES
306243
(
@@ -312,10 +249,6 @@ CREATE TABLE D_ICD_DIAGNOSES
312249
CONSTRAINT d_icd_diag_rowid_pk PRIMARY KEY (ROW_ID)
313250
) ;
314251
315-
--------------------------------------------------------
316-
-- DDL for Table D_ICD_PROCEDURES
317-
--------------------------------------------------------
318-
319252
DROP TABLE IF EXISTS D_ICD_PROCEDURES CASCADE;
320253
CREATE TABLE D_ICD_PROCEDURES
321254
(
@@ -327,10 +260,6 @@ CREATE TABLE D_ICD_PROCEDURES
327260
CONSTRAINT d_icd_proc_rowid_pk PRIMARY KEY (ROW_ID)
328261
) ;
329262
330-
--------------------------------------------------------
331-
-- DDL for Table D_ITEMS
332-
--------------------------------------------------------
333-
334263
DROP TABLE IF EXISTS D_ITEMS CASCADE;
335264
CREATE TABLE D_ITEMS
336265
(
@@ -348,10 +277,6 @@ CREATE TABLE D_ITEMS
348277
CONSTRAINT ditems_rowid_pk PRIMARY KEY (ROW_ID)
349278
) ;
350279
351-
--------------------------------------------------------
352-
-- DDL for Table D_LABITEMS
353-
--------------------------------------------------------
354-
355280
DROP TABLE IF EXISTS D_LABITEMS CASCADE;
356281
CREATE TABLE D_LABITEMS
357282
(
@@ -365,10 +290,6 @@ CREATE TABLE D_LABITEMS
365290
CONSTRAINT dlabitems_rowid_pk PRIMARY KEY (ROW_ID)
366291
) ;
367292
368-
--------------------------------------------------------
369-
-- DDL for Table ICUSTAYS
370-
--------------------------------------------------------
371-
372293
DROP TABLE IF EXISTS ICUSTAYS CASCADE;
373294
CREATE TABLE ICUSTAYS
374295
(
@@ -388,10 +309,6 @@ CREATE TABLE ICUSTAYS
388309
CONSTRAINT icustay_rowid_pk PRIMARY KEY (ROW_ID)
389310
) ;
390311
391-
--------------------------------------------------------
392-
-- DDL for Table INPUTEVENTS_CV
393-
--------------------------------------------------------
394-
395312
DROP TABLE IF EXISTS INPUTEVENTS_CV CASCADE;
396313
CREATE TABLE INPUTEVENTS_CV
397314
(
@@ -420,10 +337,6 @@ CREATE TABLE INPUTEVENTS_CV
420337
CONSTRAINT inputevents_cv_rowid_pk PRIMARY KEY (ROW_ID)
421338
) ;
422339
423-
--------------------------------------------------------
424-
-- DDL for Table INPUTEVENTS_MV
425-
--------------------------------------------------------
426-
427340
DROP TABLE IF EXISTS INPUTEVENTS_MV CASCADE;
428341
CREATE TABLE INPUTEVENTS_MV
429342
(
@@ -461,10 +374,6 @@ CREATE TABLE INPUTEVENTS_MV
461374
CONSTRAINT inputevents_mv_rowid_pk PRIMARY KEY (ROW_ID)
462375
) ;
463376
464-
--------------------------------------------------------
465-
-- DDL for Table LABEVENTS
466-
--------------------------------------------------------
467-
468377
DROP TABLE IF EXISTS LABEVENTS CASCADE;
469378
CREATE TABLE LABEVENTS
470379
(
@@ -480,10 +389,6 @@ CREATE TABLE LABEVENTS
480389
CONSTRAINT labevents_rowid_pk PRIMARY KEY (ROW_ID)
481390
) ;
482391
483-
--------------------------------------------------------
484-
-- DDL for Table MICROBIOLOGYEVENTS
485-
--------------------------------------------------------
486-
487392
DROP TABLE IF EXISTS MICROBIOLOGYEVENTS CASCADE;
488393
CREATE TABLE MICROBIOLOGYEVENTS
489394
(
@@ -506,10 +411,6 @@ CREATE TABLE MICROBIOLOGYEVENTS
506411
CONSTRAINT micro_rowid_pk PRIMARY KEY (ROW_ID)
507412
) ;
508413
509-
--------------------------------------------------------
510-
-- DDL for Table NOTEEVENTS
511-
--------------------------------------------------------
512-
513414
DROP TABLE IF EXISTS NOTEEVENTS CASCADE;
514415
CREATE TABLE NOTEEVENTS
515416
(
@@ -527,10 +428,6 @@ CREATE TABLE NOTEEVENTS
527428
CONSTRAINT noteevents_rowid_pk PRIMARY KEY (ROW_ID)
528429
) ;
529430
530-
--------------------------------------------------------
531-
-- DDL for Table OUTPUTEVENTS
532-
--------------------------------------------------------
533-
534431
DROP TABLE IF EXISTS OUTPUTEVENTS CASCADE;
535432
CREATE TABLE OUTPUTEVENTS
536433
(
@@ -550,10 +447,6 @@ CREATE TABLE OUTPUTEVENTS
550447
CONSTRAINT outputevents_cv_rowid_pk PRIMARY KEY (ROW_ID)
551448
) ;
552449
553-
--------------------------------------------------------
554-
-- DDL for Table PATIENTS
555-
--------------------------------------------------------
556-
557450
DROP TABLE IF EXISTS PATIENTS CASCADE;
558451
CREATE TABLE PATIENTS
559452
(
@@ -569,10 +462,6 @@ CREATE TABLE PATIENTS
569462
CONSTRAINT pat_rowid_pk PRIMARY KEY (ROW_ID)
570463
) ;
571464
572-
--------------------------------------------------------
573-
-- DDL for Table PRESCRIPTIONS
574-
--------------------------------------------------------
575-
576465
DROP TABLE IF EXISTS PRESCRIPTIONS CASCADE;
577466
CREATE TABLE PRESCRIPTIONS
578467
(
@@ -598,10 +487,6 @@ CREATE TABLE PRESCRIPTIONS
598487
CONSTRAINT prescription_rowid_pk PRIMARY KEY (ROW_ID)
599488
) ;
600489
601-
--------------------------------------------------------
602-
-- DDL for Table PROCEDUREEVENTS_MV
603-
--------------------------------------------------------
604-
605490
DROP TABLE IF EXISTS PROCEDUREEVENTS_MV CASCADE;
606491
CREATE TABLE PROCEDUREEVENTS_MV
607492
(
@@ -633,10 +518,6 @@ CREATE TABLE PROCEDUREEVENTS_MV
633518
CONSTRAINT procedureevents_mv_rowid_pk PRIMARY KEY (ROW_ID)
634519
) ;
635520
636-
--------------------------------------------------------
637-
-- DDL for Table PROCEDURES_ICD
638-
--------------------------------------------------------
639-
640521
DROP TABLE IF EXISTS PROCEDURES_ICD CASCADE;
641522
CREATE TABLE PROCEDURES_ICD
642523
(
@@ -648,10 +529,6 @@ CREATE TABLE PROCEDURES_ICD
648529
CONSTRAINT proceduresicd_rowid_pk PRIMARY KEY (ROW_ID)
649530
) ;
650531
651-
--------------------------------------------------------
652-
-- DDL for Table SERVICES
653-
--------------------------------------------------------
654-
655532
DROP TABLE IF EXISTS SERVICES CASCADE;
656533
CREATE TABLE SERVICES
657534
(
@@ -664,10 +541,6 @@ CREATE TABLE SERVICES
664541
CONSTRAINT services_rowid_pk PRIMARY KEY (ROW_ID)
665542
) ;
666543
667-
--------------------------------------------------------
668-
-- DDL for Table TRANSFERS
669-
--------------------------------------------------------
670-
671544
DROP TABLE IF EXISTS TRANSFERS CASCADE;
672545
CREATE TABLE TRANSFERS
673546
(
@@ -695,20 +568,15 @@ make_table_name () {
695568
# strip leading directories (e.g., ./icu/hello.csv.gz -> hello.csv.gz)
696569
BASENAME=${1##*/}
697570
# strip suffix (e.g., hello.csv.gz -> hello; hello.csv -> hello)
698-
TABLE_NAME=${BASENAME%%.*}
699-
# strip basename (e.g., ./icu/hello.csv.gz -> ./icu)
700-
#PATHNAME=${1%/*}
701-
# strip leading directories from PATHNAME (e.g. ./icu -> icu)
702-
#DIRNAME=${PATHNAME##*/}
703-
TABLE_NAME="$TABLE_NAME"
571+
TABLE_NAME="${BASENAME%%.*}"
704572
}
705573

706574
# load data into database
707575
find "$MIMIC_DIR" -type f -name '*.csv???' | while IFS= read -r FILE; do
708576
make_table_name "$FILE"
709-
echo "Loading $FILE."
577+
echo "Loading $FILE .. \c"
710578
try duckdb "$OUTFILE" <<-EOSQL
711579
COPY $TABLE_NAME FROM '$FILE' (HEADER);
712580
EOSQL
713-
echo "Finished loading $FILE."
581+
echo "done!"
714582
done && echo "Successfully finished loading data into $OUTFILE."

0 commit comments

Comments
 (0)