@@ -57,46 +57,20 @@ elif [ -n "$3" ]; then
5757 yell " import.sh takes a maximum of two arguments."
5858 die " Usage: ./import_duckdb.sh mimic_data_dir [output_db]"
5959elif [ -s " $OUTFILE " ]; then
60- yell " File \" $OUTFILE \" already exists."
61- die " Please specify an alternate output db name."
60+ yell " File \" $OUTFILE \" already exists."
61+ read -p " Continue? (y/d/n) 'y' continues, 'd' deletes original file, 'n' stops: " yn
62+ case $yn in
63+ [Yy]* ) ;; # OK
64+ [Nn]* ) exit ;;
65+ [Dd]* ) rm " $OUTFILE " ;;
66+ * ) die " Unrecognized input." ;;
67+ esac
6268fi
6369
64-
65- # create database schemas and tables
66- # below SQL is "postgres_creat_tables_pg10.sql" from mimic-iii postgres git repo,
67- # with the following changes:
68- # 1. Remove optional precision value from TIMESTAMP(NN) -> TIMESTAMP
69- # duckdb does not support this.
70- # 2. Remove PARTITION from chartevents.
70+ # create tables using DDL from postgres
71+ # minor changes: TIMESTAMP(nn) -> TIMESTAMP
7172
7273try duckdb " $OUTFILE " << EOSQL
73- -- -------------------------------------------------------------------------------
74- --
75- -- Create the MIMIC-III tables
76- --
77- -- -------------------------------------------------------------------------------
78-
79- --------------------------------------------------------
80- -- File created - Thursday-November-28-2015
81- --------------------------------------------------------
82-
83- -- If running scripts individually, you can set the schema where all tables are created as follows:
84- -- SET search_path TO mimiciii;
85-
86- -- Restoring the search path to its default value can be accomplished as follows:
87- -- SET search_path TO "$user ",public;
88-
89- /* Set the mimic_data_dir variable to point to directory containing
90- all .csv files. If using Docker, this should not be changed here.
91- Rather, when running the docker container, use the -v option
92- to have Docker mount a host volume to the container path /mimic_data
93- as explained in the README file
94- */
95-
96-
97- --------------------------------------------------------
98- -- DDL for Table ADMISSIONS
99- --------------------------------------------------------
10074
10175DROP TABLE IF EXISTS ADMISSIONS CASCADE;
10276CREATE TABLE ADMISSIONS
@@ -124,10 +98,6 @@ CREATE TABLE ADMISSIONS
12498 CONSTRAINT adm_hadm_unique UNIQUE (HADM_ID)
12599) ;
126100
127- --------------------------------------------------------
128- -- DDL for Table CALLOUT
129- --------------------------------------------------------
130-
131101DROP TABLE IF EXISTS CALLOUT CASCADE;
132102CREATE TABLE CALLOUT
133103(
@@ -158,10 +128,6 @@ CREATE TABLE CALLOUT
158128 CONSTRAINT callout_rowid_pk PRIMARY KEY (ROW_ID)
159129) ;
160130
161- --------------------------------------------------------
162- -- DDL for Table CAREGIVERS
163- --------------------------------------------------------
164-
165131DROP TABLE IF EXISTS CAREGIVERS CASCADE;
166132CREATE TABLE CAREGIVERS
167133(
@@ -173,10 +139,6 @@ CREATE TABLE CAREGIVERS
173139 CONSTRAINT cg_cgid_unique UNIQUE (CGID)
174140) ;
175141
176- --------------------------------------------------------
177- -- DDL for Table CHARTEVENTS
178- --------------------------------------------------------
179-
180142DROP TABLE IF EXISTS chartevents CASCADE;
181143CREATE TABLE chartevents
182144(
@@ -197,11 +159,6 @@ CREATE TABLE chartevents
197159 STOPPED VARCHAR(50)
198160);
199161
200-
201- --------------------------------------------------------
202- -- DDL for Table CPTEVENTS
203- --------------------------------------------------------
204-
205162DROP TABLE IF EXISTS CPTEVENTS CASCADE;
206163CREATE TABLE CPTEVENTS
207164(
@@ -220,10 +177,6 @@ CREATE TABLE CPTEVENTS
220177 CONSTRAINT cpt_rowid_pk PRIMARY KEY (ROW_ID)
221178) ;
222179
223- --------------------------------------------------------
224- -- DDL for Table DATETIMEEVENTS
225- --------------------------------------------------------
226-
227180DROP TABLE IF EXISTS DATETIMEEVENTS CASCADE;
228181CREATE TABLE DATETIMEEVENTS
229182(
@@ -244,10 +197,6 @@ CREATE TABLE DATETIMEEVENTS
244197 CONSTRAINT datetime_rowid_pk PRIMARY KEY (ROW_ID)
245198) ;
246199
247- --------------------------------------------------------
248- -- DDL for Table DIAGNOSES_ICD
249- --------------------------------------------------------
250-
251200DROP TABLE IF EXISTS DIAGNOSES_ICD CASCADE;
252201CREATE TABLE DIAGNOSES_ICD
253202(
@@ -259,10 +208,6 @@ CREATE TABLE DIAGNOSES_ICD
259208 CONSTRAINT diagnosesicd_rowid_pk PRIMARY KEY (ROW_ID)
260209) ;
261210
262- --------------------------------------------------------
263- -- DDL for Table DRGCODES
264- --------------------------------------------------------
265-
266211DROP TABLE IF EXISTS DRGCODES CASCADE;
267212CREATE TABLE DRGCODES
268213(
@@ -277,10 +222,6 @@ CREATE TABLE DRGCODES
277222 CONSTRAINT drg_rowid_pk PRIMARY KEY (ROW_ID)
278223) ;
279224
280- --------------------------------------------------------
281- -- DDL for Table D_CPT
282- --------------------------------------------------------
283-
284225DROP TABLE IF EXISTS D_CPT CASCADE;
285226CREATE TABLE D_CPT
286227(
@@ -297,10 +238,6 @@ CREATE TABLE D_CPT
297238 CONSTRAINT dcpt_rowid_pk PRIMARY KEY (ROW_ID)
298239) ;
299240
300- --------------------------------------------------------
301- -- DDL for Table D_ICD_DIAGNOSES
302- --------------------------------------------------------
303-
304241DROP TABLE IF EXISTS D_ICD_DIAGNOSES CASCADE;
305242CREATE TABLE D_ICD_DIAGNOSES
306243(
@@ -312,10 +249,6 @@ CREATE TABLE D_ICD_DIAGNOSES
312249 CONSTRAINT d_icd_diag_rowid_pk PRIMARY KEY (ROW_ID)
313250) ;
314251
315- --------------------------------------------------------
316- -- DDL for Table D_ICD_PROCEDURES
317- --------------------------------------------------------
318-
319252DROP TABLE IF EXISTS D_ICD_PROCEDURES CASCADE;
320253CREATE TABLE D_ICD_PROCEDURES
321254(
@@ -327,10 +260,6 @@ CREATE TABLE D_ICD_PROCEDURES
327260 CONSTRAINT d_icd_proc_rowid_pk PRIMARY KEY (ROW_ID)
328261) ;
329262
330- --------------------------------------------------------
331- -- DDL for Table D_ITEMS
332- --------------------------------------------------------
333-
334263DROP TABLE IF EXISTS D_ITEMS CASCADE;
335264CREATE TABLE D_ITEMS
336265(
@@ -348,10 +277,6 @@ CREATE TABLE D_ITEMS
348277 CONSTRAINT ditems_rowid_pk PRIMARY KEY (ROW_ID)
349278) ;
350279
351- --------------------------------------------------------
352- -- DDL for Table D_LABITEMS
353- --------------------------------------------------------
354-
355280DROP TABLE IF EXISTS D_LABITEMS CASCADE;
356281CREATE TABLE D_LABITEMS
357282(
@@ -365,10 +290,6 @@ CREATE TABLE D_LABITEMS
365290 CONSTRAINT dlabitems_rowid_pk PRIMARY KEY (ROW_ID)
366291) ;
367292
368- --------------------------------------------------------
369- -- DDL for Table ICUSTAYS
370- --------------------------------------------------------
371-
372293DROP TABLE IF EXISTS ICUSTAYS CASCADE;
373294CREATE TABLE ICUSTAYS
374295(
@@ -388,10 +309,6 @@ CREATE TABLE ICUSTAYS
388309 CONSTRAINT icustay_rowid_pk PRIMARY KEY (ROW_ID)
389310) ;
390311
391- --------------------------------------------------------
392- -- DDL for Table INPUTEVENTS_CV
393- --------------------------------------------------------
394-
395312DROP TABLE IF EXISTS INPUTEVENTS_CV CASCADE;
396313CREATE TABLE INPUTEVENTS_CV
397314(
@@ -420,10 +337,6 @@ CREATE TABLE INPUTEVENTS_CV
420337 CONSTRAINT inputevents_cv_rowid_pk PRIMARY KEY (ROW_ID)
421338) ;
422339
423- --------------------------------------------------------
424- -- DDL for Table INPUTEVENTS_MV
425- --------------------------------------------------------
426-
427340DROP TABLE IF EXISTS INPUTEVENTS_MV CASCADE;
428341CREATE TABLE INPUTEVENTS_MV
429342(
@@ -461,10 +374,6 @@ CREATE TABLE INPUTEVENTS_MV
461374 CONSTRAINT inputevents_mv_rowid_pk PRIMARY KEY (ROW_ID)
462375) ;
463376
464- --------------------------------------------------------
465- -- DDL for Table LABEVENTS
466- --------------------------------------------------------
467-
468377DROP TABLE IF EXISTS LABEVENTS CASCADE;
469378CREATE TABLE LABEVENTS
470379(
@@ -480,10 +389,6 @@ CREATE TABLE LABEVENTS
480389 CONSTRAINT labevents_rowid_pk PRIMARY KEY (ROW_ID)
481390) ;
482391
483- --------------------------------------------------------
484- -- DDL for Table MICROBIOLOGYEVENTS
485- --------------------------------------------------------
486-
487392DROP TABLE IF EXISTS MICROBIOLOGYEVENTS CASCADE;
488393CREATE TABLE MICROBIOLOGYEVENTS
489394(
@@ -506,10 +411,6 @@ CREATE TABLE MICROBIOLOGYEVENTS
506411 CONSTRAINT micro_rowid_pk PRIMARY KEY (ROW_ID)
507412) ;
508413
509- --------------------------------------------------------
510- -- DDL for Table NOTEEVENTS
511- --------------------------------------------------------
512-
513414DROP TABLE IF EXISTS NOTEEVENTS CASCADE;
514415CREATE TABLE NOTEEVENTS
515416(
@@ -527,10 +428,6 @@ CREATE TABLE NOTEEVENTS
527428 CONSTRAINT noteevents_rowid_pk PRIMARY KEY (ROW_ID)
528429) ;
529430
530- --------------------------------------------------------
531- -- DDL for Table OUTPUTEVENTS
532- --------------------------------------------------------
533-
534431DROP TABLE IF EXISTS OUTPUTEVENTS CASCADE;
535432CREATE TABLE OUTPUTEVENTS
536433(
@@ -550,10 +447,6 @@ CREATE TABLE OUTPUTEVENTS
550447 CONSTRAINT outputevents_cv_rowid_pk PRIMARY KEY (ROW_ID)
551448) ;
552449
553- --------------------------------------------------------
554- -- DDL for Table PATIENTS
555- --------------------------------------------------------
556-
557450DROP TABLE IF EXISTS PATIENTS CASCADE;
558451CREATE TABLE PATIENTS
559452(
@@ -569,10 +462,6 @@ CREATE TABLE PATIENTS
569462 CONSTRAINT pat_rowid_pk PRIMARY KEY (ROW_ID)
570463) ;
571464
572- --------------------------------------------------------
573- -- DDL for Table PRESCRIPTIONS
574- --------------------------------------------------------
575-
576465DROP TABLE IF EXISTS PRESCRIPTIONS CASCADE;
577466CREATE TABLE PRESCRIPTIONS
578467(
@@ -598,10 +487,6 @@ CREATE TABLE PRESCRIPTIONS
598487 CONSTRAINT prescription_rowid_pk PRIMARY KEY (ROW_ID)
599488) ;
600489
601- --------------------------------------------------------
602- -- DDL for Table PROCEDUREEVENTS_MV
603- --------------------------------------------------------
604-
605490DROP TABLE IF EXISTS PROCEDUREEVENTS_MV CASCADE;
606491CREATE TABLE PROCEDUREEVENTS_MV
607492(
@@ -633,10 +518,6 @@ CREATE TABLE PROCEDUREEVENTS_MV
633518 CONSTRAINT procedureevents_mv_rowid_pk PRIMARY KEY (ROW_ID)
634519) ;
635520
636- --------------------------------------------------------
637- -- DDL for Table PROCEDURES_ICD
638- --------------------------------------------------------
639-
640521DROP TABLE IF EXISTS PROCEDURES_ICD CASCADE;
641522CREATE TABLE PROCEDURES_ICD
642523(
@@ -648,10 +529,6 @@ CREATE TABLE PROCEDURES_ICD
648529 CONSTRAINT proceduresicd_rowid_pk PRIMARY KEY (ROW_ID)
649530) ;
650531
651- --------------------------------------------------------
652- -- DDL for Table SERVICES
653- --------------------------------------------------------
654-
655532DROP TABLE IF EXISTS SERVICES CASCADE;
656533CREATE TABLE SERVICES
657534(
@@ -664,10 +541,6 @@ CREATE TABLE SERVICES
664541 CONSTRAINT services_rowid_pk PRIMARY KEY (ROW_ID)
665542) ;
666543
667- --------------------------------------------------------
668- -- DDL for Table TRANSFERS
669- --------------------------------------------------------
670-
671544DROP TABLE IF EXISTS TRANSFERS CASCADE;
672545CREATE TABLE TRANSFERS
673546(
@@ -695,20 +568,15 @@ make_table_name () {
695568 # strip leading directories (e.g., ./icu/hello.csv.gz -> hello.csv.gz)
696569 BASENAME=${1##*/ }
697570 # strip suffix (e.g., hello.csv.gz -> hello; hello.csv -> hello)
698- TABLE_NAME=${BASENAME%% .* }
699- # strip basename (e.g., ./icu/hello.csv.gz -> ./icu)
700- # PATHNAME=${1%/*}
701- # strip leading directories from PATHNAME (e.g. ./icu -> icu)
702- # DIRNAME=${PATHNAME##*/}
703- TABLE_NAME=" $TABLE_NAME "
571+ TABLE_NAME=" ${BASENAME%% .* } "
704572}
705573
706574# load data into database
707575find " $MIMIC_DIR " -type f -name ' *.csv???' | while IFS= read -r FILE; do
708576 make_table_name " $FILE "
709- echo " Loading $FILE . "
577+ echo " Loading $FILE .. \c "
710578 try duckdb " $OUTFILE " << -EOSQL
711579 COPY $TABLE_NAME FROM '$FILE ' (HEADER);
712580EOSQL
713- echo " Finished loading $FILE . "
581+ echo " done! "
714582done && echo " Successfully finished loading data into $OUTFILE ."
0 commit comments