MIT-LCP
diff --git a/‎.github/workflows/build-db.yml‎
Lines changed: 62 additions & 0 deletions b/‎.github/workflows/build-db.yml‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎…mimic/bigquery/schemas/ed/diagnosis.json‎ ‎…mimic/bigquery/schemas/ed/diagnosis.json‎mimic-iv/buildmimic/bigquery/schemas/ed/diagnosis.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/diagnosis.json b/‎…mimic/bigquery/schemas/ed/diagnosis.json‎ ‎…mimic/bigquery/schemas/ed/diagnosis.json‎mimic-iv/buildmimic/bigquery/schemas/ed/diagnosis.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/diagnosis.json
diff --git a/‎…ldmimic/bigquery/schemas/ed/edstays.json‎ ‎…ldmimic/bigquery/schemas/ed/edstays.json‎mimic-iv/buildmimic/bigquery/schemas/ed/edstays.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/edstays.json b/‎…ldmimic/bigquery/schemas/ed/edstays.json‎ ‎…ldmimic/bigquery/schemas/ed/edstays.json‎mimic-iv/buildmimic/bigquery/schemas/ed/edstays.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/edstays.json
diff --git a/‎…dmimic/bigquery/schemas/ed/medrecon.json‎ ‎…dmimic/bigquery/schemas/ed/medrecon.json‎mimic-iv/buildmimic/bigquery/schemas/ed/medrecon.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/medrecon.json b/‎…dmimic/bigquery/schemas/ed/medrecon.json‎ ‎…dmimic/bigquery/schemas/ed/medrecon.json‎mimic-iv/buildmimic/bigquery/schemas/ed/medrecon.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/medrecon.json
diff --git a/‎…uildmimic/bigquery/schemas/ed/pyxis.json‎ ‎…uildmimic/bigquery/schemas/ed/pyxis.json‎mimic-iv/buildmimic/bigquery/schemas/ed/pyxis.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/pyxis.json b/‎…uildmimic/bigquery/schemas/ed/pyxis.json‎ ‎…uildmimic/bigquery/schemas/ed/pyxis.json‎mimic-iv/buildmimic/bigquery/schemas/ed/pyxis.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/pyxis.json
diff --git a/‎…ildmimic/bigquery/schemas/ed/triage.json‎ ‎…ildmimic/bigquery/schemas/ed/triage.json‎mimic-iv/buildmimic/bigquery/schemas/ed/triage.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/triage.json b/‎…ildmimic/bigquery/schemas/ed/triage.json‎ ‎…ildmimic/bigquery/schemas/ed/triage.json‎mimic-iv/buildmimic/bigquery/schemas/ed/triage.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/triage.json
diff --git a/‎…mimic/bigquery/schemas/ed/vitalsign.json‎ ‎…mimic/bigquery/schemas/ed/vitalsign.json‎mimic-iv/buildmimic/bigquery/schemas/ed/vitalsign.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/vitalsign.json b/‎…mimic/bigquery/schemas/ed/vitalsign.json‎ ‎…mimic/bigquery/schemas/ed/vitalsign.json‎mimic-iv/buildmimic/bigquery/schemas/ed/vitalsign.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/vitalsign.json
diff --git a/‎…c/bigquery/schemas/ed/vitalsign_hl7.json‎ ‎…c/bigquery/schemas/ed/vitalsign_hl7.json‎mimic-iv/buildmimic/bigquery/schemas/ed/vitalsign_hl7.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/vitalsign_hl7.json b/‎…c/bigquery/schemas/ed/vitalsign_hl7.json‎ ‎…c/bigquery/schemas/ed/vitalsign_hl7.json‎mimic-iv/buildmimic/bigquery/schemas/ed/vitalsign_hl7.json renamed to mimic-iv-ed/buildmimic/bigquery/schemas/ed/vitalsign_hl7.json
diff --git a/‎mimic-iv/buildmimic/bigquery/README.md‎
Lines changed: 22 additions & 27 deletions b/‎mimic-iv/buildmimic/bigquery/README.md‎
Lines changed: 22 additions & 27 deletions
diff --git a/‎…ic/bigquery/schemas/core/admissions.json‎ ‎…ic/bigquery/schemas/hosp/admissions.json‎mimic-iv/buildmimic/bigquery/schemas/core/admissions.json renamed to mimic-iv/buildmimic/bigquery/schemas/hosp/admissions.json
Lines changed: 1 addition & 1 deletion b/‎…ic/bigquery/schemas/core/admissions.json‎ ‎…ic/bigquery/schemas/hosp/admissions.json‎mimic-iv/buildmimic/bigquery/schemas/core/admissions.json renamed to mimic-iv/buildmimic/bigquery/schemas/hosp/admissions.json
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,62 @@
+name: Test DB build scripts
+on: pull_request
+
+jobs:
+  build-mimic-iv-psql:
+    # Containers must run in Linux based operating systems
+    runs-on: ubuntu-latest
+    # Docker Hub image that `container-job` executes in
+    container: node:latest
+
+    # Service containers to run with `container-job`
+    services:
+      # Label used to access the service container
+      postgres:
+        # Docker Hub image
+        image: postgres
+        # Provide the password for postgres
+        env:
+          POSTGRES_PASSWORD: postgres
+        # Set health checks to wait until postgres has started
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v3
+
+      - name: Install psql command
+        run: |
+          apt-get update
+          apt-get install --yes --no-install-recommends postgresql-client
+
+      - id: 'auth'
+        uses: 'google-github-actions/auth@v0'
+        with:
+            project_id: ${{ secrets.GCP_PROJECT_ID }}
+            credentials_json: ${{ secrets.GCP_SA_KEY }}
+
+      - name: 'Set up Cloud SDK'
+        uses: 'google-github-actions/setup-gcloud@v0'
+
+      - name: Download demo and create tables on PostgreSQL
+        run: |
+          echo "Downloading MIMIC-IV demo from GCP."
+          gsutil -q -u $PROJECT_ID -m cp -r gs://mimic-iv-archive/v2.0/demo ./
+          echo "Building and loading data into psql."
+          psql -q -h $POSTGRES_HOST -U postgres -f mimic-iv/buildmimic/postgres/create.sql
+          psql -q -h $POSTGRES_HOST -U postgres -v mimic_data_dir=demo -f mimic-iv/buildmimic/postgres/load_gz.sql
+          echo "Validating build."
+          psql -h $POSTGRES_HOST -U postgres -f mimic-iv/buildmimic/postgres/validate_demo.sql > validate_results.txt
+          cat validate_results.txt
+
+        env:
+          # The hostname used to communicate with the PostgreSQL service container
+          POSTGRES_HOST: postgres
+          PGPASSWORD: postgres
+          # The default PostgreSQL port
+          POSTGRES_PORT: 5432
+          PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
@@ -1,6 +1,6 @@
 # Loading MIMIC-IV to BigQuery
 
-**YOU DO NOT NEED TO INSTALL MIMIC-IV YOURSELF!** MIMIC-IV has been loaded onto BigQuery by the LCP, and is available for credentialed researchers to access. If you are credentialed, then you may be granted access MIMIC-IV on BigQuery instantly by following the [cloud configuration tutorial](https://mimic-iv.mit.edu/docs/access/cloud/).
+**YOU DO NOT NEED TO INSTALL MIMIC-IV YOURSELF!** MIMIC-IV has been loaded onto BigQuery by the LCP, and is available for credentialed researchers to access. If you are credentialed, then you may be granted access MIMIC-IV on BigQuery instantly by following the [cloud configuration tutorial](https://mimic.mit.edu/docs/gettingstarted/cloud/).
 
 The following instructions are provided for transparency and were used to create the current copy of MIMIC-IV on BigQuery.
 
@@ -38,43 +38,39 @@ gcloud init
 
 ---
 
-## STEP 3: Verify you can access the MIMIC-IV files on Google Cloud Storage
+## STEP 3: Download the MIMIC-IV files
 
-### A) Check the content of the bucket.
+Download the MIMIC-IV dataset files. The easiest way to download them is to open a terminal then run:
 
-```sh
-gsutil ls gs://mimiciv-1.0.physionet.org
 ```
-
-It should list a zip file, and some auxiliary files associated with the project (SHA256SUMS.txt).
-
-```sh
-gs://mimiciv-1.0.physionet.org/mimic-iv-1.0.zip
+wget -r -N -c -np --user YOURUSERNAME --ask-password https://physionet.org/files/mimiciv/2.0/
 ```
 
-Download and extract the zip file locally. Then, upload the resultant folders (`core`, `hosp`, and `icu`) to a GCP bucket of your choice:
+Replace `YOURUSERNAME` with your physionet username.
+
+Then, upload the folders (`hosp` and `icu`) to a GCP bucket of your choice:
 
 ```sh
 bucket="mimic-data"
 
-unzip mimic-iv-1.0.zip
-gsutil -m cp -r core hosp icu gs://$bucket/v1.0/
+gsutil -m cp -r hosp icu gs://$bucket/v2.0/
 ```
 
 ## STEP 4: Create a new BigQuery dataset
 
-### A) Create a new dataset for MIMIC-IV version 1.0
+### A) Create a new dataset for MIMIC-IV version 2.0
 
-In this example, we have chosen **mimic4_v1_0** as the dataset name.
+In this example, we have chosen **mimic4_v2_0** as the dataset prefix for the ICU/hosp modules.
 
 ```sh
-bq mk --dataset --data_location US --description "MIMIC-IV version 1.0" mimic4_v1_0
+bq mk --dataset --data_location US --description "MIMIC-IV version 2.0 ICU data" mimic4_v2_0_icu
+bq mk --dataset --data_location US --description "MIMIC-IV version 2.0 hosp data" mimic4_v2_0_hosp
 ```
 
 ### B) Check the status of the dataset created
 
 ```sh
-bq show mimic4_v1_0
+bq show mimic4_v2_0_hosp
 ```
 
 ---
@@ -101,13 +97,12 @@ BigQuery schemas are provided in this GitHub repository. Download the table sche
 
 ## STEP 6: Create tables and load the compressed files
 
-### A) Create a script file (ex: upload_mimic4_v1_0.sh) and copy the code below.
+### A) Create a script file (ex: upload_mimic4_v2_0.sh) and copy the code below.
 
 You will need to change the **schema_local_folder** to match the path to the schemas on your local machine.
 
 Note also that the below assumes the following dataset structure:
 
-* <dataset_prefix>_core
 * <dataset_prefix>_icu
 * <dataset_prefix>_hosp
 
@@ -118,25 +113,25 @@ If you would like all tables on the same dataset, you should modify the below sc
 
 # Initialize parameters
 bucket="mimic-data"  # we chose this bucket earlier when uploading data
-dataset_prefix="mimic"
-schema_local_folder="/home/alistairewj/mimic-iv/v1.0/schemas"
+dataset_prefix="mimic4_v2_0"
+schema_local_folder="~/mimic-code/mimic-iv/buildmimic/bigquery/schemas"
 
 # Get the list of files in the bucket
 
-for module in core hosp icu;
+for module in hosp icu;
 do
-    FILES=$(gsutil ls gs://$bucket/v1.0/$module/*.csv.gz)
+    FILES=$(gsutil ls gs://$bucket/v2.0/$module/*.csv.gz)
 
     for file in $FILES
     do
 
-    # Extract the table name from the file path (ex: gs://mimic4_v1_0/ADMISSIONS.csv.gz)
+    # Extract the table name from the file path (ex: gs://mimic4_v2_0/ADMISSIONS.csv.gz)
     base=${file##*/}            # remove path
     filename=${base%.*}         # remove .gz
     tablename=${filename%.*}    # remove .csv
 
     # Create table and populate it with data from the bucket
-    echo bq load --allow_quoted_newlines --skip_leading_rows=1 --source_format=CSV --replace ${dataset_prefix}_${module}.$tablename gs://$bucket/v1.0/$module/$tablename.csv.gz $schema_local_folder/$module/$tablename.json
+    bq load --allow_quoted_newlines --skip_leading_rows=1 --source_format=CSV --replace ${dataset_prefix}_${module}.$tablename gs://$bucket/v2.0/$module/$tablename.csv.gz $schema_local_folder/$module/$tablename.json
 
     # Check for error
     if [ $? -eq 0 ];then
@@ -155,7 +150,7 @@ This code will get the list of files in the bucket, and for each file, it will e
 ### B) Set the CHMOD to allow the file as executable (ex: 755), and execute the script file
 
 ```sh
-./upload_mimic4_v1_0.sh
+./upload_mimic4_v2_0.sh
 ```
 
 ### C) Results of the upload process
@@ -254,7 +249,7 @@ We can test a successful build by running a check query.
 
 ```sh
 bq query --use_legacy_sql=False 'select CASE WHEN count(*) = 383220 THEN True ELSE
-False end AS check from `mimic4_v1_0.patients`'
+False end AS check from `mimic4_v2_0.patients`'
 ```
 
 This verifies we have the expected row count in the patients table. It's further possible to check the row counts of the other tables by comparing to the already existing MIMIC-IV BigQuery dataset available on `physionet-data`.
@@ -1 +1 @@
-[{"name": "subject_id", "type": "INT64", "mode": "REQUIRED"}, {"name": "hadm_id", "type": "INT64", "mode": "REQUIRED"}, {"name": "admittime", "type": "DATETIME", "mode": "REQUIRED"}, {"name": "dischtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "deathtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "admission_type", "type": "STRING", "mode": "REQUIRED"}, {"name": "admission_location", "type": "STRING", "mode": "NULLABLE"}, {"name": "discharge_location", "type": "STRING", "mode": "NULLABLE"}, {"name": "insurance", "type": "STRING", "mode": "NULLABLE"}, {"name": "language", "type": "STRING", "mode": "NULLABLE"}, {"name": "marital_status", "type": "STRING", "mode": "NULLABLE"}, {"name": "ethnicity", "type": "STRING", "mode": "NULLABLE"}, {"name": "edregtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "edouttime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "hospital_expire_flag", "type": "INT64", "mode": "NULLABLE"}]
+[{"name": "subject_id", "type": "INT64", "mode": "REQUIRED"}, {"name": "hadm_id", "type": "INT64", "mode": "REQUIRED"}, {"name": "admittime", "type": "DATETIME", "mode": "REQUIRED"}, {"name": "dischtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "deathtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "admission_type", "type": "STRING", "mode": "REQUIRED"}, {"name": "admission_location", "type": "STRING", "mode": "NULLABLE"}, {"name": "discharge_location", "type": "STRING", "mode": "NULLABLE"}, {"name": "insurance", "type": "STRING", "mode": "NULLABLE"}, {"name": "language", "type": "STRING", "mode": "NULLABLE"}, {"name": "marital_status", "type": "STRING", "mode": "NULLABLE"}, {"name": "race", "type": "STRING", "mode": "NULLABLE"}, {"name": "edregtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "edouttime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "hospital_expire_flag", "type": "INT64", "mode": "NULLABLE"}]
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-[{"name": "subject_id", "type": "INT64", "mode": "REQUIRED"}, {"name": "hadm_id", "type": "INT64", "mode": "REQUIRED"}, {"name": "admittime", "type": "DATETIME", "mode": "REQUIRED"}, {"name": "dischtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "deathtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "admission_type", "type": "STRING", "mode": "REQUIRED"}, {"name": "admission_location", "type": "STRING", "mode": "NULLABLE"}, {"name": "discharge_location", "type": "STRING", "mode": "NULLABLE"}, {"name": "insurance", "type": "STRING", "mode": "NULLABLE"}, {"name": "language", "type": "STRING", "mode": "NULLABLE"}, {"name": "marital_status", "type": "STRING", "mode": "NULLABLE"}, {"name": "ethnicity", "type": "STRING", "mode": "NULLABLE"}, {"name": "edregtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "edouttime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "hospital_expire_flag", "type": "INT64", "mode": "NULLABLE"}]
	`1`	+[{"name": "subject_id", "type": "INT64", "mode": "REQUIRED"}, {"name": "hadm_id", "type": "INT64", "mode": "REQUIRED"}, {"name": "admittime", "type": "DATETIME", "mode": "REQUIRED"}, {"name": "dischtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "deathtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "admission_type", "type": "STRING", "mode": "REQUIRED"}, {"name": "admission_location", "type": "STRING", "mode": "NULLABLE"}, {"name": "discharge_location", "type": "STRING", "mode": "NULLABLE"}, {"name": "insurance", "type": "STRING", "mode": "NULLABLE"}, {"name": "language", "type": "STRING", "mode": "NULLABLE"}, {"name": "marital_status", "type": "STRING", "mode": "NULLABLE"}, {"name": "race", "type": "STRING", "mode": "NULLABLE"}, {"name": "edregtime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "edouttime", "type": "DATETIME", "mode": "NULLABLE"}, {"name": "hospital_expire_flag", "type": "INT64", "mode": "NULLABLE"}]