GITBOOK-468: change request with no subject merged in GitBook

fedorov · gitbook-bot · commit 2a011e160241 · 2026-02-13T22:52:55.000Z
diff --git a/data/organization-of-data/files-and-metadata.md b/data/organization-of-data/files-and-metadata.md
@@ -5,57 +5,46 @@ We gratefully acknowledge [Google Public Data Program](https://console.cloud.goo
 {% endhint %}
 
 ```mermaid
-graph TB
-    DCM["<b>DICOM FILES (.dcm)</b><br/>Named by crdc_instance_uuid, grouped by crdc_series_uuid"]
-
-    DCM -->|"stored in"| BUCKETS
-
-    subgraph BUCKETS["CLOUD STORAGE BUCKETS (AWS S3 + GCS mirrors)"]
-        direction LR
-        B1["idc-open-data<br/>~90%, CC BY"]
-        B2["idc-open-data-two / idc1<br/>head scans"]
-        B3["idc-open-data-cr / cr<br/>~4%, CC BY-NC"]
-    end
-
-    B1 & B2 & B3 -->|"all 3 buckets imported"| PROXY
-    B1 -->|"replicated into"| GHC
-
-    subgraph STORES["DICOMweb / DICOM STORES"]
-        direction LR
-        PROXY["IDC Public Proxy<br/>No auth, 100% coverage"]
-        GHC["Google Healthcare API<br/>Auth required, ~96% coverage"]
-    end
-
-    GHC -->|"DICOM metadata exported to"| BQ
-
-    subgraph BQ["BigQuery (GCP auth + billing)"]
-        BQ_DESC["All 4000+ DICOM tags &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<br/>Tables: dicom_all, dicom_metadata, clinical"]
-    end
-
-    BQ -->|"~50 key columns queried via SQL"| IDX
-    BQ -->|"tables exported to"| S3BQ
-    S3BQ["Parquet files in AWS S3"]
-
-    subgraph IDX["idc-index PARQUET FILES (no auth)"]
-        IDX_DESC["~50 key columns per series, bundled in Python package &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<br/>Auto-loaded: index, prior_versions_index<br/>On-demand: collections, seg, sm, ann, clinical, contrast"]
-    end
-
-    IDX -.->|"SeriesInstanceUID for DICOMweb queries"| STORES
-    IDX -.->|"series_aws_url / crdc_series_uuid maps to bucket paths"| BUCKETS
-
-    style DCM fill:#fff3e0,stroke:#FF9800,stroke-width:2px,color:#000
-    style BUCKETS fill:#e8f4fd,stroke:#2196F3,stroke-width:2px,color:#000
-    style B1 fill:#e8f4fd,stroke:#2196F3,color:#000
-    style B2 fill:#e8f4fd,stroke:#2196F3,color:#000
-    style B3 fill:#e8f4fd,stroke:#2196F3,color:#000
-    style STORES fill:#f3e5f5,stroke:#9C27B0,stroke-width:2px,color:#000
-    style PROXY fill:#f3e5f5,stroke:#9C27B0,color:#000
-    style GHC fill:#f3e5f5,stroke:#9C27B0,color:#000
-    style BQ fill:#fce4ec,stroke:#E91E63,stroke-width:2px,color:#000
-    style BQ_DESC fill:#fce4ec,stroke:none,color:#000
-    style IDX fill:#e8f5e9,stroke:#4CAF50,stroke-width:2px,color:#000
-    style S3BQ fill:#e8f4fd,stroke:#2196F3,stroke-width:2px,color:#000
-    style IDX_DESC fill:#e8f5e9,stroke:none,color:#000
+flowchart TB
+ subgraph BUCKETS["CLOUD STORAGE BUCKETS (AWS S3 + GCS mirrors)"]
+    direction LR
+        B1["gs://idc-open-data<br>s3://idc-open-data<br>~90%, CC BY"]
+        B2["gs://idc-open-idc1<br>s3://idc-open-data-two<br>potential head scans, CC BY"]
+        B3["gs://idc-open-cr<br>s3://idc-open-data-cr<br>~4%, CC BY-NC"]
+  end
+ subgraph STORES["DICOMweb / DICOM STORES"]
+    direction LR
+        PROXY["IDC DICOM store<br>IDC Public Proxy in front of Google Healthcare DICOM store<br>No auth, 100% coverage"]
+        GHC["Google Healthcare DICOM store<br>Google Healthcare API<br>Auth required, >95% coverage"]
+  end
+ subgraph BQ["BigQuery (GCP auth + billing)<br>&nbsp; &nbsp;&nbsp; &nbsp;All 4000+ DICOM tags &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<br>Tables: dicom_all, dicom_metadata, derived metadata, clinical"]
+  end
+ subgraph IDX["idc-index PARQUET FILES (no auth)<br>~50 key columns per series, bundled in Python package &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<br>Auto-loaded: index, prior_versions_index<br>On-demand: collections, seg, sm, ann, clinical, contrast"]
+  end
+    DCM["<b>DICOM FILES (.dcm)</b><br>Named by crdc_instance_uuid, grouped by crdc_series_uuid"] -- stored in --> BUCKETS
+    B1 -- DICOM instances<BR>imported into --> PROXY
+    B2 -- DICOM instances<BR>imported into --> PROXY
+    B3 -- DICOM instances<BR>imported into --> PROXY
+    B1 -- DICOM instances<BR>imported into --> GHC
+    PROXY -- DICOM metadata exported to --> BQ
+    BQ -- ~50 key columns queried via SQL --> IDX
+    BQ -- tables exported to --> S3BQ["Parquet files in AWS S3"]
+    IDX -. SeriesInstanceUID for DICOMweb queries .-> STORES
+    IDX -. series_aws_url / crdc_series_uuid maps to bucket paths .-> BUCKETS
+
+    style DCM fill:#fff3e0,stroke:#FF9800,stroke-width:2px,color:#000
+    style BUCKETS fill:#e8f4fd,stroke:#2196F3,stroke-width:2px,color:#000
+    style B1 fill:#e8f4fd,stroke:#2196F3,color:#000
+    style B2 fill:#e8f4fd,stroke:#2196F3,color:#000
+    style B3 fill:#e8f4fd,stroke:#2196F3,color:#000
+    style PROXY fill:#f3e5f5,stroke:#9C27B0,color:#000
+    style GHC fill:#f3e5f5,stroke:#9C27B0,color:#000
+    style BQ fill:#fce4ec,stroke:#E91E63,stroke-width:2px,color:#000
+    %% style BQ_DESC fill:#fce4ec,stroke:none,color:#000
+    style IDX fill:#e8f5e9,stroke:#4CAF50,stroke-width:2px,color:#000
+    style S3BQ fill:#e8f4fd,stroke:#2196F3,stroke-width:2px,color:#000
+    %% style IDX_DESC fill:#e8f5e9,stroke:none,color:#000
+    style STORES fill:#f3e5f5,stroke:#9C27B0,stroke-width:2px,color:#000
 ```
 
 Let's start with the overall principles of how we organize data in IDC.