|
19 | 19 | "# Read Zipped DICOM files saving time and storage\n", |
20 | 20 | "WIth the custom \"zipdcm\" Python Data Source, we can read zipped (and non Zipped) up DICOM files directly to extract their metadata.\n", |
21 | 21 | "\n", |
22 | | - "Requirements:\n", |
23 | | - "- Recommend DBR 17.0 (Apache Spark 4.0) compute\n", |
| 22 | + "### Requirements:\n", |
| 23 | + "- Recommend DBR 17.1 (Apache Spark 4.0) dedicated compute\n", |
24 | 24 | "- Shared cluster compute compatible\n", |
25 | 25 | "- Working on serverless compute fix.\n", |
26 | | - "- Requires `pydicom==3.0.1`" |
| 26 | + "- Requires `pydicom==3.0.1 pylibjpeg[all]>=2.0.1`\n", |
| 27 | + "\n", |
| 28 | + "### Synthetic PHI data source citation\n", |
| 29 | + "Rutherford, M. W., Nolan, T., Pei, L., Wagner, U., Pan, Q., Farmer, P., Smith, K., Kopchick, B., Laura Opsahl-Ong, Sutton, G., Clunie, D. A., Farahani, K., & Prior, F. (2025). Data in Support of the MIDI-B Challenge (MIDI-B-Synthetic-Validation, MIDI-B-Curated-Validation, MIDI-B-Synthetic-Test, MIDI-B-Curated-Test) (Version 1) [Dataset]. The Cancer Imaging Archive. https://doi.org/10.7937/CF2P-AW56 " |
27 | 30 | ] |
28 | 31 | }, |
29 | 32 | { |
|
53 | 56 | } |
54 | 57 | ], |
55 | 58 | "source": [ |
56 | | - "%pip install --quiet pydicom==3.0.1\n", |
| 59 | + "# %pip install --quiet numpy==1.26.4 pydicom==3.0.1 pylibjpeg[all]>=2.0.1\n", |
| 60 | + "%pip install --quiet numpy==2.1.3 pydicom==3.0.1 pylibjpeg[all]>=2.0.1\n", |
57 | 61 | "%restart_python" |
58 | 62 | ] |
59 | 63 | }, |
|
79 | 83 | "name": "stdout", |
80 | 84 | "output_type": "stream", |
81 | 85 | "text": [ |
82 | | - "total 57M\n-rwxrwxrwx 1 root root 12K Aug 1 21:09 1.3.199.1.2.3712432.1.402.1107814368275696879.zip\n-rwxrwxrwx 1 root root 24M Aug 1 21:09 3.5.574.1.3.9030958.6.376.1780887819048872979.zip\n-rwxrwxrwx 1 root root 12M Aug 1 21:09 3.5.574.1.3.9030958.6.376.2860280475000825621.zip\ndrwxrwxrwx 2 root root 4.0K Aug 2 17:33 x\n-rwxrwxrwx 1 root root 12M Aug 1 21:09 x.zip\ndrwxrwxrwx 2 root root 4.0K Aug 2 17:33 y\n-rwxrwxrwx 1 root root 12M Aug 1 21:09 y.zip\n" |
| 86 | + "total 57M\n-rwxrwxrwx 1 root root 12K Aug 1 21:09 1.3.199.1.2.3712432.1.402.1107814368275696879.zip\n-rwxrwxrwx 1 root root 24M Aug 1 21:09 3.5.574.1.3.9030958.6.376.1780887819048872979.zip\n-rwxrwxrwx 1 root root 12M Aug 1 21:09 3.5.574.1.3.9030958.6.376.2860280475000825621.zip\ndrwxrwxrwx 2 root root 4.0K Aug 10 04:17 x\n-rwxrwxrwx 1 root root 12M Aug 1 21:09 x.zip\ndrwxrwxrwx 2 root root 4.0K Aug 10 04:17 y\n-rwxrwxrwx 1 root root 12M Aug 1 21:09 y.zip\n" |
83 | 87 | ] |
84 | 88 | } |
85 | 89 | ], |
|
213 | 217 | "spark.dataSource.register(ZipDCMDataSource)\n", |
214 | 218 | "\n", |
215 | 219 | "# read DCMs with `numPartitions` parallelism.\n", |
216 | | - "df = spark.read.format(\"zipdcm\").option('numPartitions',4).load(\"./resources\")\n", |
| 220 | + "df = (\n", |
| 221 | + " spark.read\n", |
| 222 | + " .format(\"zipdcm\")\n", |
| 223 | + " .option(\"numPartitions\",4)\n", |
| 224 | + " .load(\"./resources\")\n", |
| 225 | + ")\n", |
217 | 226 | "df.display()" |
218 | 227 | ] |
219 | 228 | } |
220 | 229 | ], |
221 | 230 | "metadata": { |
222 | 231 | "application/vnd.databricks.v1+notebook": { |
223 | | - "computePreferences": null, |
| 232 | + "computePreferences": { |
| 233 | + "hardware": { |
| 234 | + "accelerator": null, |
| 235 | + "gpuPoolId": null, |
| 236 | + "memory": null |
| 237 | + } |
| 238 | + }, |
224 | 239 | "dashboards": [], |
225 | 240 | "environmentMetadata": { |
226 | | - "base_environment": "dbe_65bc13ea-276c-4905-a728-9fe2fb1780e2", |
| 241 | + "base_environment": "", |
227 | 242 | "environment_version": "2" |
228 | 243 | }, |
229 | 244 | "inputWidgetPreferences": null, |
230 | 245 | "language": "python", |
231 | 246 | "notebookMetadata": { |
232 | 247 | "mostRecentlyExecutedCommandWithImplicitDF": { |
233 | | - "commandId": 7424973428825328, |
| 248 | + "commandId": 5816783787054213, |
234 | 249 | "dataframes": [ |
235 | 250 | "_sqldf" |
236 | 251 | ] |
|
0 commit comments