|
20 | 20 | "WIth the custom \"zipdcm\" Python Data Source, we can read zipped (and non Zipped) up DICOM files directly to extract their metadata.\n", |
21 | 21 | "\n", |
22 | 22 | "Requirements:\n", |
23 | | - "- Recommend DBR 17.0 (spark 4.0) compute\n", |
| 23 | + "- Recommend DBR 17.0 (Apache Spark 4.0) compute\n", |
24 | 24 | "- Shared cluster compute compatible\n", |
25 | 25 | "- Working on serverless compute fix.\n", |
26 | 26 | "- Requires `pydicom==3.0.1`" |
|
53 | 53 | } |
54 | 54 | ], |
55 | 55 | "source": [ |
56 | | - "%pip install --quiet pydicom==3.0.1" |
| 56 | + "%pip install --quiet pydicom==3.0.1\n", |
| 57 | + "%restart_python" |
57 | 58 | ] |
58 | 59 | }, |
59 | 60 | { |
|
66 | 67 | "rowLimit": 10000 |
67 | 68 | }, |
68 | 69 | "inputWidgets": {}, |
69 | | - "nuid": "58d15465-474e-4e54-b3e2-13ad6c46b717", |
| 70 | + "nuid": "d446370a-7598-4c79-bbbc-89775c62e887", |
70 | 71 | "showTitle": false, |
71 | 72 | "tableResultSettingsMap": {}, |
72 | 73 | "title": "" |
73 | 74 | } |
74 | 75 | }, |
75 | | - "outputs": [], |
| 76 | + "outputs": [ |
| 77 | + { |
| 78 | + "output_type": "stream", |
| 79 | + "name": "stdout", |
| 80 | + "output_type": "stream", |
| 81 | + "text": [ |
| 82 | + "total 57M\n-rwxrwxrwx 1 root root 12K Aug 1 21:09 1.3.199.1.2.3712432.1.402.1107814368275696879.zip\n-rwxrwxrwx 1 root root 24M Aug 1 21:09 3.5.574.1.3.9030958.6.376.1780887819048872979.zip\n-rwxrwxrwx 1 root root 12M Aug 1 21:09 3.5.574.1.3.9030958.6.376.2860280475000825621.zip\ndrwxrwxrwx 2 root root 4.0K Aug 2 17:33 x\n-rwxrwxrwx 1 root root 12M Aug 1 21:09 x.zip\ndrwxrwxrwx 2 root root 4.0K Aug 2 17:33 y\n-rwxrwxrwx 1 root root 12M Aug 1 21:09 y.zip\n" |
| 83 | + ] |
| 84 | + } |
| 85 | + ], |
76 | 86 | "source": [ |
77 | | - "dbutils.library.restartPython()" |
| 87 | + "%sh ls -lh ./resources/dcms" |
78 | 88 | ] |
79 | 89 | }, |
80 | 90 | { |
|
202 | 212 | "from dbx.zip_dcm_ds import ZipDCMDataSource\n", |
203 | 213 | "spark.dataSource.register(ZipDCMDataSource)\n", |
204 | 214 | "\n", |
205 | | - "df = spark.read.format(\"zipdcm\").load(\"./resources\")\n", |
| 215 | + "# read DCMs with `numPartitions` parallelism.\n", |
| 216 | + "df = spark.read.format(\"zipdcm\").option('numPartitions',4).load(\"./resources\")\n", |
206 | 217 | "df.display()" |
207 | 218 | ] |
208 | 219 | } |
|
218 | 229 | "inputWidgetPreferences": null, |
219 | 230 | "language": "python", |
220 | 231 | "notebookMetadata": { |
| 232 | + "mostRecentlyExecutedCommandWithImplicitDF": { |
| 233 | + "commandId": 7424973428825328, |
| 234 | + "dataframes": [ |
| 235 | + "_sqldf" |
| 236 | + ] |
| 237 | + }, |
221 | 238 | "pythonIndentUnit": 4 |
222 | 239 | }, |
223 | | - "notebookName": "demo", |
| 240 | + "notebookName": "zip-dicom-demo", |
224 | 241 | "widgets": {} |
225 | 242 | }, |
226 | 243 | "language_info": { |
|
0 commit comments