Merge branch 'main' of https://github.com/databricks-industry-solutions/python-data-sources

dmoore247 · dmoore247 · commit 7e57cc6fe07a · 2025-08-02T17:00:43.000-04:00
diff --git a/zipdcm/zip-dicom-demo.ipynb b/zipdcm/zip-dicom-demo.ipynb
@@ -20,7 +20,7 @@
     "WIth the custom \"zipdcm\" Python Data Source, we can read zipped (and non Zipped) up DICOM files directly to extract their metadata.\n",
     "\n",
     "Requirements:\n",
-    "- Recommend DBR 17.0 (spark 4.0) compute\n",
+    "- Recommend DBR 17.0 (Apache Spark 4.0) compute\n",
     "- Shared cluster compute compatible\n",
     "- Working on serverless compute fix.\n",
     "- Requires `pydicom==3.0.1`"
@@ -53,7 +53,8 @@
     }
    ],
    "source": [
-    "%pip install --quiet pydicom==3.0.1"
+    "%pip install --quiet pydicom==3.0.1\n",
+    "%restart_python"
    ]
   },
   {
@@ -66,15 +67,24 @@
       "rowLimit": 10000
      },
      "inputWidgets": {},
-     "nuid": "58d15465-474e-4e54-b3e2-13ad6c46b717",
+     "nuid": "d446370a-7598-4c79-bbbc-89775c62e887",
      "showTitle": false,
      "tableResultSettingsMap": {},
      "title": ""
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 57M\n-rwxrwxrwx 1 root root  12K Aug  1 21:09 1.3.199.1.2.3712432.1.402.1107814368275696879.zip\n-rwxrwxrwx 1 root root  24M Aug  1 21:09 3.5.574.1.3.9030958.6.376.1780887819048872979.zip\n-rwxrwxrwx 1 root root  12M Aug  1 21:09 3.5.574.1.3.9030958.6.376.2860280475000825621.zip\ndrwxrwxrwx 2 root root 4.0K Aug  2 17:33 x\n-rwxrwxrwx 1 root root  12M Aug  1 21:09 x.zip\ndrwxrwxrwx 2 root root 4.0K Aug  2 17:33 y\n-rwxrwxrwx 1 root root  12M Aug  1 21:09 y.zip\n"
+     ]
+    }
+   ],
    "source": [
-    "dbutils.library.restartPython()"
+    "%sh ls -lh ./resources/dcms"
    ]
   },
   {
@@ -202,7 +212,8 @@
     "from dbx.zip_dcm_ds import ZipDCMDataSource\n",
     "spark.dataSource.register(ZipDCMDataSource)\n",
     "\n",
-    "df = spark.read.format(\"zipdcm\").load(\"./resources\")\n",
+    "# read DCMs with `numPartitions` parallelism.\n",
+    "df = spark.read.format(\"zipdcm\").option('numPartitions',4).load(\"./resources\")\n",
     "df.display()"
    ]
   }
@@ -218,9 +229,15 @@
    "inputWidgetPreferences": null,
    "language": "python",
    "notebookMetadata": {
+    "mostRecentlyExecutedCommandWithImplicitDF": {
+     "commandId": 7424973428825328,
+     "dataframes": [
+      "_sqldf"
+     ]
+    },
     "pythonIndentUnit": 4
    },
-   "notebookName": "demo",
+   "notebookName": "zip-dicom-demo",
    "widgets": {}
   },
   "language_info": {