From d24689341b1757dbcd063a57bf2b083b3a6e13cc Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 18 Oct 2024 11:35:51 +0200 Subject: [PATCH 1/8] chore: update jupyterhub-pyspark demo with newer image and dependencies --- .github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml | 4 ++-- .../Dockerfile | 2 +- .../requirements.txt | 4 ++-- stacks/_templates/jupyterhub.yaml | 2 +- stacks/jupyterhub-pyspark-hdfs/notebook.ipynb | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml b/.github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml index f9b497e5..c9b2264c 100644 --- a/.github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml +++ b/.github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml @@ -2,8 +2,8 @@ name: Build and publish pyspark-k8s-with-scikit-learn env: - IMAGE_NAME: pyspark-k8s-with-scikit-learn - IMAGE_VERSION: 3.4.0-stackable0.0.0-dev + IMAGE_NAME: spark-k8s-with-scikit-learn + IMAGE_VERSION: 3.5.0-stackable24.3.0 REGISTRY_PATH: stackable DOCKERFILE_PATH: "demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/Dockerfile" diff --git a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/Dockerfile b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/Dockerfile index 445be34e..232b7c8b 100644 --- a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/Dockerfile +++ b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.stackable.tech/stackable/spark-k8s:3.5.1-stackable24.7.0 +FROM docker.stackable.tech/stackable/spark-k8s:3.5.0-stackable24.3.0 COPY demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt . diff --git a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt index 27812422..1250d128 100644 --- a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt +++ b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt @@ -1,2 +1,2 @@ -scikit-learn==1.1.3 -pandas==1.5.1 \ No newline at end of file +scikit-learn==1.3.1 +pandas==2.0.3 \ No newline at end of file diff --git a/stacks/_templates/jupyterhub.yaml b/stacks/_templates/jupyterhub.yaml index 81481c9a..5b3c1394 100644 --- a/stacks/_templates/jupyterhub.yaml +++ b/stacks/_templates/jupyterhub.yaml @@ -39,7 +39,7 @@ options: # Inspect the Dockerfile at: # https://github.com/jupyter/docker-stacks/tree/HEAD/datascience-notebook/Dockerfile name: jupyter/pyspark-notebook - tag: python-3.9 + tag: python-3.11 serviceAccountName: spark networkPolicy: enabled: false diff --git a/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb b/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb index 19dcd3f4..7e0cc949 100644 --- a/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb +++ b/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb @@ -34,7 +34,7 @@ "spark = (SparkSession\n", " .builder\n", " .master(f'k8s://https://{os.environ[\"KUBERNETES_SERVICE_HOST\"]}:{os.environ[\"KUBERNETES_SERVICE_PORT\"]}')\n", - " .config(\"spark.kubernetes.container.image\", \"docker.stackable.tech/demos/pyspark-k8s-with-scikit-learn:3.3.0-stackable23.4\")\n", + " .config(\"spark.kubernetes.container.image\", \"docker.stackable.tech/demos/spark-k8s-with-scikit-learn:3.5.0-stackable24.3.0\")\n", " .config(\"spark.driver.port\", \"2222\")\n", " .config(\"spark.driver.blockManager.port\", \"7777\")\n", " .config(\"spark.driver.host\", \"driver-service.default.svc.cluster.local\")\n", From 43ea333396b55a0e415b99771066e7f16441d512 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 18 Oct 2024 11:40:04 +0200 Subject: [PATCH 2/8] linting --- .../requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt index 1250d128..899fea70 100644 --- a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt +++ b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt @@ -1,2 +1,2 @@ scikit-learn==1.3.1 -pandas==2.0.3 \ No newline at end of file +pandas==2.0.3 From c0b0884dd95d121bcb55644193651e1bdb29f383 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 18 Oct 2024 11:51:04 +0200 Subject: [PATCH 3/8] linting II --- .../requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt index 899fea70..a59e6776 100644 --- a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt +++ b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt @@ -1,2 +1,3 @@ scikit-learn==1.3.1 pandas==2.0.3 + From a8282865eab8d45a37e9d4502321f52608bed2a5 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 18 Oct 2024 11:52:42 +0200 Subject: [PATCH 4/8] linting III --- .../requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt index a59e6776..899fea70 100644 --- a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt +++ b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt @@ -1,3 +1,2 @@ scikit-learn==1.3.1 pandas==2.0.3 - From 01aec4b548d1d9b17c25f326e3a015e5bc0f8d66 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 18 Oct 2024 11:54:48 +0200 Subject: [PATCH 5/8] linting again --- stacks/_templates/jupyterhub.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/stacks/_templates/jupyterhub.yaml b/stacks/_templates/jupyterhub.yaml index 5b3c1394..95738168 100644 --- a/stacks/_templates/jupyterhub.yaml +++ b/stacks/_templates/jupyterhub.yaml @@ -1,3 +1,4 @@ +--- releaseName: jupyterhub name: jupyterhub repo: From 049726128a31b4fef3717879c3afef62c4316f8b Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 18 Oct 2024 11:56:36 +0200 Subject: [PATCH 6/8] remove spaces in braces --- stacks/_templates/jupyterhub.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stacks/_templates/jupyterhub.yaml b/stacks/_templates/jupyterhub.yaml index 95738168..793929a7 100644 --- a/stacks/_templates/jupyterhub.yaml +++ b/stacks/_templates/jupyterhub.yaml @@ -12,7 +12,7 @@ options: allowed_users: - admin DummyAuthenticator: - password: {{ jupyterHubAdminPassword }} + password: {{jupyterHubAdminPassword}} JupyterHub: authenticator_class: dummy labels: From 1d72691af08841df9731d4a5bda17c90911e7ad3 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 18 Oct 2024 12:37:56 +0200 Subject: [PATCH 7/8] deactivate the arm runner --- .github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml b/.github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml index c9b2264c..ae80ce6d 100644 --- a/.github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml +++ b/.github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml @@ -27,7 +27,9 @@ jobs: matrix: runner: - {name: "ubuntu-latest", arch: "amd64"} - - {name: "ubicloud-standard-8-arm", arch: "arm64"} + # TODO: the image 3.5.0-stackable24.3.0 does not have an arm64 build. + # Re-activate the arm runner when the image is updated to one that does. + #- {name: "ubicloud-standard-8-arm", arch: "arm64"} steps: - name: Checkout Repository uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 From e1372bda002fbce22ee9b9ff2a9ad4b18b6e4d06 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 18 Oct 2024 13:19:47 +0200 Subject: [PATCH 8/8] renaming --- ...scikit-learn.yaml => dev_spark-k8s-with-scikit-learn.yaml} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename .github/workflows/{dev_pyspark-k8s-with-scikit-learn.yaml => dev_spark-k8s-with-scikit-learn.yaml} (96%) diff --git a/.github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml b/.github/workflows/dev_spark-k8s-with-scikit-learn.yaml similarity index 96% rename from .github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml rename to .github/workflows/dev_spark-k8s-with-scikit-learn.yaml index ae80ce6d..635cc33f 100644 --- a/.github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml +++ b/.github/workflows/dev_spark-k8s-with-scikit-learn.yaml @@ -1,5 +1,5 @@ --- -name: Build and publish pyspark-k8s-with-scikit-learn +name: Build and publish spark-k8s-with-scikit-learn env: IMAGE_NAME: spark-k8s-with-scikit-learn @@ -15,7 +15,7 @@ on: paths: - demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/Dockerfile - demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/requirements.txt - - .github/workflows/dev_pyspark-k8s-with-scikit-learn.yaml + - .github/workflows/dev_spark-k8s-with-scikit-learn.yaml jobs: build: