From 02ff8d96dfa5d9ff7085bc92ea69ab9196bf8f4f Mon Sep 17 00:00:00 2001 From: Lars Francke Date: Sat, 31 May 2025 19:02:22 +0200 Subject: [PATCH 1/4] chore: Remove hardcoded uid and gid --- CHANGELOG.md | 5 +++++ rust/operator-binary/src/connect/server.rs | 4 +--- rust/operator-binary/src/crd/constants.rs | 1 - rust/operator-binary/src/history/history_controller.rs | 4 +--- rust/operator-binary/src/spark_k8s_controller.rs | 2 -- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69cb831c..90ea254e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,10 @@ All notable changes to this project will be documented in this file. - Use versioned common structs ([#572]). - BREAKING: Change the label `app.kubernetes.io/name` for Spark history and connect objects to use `spark-history` and `spark-connect` instead of `spark-k8s` ([#573]). - BREAKING: The history Pods now have their own ClusterRole named `spark-history-clusterrole` ([#573]). +- BREAKING: Previously this operator would hardcode the UID and GID of the Pods being created to 1000/0, this has changed now ([#575]) + - The `runAsUser` and `runAsGroup` fields will not be set anymore by the operator + - The defaults from the docker images itself will now apply, which will be different from 1000/0 going forward + - This is marked as breaking because tools and policies might exist, which require these fields to be set ### Fixed @@ -52,6 +56,7 @@ All notable changes to this project will be documented in this file. [#572]: https://github.com/stackabletech/spark-k8s-operator/pull/572 [#573]: https://github.com/stackabletech/spark-k8s-operator/pull/573 [#574]: https://github.com/stackabletech/spark-k8s-operator/pull/574 +[#575]: https://github.com/stackabletech/spark-k8s-operator/pull/575 ## [25.3.0] - 2025-03-21 diff --git a/rust/operator-binary/src/connect/server.rs b/rust/operator-binary/src/connect/server.rs index 328b4abe..b4019b7a 100644 --- a/rust/operator-binary/src/connect/server.rs +++ b/rust/operator-binary/src/connect/server.rs @@ -48,7 +48,7 @@ use crate::{ constants::{ JVM_SECURITY_PROPERTIES_FILE, LISTENER_VOLUME_DIR, LISTENER_VOLUME_NAME, LOG4J2_CONFIG_FILE, MAX_SPARK_LOG_FILES_SIZE, METRICS_PROPERTIES_FILE, - POD_TEMPLATE_FILE, SPARK_DEFAULTS_FILE_NAME, SPARK_UID, VOLUME_MOUNT_NAME_CONFIG, + POD_TEMPLATE_FILE, SPARK_DEFAULTS_FILE_NAME, VOLUME_MOUNT_NAME_CONFIG, VOLUME_MOUNT_NAME_LOG, VOLUME_MOUNT_NAME_LOG_CONFIG, VOLUME_MOUNT_PATH_CONFIG, VOLUME_MOUNT_PATH_LOG, VOLUME_MOUNT_PATH_LOG_CONFIG, }, @@ -246,8 +246,6 @@ pub(crate) fn build_stateful_set( ) .context(AddVolumeSnafu)? .security_context(PodSecurityContext { - run_as_user: Some(SPARK_UID), - run_as_group: Some(0), fs_group: Some(1000), ..PodSecurityContext::default() }); diff --git a/rust/operator-binary/src/crd/constants.rs b/rust/operator-binary/src/crd/constants.rs index 85f4d769..7e32e392 100644 --- a/rust/operator-binary/src/crd/constants.rs +++ b/rust/operator-binary/src/crd/constants.rs @@ -86,7 +86,6 @@ pub const SPARK_DEFAULTS_FILE_NAME: &str = "spark-defaults.conf"; pub const SPARK_ENV_SH_FILE_NAME: &str = "spark-env.sh"; pub const SPARK_CLUSTER_ROLE: &str = "spark-k8s-clusterrole"; -pub const SPARK_UID: i64 = 1000; pub const METRICS_PORT: u16 = 18081; pub const HISTORY_UI_PORT: u16 = 18080; diff --git a/rust/operator-binary/src/history/history_controller.rs b/rust/operator-binary/src/history/history_controller.rs index 737a8e96..55b17c5d 100644 --- a/rust/operator-binary/src/history/history_controller.rs +++ b/rust/operator-binary/src/history/history_controller.rs @@ -58,7 +58,7 @@ use crate::{ HISTORY_UI_PORT, JVM_SECURITY_PROPERTIES_FILE, LISTENER_VOLUME_DIR, LISTENER_VOLUME_NAME, MAX_SPARK_LOG_FILES_SIZE, METRICS_PORT, OPERATOR_NAME, SECRET_ACCESS_KEY, SPARK_DEFAULTS_FILE_NAME, SPARK_ENV_SH_FILE_NAME, - SPARK_IMAGE_BASE_NAME, SPARK_UID, STACKABLE_TRUST_STORE, VOLUME_MOUNT_NAME_CONFIG, + SPARK_IMAGE_BASE_NAME, STACKABLE_TRUST_STORE, VOLUME_MOUNT_NAME_CONFIG, VOLUME_MOUNT_NAME_LOG, VOLUME_MOUNT_NAME_LOG_CONFIG, VOLUME_MOUNT_PATH_CONFIG, VOLUME_MOUNT_PATH_LOG, VOLUME_MOUNT_PATH_LOG_CONFIG, }, @@ -544,8 +544,6 @@ fn build_stateful_set( ) .context(AddVolumeSnafu)? .security_context(PodSecurityContext { - run_as_user: Some(SPARK_UID), - run_as_group: Some(0), fs_group: Some(1000), ..PodSecurityContext::default() }); diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index ac6c052f..7fcbf36d 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -1023,8 +1023,6 @@ fn build_spark_role_serviceaccount( fn security_context() -> PodSecurityContext { PodSecurityContext { - run_as_user: Some(SPARK_UID), - run_as_group: Some(0), fs_group: Some(1000), ..PodSecurityContext::default() } From f04723b98ea22983f392d32f6b9db55109487c0a Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 2 Jun 2025 15:25:29 +0200 Subject: [PATCH 2/4] fix(helm): give the operator the same scc as the product --- deploy/helm/spark-k8s-operator/templates/roles.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/deploy/helm/spark-k8s-operator/templates/roles.yaml b/deploy/helm/spark-k8s-operator/templates/roles.yaml index 3625c680..110e5895 100644 --- a/deploy/helm/spark-k8s-operator/templates/roles.yaml +++ b/deploy/helm/spark-k8s-operator/templates/roles.yaml @@ -151,3 +151,13 @@ rules: - patch - create - delete +{{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} + - apiGroups: + - security.openshift.io + resources: + - securitycontextconstraints + resourceNames: + - nonroot-v2 + verbs: + - use +{{ end }} From a0305ee65cd46a323c06f7308cd4f2d169acaf73 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 3 Jun 2025 12:51:48 +0200 Subject: [PATCH 3/4] add USER directive to the example image --- apps/README.md | 8 +++++++- apps/docker/Dockerfile | 5 +++++ tests/test-definition.yaml | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/apps/README.md b/apps/README.md index 29e1167b..3c647a16 100644 --- a/apps/README.md +++ b/apps/README.md @@ -1,5 +1,11 @@ +# Provision Spark applications from seaparate image -# Generate report from the public data set +## Build the image + + docker build -t oci.stackable.tech/stackable/ny-tlc-report:0.3.0 -f apps/docker/Dockerfile . + docker push oci.stackable.tech/stackable/ny-tlc-report:0.3.0 + +## Generate report from the public data set spark-submit --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider --packages org.apache.hadoop:hadoop-aws:3.2.0,com.amazonaws:aws-java-sdk-s3:1.12.180,com.amazonaws:aws-java-sdk-core:1.12.180 ny_tlc_report.py --input 's3a://nyc-tlc/trip data/yellow_tripdata_2021-07.csv' diff --git a/apps/docker/Dockerfile b/apps/docker/Dockerfile index 34468e50..f33dc306 100644 --- a/apps/docker/Dockerfile +++ b/apps/docker/Dockerfile @@ -5,4 +5,9 @@ LABEL maintainer="Stackable GmbH" WORKDIR /jobs +# A user must be specified for the container to run on OpenShift +# with the nonroot-v2 security context constraint and +# without a runAsUser container annotation. +USER 1000 + COPY apps/ny_tlc_report.py . diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index fe31933c..c96fd92d 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -33,7 +33,7 @@ dimensions: - 3.9.3 - name: ny-tlc-report values: - - 0.2.0 + - 0.3.0 - name: s3-use-tls values: - "false" From 30f8f2bb3da670d4d0e50b8089da707ba9cffedb Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 3 Jun 2025 15:41:16 +0200 Subject: [PATCH 4/4] bump ny-tlc-report image version in GH workflow --- .github/workflows/dev_ny-tlc-report.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dev_ny-tlc-report.yaml b/.github/workflows/dev_ny-tlc-report.yaml index 1aa2d290..b627362b 100644 --- a/.github/workflows/dev_ny-tlc-report.yaml +++ b/.github/workflows/dev_ny-tlc-report.yaml @@ -3,7 +3,7 @@ name: Build and publish ny-tlc-report env: IMAGE_NAME: ny-tlc-report - IMAGE_VERSION: 0.2.0 + IMAGE_VERSION: 0.3.0 REGISTRY_PATH: stackable DOCKERFILE_PATH: "apps/docker/Dockerfile"