Skip to content

Commit 06638fc

Browse files
committed
chore: Merge branch 'main' into feat/boil
2 parents d189810 + 5f73e7c commit 06638fc

File tree

5 files changed

+121
-29
lines changed

5 files changed

+121
-29
lines changed

spark-connect-client/boil-config.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,10 @@ java-base = "17"
44

55
[versions."3.5.6".build-arguments]
66
python-version = "3.11"
7+
8+
[versions."4.0.0".local-images]
9+
spark-k8s = "4.0.0"
10+
java-base = "17"
11+
12+
[versions."4.0.0".build-arguments]
13+
python-version = "3.11"

spark-k8s/Dockerfile

Lines changed: 55 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,18 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patche
6161
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR_VERSION} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR_VERSION}
6262

6363
RUN <<EOF
64-
cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR_VERSION})/spark"
64+
65+
# IMPORTANT: HBase connectors don't support Spark 4 yet, so we skip the build.
66+
# Watch this PR for updates: https://github.com/apache/hbase-connectors/pull/130
67+
if [[ "${PRODUCT_VERSION}" == 4* ]]; then
68+
# Create this empty directory so that following COPY layers succeed.
69+
mkdir -p /stackable/spark/jars
70+
# Create a dummy tarball to satisfy the build process for Spark 3.
71+
touch hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz
72+
exit 0
73+
fi
74+
75+
cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark"
6576

6677
NEW_VERSION="${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}"
6778

@@ -80,18 +91,10 @@ export JDK_JAVA_OPTIONS="\
8091
--add-opens java.base/java.util=ALL-UNNAMED"
8192

8293
# Get the Scala version used by Spark
83-
SCALA_VERSION=$( \
84-
mvn --quiet --non-recursive --file /stackable/spark/pom.xml \
85-
org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
86-
-DforceStdout \
87-
-Dexpression='project.properties(scala.version)')
94+
SCALA_VERSION=$(grep "scala.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}')
8895

8996
# Get the Scala binary version used by Spark
90-
SCALA_BINARY_VERSION=$( \
91-
mvn --quiet --non-recursive --file /stackable/spark/pom.xml \
92-
org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
93-
-DforceStdout \
94-
-Dexpression='project.properties(scala.binary.version)')
97+
SCALA_BINARY_VERSION=$(grep "scala.binary.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}')
9598

9699
# Build the Spark HBase connector
97100
# Skip the tests because the MiniHBaseCluster does not get ready for
@@ -110,6 +113,7 @@ mvn \
110113
--define hadoop-three.version="${HADOOP_VERSION}" \
111114
--define hbase.version="${HBASE_VERSION}" \
112115
--define skipTests \
116+
--define maven.test.skip=true \
113117
clean package
114118

115119
mkdir -p /stackable/spark/jars
@@ -159,22 +163,36 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
159163
COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
160164

161165
# >>> Build spark
162-
# Compiling the tests takes a lot of time, so we skip them
163-
# -Dmaven.test.skip=true skips both the compilation and execution of tests
164-
# -DskipTests skips only the execution
165166
RUN <<EOF
166167
# Make Maven aware of custom Stackable libraries
167168
mv /stackable/patched-libs/maven /root/.m2/repository
168169

169170
ORIGINAL_VERSION="${PRODUCT_VERSION}"
170171
NEW_VERSION="${PRODUCT_VERSION}-stackable${RELEASE_VERSION}"
171172

173+
MAVEN_BIN="/usr/bin/mvn"
172174
export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g"
173175

174-
./dev/make-distribution.sh \
175-
-Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE_VERSION}" \
176-
-DskipTests \
177-
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
176+
case "${PRODUCT_VERSION}" in
177+
4*)
178+
# The Spark 4 script has a --connect option which is not available in Spark 3.
179+
# This option is required to build Spark Connect.
180+
# Also this option breaks the Spark 3 build so we ensure it's only provided here.
181+
./dev/make-distribution.sh \
182+
--mvn "${MAVEN_BIN}" \
183+
--connect \
184+
-Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE_VERSION}" \
185+
-DskipTests \
186+
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
187+
;;
188+
*)
189+
./dev/make-distribution.sh \
190+
--mvn "${MAVEN_BIN}" \
191+
-Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE_VERSION}" \
192+
-DskipTests \
193+
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
194+
;;
195+
esac
178196

179197
sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" assembly/target/bom.json
180198
EOF
@@ -185,22 +203,30 @@ EOF
185203
# we create a new dist/connect folder, and copy them here.
186204
RUN <<EOF
187205

188-
# Get the Scala binary version
189-
SCALA_BINARY_VERSION=$( \
190-
mvn --quiet --non-recursive --file pom.xml \
191-
org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
192-
-DforceStdout \
193-
-Dexpression='project.properties(scala.binary.version)')
206+
SCALA_BINARY_VERSION=$(grep "scala.binary.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}')
194207

195208
mkdir -p dist/connect
196209
cd dist/connect
197210

198-
cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
199-
cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
200-
cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
201-
202-
# The Spark operator expects a file named spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}.jar without the -stackable${RELEASE_VERSION} suffix.
211+
case "${PRODUCT_VERSION}" in
212+
4*)
213+
cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
214+
cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
215+
cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
216+
;;
217+
*)
218+
cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
219+
cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
220+
cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
221+
;;
222+
esac
223+
224+
# This link is needed by the operator and is kept for backwards compatibility.
225+
# TODO: remove it at some time in the future.
203226
ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}.jar"
227+
# Link to the spark-connect jar without the stackable suffix and scala version.
228+
# This link supersedes the previous link.
229+
ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" "spark-connect-${PRODUCT_VERSION}.jar"
204230
EOF
205231

206232
# <<< Build spark

spark-k8s/boil-config.toml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,22 @@ woodstox-core-version = "6.5.1" # https://mvnrepository.com/artifact/com.fasterx
3535
jmx-exporter-version = "1.3.0"
3636
tini-version = "0.19.0"
3737
hbase-connector-version = "1.0.1"
38+
39+
[versions."4.0.0".local-images]
40+
"hadoop/hadoop" = "3.4.1"
41+
java-base = "17"
42+
java-devel = "17"
43+
hbase = "2.6.2"
44+
vector = "0.47.0"
45+
46+
[versions."4.0.0".build-arguments]
47+
python-version = "3.11"
48+
aws-java-sdk-bundle-version = "2.24.6"
49+
azure-storage-version = "7.0.1" # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.4
50+
azure-keyvault-core-version = "1.0.0" # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1
51+
jackson-dataformat-xml-version = "2.15.2" # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1
52+
stax2-api-version = "4.2.1" # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
53+
woodstox-core-version = "6.5.1" # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
54+
jmx-exporter-version = "1.3.0"
55+
tini-version = "0.19.0"
56+
hbase-connector-version = "1.0.1" # This is not supported in Spark 4 yet.
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
From 2da5608928018dd017c91b904eb8f84a4f6df78a Mon Sep 17 00:00:00 2001
2+
From: Razvan-Daniel Mihai <[email protected]>
3+
Date: Fri, 4 Jul 2025 15:54:55 +0200
4+
Subject: Update CycloneDX plugin
5+
6+
---
7+
dev/make-distribution.sh | 1 -
8+
pom.xml | 5 +++++
9+
2 files changed, 5 insertions(+), 1 deletion(-)
10+
11+
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
12+
index 16607e45ae..44e345a245 100755
13+
--- a/dev/make-distribution.sh
14+
+++ b/dev/make-distribution.sh
15+
@@ -176,7 +176,6 @@ BUILD_COMMAND=("$MVN" clean package \
16+
-Dmaven.javadoc.skip=true \
17+
-Dmaven.scaladoc.skip=true \
18+
-Dmaven.source.skip \
19+
- -Dcyclonedx.skip=true \
20+
$@)
21+
22+
# Actually build the jar
23+
diff --git a/pom.xml b/pom.xml
24+
index 443d46a430..632920f100 100644
25+
--- a/pom.xml
26+
+++ b/pom.xml
27+
@@ -3327,6 +3327,11 @@
28+
<groupId>org.cyclonedx</groupId>
29+
<artifactId>cyclonedx-maven-plugin</artifactId>
30+
<version>2.8.0</version>
31+
+ <configuration>
32+
+ <projectType>application</projectType>
33+
+ <schemaVersion>1.5</schemaVersion>
34+
+ <skipNotDeployed>false</skipNotDeployed>
35+
+ </configuration>
36+
<executions>
37+
<execution>
38+
<phase>package</phase>
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
base = "fa33ea000a0bda9e5a3fa1af98e8e85b8cc5e4d4"
2+
mirror = "https://github.com/stackabletech/spark.git"

0 commit comments

Comments
 (0)