Skip to content

Commit 023006d

Browse files
committed
init spark 4.0.0 patches
1 parent c1842b0 commit 023006d

File tree

3 files changed

+151
-108
lines changed

3 files changed

+151
-108
lines changed

spark-k8s/Dockerfile

Lines changed: 111 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -31,101 +31,102 @@ tar -czf /stackable/spark-${PRODUCT}-stackable${RELEASE}-src.tar.gz .
3131
chmod g=u /stackable/spark-${PRODUCT}-stackable${RELEASE}-src.tar.gz
3232
EOF
3333

34-
# hbase-connectors-builder: Build the Spark HBase connector and copy
35-
# required JARs into /stackable/spark/jars
36-
FROM stackable/image/java-devel AS hbase-connectors-builder
37-
38-
ARG PRODUCT
39-
ARG RELEASE
40-
ARG HADOOP
41-
ARG HBASE
42-
ARG HBASE_CONNECTOR
43-
ARG STACKABLE_USER_UID
44-
45-
WORKDIR /stackable
46-
47-
# Copy the pom.xml file from the patched Spark source code to read the
48-
# versions used by Spark. The pom.xml defines child modules which are
49-
# not required and not copied, therefore mvn must be called with the
50-
# parameter --non-recursive.
51-
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
52-
/stackable/src/spark-k8s/patchable-work/worktree/${PRODUCT}/pom.xml \
53-
spark/
54-
55-
# Patch the hbase-connectors source code
56-
WORKDIR /stackable
57-
58-
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml
59-
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR}
60-
61-
RUN <<EOF
62-
cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark"
63-
64-
NEW_VERSION="${HBASE_CONNECTOR}-stackable${RELEASE}"
65-
66-
mvn versions:set -DnewVersion=$NEW_VERSION
67-
68-
# Create snapshot of the source code including custom patches
69-
tar -czf /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
70-
71-
# Building the hbase-connectors with JDK 17 is not yet supported, see
72-
# https://github.com/apache/hbase-connectors/pull/132.
73-
# As there are no JDK profiles, access to the non-public elements must
74-
# be enabled with --add-opens, see https://openjdk.org/jeps/403 and
75-
# https://openjdk.org/jeps/261#Breaking-encapsulation.
76-
export JDK_JAVA_OPTIONS="\
77-
--add-opens java.base/java.lang=ALL-UNNAMED \
78-
--add-opens java.base/java.util=ALL-UNNAMED"
79-
80-
# Get the Scala version used by Spark
81-
SCALA_VERSION=$( \
82-
mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
83-
org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
84-
-DforceStdout \
85-
-Dexpression='project.properties(scala.version)')
86-
87-
# Get the Scala binary version used by Spark
88-
SCALA_BINARY_VERSION=$( \
89-
mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
90-
org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
91-
-DforceStdout \
92-
-Dexpression='project.properties(scala.binary.version)')
93-
94-
# Build the Spark HBase connector
95-
# Skip the tests because the MiniHBaseCluster does not get ready for
96-
# whatever reason:
97-
# Caused by: java.lang.RuntimeException: Master not active after 30000ms
98-
# at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221)
99-
# at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177)
100-
# at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407)
101-
# at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250)
102-
mvn \
103-
--batch-mode \
104-
--no-transfer-progress \
105-
--define spark.version="${PRODUCT}" \
106-
--define scala.version="${SCALA_VERSION}" \
107-
--define scala.binary.version="${SCALA_BINARY_VERSION}" \
108-
--define hadoop-three.version="${HADOOP}" \
109-
--define hbase.version="${HBASE}" \
110-
--define skipTests \
111-
clean package
112-
113-
mkdir -p /stackable/spark/jars
114-
ln -s "$(pwd)/hbase-spark/target/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar" /stackable/spark/jars/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar
115-
116-
cd /stackable/spark/jars
117-
118-
# Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder
119-
# which is required by the connector.
120-
# Spark contains only log4j-slf4j2-impl-x.x.x.jar but not
121-
# log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the
122-
# classpath as long as they have the same version.
123-
mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
124-
dependency:copy \
125-
-Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \
126-
-DoutputDirectory=./jars
127-
chmod g=u /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
128-
EOF
34+
### # hbase-connectors-builder: Build the Spark HBase connector and copy
35+
### # required JARs into /stackable/spark/jars
36+
### FROM stackable/image/java-devel AS hbase-connectors-builder
37+
###
38+
### ARG PRODUCT
39+
### ARG RELEASE
40+
### ARG HADOOP
41+
### ARG HBASE
42+
### ARG HBASE_CONNECTOR
43+
### ARG STACKABLE_USER_UID
44+
###
45+
### WORKDIR /stackable
46+
###
47+
### # Copy the pom.xml file from the patched Spark source code to read the
48+
### # versions used by Spark. The pom.xml defines child modules which are
49+
### # not required and not copied, therefore mvn must be called with the
50+
### # parameter --non-recursive.
51+
### COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
52+
### /stackable/src/spark-k8s/patchable-work/worktree/${PRODUCT}/pom.xml \
53+
### spark/
54+
###
55+
### # Patch the hbase-connectors source code
56+
### WORKDIR /stackable
57+
###
58+
### COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml
59+
### COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR}
60+
###
61+
### RUN <<EOF
62+
### cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark"
63+
###
64+
### NEW_VERSION="${HBASE_CONNECTOR}-stackable${RELEASE}"
65+
###
66+
### mvn versions:set -DnewVersion=$NEW_VERSION
67+
###
68+
### # Create snapshot of the source code including custom patches
69+
### tar -czf /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
70+
###
71+
### # Building the hbase-connectors with JDK 17 is not yet supported, see
72+
### # https://github.com/apache/hbase-connectors/pull/132.
73+
### # As there are no JDK profiles, access to the non-public elements must
74+
### # be enabled with --add-opens, see https://openjdk.org/jeps/403 and
75+
### # https://openjdk.org/jeps/261#Breaking-encapsulation.
76+
### export JDK_JAVA_OPTIONS="\
77+
### --add-opens java.base/java.lang=ALL-UNNAMED \
78+
### --add-opens java.base/java.util=ALL-UNNAMED"
79+
###
80+
### # Get the Scala version used by Spark
81+
### SCALA_VERSION=$( \
82+
### mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
83+
### org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
84+
### -DforceStdout \
85+
### -Dexpression='project.properties(scala.version)')
86+
###
87+
### # Get the Scala binary version used by Spark
88+
### SCALA_BINARY_VERSION=$( \
89+
### mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
90+
### org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
91+
### -DforceStdout \
92+
### -Dexpression='project.properties(scala.binary.version)')
93+
###
94+
### # Build the Spark HBase connector
95+
### # Skip the tests because the MiniHBaseCluster does not get ready for
96+
### # whatever reason:
97+
### # Caused by: java.lang.RuntimeException: Master not active after 30000ms
98+
### # at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221)
99+
### # at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177)
100+
### # at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407)
101+
### # at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250)
102+
### mvn \
103+
### --batch-mode \
104+
### --no-transfer-progress \
105+
### --define spark.version="${PRODUCT}" \
106+
### --define scala.version="${SCALA_VERSION}" \
107+
### --define scala.binary.version="${SCALA_BINARY_VERSION}" \
108+
### --define hadoop-three.version="${HADOOP}" \
109+
### --define hbase.version="${HBASE}" \
110+
### --define skipTests \
111+
### --define maven.test.skip=true \
112+
### clean package
113+
###
114+
### mkdir -p /stackable/spark/jars
115+
### ln -s "$(pwd)/hbase-spark/target/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar" /stackable/spark/jars/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar
116+
###
117+
### cd /stackable/spark/jars
118+
###
119+
### # Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder
120+
### # which is required by the connector.
121+
### # Spark contains only log4j-slf4j2-impl-x.x.x.jar but not
122+
### # log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the
123+
### # classpath as long as they have the same version.
124+
### mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
125+
### dependency:copy \
126+
### -Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \
127+
### -DoutputDirectory=./jars
128+
### chmod g=u /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
129+
### EOF
129130

130131

131132
# spark-builder: Build Spark into /stackable/spark-${PRODUCT}/dist,
@@ -179,6 +180,7 @@ RUN <<EOF
179180

180181
./dev/make-distribution.sh \
181182
--mvn /tmp/apache-maven-${MAVEN_VERSION}/bin/mvn \
183+
--connect \
182184
-Dhadoop.version="${HADOOP}-stackable${RELEASE}" \
183185
-DskipTests \
184186
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
@@ -195,18 +197,19 @@ EOF
195197
RUN <<EOF
196198

197199
# Get the Scala binary version
198-
SCALA_BINARY_VERSION=$( \
199-
mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file pom.xml \
200-
org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
201-
-DforceStdout \
202-
-Dexpression='project.properties(scala.binary.version)')
200+
# SCALA_BINARY_VERSION=$( \
201+
# mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file pom.xml \
202+
# org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
203+
# -DforceStdout \
204+
# -Dexpression='project.properties(scala.binary.version)')
205+
SCALA_BINARY_VERSION=$(grep "scala.binary.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}')
203206

204207
mkdir -p dist/connect
205208
cd dist/connect
206209

207-
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
208-
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
209-
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
210+
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
211+
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
212+
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
210213

211214
# The Spark operator expects a file named spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar without the -stackable${RELEASE} suffix.
212215
ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar"
@@ -229,10 +232,10 @@ COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \
229232
/stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar \
230233
./
231234

232-
# Copy the HBase connector including required modules
233-
COPY --from=hbase-connectors-builder --chown=${STACKABLE_USER_UID}:0 \
234-
/stackable/spark/jars/* \
235-
./
235+
### # Copy the HBase connector including required modules
236+
### COPY --from=hbase-connectors-builder --chown=${STACKABLE_USER_UID}:0 \
237+
### /stackable/spark/jars/* \
238+
### ./
236239

237240
# Copy modules required to access HBase
238241
COPY --from=hbase-builder --chown=${STACKABLE_USER_UID}:0 \
@@ -310,7 +313,7 @@ ENV PYTHONPATH=$SPARK_HOME/python
310313

311314
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}-stackable${RELEASE}/dist /stackable/spark
312315
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder /stackable/spark-${PRODUCT}-stackable${RELEASE}-src.tar.gz /stackable
313-
COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-connectors-builder /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz /stackable
316+
### COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-connectors-builder /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz /stackable
314317
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}-stackable${RELEASE}/assembly/target/bom.json /stackable/spark/spark-${PRODUCT}-stackable${RELEASE}.cdx.json
315318
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/jmx /stackable/jmx
316319
COPY --from=spark-builder /usr/bin/tini /usr/bin/tini
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
From 2da5608928018dd017c91b904eb8f84a4f6df78a Mon Sep 17 00:00:00 2001
2+
From: Razvan-Daniel Mihai <[email protected]>
3+
Date: Fri, 4 Jul 2025 15:54:55 +0200
4+
Subject: Update CycloneDX plugin
5+
6+
---
7+
dev/make-distribution.sh | 1 -
8+
pom.xml | 5 +++++
9+
2 files changed, 5 insertions(+), 1 deletion(-)
10+
11+
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
12+
index 16607e45ae..44e345a245 100755
13+
--- a/dev/make-distribution.sh
14+
+++ b/dev/make-distribution.sh
15+
@@ -176,7 +176,6 @@ BUILD_COMMAND=("$MVN" clean package \
16+
-Dmaven.javadoc.skip=true \
17+
-Dmaven.scaladoc.skip=true \
18+
-Dmaven.source.skip \
19+
- -Dcyclonedx.skip=true \
20+
$@)
21+
22+
# Actually build the jar
23+
diff --git a/pom.xml b/pom.xml
24+
index 443d46a430..632920f100 100644
25+
--- a/pom.xml
26+
+++ b/pom.xml
27+
@@ -3327,6 +3327,11 @@
28+
<groupId>org.cyclonedx</groupId>
29+
<artifactId>cyclonedx-maven-plugin</artifactId>
30+
<version>2.8.0</version>
31+
+ <configuration>
32+
+ <projectType>application</projectType>
33+
+ <schemaVersion>1.5</schemaVersion>
34+
+ <skipNotDeployed>false</skipNotDeployed>
35+
+ </configuration>
36+
<executions>
37+
<execution>
38+
<phase>package</phase>
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
base = "fa33ea000a0bda9e5a3fa1af98e8e85b8cc5e4d4"
2+
mirror = "https://github.com/stackabletech/spark.git"

0 commit comments

Comments
 (0)