Skip to content

Commit 9c23d6f

Browse files
committed
successful spark-4 build
1 parent 023006d commit 9c23d6f

File tree

1 file changed

+138
-120
lines changed

1 file changed

+138
-120
lines changed

spark-k8s/Dockerfile

Lines changed: 138 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -31,102 +31,110 @@ tar -czf /stackable/spark-${PRODUCT}-stackable${RELEASE}-src.tar.gz .
3131
chmod g=u /stackable/spark-${PRODUCT}-stackable${RELEASE}-src.tar.gz
3232
EOF
3333

34-
### # hbase-connectors-builder: Build the Spark HBase connector and copy
35-
### # required JARs into /stackable/spark/jars
36-
### FROM stackable/image/java-devel AS hbase-connectors-builder
37-
###
38-
### ARG PRODUCT
39-
### ARG RELEASE
40-
### ARG HADOOP
41-
### ARG HBASE
42-
### ARG HBASE_CONNECTOR
43-
### ARG STACKABLE_USER_UID
44-
###
45-
### WORKDIR /stackable
46-
###
47-
### # Copy the pom.xml file from the patched Spark source code to read the
48-
### # versions used by Spark. The pom.xml defines child modules which are
49-
### # not required and not copied, therefore mvn must be called with the
50-
### # parameter --non-recursive.
51-
### COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
52-
### /stackable/src/spark-k8s/patchable-work/worktree/${PRODUCT}/pom.xml \
53-
### spark/
54-
###
55-
### # Patch the hbase-connectors source code
56-
### WORKDIR /stackable
57-
###
58-
### COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml
59-
### COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR}
60-
###
61-
### RUN <<EOF
62-
### cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark"
63-
###
64-
### NEW_VERSION="${HBASE_CONNECTOR}-stackable${RELEASE}"
65-
###
66-
### mvn versions:set -DnewVersion=$NEW_VERSION
67-
###
68-
### # Create snapshot of the source code including custom patches
69-
### tar -czf /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
70-
###
71-
### # Building the hbase-connectors with JDK 17 is not yet supported, see
72-
### # https://github.com/apache/hbase-connectors/pull/132.
73-
### # As there are no JDK profiles, access to the non-public elements must
74-
### # be enabled with --add-opens, see https://openjdk.org/jeps/403 and
75-
### # https://openjdk.org/jeps/261#Breaking-encapsulation.
76-
### export JDK_JAVA_OPTIONS="\
77-
### --add-opens java.base/java.lang=ALL-UNNAMED \
78-
### --add-opens java.base/java.util=ALL-UNNAMED"
79-
###
80-
### # Get the Scala version used by Spark
81-
### SCALA_VERSION=$( \
82-
### mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
83-
### org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
84-
### -DforceStdout \
85-
### -Dexpression='project.properties(scala.version)')
86-
###
87-
### # Get the Scala binary version used by Spark
88-
### SCALA_BINARY_VERSION=$( \
89-
### mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
90-
### org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
91-
### -DforceStdout \
92-
### -Dexpression='project.properties(scala.binary.version)')
93-
###
94-
### # Build the Spark HBase connector
95-
### # Skip the tests because the MiniHBaseCluster does not get ready for
96-
### # whatever reason:
97-
### # Caused by: java.lang.RuntimeException: Master not active after 30000ms
98-
### # at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221)
99-
### # at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177)
100-
### # at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407)
101-
### # at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250)
102-
### mvn \
103-
### --batch-mode \
104-
### --no-transfer-progress \
105-
### --define spark.version="${PRODUCT}" \
106-
### --define scala.version="${SCALA_VERSION}" \
107-
### --define scala.binary.version="${SCALA_BINARY_VERSION}" \
108-
### --define hadoop-three.version="${HADOOP}" \
109-
### --define hbase.version="${HBASE}" \
110-
### --define skipTests \
111-
### --define maven.test.skip=true \
112-
### clean package
113-
###
114-
### mkdir -p /stackable/spark/jars
115-
### ln -s "$(pwd)/hbase-spark/target/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar" /stackable/spark/jars/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar
116-
###
117-
### cd /stackable/spark/jars
118-
###
119-
### # Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder
120-
### # which is required by the connector.
121-
### # Spark contains only log4j-slf4j2-impl-x.x.x.jar but not
122-
### # log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the
123-
### # classpath as long as they have the same version.
124-
### mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
125-
### dependency:copy \
126-
### -Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \
127-
### -DoutputDirectory=./jars
128-
### chmod g=u /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
129-
### EOF
34+
# hbase-connectors-builder: Build the Spark HBase connector and copy
35+
# required JARs into /stackable/spark/jars
36+
FROM stackable/image/java-devel AS hbase-connectors-builder
37+
38+
ARG PRODUCT
39+
ARG RELEASE
40+
ARG HADOOP
41+
ARG HBASE
42+
ARG HBASE_CONNECTOR
43+
ARG STACKABLE_USER_UID
44+
45+
WORKDIR /stackable
46+
47+
# Copy the pom.xml file from the patched Spark source code to read the
48+
# versions used by Spark. The pom.xml defines child modules which are
49+
# not required and not copied, therefore mvn must be called with the
50+
# parameter --non-recursive.
51+
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
52+
/stackable/src/spark-k8s/patchable-work/worktree/${PRODUCT}/pom.xml \
53+
spark/
54+
55+
# Patch the hbase-connectors source code
56+
WORKDIR /stackable
57+
58+
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml
59+
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR}
60+
61+
RUN <<EOF
62+
63+
# HBase connectors don't support Spark 4 yet, so we skip the build.
64+
if [[ "${PRODUCT}" == 4* ]]; then
65+
# Create this empy directory so that following COPY layers succeed.
66+
mkdir -p /stackable/spark/jars
67+
exit 0
68+
fi
69+
70+
cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark"
71+
72+
NEW_VERSION="${HBASE_CONNECTOR}-stackable${RELEASE}"
73+
74+
mvn versions:set -DnewVersion=$NEW_VERSION
75+
76+
# Create snapshot of the source code including custom patches
77+
tar -czf /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
78+
79+
# Building the hbase-connectors with JDK 17 is not yet supported, see
80+
# https://github.com/apache/hbase-connectors/pull/132.
81+
# As there are no JDK profiles, access to the non-public elements must
82+
# be enabled with --add-opens, see https://openjdk.org/jeps/403 and
83+
# https://openjdk.org/jeps/261#Breaking-encapsulation.
84+
export JDK_JAVA_OPTIONS="\
85+
--add-opens java.base/java.lang=ALL-UNNAMED \
86+
--add-opens java.base/java.util=ALL-UNNAMED"
87+
88+
# Get the Scala version used by Spark
89+
SCALA_VERSION=$( \
90+
mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
91+
org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
92+
-DforceStdout \
93+
-Dexpression='project.properties(scala.version)')
94+
95+
# Get the Scala binary version used by Spark
96+
SCALA_BINARY_VERSION=$( \
97+
mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
98+
org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
99+
-DforceStdout \
100+
-Dexpression='project.properties(scala.binary.version)')
101+
102+
# Build the Spark HBase connector
103+
# Skip the tests because the MiniHBaseCluster does not get ready for
104+
# whatever reason:
105+
# Caused by: java.lang.RuntimeException: Master not active after 30000ms
106+
# at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221)
107+
# at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177)
108+
# at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407)
109+
# at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250)
110+
mvn \
111+
--batch-mode \
112+
--no-transfer-progress \
113+
--define spark.version="${PRODUCT}" \
114+
--define scala.version="${SCALA_VERSION}" \
115+
--define scala.binary.version="${SCALA_BINARY_VERSION}" \
116+
--define hadoop-three.version="${HADOOP}" \
117+
--define hbase.version="${HBASE}" \
118+
--define skipTests \
119+
--define maven.test.skip=true \
120+
clean package
121+
122+
mkdir -p /stackable/spark/jars
123+
ln -s "$(pwd)/hbase-spark/target/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar" /stackable/spark/jars/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar
124+
125+
cd /stackable/spark/jars
126+
127+
# Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder
128+
# which is required by the connector.
129+
# Spark contains only log4j-slf4j2-impl-x.x.x.jar but not
130+
# log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the
131+
# classpath as long as they have the same version.
132+
mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
133+
dependency:copy \
134+
-Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \
135+
-DoutputDirectory=./jars
136+
chmod g=u /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
137+
EOF
130138

131139

132140
# spark-builder: Build Spark into /stackable/spark-${PRODUCT}/dist,
@@ -159,9 +167,6 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
159167
COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
160168

161169
# >>> Build spark
162-
# Compiling the tests takes a lot of time, so we skip them
163-
# -Dmaven.test.skip=true skips both the compilation and execution of tests
164-
# -DskipTests skips only the execution
165170
RUN <<EOF
166171
# Make Maven aware of custom Stackable libraries
167172
mv /stackable/patched-libs/maven /root/.m2/repository
@@ -176,16 +181,35 @@ RUN <<EOF
176181
ORIGINAL_VERSION="${PRODUCT}"
177182
NEW_VERSION="${PRODUCT}-stackable${RELEASE}"
178183

184+
STACKABLE_HADOOP_VERSION="${HADOOP}-stackable${RELEASE}"
185+
186+
MAVEN_BIN="/tmp/apache-maven-${MAVEN_VERSION}/bin/mvn"
179187
export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g"
180188

181-
./dev/make-distribution.sh \
182-
--mvn /tmp/apache-maven-${MAVEN_VERSION}/bin/mvn \
183-
--connect \
184-
-Dhadoop.version="${HADOOP}-stackable${RELEASE}" \
185-
-DskipTests \
186-
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
187-
--no-transfer-progress \
188-
--batch-mode
189+
case "${PRODUCT}" in
190+
"4*")
191+
# The Spark 4 script has a --connect option which is not available in Spark 3.
192+
# This option is required to build Spark Connect.
193+
# Also this option breaks the Spark 3 build so we ensure it's only provided here.
194+
./dev/make-distribution.sh \
195+
--mvn "${MAVEN_BIN}" \
196+
--connect \
197+
-Dhadoop.version="${STACKABLE_HADOOP_VERSION}" \
198+
-DskipTests \
199+
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
200+
--no-transfer-progress \
201+
--batch-mode
202+
;;
203+
*)
204+
./dev/make-distribution.sh \
205+
--mvn "${MAVEN_BIN}" \
206+
-Dhadoop.version="${STACKABLE_HADOOP_VERSION}" \
207+
-DskipTests \
208+
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
209+
--no-transfer-progress \
210+
--batch-mode
211+
;;
212+
esac
189213

190214
sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" assembly/target/bom.json
191215
EOF
@@ -196,12 +220,6 @@ EOF
196220
# we create a new dist/connect folder, and copy them here.
197221
RUN <<EOF
198222

199-
# Get the Scala binary version
200-
# SCALA_BINARY_VERSION=$( \
201-
# mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file pom.xml \
202-
# org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
203-
# -DforceStdout \
204-
# -Dexpression='project.properties(scala.binary.version)')
205223
SCALA_BINARY_VERSION=$(grep "scala.binary.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}')
206224

207225
mkdir -p dist/connect
@@ -211,8 +229,8 @@ RUN <<EOF
211229
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
212230
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
213231

214-
# The Spark operator expects a file named spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar without the -stackable${RELEASE} suffix.
215-
ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar"
232+
# The Spark operator expects a file named spark-connect-{PRODUCT}.jar.
233+
ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect-${PRODUCT}.jar"
216234
EOF
217235

218236
# <<< Build spark
@@ -232,10 +250,10 @@ COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \
232250
/stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar \
233251
./
234252

235-
### # Copy the HBase connector including required modules
236-
### COPY --from=hbase-connectors-builder --chown=${STACKABLE_USER_UID}:0 \
237-
### /stackable/spark/jars/* \
238-
### ./
253+
# Copy the HBase connector including required modules
254+
COPY --from=hbase-connectors-builder --chown=${STACKABLE_USER_UID}:0 \
255+
/stackable/spark/jars/* \
256+
./
239257

240258
# Copy modules required to access HBase
241259
COPY --from=hbase-builder --chown=${STACKABLE_USER_UID}:0 \
@@ -313,7 +331,7 @@ ENV PYTHONPATH=$SPARK_HOME/python
313331

314332
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}-stackable${RELEASE}/dist /stackable/spark
315333
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder /stackable/spark-${PRODUCT}-stackable${RELEASE}-src.tar.gz /stackable
316-
### COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-connectors-builder /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz /stackable
334+
COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-connectors-builder /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz* /stackable
317335
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}-stackable${RELEASE}/assembly/target/bom.json /stackable/spark/spark-${PRODUCT}-stackable${RELEASE}.cdx.json
318336
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/jmx /stackable/jmx
319337
COPY --from=spark-builder /usr/bin/tini /usr/bin/tini

0 commit comments

Comments
 (0)