@@ -31,102 +31,110 @@ tar -czf /stackable/spark-${PRODUCT}-stackable${RELEASE}-src.tar.gz .
3131chmod g=u /stackable/spark-${PRODUCT}-stackable${RELEASE}-src.tar.gz
3232EOF
3333
34- # ## # hbase-connectors-builder: Build the Spark HBase connector and copy
35- # ## # required JARs into /stackable/spark/jars
36- # ## FROM stackable/image/java-devel AS hbase-connectors-builder
37- # ##
38- # ## ARG PRODUCT
39- # ## ARG RELEASE
40- # ## ARG HADOOP
41- # ## ARG HBASE
42- # ## ARG HBASE_CONNECTOR
43- # ## ARG STACKABLE_USER_UID
44- # ##
45- # ## WORKDIR /stackable
46- # ##
47- # ## # Copy the pom.xml file from the patched Spark source code to read the
48- # ## # versions used by Spark. The pom.xml defines child modules which are
49- # ## # not required and not copied, therefore mvn must be called with the
50- # ## # parameter --non-recursive.
51- # ## COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
52- # ## /stackable/src/spark-k8s/patchable-work/worktree/${PRODUCT}/pom.xml \
53- # ## spark/
54- # ##
55- # ## # Patch the hbase-connectors source code
56- # ## WORKDIR /stackable
57- # ##
58- # ## COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml
59- # ## COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR}
60- # ##
61- # ## RUN <<EOF
62- # ## cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark"
63- # ##
64- # ## NEW_VERSION="${HBASE_CONNECTOR}-stackable${RELEASE}"
65- # ##
66- # ## mvn versions:set -DnewVersion=$NEW_VERSION
67- # ##
68- # ## # Create snapshot of the source code including custom patches
69- # ## tar -czf /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
70- # ##
71- # ## # Building the hbase-connectors with JDK 17 is not yet supported, see
72- # ## # https://github.com/apache/hbase-connectors/pull/132.
73- # ## # As there are no JDK profiles, access to the non-public elements must
74- # ## # be enabled with --add-opens, see https://openjdk.org/jeps/403 and
75- # ## # https://openjdk.org/jeps/261#Breaking-encapsulation.
76- # ## export JDK_JAVA_OPTIONS="\
77- # ## --add-opens java.base/java.lang=ALL-UNNAMED \
78- # ## --add-opens java.base/java.util=ALL-UNNAMED"
79- # ##
80- # ## # Get the Scala version used by Spark
81- # ## SCALA_VERSION=$( \
82- # ## mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
83- # ## org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
84- # ## -DforceStdout \
85- # ## -Dexpression='project.properties(scala.version)')
86- # ##
87- # ## # Get the Scala binary version used by Spark
88- # ## SCALA_BINARY_VERSION=$( \
89- # ## mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
90- # ## org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
91- # ## -DforceStdout \
92- # ## -Dexpression='project.properties(scala.binary.version)')
93- # ##
94- # ## # Build the Spark HBase connector
95- # ## # Skip the tests because the MiniHBaseCluster does not get ready for
96- # ## # whatever reason:
97- # ## # Caused by: java.lang.RuntimeException: Master not active after 30000ms
98- # ## # at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221)
99- # ## # at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177)
100- # ## # at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407)
101- # ## # at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250)
102- # ## mvn \
103- # ## --batch-mode \
104- # ## --no-transfer-progress \
105- # ## --define spark.version="${PRODUCT}" \
106- # ## --define scala.version="${SCALA_VERSION}" \
107- # ## --define scala.binary.version="${SCALA_BINARY_VERSION}" \
108- # ## --define hadoop-three.version="${HADOOP}" \
109- # ## --define hbase.version="${HBASE}" \
110- # ## --define skipTests \
111- # ## --define maven.test.skip=true \
112- # ## clean package
113- # ##
114- # ## mkdir -p /stackable/spark/jars
115- # ## ln -s "$(pwd)/hbase-spark/target/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar" /stackable/spark/jars/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar
116- # ##
117- # ## cd /stackable/spark/jars
118- # ##
119- # ## # Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder
120- # ## # which is required by the connector.
121- # ## # Spark contains only log4j-slf4j2-impl-x.x.x.jar but not
122- # ## # log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the
123- # ## # classpath as long as they have the same version.
124- # ## mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
125- # ## dependency:copy \
126- # ## -Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \
127- # ## -DoutputDirectory=./jars
128- # ## chmod g=u /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
129- # ## EOF
34+ # hbase-connectors-builder: Build the Spark HBase connector and copy
35+ # required JARs into /stackable/spark/jars
36+ FROM stackable/image/java-devel AS hbase-connectors-builder
37+
38+ ARG PRODUCT
39+ ARG RELEASE
40+ ARG HADOOP
41+ ARG HBASE
42+ ARG HBASE_CONNECTOR
43+ ARG STACKABLE_USER_UID
44+
45+ WORKDIR /stackable
46+
47+ # Copy the pom.xml file from the patched Spark source code to read the
48+ # versions used by Spark. The pom.xml defines child modules which are
49+ # not required and not copied, therefore mvn must be called with the
50+ # parameter --non-recursive.
51+ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
52+ /stackable/src/spark-k8s/patchable-work/worktree/${PRODUCT}/pom.xml \
53+ spark/
54+
55+ # Patch the hbase-connectors source code
56+ WORKDIR /stackable
57+
58+ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml
59+ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR}
60+
61+ RUN <<EOF
62+
63+ # HBase connectors don't support Spark 4 yet, so we skip the build.
64+ if [[ "${PRODUCT}" == 4* ]]; then
65+ # Create this empy directory so that following COPY layers succeed.
66+ mkdir -p /stackable/spark/jars
67+ exit 0
68+ fi
69+
70+ cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark"
71+
72+ NEW_VERSION="${HBASE_CONNECTOR}-stackable${RELEASE}"
73+
74+ mvn versions:set -DnewVersion=$NEW_VERSION
75+
76+ # Create snapshot of the source code including custom patches
77+ tar -czf /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
78+
79+ # Building the hbase-connectors with JDK 17 is not yet supported, see
80+ # https://github.com/apache/hbase-connectors/pull/132.
81+ # As there are no JDK profiles, access to the non-public elements must
82+ # be enabled with --add-opens, see https://openjdk.org/jeps/403 and
83+ # https://openjdk.org/jeps/261#Breaking-encapsulation.
84+ export JDK_JAVA_OPTIONS="\
85+ --add-opens java.base/java.lang=ALL-UNNAMED \
86+ --add-opens java.base/java.util=ALL-UNNAMED"
87+
88+ # Get the Scala version used by Spark
89+ SCALA_VERSION=$( \
90+ mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
91+ org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
92+ -DforceStdout \
93+ -Dexpression='project.properties(scala.version)' )
94+
95+ # Get the Scala binary version used by Spark
96+ SCALA_BINARY_VERSION=$( \
97+ mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
98+ org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
99+ -DforceStdout \
100+ -Dexpression='project.properties(scala.binary.version)' )
101+
102+ # Build the Spark HBase connector
103+ # Skip the tests because the MiniHBaseCluster does not get ready for
104+ # whatever reason:
105+ # Caused by: java.lang.RuntimeException: Master not active after 30000ms
106+ # at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221)
107+ # at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177)
108+ # at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407)
109+ # at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250)
110+ mvn \
111+ --batch-mode \
112+ --no-transfer-progress \
113+ --define spark.version="${PRODUCT}" \
114+ --define scala.version="${SCALA_VERSION}" \
115+ --define scala.binary.version="${SCALA_BINARY_VERSION}" \
116+ --define hadoop-three.version="${HADOOP}" \
117+ --define hbase.version="${HBASE}" \
118+ --define skipTests \
119+ --define maven.test.skip=true \
120+ clean package
121+
122+ mkdir -p /stackable/spark/jars
123+ ln -s "$(pwd)/hbase-spark/target/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar" /stackable/spark/jars/hbase-spark-${HBASE_CONNECTOR}-stackable${RELEASE}.jar
124+
125+ cd /stackable/spark/jars
126+
127+ # Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder
128+ # which is required by the connector.
129+ # Spark contains only log4j-slf4j2-impl-x.x.x.jar but not
130+ # log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the
131+ # classpath as long as they have the same version.
132+ mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file /stackable/spark/pom.xml \
133+ dependency:copy \
134+ -Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \
135+ -DoutputDirectory=./jars
136+ chmod g=u /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz .
137+ EOF
130138
131139
132140# spark-builder: Build Spark into /stackable/spark-${PRODUCT}/dist,
@@ -159,9 +167,6 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
159167COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
160168
161169# >>> Build spark
162- # Compiling the tests takes a lot of time, so we skip them
163- # -Dmaven.test.skip=true skips both the compilation and execution of tests
164- # -DskipTests skips only the execution
165170RUN <<EOF
166171 # Make Maven aware of custom Stackable libraries
167172 mv /stackable/patched-libs/maven /root/.m2/repository
@@ -176,16 +181,35 @@ RUN <<EOF
176181 ORIGINAL_VERSION="${PRODUCT}"
177182 NEW_VERSION="${PRODUCT}-stackable${RELEASE}"
178183
184+ STACKABLE_HADOOP_VERSION="${HADOOP}-stackable${RELEASE}"
185+
186+ MAVEN_BIN="/tmp/apache-maven-${MAVEN_VERSION}/bin/mvn"
179187 export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g"
180188
181- ./dev/make-distribution.sh \
182- --mvn /tmp/apache-maven-${MAVEN_VERSION}/bin/mvn \
183- --connect \
184- -Dhadoop.version="${HADOOP}-stackable${RELEASE}" \
185- -DskipTests \
186- -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
187- --no-transfer-progress \
188- --batch-mode
189+ case "${PRODUCT}" in
190+ "4*" )
191+ # The Spark 4 script has a --connect option which is not available in Spark 3.
192+ # This option is required to build Spark Connect.
193+ # Also this option breaks the Spark 3 build so we ensure it's only provided here.
194+ ./dev/make-distribution.sh \
195+ --mvn "${MAVEN_BIN}" \
196+ --connect \
197+ -Dhadoop.version="${STACKABLE_HADOOP_VERSION}" \
198+ -DskipTests \
199+ -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
200+ --no-transfer-progress \
201+ --batch-mode
202+ ;;
203+ *)
204+ ./dev/make-distribution.sh \
205+ --mvn "${MAVEN_BIN}" \
206+ -Dhadoop.version="${STACKABLE_HADOOP_VERSION}" \
207+ -DskipTests \
208+ -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
209+ --no-transfer-progress \
210+ --batch-mode
211+ ;;
212+ esac
189213
190214 sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" assembly/target/bom.json
191215EOF
196220# we create a new dist/connect folder, and copy them here.
197221RUN <<EOF
198222
199- # Get the Scala binary version
200- # SCALA_BINARY_VERSION=$( \
201- # mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file pom.xml \
202- # org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
203- # -DforceStdout \
204- # -Dexpression='project.properties(scala.binary.version)')
205223 SCALA_BINARY_VERSION=$(grep "scala.binary.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}' )
206224
207225 mkdir -p dist/connect
@@ -211,8 +229,8 @@ RUN <<EOF
211229 cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
212230 cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
213231
214- # The Spark operator expects a file named spark-connect_${SCALA_BINARY_VERSION}-$ {PRODUCT}.jar without the -stackable${RELEASE} suffix .
215- ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect_${SCALA_BINARY_VERSION} -${PRODUCT}.jar"
232+ # The Spark operator expects a file named spark-connect- {PRODUCT}.jar.
233+ ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect -${PRODUCT}.jar"
216234EOF
217235
218236# <<< Build spark
@@ -232,10 +250,10 @@ COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \
232250 /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar \
233251 ./
234252
235- # ## # Copy the HBase connector including required modules
236- # ## COPY --from=hbase-connectors-builder --chown=${STACKABLE_USER_UID}:0 \
237- # ## /stackable/spark/jars/* \
238- # ## ./
253+ # Copy the HBase connector including required modules
254+ COPY --from=hbase-connectors-builder --chown=${STACKABLE_USER_UID}:0 \
255+ /stackable/spark/jars/* \
256+ ./
239257
240258# Copy modules required to access HBase
241259COPY --from=hbase-builder --chown=${STACKABLE_USER_UID}:0 \
@@ -313,7 +331,7 @@ ENV PYTHONPATH=$SPARK_HOME/python
313331
314332COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}-stackable${RELEASE}/dist /stackable/spark
315333COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder /stackable/spark-${PRODUCT}-stackable${RELEASE}-src.tar.gz /stackable
316- # ## COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-connectors-builder /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz /stackable
334+ COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-connectors-builder /stackable/hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz* /stackable
317335COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}-stackable${RELEASE}/assembly/target/bom.json /stackable/spark/spark-${PRODUCT}-stackable${RELEASE}.cdx.json
318336COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/jmx /stackable/jmx
319337COPY --from=spark-builder /usr/bin/tini /usr/bin/tini
0 commit comments