@@ -235,15 +235,27 @@ COPY --from=hbase-builder --chown=${STACKABLE_USER_UID}:0 \
235235 /stackable/hbase/lib/client-facing-thirdparty/opentelemetry-semconv-*-alpha.jar \
236236 ./
237237
238- WORKDIR /stackable/spark-${PRODUCT}/dist/extra-jars
238+ WORKDIR /stackable/spark-${PRODUCT}/dist/connect
239+
240+ # As of version 3.5.5, spark-connect jars are not included in the dist folder.
241+ # To avoid classpath conflicts with existing spark applications,
242+ # we create a new dist/connect folder, and copy them here.
243+ RUN cp /stackable/spark-${PRODUCT}/connector/connect/server/target/spark-connect_*-${PRODUCT}.jar . \
244+ && cp /stackable/spark-${PRODUCT}/connector/connect/common/target/spark-connect-common_*-${PRODUCT}.jar . \
245+ && cp /stackable/spark-${PRODUCT}/connector/connect/client/jvm/target/spark-connect-client-jvm_2.12-${PRODUCT}.jar .
239246
240247COPY spark-k8s/stackable/jmx /stackable/jmx
241248
249+ WORKDIR /stackable/spark-${PRODUCT}/dist/extra-jars
250+
242251RUN <<EOF
243252# Download jackson-dataformat-xml, stax2-api, and woodstox-core which are required for logging.
244- curl --fail https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar
245- curl --fail https://repo.stackable.tech/repository/packages/stax2-api/stax2-api-${STAX2_API}.jar
246- curl --fail https://repo.stackable.tech/repository/packages/woodstox-core/woodstox-core-${WOODSTOX_CORE}.jar
253+ curl --fail https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar \
254+ -o /stackable/spark-${PRODUCT}/dist/extra-jars/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar
255+ curl --fail https://repo.stackable.tech/repository/packages/stax2-api/stax2-api-${STAX2_API}.jar \
256+ -o /stackable/spark-${PRODUCT}/dist/extra-jars/stax2-api-${STAX2_API}.jar
257+ curl --fail https://repo.stackable.tech/repository/packages/woodstox-core/woodstox-core-${WOODSTOX_CORE}.jar \
258+ -o /stackable/spark-${PRODUCT}/dist/extra-jars/woodstox-core-${WOODSTOX_CORE}.jar
247259
248260# Get the correct `tini` binary for our architecture.
249261curl --fail "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-${TARGETARCH}" \
@@ -255,14 +267,13 @@ curl --fail "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_pr
255267 -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
256268ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
257269
258- # Symlink example jar, so that we can easily use it in tests
259- ln -s /stackable/spark-${PRODUCT}/dist/examples/jars/spark-examples_*.jar /stackable/spark-${PRODUCT}/dist/examples/jars/spark-examples.jar
260-
261270chmod -R g=u /stackable/spark-${PRODUCT}/dist
262271chmod -R g=u /stackable/spark-${PRODUCT}/assembly/target/bom.json
263272chmod -R g=u /stackable/jmx
264273EOF
265274
275+ # TODO: java-base installs the Adoptium dnf repo and the Termurin jre which is not needed here.
276+ # To reduce the size of this image, the Adoptium repo could be moved to stackable-base instead.
266277FROM stackable/image/java-base AS final
267278
268279ARG PRODUCT
@@ -282,7 +293,9 @@ LABEL name="Apache Spark" \
282293
283294ENV HOME=/stackable
284295ENV SPARK_HOME=/stackable/spark
285- ENV PATH=$SPARK_HOME:$PATH:/bin:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HOME/.local/bin
296+ # Override the java-base version of JAVA_HOME to point to the jdk.
297+ ENV JAVA_HOME="/usr/lib/jvm/temurin-${JAVA_VERSION}-jdk"
298+ ENV PATH=$SPARK_HOME/bin:$JAVA_HOME/bin:$PATH
286299ENV PYSPARK_PYTHON=/usr/bin/python
287300ENV PYTHONPATH=$SPARK_HOME/python
288301
@@ -297,24 +310,32 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/licenses /licenses
297310
298311RUN <<EOF
299312microdnf update
300- # procps: required for spark startup scripts
301- # java-*-openjdk-devel: This is needed by the Spark UI to display process information using jps and jmap
302- # Copying just the binaries from the builder stage failed.
303- microdnf install \
313+
314+ # procps:
315+ # Required for spark startup scripts.
316+ # temurin-{version}-jdk:
317+ # Needed by the Spark UI to display process information using "jps" and "jmap".
318+ # Spark-Connect needs "javac" to compile auto-generated classes on the fly.
319+ microdnf install --nodocs \
304320 gzip \
305321 hostname \
306322 procps \
307323 "python${PYTHON}" \
308324 "python${PYTHON}-pip" \
309325 zip \
310- "java -${JAVA_VERSION}-openjdk-devel "
326+ "temurin -${JAVA_VERSION}-jdk "
311327microdnf clean all
312328rm -rf /var/cache/yum
313329
314330ln -s /usr/bin/python${PYTHON} /usr/bin/python
315331ln -s /usr/bin/pip-${PYTHON} /usr/bin/pip
332+
333+ # Symlink example jar, so that we can easily use it in tests
334+ ln -s /stackable/spark/examples/jars/spark-examples_*.jar /stackable/spark/examples/jars/spark-examples.jar
335+ chown -h ${STACKABLE_USER_UID}:0 /stackable/spark/examples/jars/spark-examples.jar
316336EOF
317337
338+
318339# ----------------------------------------
319340# Attention:
320341# If you do any file based actions (copying / creating etc.) below this comment you
0 commit comments