@@ -235,15 +235,27 @@ COPY --from=hbase-builder --chown=${STACKABLE_USER_UID}:0 \
235
235
/stackable/hbase/lib/client-facing-thirdparty/opentelemetry-semconv-*-alpha.jar \
236
236
./
237
237
238
- WORKDIR /stackable/spark-${PRODUCT}/dist/extra-jars
238
+ WORKDIR /stackable/spark-${PRODUCT}/dist/connect
239
+
240
+ # As of version 3.5.5, spark-connect jars are not included in the dist folder.
241
+ # To avoid classpath conflicts with existing spark applications,
242
+ # we create a new dist/connect folder, and copy them here.
243
+ RUN cp /stackable/spark-${PRODUCT}/connector/connect/server/target/spark-connect_*-${PRODUCT}.jar . \
244
+ && cp /stackable/spark-${PRODUCT}/connector/connect/common/target/spark-connect-common_*-${PRODUCT}.jar . \
245
+ && cp /stackable/spark-${PRODUCT}/connector/connect/client/jvm/target/spark-connect-client-jvm_2.12-${PRODUCT}.jar .
239
246
240
247
COPY spark-k8s/stackable/jmx /stackable/jmx
241
248
249
+ WORKDIR /stackable/spark-${PRODUCT}/dist/extra-jars
250
+
242
251
RUN <<EOF
243
252
# Download jackson-dataformat-xml, stax2-api, and woodstox-core which are required for logging.
244
- curl --fail https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar
245
- curl --fail https://repo.stackable.tech/repository/packages/stax2-api/stax2-api-${STAX2_API}.jar
246
- curl --fail https://repo.stackable.tech/repository/packages/woodstox-core/woodstox-core-${WOODSTOX_CORE}.jar
253
+ curl --fail https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar \
254
+ -o /stackable/spark-${PRODUCT}/dist/extra-jars/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar
255
+ curl --fail https://repo.stackable.tech/repository/packages/stax2-api/stax2-api-${STAX2_API}.jar \
256
+ -o /stackable/spark-${PRODUCT}/dist/extra-jars/stax2-api-${STAX2_API}.jar
257
+ curl --fail https://repo.stackable.tech/repository/packages/woodstox-core/woodstox-core-${WOODSTOX_CORE}.jar \
258
+ -o /stackable/spark-${PRODUCT}/dist/extra-jars/woodstox-core-${WOODSTOX_CORE}.jar
247
259
248
260
# Get the correct `tini` binary for our architecture.
249
261
curl --fail "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-${TARGETARCH}" \
@@ -255,14 +267,13 @@ curl --fail "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_pr
255
267
-o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
256
268
ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
257
269
258
- # Symlink example jar, so that we can easily use it in tests
259
- ln -s /stackable/spark-${PRODUCT}/dist/examples/jars/spark-examples_*.jar /stackable/spark-${PRODUCT}/dist/examples/jars/spark-examples.jar
260
-
261
270
chmod -R g=u /stackable/spark-${PRODUCT}/dist
262
271
chmod -R g=u /stackable/spark-${PRODUCT}/assembly/target/bom.json
263
272
chmod -R g=u /stackable/jmx
264
273
EOF
265
274
275
+ # TODO: java-base installs the Adoptium dnf repo and the Termurin jre which is not needed here.
276
+ # To reduce the size of this image, the Adoptium repo could be moved to stackable-base instead.
266
277
FROM stackable/image/java-base AS final
267
278
268
279
ARG PRODUCT
@@ -282,7 +293,9 @@ LABEL name="Apache Spark" \
282
293
283
294
ENV HOME=/stackable
284
295
ENV SPARK_HOME=/stackable/spark
285
- ENV PATH=$SPARK_HOME:$PATH:/bin:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HOME/.local/bin
296
+ # Override the java-base version of JAVA_HOME to point to the jdk.
297
+ ENV JAVA_HOME="/usr/lib/jvm/temurin-${JAVA_VERSION}-jdk"
298
+ ENV PATH=$SPARK_HOME/bin:$JAVA_HOME/bin:$PATH
286
299
ENV PYSPARK_PYTHON=/usr/bin/python
287
300
ENV PYTHONPATH=$SPARK_HOME/python
288
301
@@ -297,24 +310,32 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/licenses /licenses
297
310
298
311
RUN <<EOF
299
312
microdnf update
300
- # procps: required for spark startup scripts
301
- # java-*-openjdk-devel: This is needed by the Spark UI to display process information using jps and jmap
302
- # Copying just the binaries from the builder stage failed.
303
- microdnf install \
313
+
314
+ # procps:
315
+ # Required for spark startup scripts.
316
+ # temurin-{version}-jdk:
317
+ # Needed by the Spark UI to display process information using "jps" and "jmap".
318
+ # Spark-Connect needs "javac" to compile auto-generated classes on the fly.
319
+ microdnf install --nodocs \
304
320
gzip \
305
321
hostname \
306
322
procps \
307
323
"python${PYTHON}" \
308
324
"python${PYTHON}-pip" \
309
325
zip \
310
- "java -${JAVA_VERSION}-openjdk-devel "
326
+ "temurin -${JAVA_VERSION}-jdk "
311
327
microdnf clean all
312
328
rm -rf /var/cache/yum
313
329
314
330
ln -s /usr/bin/python${PYTHON} /usr/bin/python
315
331
ln -s /usr/bin/pip-${PYTHON} /usr/bin/pip
332
+
333
+ # Symlink example jar, so that we can easily use it in tests
334
+ ln -s /stackable/spark/examples/jars/spark-examples_*.jar /stackable/spark/examples/jars/spark-examples.jar
335
+ chown -h ${STACKABLE_USER_UID}:0 /stackable/spark/examples/jars/spark-examples.jar
316
336
EOF
317
337
338
+
318
339
# ----------------------------------------
319
340
# Attention:
320
341
# If you do any file based actions (copying / creating etc.) below this comment you
0 commit comments