Skip to content

Commit 8c113ac

Browse files
authored
Fix: spark-k8s reduce image size (#1042)
* remove log4shell check * remove log4shell references * reduce disk image * adapted changelog * remove copy paste leftover * remove chmod on python binaries
1 parent b96184f commit 8c113ac

File tree

2 files changed

+38
-48
lines changed

2 files changed

+38
-48
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
44

55
## [Unreleased]
66

7+
### Fixed
8+
9+
- spark-k8s: reduce docker image size by removing the recursive chown/chmods in the final image ([#1042]).
10+
11+
[#1042]: https://github.com/stackabletech/docker-images/pull/1042
12+
713
## [25.3.0] - 2025-03-21
814

915
### Added

spark-k8s/Dockerfile

Lines changed: 32 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ EOF
157157

158158

159159
# spark-builder: Build Spark into /stackable/spark-${PRODUCT}/dist,
160-
# download additional JARs and perform checks, like log4shell check.
160+
# download additional JARs and perform checks
161161
FROM stackable/image/java-devel AS spark-builder
162162

163163
ARG PRODUCT
@@ -189,20 +189,15 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
189189
# 134.0 [ERROR] Detected Maven Version: 3.6.3 is not in the allowed range [3.8.8,)
190190
RUN export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g" \
191191
&& ./dev/make-distribution.sh \
192-
-Dhadoop.version="$HADOOP" \
193-
-Dmaven.test.skip=true \
194-
-DskipTests \
195-
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
196-
--no-transfer-progress \
197-
--batch-mode
192+
-Dhadoop.version="$HADOOP" \
193+
-Dmaven.test.skip=true \
194+
-DskipTests \
195+
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
196+
--no-transfer-progress \
197+
--batch-mode
198198

199199
# <<< Build spark
200200

201-
# Get the correct `tini` binary for our architecture.
202-
RUN curl -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-${TARGETARCH}" \
203-
&& chmod +x /usr/bin/tini
204-
205-
# We download these under dist so that log4shell checks them
206201
WORKDIR /stackable/spark-${PRODUCT}/dist/jars
207202

208203
# Copy modules required for s3a://
@@ -242,34 +237,31 @@ COPY --from=hbase-builder --chown=${STACKABLE_USER_UID}:0 \
242237

243238
WORKDIR /stackable/spark-${PRODUCT}/dist/extra-jars
244239

240+
COPY spark-k8s/stackable/jmx /stackable/jmx
241+
242+
RUN <<EOF
245243
# Download jackson-dataformat-xml, stax2-api, and woodstox-core which are required for logging.
246-
RUN curl -O https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar \
247-
&& curl -O https://repo.stackable.tech/repository/packages/stax2-api/stax2-api-${STAX2_API}.jar \
248-
&& curl -O https://repo.stackable.tech/repository/packages/woodstox-core/woodstox-core-${WOODSTOX_CORE}.jar
244+
curl --fail https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar
245+
curl --fail https://repo.stackable.tech/repository/packages/stax2-api/stax2-api-${STAX2_API}.jar
246+
curl --fail https://repo.stackable.tech/repository/packages/woodstox-core/woodstox-core-${WOODSTOX_CORE}.jar
249247

250-
WORKDIR /stackable/jmx
248+
# Get the correct `tini` binary for our architecture.
249+
curl --fail "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-${TARGETARCH}" \
250+
-o /usr/bin/tini
251+
chmod +x /usr/bin/tini
251252

252-
RUN curl -O "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
253+
# JMX Exporter
254+
curl --fail "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" \
255+
-o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
256+
ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
253257

254-
# ===
255-
# Mitigation for CVE-2021-44228 (Log4Shell)
256-
#
257-
# For earlier versions this script removes the .class file that contains the
258-
# vulnerable code.
259-
# TODO: This can be restricted to target only versions which do not honor the environment
260-
# varible that has been set above but this has not currently been implemented
261-
COPY shared/log4shell.sh /bin
262-
RUN /bin/log4shell.sh /stackable/spark-${PRODUCT}/dist
263-
264-
# Ensure no vulnerable files are left over
265-
# This will currently report vulnerable files being present, as it also alerts on
266-
# SocketNode.class, which we do not remove with our scripts.
267-
# Further investigation will be needed whether this should also be removed.
268-
COPY shared/log4shell_1.6.1-log4shell_Linux_x86_64 /bin/log4shell_scanner_x86_64
269-
COPY shared/log4shell_1.6.1-log4shell_Linux_aarch64 /bin/log4shell_scanner_aarch64
270-
COPY shared/log4shell_scanner /bin/log4shell_scanner
271-
RUN /bin/log4shell_scanner s /stackable/spark-${PRODUCT}/dist
272-
# ===
258+
# Symlink example jar, so that we can easily use it in tests
259+
ln -s /stackable/spark-${PRODUCT}/dist/examples/jars/spark-examples_*.jar /stackable/spark-${PRODUCT}/dist/examples/jars/spark-examples.jar
260+
261+
chmod -R g=u /stackable/spark-${PRODUCT}/dist
262+
chmod -R g=u /stackable/spark-${PRODUCT}/assembly/target/bom.json
263+
chmod -R g=u /stackable/jmx
264+
EOF
273265

274266
FROM stackable/image/java-base AS final
275267

@@ -294,14 +286,15 @@ ENV PATH=$SPARK_HOME:$PATH:/bin:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HOME/.local/b
294286
ENV PYSPARK_PYTHON=/usr/bin/python
295287
ENV PYTHONPATH=$SPARK_HOME/python
296288

297-
COPY spark-k8s/stackable /stackable
298-
COPY spark-k8s/licenses /licenses
299289

300290
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}/dist /stackable/spark
301291
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}/assembly/target/bom.json /stackable/spark/spark-${PRODUCT}.cdx.json
302292
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/jmx /stackable/jmx
303293
COPY --from=spark-builder /usr/bin/tini /usr/bin/tini
304294

295+
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/stackable/run-spark.sh /stackable/run-spark.sh
296+
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/licenses /licenses
297+
305298
RUN <<EOF
306299
microdnf update
307300
# procps: required for spark startup scripts
@@ -320,19 +313,10 @@ rm -rf /var/cache/yum
320313

321314
ln -s /usr/bin/python${PYTHON} /usr/bin/python
322315
ln -s /usr/bin/pip-${PYTHON} /usr/bin/pip
323-
324-
ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
325-
# Symlink example jar, so that we can easily use it in tests
326-
ln -s /stackable/spark/examples/jars/spark-examples_*.jar /stackable/spark/examples/jars/spark-examples.jar
327-
328-
# All files and folders owned by root group to support running as arbitrary users.
329-
# This is best practice as all container users will belong to the root group (0).
330-
chown -R ${STACKABLE_USER_UID}:0 /stackable
331-
chmod -R g=u /stackable
332316
EOF
333317

334318
# ----------------------------------------
335-
# Attention: We are changing the group of all files in /stackable directly above
319+
# Attention:
336320
# If you do any file based actions (copying / creating etc.) below this comment you
337321
# absolutely need to make sure that the correct permissions are applied!
338322
# chown ${STACKABLE_USER_UID}:0

0 commit comments

Comments
 (0)