157157
158158
159159# spark-builder: Build Spark into /stackable/spark-${PRODUCT}/dist,
160- # download additional JARs and perform checks, like log4shell check.
160+ # download additional JARs and perform checks
161161FROM stackable/image/java-devel AS spark-builder
162162
163163ARG PRODUCT
@@ -189,20 +189,15 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
189189# 134.0 [ERROR] Detected Maven Version: 3.6.3 is not in the allowed range [3.8.8,)
190190RUN export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g" \
191191 && ./dev/make-distribution.sh \
192- -Dhadoop.version="$HADOOP" \
193- -Dmaven.test.skip=true \
194- -DskipTests \
195- -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
196- --no-transfer-progress \
197- --batch-mode
192+ -Dhadoop.version="$HADOOP" \
193+ -Dmaven.test.skip=true \
194+ -DskipTests \
195+ -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
196+ --no-transfer-progress \
197+ --batch-mode
198198
199199# <<< Build spark
200200
201- # Get the correct `tini` binary for our architecture.
202- RUN curl -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-${TARGETARCH}" \
203- && chmod +x /usr/bin/tini
204-
205- # We download these under dist so that log4shell checks them
206201WORKDIR /stackable/spark-${PRODUCT}/dist/jars
207202
208203# Copy modules required for s3a://
@@ -242,34 +237,31 @@ COPY --from=hbase-builder --chown=${STACKABLE_USER_UID}:0 \
242237
243238WORKDIR /stackable/spark-${PRODUCT}/dist/extra-jars
244239
240+ COPY spark-k8s/stackable/jmx /stackable/jmx
241+
242+ RUN <<EOF
245243# Download jackson-dataformat-xml, stax2-api, and woodstox-core which are required for logging.
246- RUN curl -O https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar \
247- && curl -O https://repo.stackable.tech/repository/packages/stax2-api/stax2-api-${STAX2_API}.jar \
248- && curl -O https://repo.stackable.tech/repository/packages/woodstox-core/woodstox-core-${WOODSTOX_CORE}.jar
244+ curl --fail https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar
245+ curl --fail https://repo.stackable.tech/repository/packages/stax2-api/stax2-api-${STAX2_API}.jar
246+ curl --fail https://repo.stackable.tech/repository/packages/woodstox-core/woodstox-core-${WOODSTOX_CORE}.jar
249247
250- WORKDIR /stackable/jmx
248+ # Get the correct `tini` binary for our architecture.
249+ curl --fail "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-${TARGETARCH}" \
250+ -o /usr/bin/tini
251+ chmod +x /usr/bin/tini
251252
252- RUN curl -O "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
253+ # JMX Exporter
254+ curl --fail "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" \
255+ -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
256+ ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
253257
254- # ===
255- # Mitigation for CVE-2021-44228 (Log4Shell)
256- #
257- # For earlier versions this script removes the .class file that contains the
258- # vulnerable code.
259- # TODO: This can be restricted to target only versions which do not honor the environment
260- # varible that has been set above but this has not currently been implemented
261- COPY shared/log4shell.sh /bin
262- RUN /bin/log4shell.sh /stackable/spark-${PRODUCT}/dist
263-
264- # Ensure no vulnerable files are left over
265- # This will currently report vulnerable files being present, as it also alerts on
266- # SocketNode.class, which we do not remove with our scripts.
267- # Further investigation will be needed whether this should also be removed.
268- COPY shared/log4shell_1.6.1-log4shell_Linux_x86_64 /bin/log4shell_scanner_x86_64
269- COPY shared/log4shell_1.6.1-log4shell_Linux_aarch64 /bin/log4shell_scanner_aarch64
270- COPY shared/log4shell_scanner /bin/log4shell_scanner
271- RUN /bin/log4shell_scanner s /stackable/spark-${PRODUCT}/dist
272- # ===
258+ # Symlink example jar, so that we can easily use it in tests
259+ ln -s /stackable/spark-${PRODUCT}/dist/examples/jars/spark-examples_*.jar /stackable/spark-${PRODUCT}/dist/examples/jars/spark-examples.jar
260+
261+ chmod -R g=u /stackable/spark-${PRODUCT}/dist
262+ chmod -R g=u /stackable/spark-${PRODUCT}/assembly/target/bom.json
263+ chmod -R g=u /stackable/jmx
264+ EOF
273265
274266FROM stackable/image/java-base AS final
275267
@@ -294,14 +286,15 @@ ENV PATH=$SPARK_HOME:$PATH:/bin:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HOME/.local/b
294286ENV PYSPARK_PYTHON=/usr/bin/python
295287ENV PYTHONPATH=$SPARK_HOME/python
296288
297- COPY spark-k8s/stackable /stackable
298- COPY spark-k8s/licenses /licenses
299289
300290COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}/dist /stackable/spark
301291COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}/assembly/target/bom.json /stackable/spark/spark-${PRODUCT}.cdx.json
302292COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/jmx /stackable/jmx
303293COPY --from=spark-builder /usr/bin/tini /usr/bin/tini
304294
295+ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/stackable/run-spark.sh /stackable/run-spark.sh
296+ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/licenses /licenses
297+
305298RUN <<EOF
306299microdnf update
307300# procps: required for spark startup scripts
@@ -320,19 +313,10 @@ rm -rf /var/cache/yum
320313
321314ln -s /usr/bin/python${PYTHON} /usr/bin/python
322315ln -s /usr/bin/pip-${PYTHON} /usr/bin/pip
323-
324- ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
325- # Symlink example jar, so that we can easily use it in tests
326- ln -s /stackable/spark/examples/jars/spark-examples_*.jar /stackable/spark/examples/jars/spark-examples.jar
327-
328- # All files and folders owned by root group to support running as arbitrary users.
329- # This is best practice as all container users will belong to the root group (0).
330- chown -R ${STACKABLE_USER_UID}:0 /stackable
331- chmod -R g=u /stackable
332316EOF
333317
334318# ----------------------------------------
335- # Attention: We are changing the group of all files in /stackable directly above
319+ # Attention:
336320# If you do any file based actions (copying / creating etc.) below this comment you
337321# absolutely need to make sure that the correct permissions are applied!
338322# chown ${STACKABLE_USER_UID}:0
0 commit comments