157
157
158
158
159
159
# spark-builder: Build Spark into /stackable/spark-${PRODUCT}/dist,
160
- # download additional JARs and perform checks, like log4shell check.
160
+ # download additional JARs and perform checks
161
161
FROM stackable/image/java-devel AS spark-builder
162
162
163
163
ARG PRODUCT
@@ -189,20 +189,15 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
189
189
# 134.0 [ERROR] Detected Maven Version: 3.6.3 is not in the allowed range [3.8.8,)
190
190
RUN export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g" \
191
191
&& ./dev/make-distribution.sh \
192
- -Dhadoop.version="$HADOOP" \
193
- -Dmaven.test.skip=true \
194
- -DskipTests \
195
- -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
196
- --no-transfer-progress \
197
- --batch-mode
192
+ -Dhadoop.version="$HADOOP" \
193
+ -Dmaven.test.skip=true \
194
+ -DskipTests \
195
+ -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
196
+ --no-transfer-progress \
197
+ --batch-mode
198
198
199
199
# <<< Build spark
200
200
201
- # Get the correct `tini` binary for our architecture.
202
- RUN curl -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-${TARGETARCH}" \
203
- && chmod +x /usr/bin/tini
204
-
205
- # We download these under dist so that log4shell checks them
206
201
WORKDIR /stackable/spark-${PRODUCT}/dist/jars
207
202
208
203
# Copy modules required for s3a://
@@ -242,34 +237,31 @@ COPY --from=hbase-builder --chown=${STACKABLE_USER_UID}:0 \
242
237
243
238
WORKDIR /stackable/spark-${PRODUCT}/dist/extra-jars
244
239
240
+ COPY spark-k8s/stackable/jmx /stackable/jmx
241
+
242
+ RUN <<EOF
245
243
# Download jackson-dataformat-xml, stax2-api, and woodstox-core which are required for logging.
246
- RUN curl -O https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar \
247
- && curl -O https://repo.stackable.tech/repository/packages/stax2-api/stax2-api-${STAX2_API}.jar \
248
- && curl -O https://repo.stackable.tech/repository/packages/woodstox-core/woodstox-core-${WOODSTOX_CORE}.jar
244
+ curl --fail https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar
245
+ curl --fail https://repo.stackable.tech/repository/packages/stax2-api/stax2-api-${STAX2_API}.jar
246
+ curl --fail https://repo.stackable.tech/repository/packages/woodstox-core/woodstox-core-${WOODSTOX_CORE}.jar
249
247
250
- WORKDIR /stackable/jmx
248
+ # Get the correct `tini` binary for our architecture.
249
+ curl --fail "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-${TARGETARCH}" \
250
+ -o /usr/bin/tini
251
+ chmod +x /usr/bin/tini
251
252
252
- RUN curl -O "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
253
+ # JMX Exporter
254
+ curl --fail "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" \
255
+ -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
256
+ ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
253
257
254
- # ===
255
- # Mitigation for CVE-2021-44228 (Log4Shell)
256
- #
257
- # For earlier versions this script removes the .class file that contains the
258
- # vulnerable code.
259
- # TODO: This can be restricted to target only versions which do not honor the environment
260
- # varible that has been set above but this has not currently been implemented
261
- COPY shared/log4shell.sh /bin
262
- RUN /bin/log4shell.sh /stackable/spark-${PRODUCT}/dist
263
-
264
- # Ensure no vulnerable files are left over
265
- # This will currently report vulnerable files being present, as it also alerts on
266
- # SocketNode.class, which we do not remove with our scripts.
267
- # Further investigation will be needed whether this should also be removed.
268
- COPY shared/log4shell_1.6.1-log4shell_Linux_x86_64 /bin/log4shell_scanner_x86_64
269
- COPY shared/log4shell_1.6.1-log4shell_Linux_aarch64 /bin/log4shell_scanner_aarch64
270
- COPY shared/log4shell_scanner /bin/log4shell_scanner
271
- RUN /bin/log4shell_scanner s /stackable/spark-${PRODUCT}/dist
272
- # ===
258
+ # Symlink example jar, so that we can easily use it in tests
259
+ ln -s /stackable/spark-${PRODUCT}/dist/examples/jars/spark-examples_*.jar /stackable/spark-${PRODUCT}/dist/examples/jars/spark-examples.jar
260
+
261
+ chmod -R g=u /stackable/spark-${PRODUCT}/dist
262
+ chmod -R g=u /stackable/spark-${PRODUCT}/assembly/target/bom.json
263
+ chmod -R g=u /stackable/jmx
264
+ EOF
273
265
274
266
FROM stackable/image/java-base AS final
275
267
@@ -294,14 +286,15 @@ ENV PATH=$SPARK_HOME:$PATH:/bin:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HOME/.local/b
294
286
ENV PYSPARK_PYTHON=/usr/bin/python
295
287
ENV PYTHONPATH=$SPARK_HOME/python
296
288
297
- COPY spark-k8s/stackable /stackable
298
- COPY spark-k8s/licenses /licenses
299
289
300
290
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}/dist /stackable/spark
301
291
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}/assembly/target/bom.json /stackable/spark/spark-${PRODUCT}.cdx.json
302
292
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/jmx /stackable/jmx
303
293
COPY --from=spark-builder /usr/bin/tini /usr/bin/tini
304
294
295
+ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/stackable/run-spark.sh /stackable/run-spark.sh
296
+ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/licenses /licenses
297
+
305
298
RUN <<EOF
306
299
microdnf update
307
300
# procps: required for spark startup scripts
@@ -320,19 +313,10 @@ rm -rf /var/cache/yum
320
313
321
314
ln -s /usr/bin/python${PYTHON} /usr/bin/python
322
315
ln -s /usr/bin/pip-${PYTHON} /usr/bin/pip
323
-
324
- ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
325
- # Symlink example jar, so that we can easily use it in tests
326
- ln -s /stackable/spark/examples/jars/spark-examples_*.jar /stackable/spark/examples/jars/spark-examples.jar
327
-
328
- # All files and folders owned by root group to support running as arbitrary users.
329
- # This is best practice as all container users will belong to the root group (0).
330
- chown -R ${STACKABLE_USER_UID}:0 /stackable
331
- chmod -R g=u /stackable
332
316
EOF
333
317
334
318
# ----------------------------------------
335
- # Attention: We are changing the group of all files in /stackable directly above
319
+ # Attention:
336
320
# If you do any file based actions (copying / creating etc.) below this comment you
337
321
# absolutely need to make sure that the correct permissions are applied!
338
322
# chown ${STACKABLE_USER_UID}:0
0 commit comments