11# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7
22# check=error=true
33
4- FROM stackable/image/java-devel AS hadoop-builder
5-
6- ARG PRODUCT
7- ARG RELEASE
8- ARG ASYNC_PROFILER
9- ARG JMX_EXPORTER
10- ARG PROTOBUF
11- ARG TARGETARCH
12- ARG TARGETOS
13- ARG STACKABLE_USER_UID
14-
15- WORKDIR /stackable
16-
17- COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/patchable.toml /stackable/src/shared/protobuf/stackable/patches/patchable.toml
18- COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/${PROTOBUF} /stackable/src/shared/protobuf/stackable/patches/${PROTOBUF}
19-
20- RUN <<EOF
21- rpm --install --replacepkgs https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
22- microdnf update
23- # boost is a build dependency starting in Hadoop 3.4.0 if compiling native code
24- # automake and libtool are required to build protobuf
25- microdnf install boost1.78-devel automake libtool
26- microdnf clean all
27- rm -rf /var/cache/yum
28- mkdir /opt/protobuf
29- chown ${STACKABLE_USER_UID}:0 /opt/protobuf
30- EOF
31-
32- USER ${STACKABLE_USER_UID}
33- # This Protobuf version is the exact version as used in the Hadoop Dockerfile
34- # See https://github.com/apache/hadoop/blob/trunk/dev-support/docker/pkg-resolver/install-protobuf.sh
35- # (this was hardcoded in the Dockerfile in earlier versions of Hadoop, make sure to look at the exact version in Github)
36- RUN <<EOF
37- cd "$(/stackable/patchable --images-repo-root=src checkout shared/protobuf ${PROTOBUF})"
38-
39- # Create snapshot of the source code including custom patches
40- tar -czf /stackable/protobuf-${PROTOBUF}-src.tar.gz .
41-
42- ./autogen.sh
43- ./configure --prefix=/opt/protobuf
44- make "-j$(nproc)"
45- make install
46- (cd .. && rm -r ${PROTOBUF})
47- EOF
48-
49- ENV PROTOBUF_HOME=/opt/protobuf
50- ENV PATH="${PATH}:/opt/protobuf/bin"
51-
52- RUN <<EOF
53- # async-profiler
54- ARCH="${TARGETARCH/amd64/x64}"
55- curl "https://repo.stackable.tech/repository/packages/async-profiler/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}.tar.gz" | tar -xzC .
56- ln -s "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}" /stackable/async-profiler
57-
58- # JMX Exporter
59- mkdir /stackable/jmx
60- curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
61- chmod -x "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
62- ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
63- EOF
64-
65- WORKDIR /build
66- COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/patchable.toml /build/src/hadoop/stackable/patches/patchable.toml
67- COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/${PRODUCT} /build/src/hadoop/stackable/patches/${PRODUCT}
68- COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/fuse_dfs_wrapper /build
69- COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
70- USER ${STACKABLE_USER_UID}
71- # Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI, so skip building this module
72- # Build from source to enable FUSE module, and to apply custom patches.
73- # Also skip building the yarn, mapreduce and minicluster modules: this will result in the modules being excluded but not all
74- # jar files will be stripped if they are needed elsewhere e.g. share/hadoop/yarn will not be part of the build, but yarn jars
75- # will still exist in share/hadoop/tools as they would be needed by the resource estimator tool. Such jars are removed in a later step.
76- RUN <<EOF
77- cd "$(/stackable/patchable --images-repo-root=src checkout hadoop ${PRODUCT})"
78-
79- ORIGINAL_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
80- NEW_VERSION=${PRODUCT}-stackable${RELEASE}
81-
82- mvn versions:set -DnewVersion=${NEW_VERSION}
83-
84- # Since we skip building the hadoop-pipes module, we need to set the version to the original version so it can be pulled from Maven Central
85- sed -e '/<artifactId>hadoop-pipes<\/ artifactId>/,/<\/ dependency>/ { s/<version>.*<\/ version>/<version>' "$ORIGINAL_VERSION" '<\/ version>/ }' -i hadoop-tools/hadoop-tools-dist/pom.xml
86-
87- # Create snapshot of the source code including custom patches
88- tar -czf /stackable/hadoop-${NEW_VERSION}-src.tar.gz .
89-
90- mvn \
91- --batch-mode \
92- --no-transfer-progress \
93- clean package install \
94- -Pdist,native \
95- -pl '!hadoop-tools/hadoop-pipes' \
96- -Dhadoop.version=${NEW_VERSION} \
97- -Drequire.fuse=true \
98- -DskipTests \
99- -Dmaven.javadoc.skip=true
100-
101- mkdir -p /stackable/patched-libs/maven/org/apache
102- cp -r /stackable/.m2/repository/org/apache/hadoop /stackable/patched-libs/maven/org/apache
103-
104- cp -r hadoop-dist/target/hadoop-${NEW_VERSION} /stackable/hadoop-${NEW_VERSION}
105- sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" hadoop-dist/target/bom.json
106- mv hadoop-dist/target/bom.json /stackable/hadoop-${NEW_VERSION}/hadoop-${NEW_VERSION}.cdx.json
107-
108- # HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves
109- cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${NEW_VERSION}/bin
110-
111- # Remove source code
112- (cd .. && rm -r ${PRODUCT})
113-
114- ln -s /stackable/hadoop-${NEW_VERSION} /stackable/hadoop
115-
116- mv /build/fuse_dfs_wrapper /stackable/hadoop/bin
117-
118- # Remove unneeded binaries:
119- # - code sources
120- # - mapreduce/yarn binaries that were built as cross-project dependencies
121- # - minicluster (only used for testing) and test .jars
122- # - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
123- rm -rf /stackable/hadoop/share/hadoop/common/sources/
124- rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/
125- rm -rf /stackable/hadoop/share/hadoop/tools/sources/
126- rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar
127- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar
128- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar
129- find /stackable/hadoop -name 'hadoop-minicluster-*.jar' -type f -delete
130- find /stackable/hadoop -name 'hadoop-client-minicluster-*.jar' -type f -delete
131- find /stackable/hadoop -name 'hadoop-*tests.jar' -type f -delete
132- rm -rf /stackable/.m2
133-
134- # Set correct groups; make sure only required artifacts for the final image are located in /stackable
135- chmod -R g=u /stackable
136- EOF
4+ FROM stackable/image/hadoop/hadoop AS hadoop-builder
1375
1386FROM stackable/image/java-devel AS hdfs-utils-builder
1397
1408ARG HDFS_UTILS
1419ARG PRODUCT
10+ ARG RELEASE
14211ARG STACKABLE_USER_UID
12+ ARG HADOOP_HADOOP
13+ # Reassign the arg to `HADOOP_VERSION` for better readability.
14+ # It is passed as `HADOOP_HADOOP`, because versions.py has to contain `hadoop/hadoop` to establish a dependency on the Hadoop builder.
15+ # The value of `hadoop/hadoop` is transformed by `bake` and automatically passed as `HADOOP_HADOOP` arg.
16+ ENV HADOOP_VERSION=${HADOOP_HADOOP}
14317
14418# Starting with hdfs-utils 0.4.0 we need to use Java 17 for compilation.
14519# We can not simply use java-devel with Java 17, as it is also used to compile Hadoop in this
@@ -161,25 +35,31 @@ WORKDIR /stackable
16135COPY --chown=${STACKABLE_USER_UID}:0 hadoop/hdfs-utils/stackable/patches/patchable.toml /stackable/src/hadoop/hdfs-utils/stackable/patches/patchable.toml
16236COPY --chown=${STACKABLE_USER_UID}:0 hadoop/hdfs-utils/stackable/patches/${HDFS_UTILS} /stackable/src/hadoop/hdfs-utils/stackable/patches/${HDFS_UTILS}
16337
38+ COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
39+
16440# The Stackable HDFS utils contain an OPA authorizer, group mapper & topology provider.
16541# The topology provider provides rack awareness functionality for HDFS by allowing users to specify Kubernetes
16642# labels to build a rackID from.
16743# Starting with hdfs-utils version 0.3.0 the topology provider is not a standalone jar anymore and included in hdfs-utils.
16844RUN <<EOF
16945cd "$(/stackable/patchable --images-repo-root=src checkout hadoop/hdfs-utils ${HDFS_UTILS})"
17046
47+ # Make Maven aware of custom Stackable libraries
48+ mkdir -p /stackable/.m2/repository
49+ cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository
50+
17151# Create snapshot of the source code including custom patches
17252tar -czf /stackable/hdfs-utils-${HDFS_UTILS}-src.tar.gz .
17353
17454mvn \
17555 --batch-mode \
17656 --no-transfer-progress\
17757 clean package \
178- -P hadoop-${PRODUCT} \
58+ -P hadoop-${HADOOP_VERSION} \
59+ -Dhadoop.version=${HADOOP_VERSION}-stackable${RELEASE} \
17960 -DskipTests \
18061 -Dmaven.javadoc.skip=true
18162
182- mkdir -p /stackable
18363cp target/hdfs-utils-$HDFS_UTILS.jar /stackable/hdfs-utils-${HDFS_UTILS}.jar
18464rm -rf hdfs-utils-main
18565
@@ -191,11 +71,15 @@ FROM stackable/image/java-base AS final
19171
19272ARG PRODUCT
19373ARG RELEASE
194- ARG TARGETARCH
195- ARG TARGETOS
74+ ARG HADOOP_HADOOP
75+ # Reassign the arg to `HADOOP_VERSION` for better readability.
76+ ENV HADOOP_VERSION=${HADOOP_HADOOP}
19677ARG HDFS_UTILS
197- ARG ASYNC_PROFILER
19878ARG STACKABLE_USER_UID
79+ ARG ASYNC_PROFILER
80+ ARG JMX_EXPORTER
81+ ARG TARGETARCH
82+ ARG TARGETOS
19983
20084LABEL \
20185 name="Apache Hadoop" \
@@ -206,17 +90,13 @@ LABEL \
20690 summary="The Stackable image for Apache Hadoop." \
20791 description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS."
20892
93+ COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE} /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE}
94+ COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/*-src.tar.gz /stackable
20995
210- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-${PRODUCT}-stackable${RELEASE} /stackable/hadoop-${PRODUCT}-stackable${RELEASE}
211- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-${PRODUCT}-stackable${RELEASE}-src.tar.gz /stackable/
212- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/async-profiler-${ASYNC_PROFILER}-* /stackable/async-profiler-${ASYNC_PROFILER}
213- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/jmx /stackable/jmx
214- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/protobuf-*-src.tar.gz /stackable/
215- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/patched-libs /stackable/patched-libs
216-
217- COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}-stackable${RELEASE}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
96+ COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
21897COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}-src.tar.gz /stackable
21998
99+ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
220100COPY --chown=${STACKABLE_USER_UID}:0 hadoop/licenses /licenses
221101
222102# fuse is required for fusermount (called by fuse_dfs)
@@ -241,21 +121,24 @@ rm -rf /var/cache/yum
241121# It is so non-root users (as we are) can mount a FUSE device and let other users access it
242122echo "user_allow_other" > /etc/fuse.conf
243123
244- ln -s "/stackable/hadoop-${PRODUCT}-stackable${RELEASE}" /stackable/hadoop
245- chown --no-dereference "${STACKABLE_USER_UID}:0" /stackable/hadoop
246- chmod g=u "/stackable/hadoop-${PRODUCT}-stackable${RELEASE}"
247- chmod g=u /stackable/*-src.tar.gz
124+ ln -s "/stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE}" /stackable/hadoop
248125
126+ # async-profiler
249127ARCH="${TARGETARCH/amd64/x64}"
250- mv /stackable/async-profiler-${ASYNC_PROFILER}* "/stackable/async-profiler-${ASYNC_PROFILER-}-${TARGETOS}-${ARCH}"
251- chmod g=u "/stackable/async-profiler-${ASYNC_PROFILER-}-${TARGETOS}-${ARCH}"
128+ curl "https://repo.stackable.tech/repository/packages/async-profiler/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}.tar.gz" | tar -xzC /stackable
252129ln -s "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}" /stackable/async-profiler
253- chown --no-dereference "${STACKABLE_USER_UID}:0" /stackable/async-profiler
254130
255- chmod g=u /stackable/jmx
256- chmod g=u /stackable/patched-libs
131+ # JMX Exporter
132+ curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
133+ chmod -x "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
134+ ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
257135
136+ # Set correct permissions and ownerships
137+ chown --recursive ${STACKABLE_USER_UID}:0 /stackable/hadoop /stackable/jmx /stackable/async-profiler "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}"
138+ chmod --recursive g=u /stackable/jmx /stackable/async-profiler "/stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE}"
139+ EOF
258140
141+ RUN <<EOF
259142# ----------------------------------------
260143# Checks
261144# This section is to run final checks to ensure the created final images
0 commit comments