11#  syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7
22#  check=error=true
33
4- FROM  stackable/image/java-devel AS hadoop-builder
5- 
6- ARG  PRODUCT
7- ARG  RELEASE
8- ARG  ASYNC_PROFILER
9- ARG  JMX_EXPORTER
10- ARG  PROTOBUF
11- ARG  TARGETARCH
12- ARG  TARGETOS
13- ARG  STACKABLE_USER_UID
14- 
15- WORKDIR  /stackable
16- 
17- COPY  --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/patchable.toml /stackable/src/shared/protobuf/stackable/patches/patchable.toml
18- COPY  --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/${PROTOBUF} /stackable/src/shared/protobuf/stackable/patches/${PROTOBUF}
19- 
20- RUN  <<EOF
21- rpm --install --replacepkgs https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
22- microdnf update
23- #  boost is a build dependency starting in Hadoop 3.4.0 if compiling native code
24- #  automake and libtool are required to build protobuf
25- microdnf install boost1.78-devel automake libtool
26- microdnf clean all
27- rm -rf /var/cache/yum
28- mkdir /opt/protobuf
29- chown ${STACKABLE_USER_UID}:0 /opt/protobuf
30- EOF
31- 
32- USER  ${STACKABLE_USER_UID}
33- #  This Protobuf version is the exact version as used in the Hadoop Dockerfile
34- #  See https://github.com/apache/hadoop/blob/trunk/dev-support/docker/pkg-resolver/install-protobuf.sh
35- #  (this was hardcoded in the Dockerfile in earlier versions of Hadoop, make sure to look at the exact version in Github)
36- RUN  <<EOF
37-     cd "$(/stackable/patchable --images-repo-root=src checkout shared/protobuf ${PROTOBUF})" 
38- 
39-     #  Create snapshot of the source code including custom patches
40-     tar -czf /stackable/protobuf-${PROTOBUF}-src.tar.gz .
41- 
42-     ./autogen.sh
43-     ./configure --prefix=/opt/protobuf
44-     make "-j$(nproc)" 
45-     make install
46-     (cd .. && rm -r ${PROTOBUF})
47- EOF
48- 
49- ENV  PROTOBUF_HOME=/opt/protobuf
50- ENV  PATH="${PATH}:/opt/protobuf/bin" 
51- 
52- RUN  <<EOF
53- #  async-profiler
54- ARCH="${TARGETARCH/amd64/x64}" 
55- curl "https://repo.stackable.tech/repository/packages/async-profiler/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}.tar.gz"   | tar -xzC .
56- ln -s "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}"  /stackable/async-profiler
57- 
58- #  JMX Exporter
59- mkdir /stackable/jmx
60- curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"  -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" 
61- chmod -x "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" 
62- ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"  /stackable/jmx/jmx_prometheus_javaagent.jar
63- EOF
64- 
65- WORKDIR  /build
66- COPY  --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/patchable.toml /build/src/hadoop/stackable/patches/patchable.toml
67- COPY  --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/${PRODUCT} /build/src/hadoop/stackable/patches/${PRODUCT}
68- COPY  --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/fuse_dfs_wrapper /build
69- COPY  --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
70- USER  ${STACKABLE_USER_UID}
71- #  Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI, so skip building this module
72- #  Build from source to enable FUSE module, and to apply custom patches.
73- #  Also skip building the yarn, mapreduce and minicluster modules: this will result in the modules being excluded but not all
74- #  jar files will be stripped if they are needed elsewhere e.g. share/hadoop/yarn will not be part of the build, but yarn jars
75- #  will still exist in share/hadoop/tools as they would be needed by the resource estimator tool. Such jars are removed in a later step.
76- RUN  <<EOF
77- cd "$(/stackable/patchable --images-repo-root=src checkout hadoop ${PRODUCT})" 
78- 
79- ORIGINAL_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
80- NEW_VERSION=${PRODUCT}-stackable${RELEASE}
81- 
82- mvn versions:set -DnewVersion=${NEW_VERSION}
83- 
84- #  Since we skip building the hadoop-pipes module, we need to set the version to the original version so it can be pulled from Maven Central
85- sed -e '/<artifactId>hadoop-pipes<\/ artifactId>/,/<\/ dependency>/ { s/<version>.*<\/ version>/<version>' "$ORIGINAL_VERSION" '<\/ version>/ }'  -i hadoop-tools/hadoop-tools-dist/pom.xml
86- 
87- #  Create snapshot of the source code including custom patches
88- tar -czf /stackable/hadoop-${NEW_VERSION}-src.tar.gz .
89- 
90- mvn \
91-     --batch-mode \
92-     --no-transfer-progress \
93-     clean package install \
94-     -Pdist,native \
95-     -pl '!hadoop-tools/hadoop-pipes'  \
96-     -Dhadoop.version=${NEW_VERSION} \
97-     -Drequire.fuse=true \
98-     -DskipTests \
99-     -Dmaven.javadoc.skip=true
100- 
101- mkdir -p /stackable/patched-libs/maven/org/apache
102- cp -r /stackable/.m2/repository/org/apache/hadoop /stackable/patched-libs/maven/org/apache
103- 
104- cp -r hadoop-dist/target/hadoop-${NEW_VERSION} /stackable/hadoop-${NEW_VERSION}
105- sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g"  hadoop-dist/target/bom.json
106- mv hadoop-dist/target/bom.json /stackable/hadoop-${NEW_VERSION}/hadoop-${NEW_VERSION}.cdx.json
107- 
108- #  HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves
109- cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${NEW_VERSION}/bin
110- 
111- #  Remove source code
112- (cd .. && rm -r ${PRODUCT})
113- 
114- ln -s /stackable/hadoop-${NEW_VERSION} /stackable/hadoop
115- 
116- mv /build/fuse_dfs_wrapper /stackable/hadoop/bin
117- 
118- #  Remove unneeded binaries:
119- #   - code sources
120- #   - mapreduce/yarn binaries that were built as cross-project dependencies
121- #   - minicluster (only used for testing) and test .jars
122- #   - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
123- rm -rf /stackable/hadoop/share/hadoop/common/sources/
124- rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/
125- rm -rf /stackable/hadoop/share/hadoop/tools/sources/
126- rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar
127- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar
128- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar
129- find /stackable/hadoop -name 'hadoop-minicluster-*.jar'  -type f -delete
130- find /stackable/hadoop -name 'hadoop-client-minicluster-*.jar'  -type f -delete
131- find /stackable/hadoop -name 'hadoop-*tests.jar'  -type f -delete
132- rm -rf /stackable/.m2
133- 
134- #  Set correct groups; make sure only required artifacts for the final image are located in /stackable
135- chmod -R g=u /stackable
136- EOF
4+ FROM  stackable/image/hadoop/hadoop AS hadoop-builder
1375
1386FROM  stackable/image/java-devel AS hdfs-utils-builder
1397
1408ARG  HDFS_UTILS
1419ARG  PRODUCT
10+ ARG  RELEASE
11+ ARG  HADOOP_HADOOP
14212ARG  STACKABLE_USER_UID
14313
14414#  Starting with hdfs-utils 0.4.0 we need to use Java 17 for compilation.
@@ -161,25 +31,31 @@ WORKDIR /stackable
16131COPY  --chown=${STACKABLE_USER_UID}:0 hadoop/hdfs-utils/stackable/patches/patchable.toml /stackable/src/hadoop/hdfs-utils/stackable/patches/patchable.toml
16232COPY  --chown=${STACKABLE_USER_UID}:0 hadoop/hdfs-utils/stackable/patches/${HDFS_UTILS} /stackable/src/hadoop/hdfs-utils/stackable/patches/${HDFS_UTILS}
16333
34+ COPY  --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
35+ 
16436#  The Stackable HDFS utils contain an OPA authorizer, group mapper & topology provider.
16537#  The topology provider provides rack awareness functionality for HDFS by allowing users to specify Kubernetes
16638#  labels to build a rackID from.
16739#  Starting with hdfs-utils version 0.3.0 the topology provider is not a standalone jar anymore and included in hdfs-utils.
16840RUN  <<EOF
16941cd "$(/stackable/patchable --images-repo-root=src checkout hadoop/hdfs-utils ${HDFS_UTILS})" 
17042
43+ #  Make Maven aware of custom Stackable libraries
44+ mkdir -p /stackable/.m2/repository
45+ cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository
46+ 
17147#  Create snapshot of the source code including custom patches
17248tar -czf /stackable/hdfs-utils-${HDFS_UTILS}-src.tar.gz .
17349
17450mvn \
17551    --batch-mode \
17652    --no-transfer-progress\
17753    clean package \
178-     -P hadoop-${PRODUCT} \
54+     -P hadoop-${HADOOP_HADOOP} \
55+     -Dhadoop.version=${HADOOP_HADOOP}-stackable${RELEASE} \
17956    -DskipTests \
18057    -Dmaven.javadoc.skip=true
18158
182- mkdir -p /stackable
18359cp target/hdfs-utils-$HDFS_UTILS.jar /stackable/hdfs-utils-${HDFS_UTILS}.jar
18460rm -rf hdfs-utils-main
18561
@@ -191,8 +67,13 @@ FROM stackable/image/java-base AS final
19167
19268ARG  PRODUCT
19369ARG  RELEASE
70+ ARG  HADOOP_HADOOP
19471ARG  HDFS_UTILS
19572ARG  STACKABLE_USER_UID
73+ ARG  ASYNC_PROFILER
74+ ARG  JMX_EXPORTER
75+ ARG  TARGETARCH
76+ ARG  TARGETOS
19677
19778LABEL  \
19879    name="Apache Hadoop"  \
@@ -203,10 +84,13 @@ LABEL \
20384    summary="The Stackable image for Apache Hadoop."  \
20485    description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS." 
20586
206- COPY  --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable /stackable
207- COPY  --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}-stackable${RELEASE}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
87+ COPY  --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-${HADOOP_HADOOP}-stackable${RELEASE} /stackable/hadoop-${HADOOP_HADOOP}-stackable${RELEASE}
88+ COPY  --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/*-src.tar.gz /stackable
89+ 
90+ COPY  --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${HADOOP_HADOOP}-stackable${RELEASE}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
20891COPY  --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}-src.tar.gz /stackable
20992
93+ COPY  --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
21094COPY  --chown=${STACKABLE_USER_UID}:0 hadoop/licenses /licenses
21195
21296#  fuse is required for fusermount (called by fuse_dfs)
@@ -230,6 +114,22 @@ rm -rf /var/cache/yum
230114#  Without this fuse_dfs does not work
231115#  It is so non-root users (as we are) can mount a FUSE device and let other users access it
232116echo "user_allow_other"  > /etc/fuse.conf
117+ 
118+ ln -s /stackable/hadoop-${HADOOP_HADOOP}-stackable${RELEASE} /stackable/hadoop
119+ 
120+ #  async-profiler
121+ ARCH="${TARGETARCH/amd64/x64}" 
122+ curl "https://repo.stackable.tech/repository/packages/async-profiler/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}.tar.gz"   | tar -xzC /stackable
123+ ln -s "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}"  /stackable/async-profiler
124+ 
125+ #  JMX Exporter
126+ curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"  -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" 
127+ chmod -x "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" 
128+ ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"  /stackable/jmx/jmx_prometheus_javaagent.jar
129+ 
130+ #  Set correct permissions and ownerships
131+ chown --recursive ${STACKABLE_USER_UID}:0 /stackable/hadoop /stackable/jmx /stackable/async-profiler /stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}
132+ chmod --recursive g=u /stackable/jmx /stackable/async-profiler /stackable/hadoop-${HADOOP_HADOOP}-stackable${RELEASE}
233133EOF
234134
235135#  ----------------------------------------
0 commit comments