@@ -11,82 +11,100 @@ ARG TARGETARCH
11
11
ARG TARGETOS
12
12
ARG STACKABLE_USER_UID
13
13
14
- WORKDIR /stackable/jmx
15
-
16
- # The symlink from JMX Exporter 0.16.1 to the versionless link exists because old HDFS Operators (up until and including 23.7) used to hardcode
17
- # the version of JMX Exporter like this: "-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar"
18
- # This is a TEMPORARY fix which means that we can keep the hardcoded path in HDFS operator FOR NOW as it will still point to a newer version of JMX Exporter, despite the "0.16.1" in the name.
19
- # At the same time a new HDFS Operator will still work with older images which do not have the symlink to the versionless jar.
20
- # After one of our next releases (23.11 or 24.x) we should update the operator to point at the non-versioned symlink (jmx_prometheus_javaagent.jar)
21
- # And then we can also remove the symlink to 0.16.1 from this Dockerfile.
22
- RUN curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" && \
23
- chmod -x "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" && \
24
- ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar && \
25
- ln -s /stackable/jmx/jmx_prometheus_javaagent.jar /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar
26
-
27
- WORKDIR /stackable
28
-
29
- RUN ARCH="${TARGETARCH/amd64/x64}" && \
30
- curl "https://repo.stackable.tech/repository/packages/async-profiler/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}.tar.gz" | tar -xzC . && \
31
- ln -s "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}" /stackable/async-profiler
32
-
33
14
# This Protobuf version is the exact version as used in the Hadoop Dockerfile
34
15
# See https://github.com/apache/hadoop/blob/trunk/dev-support/docker/pkg-resolver/install-protobuf.sh
35
16
# (this was hardcoded in the Dockerfile in earlier versions of Hadoop, make sure to look at the exact version in Github)
36
17
WORKDIR /opt/protobuf-src
37
- RUN curl https://repo.stackable.tech/repository/packages/protobuf/protobuf-java-${PROTOBUF}.tar.gz -o /opt/protobuf.tar.gz && \
38
- tar xzf /opt/protobuf.tar.gz --strip-components 1 --no-same-owner && \
39
- ./configure --prefix=/opt/protobuf && \
40
- make "-j$(nproc)" && \
41
- make install && \
18
+ RUN <<EOF
19
+ curl https://repo.stackable.tech/repository/packages/protobuf/protobuf-java-${PROTOBUF}.tar.gz -o /opt/protobuf.tar.gz
20
+ tar xzf /opt/protobuf.tar.gz --strip-components 1 --no-same-owner
21
+ ./configure --prefix=/opt/protobuf
22
+ make "-j$(nproc)"
23
+ make install
42
24
rm -rf /opt/protobuf-src
25
+ EOF
43
26
44
27
ENV PROTOBUF_HOME=/opt/protobuf
45
28
ENV PATH="${PATH}:/opt/protobuf/bin"
46
29
47
- RUN rpm --install --replacepkgs https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
48
- RUN microdnf update && \
49
- microdnf install \
50
- # boost is a build dependency starting in Hadoop 3.4.0 if compiling native code
51
- boost1.78-devel && \
52
- microdnf clean all && \
53
- rm -rf /var/cache/yum
30
+ RUN <<EOF
31
+ rpm --install --replacepkgs https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
32
+ microdnf update
33
+ # boost is a build dependency starting in Hadoop 3.4.0 if compiling native code
34
+ microdnf install boost1.78-devel
35
+ microdnf clean all
36
+ rm -rf /var/cache/yum
37
+ EOF
54
38
55
39
WORKDIR /stackable
40
+ RUN <<EOF
41
+ # async-profiler
42
+ ARCH="${TARGETARCH/amd64/x64}"
43
+ curl "https://repo.stackable.tech/repository/packages/async-profiler/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}.tar.gz" | tar -xzC .
44
+ ln -s "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}" /stackable/async-profiler
45
+
46
+ # JMX Exporter
47
+ mkdir /stackable/jmx
48
+ curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
49
+ chmod -x "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
50
+ ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
51
+ EOF
56
52
57
- COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches /stackable/patches
58
-
53
+ WORKDIR /build
54
+ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/fuse_dfs_wrapper /build
55
+ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches /build/patches
56
+ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
59
57
# Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI, so skip building this module
60
58
# Build from source to enable FUSE module, and to apply custom patches.
61
59
# Also skip building the yarn, mapreduce and minicluster modules: this will result in the modules being excluded but not all
62
60
# jar files will be stripped if they are needed elsewhere e.g. share/hadoop/yarn will not be part of the build, but yarn jars
63
61
# will still exist in share/hadoop/tools as they would be needed by the resource estimator tool. Such jars are removed in a later step.
64
- RUN curl "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . && \
65
- patches/apply_patches.sh ${PRODUCT} && \
66
- cd hadoop-${PRODUCT}-src && \
67
- mvn --batch-mode --no-transfer-progress clean package -Pdist,native -pl '!hadoop-tools/hadoop-pipes,!hadoop-yarn-project,!hadoop-mapreduce-project,!hadoop-minicluster' -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \
68
- cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT} && \
69
- mv hadoop-dist/target/bom.json /stackable/hadoop-${PRODUCT}/hadoop-${PRODUCT}.cdx.json && \
70
- # HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves
71
- cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin && \
72
- rm -rf /stackable/hadoop-${PRODUCT}-src
73
-
74
- # For earlier versions this script removes the .class file that contains the
75
- # vulnerable code.
76
- # TODO: This can be restricted to target only versions which do not honor the environment
77
- # varible that has been set above but this has not currently been implemented
78
- COPY shared/log4shell.sh /bin
79
- RUN /bin/log4shell.sh "/stackable/hadoop-${PRODUCT}"
80
-
81
- # Ensure no vulnerable files are left over
82
- # This will currently report vulnerable files being present, as it also alerts on
83
- # SocketNode.class, which we do not remove with our scripts.
84
- # Further investigation will be needed whether this should also be removed.
85
- COPY shared/log4shell_1.6.1-log4shell_Linux_x86_64 /bin/log4shell_scanner_x86_64
86
- COPY shared/log4shell_1.6.1-log4shell_Linux_aarch64 /bin/log4shell_scanner_aarch64
87
- COPY shared/log4shell_scanner /bin/log4shell_scanner
88
- RUN /bin/log4shell_scanner s "/stackable/hadoop-${PRODUCT}"
89
- # ===
62
+ RUN <<EOF
63
+ curl "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC .
64
+
65
+ patches/apply_patches.sh ${PRODUCT}
66
+ cd hadoop-${PRODUCT}-src
67
+
68
+ mvn \
69
+ --batch-mode \
70
+ --no-transfer-progress \
71
+ clean package \
72
+ -Pdist,native \
73
+ -pl '!hadoop-tools/hadoop-pipes,!hadoop-yarn-project,!hadoop-mapreduce-project,!hadoop-minicluster' \
74
+ -Drequire.fuse=true \
75
+ -DskipTests \
76
+ -Dmaven.javadoc.skip=true
77
+
78
+ cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT}
79
+ mv hadoop-dist/target/bom.json /stackable/hadoop-${PRODUCT}/hadoop-${PRODUCT}.cdx.json
80
+
81
+ # HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves
82
+ cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin
83
+ rm -rf /build/hadoop-${PRODUCT}-src
84
+
85
+ ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop
86
+
87
+ mv /build/fuse_dfs_wrapper /stackable/hadoop/bin
88
+
89
+ # Remove unneeded binaries:
90
+ # - code sources
91
+ # - mapreduce/yarn binaries that were built as cross-project dependencies
92
+ # - minicluster (only used for testing) and test .jars
93
+ # - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
94
+ rm -rf /stackable/hadoop/share/hadoop/common/sources/
95
+ rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/
96
+ rm -rf /stackable/hadoop/share/hadoop/tools/sources/
97
+ rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar
98
+ rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar
99
+ rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar
100
+ find /stackable -name 'hadoop-minicluster-*.jar' -type f -delete
101
+ find /stackable -name 'hadoop-client-minicluster-*.jar' -type f -delete
102
+ find /stackable -name 'hadoop-*tests.jar' -type f -delete
103
+ rm -rf /stackable/.m2
104
+
105
+ # Set correct groups; make sure only required artifacts for the final image are located in /stackable
106
+ chmod -R g=u /stackable
107
+ EOF
90
108
91
109
FROM stackable/image/java-devel AS hdfs-utils-builder
92
110
@@ -99,26 +117,40 @@ ARG STACKABLE_USER_UID
99
117
# Dockerfile, which needs Java 11. So we need to also use the java-devel image in version 11 and
100
118
# install Java 17 ourselves.
101
119
# The adptiom yum repo is already added by the java-devel Dockerfile.
102
- RUN microdnf update && \
103
- microdnf install -y temurin-17-jdk && \
104
- microdnf clean all && \
105
- rm -rf /var/cache/yum
120
+ RUN <<EOF
121
+ microdnf update
122
+ microdnf install -y temurin-17-jdk
123
+ microdnf clean all
124
+ rm -rf /var/cache/yum
125
+ EOF
126
+
106
127
ENV JAVA_HOME="/usr/lib/jvm/temurin-17-jdk"
107
128
108
129
USER ${STACKABLE_USER_UID}
109
- WORKDIR /stackable
110
-
130
+ WORKDIR /build
111
131
# The Stackable HDFS utils contain an OPA authorizer, group mapper & topology provider.
112
132
# The topology provider provides rack awareness functionality for HDFS by allowing users to specify Kubernetes
113
133
# labels to build a rackID from.
114
134
# Starting with hdfs-utils version 0.3.0 the topology provider is not a standalone jar anymore and included in hdfs-utils.
115
-
116
- RUN curl "https://github.com/stackabletech/hdfs-utils/archive/refs/tags/v${HDFS_UTILS}.tar.gz" | tar -xzC . && \
117
- cd hdfs-utils-${HDFS_UTILS} && \
118
- mvn --batch-mode --no-transfer-progress clean package -P hadoop-${PRODUCT} -DskipTests -Dmaven.javadoc.skip=true && \
119
- mkdir -p /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib && \
120
- cp target/hdfs-utils-$HDFS_UTILS.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar && \
121
- rm -rf /stackable/hdfs-utils-main
135
+ RUN <<EOF
136
+ curl "https://github.com/stackabletech/hdfs-utils/archive/refs/tags/v${HDFS_UTILS}.tar.gz" | tar -xzC .
137
+ cd hdfs-utils-${HDFS_UTILS}
138
+
139
+ mvn \
140
+ --batch-mode \
141
+ --no-transfer-progress\
142
+ clean package \
143
+ -P hadoop-${PRODUCT} \
144
+ -DskipTests \
145
+ -Dmaven.javadoc.skip=true
146
+
147
+ mkdir -p /stackable
148
+ cp target/hdfs-utils-$HDFS_UTILS.jar /stackable/hdfs-utils-${HDFS_UTILS}.jar
149
+ rm -rf hdfs-utils-main
150
+
151
+ # Set correct groups
152
+ chmod g=u /stackable/hdfs-utils-${HDFS_UTILS}.jar
153
+ EOF
122
154
123
155
FROM stackable/image/java-base AS final
124
156
@@ -127,21 +159,19 @@ ARG RELEASE
127
159
ARG HDFS_UTILS
128
160
ARG STACKABLE_USER_UID
129
161
130
- LABEL name="Apache Hadoop" \
131
-
132
- vendor="Stackable GmbH" \
133
- version="${PRODUCT}" \
134
- release="${RELEASE}" \
135
- summary="The Stackable image for Apache Hadoop." \
136
- description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS."
137
-
138
- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT}/
139
- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/jmx /stackable/jmx/
140
- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/async-profiler /stackable/async-profiler/
141
- COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
142
- COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/fuse_dfs_wrapper /stackable/
143
- COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
162
+ LABEL \
163
+ name="Apache Hadoop" \
164
+
165
+ vendor="Stackable GmbH" \
166
+ version="${PRODUCT}" \
167
+ release="${RELEASE}" \
168
+ summary="The Stackable image for Apache Hadoop." \
169
+ description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS."
170
+
171
+ COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable /stackable
172
+ COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
144
173
174
+ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/licenses /licenses
145
175
146
176
# fuse is required for fusermount (called by fuse_dfs)
147
177
# fuse-libs is required for fuse_dfs (not included in fuse)
@@ -156,44 +186,31 @@ microdnf install \
156
186
fuse-libs \
157
187
tar
158
188
microdnf clean all
189
+ rpm -qa --qf "%{NAME}-%{VERSION}-%{RELEASE}\n " | sort > /stackable/package_manifest.txt
190
+ chown ${STACKABLE_USER_UID}:0 /stackable/package_manifest.txt
191
+ chmod g=u /stackable/package_manifest.txt
159
192
rm -rf /var/cache/yum
160
193
161
- ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop
162
- mv /stackable/fuse_dfs_wrapper /stackable/hadoop/bin
163
-
164
- # Remove unneeded binaries:
165
- # - code sources
166
- # - mapreduce/yarn binaries that were built as cross-project dependencies
167
- # - minicluster (only used for testing) and test .jars
168
- # - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
169
- rm -rf /stackable/hadoop/share/hadoop/common/sources/
170
- rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/
171
- rm -rf /stackable/hadoop/share/hadoop/tools/sources/
172
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar
173
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar
174
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar
175
- find . -name 'hadoop-minicluster-*.jar' -type f -delete
176
- find . -name 'hadoop-client-minicluster-*.jar' -type f -delete
177
- find . -name 'hadoop-*tests.jar' -type f -delete
178
-
179
194
# Without this fuse_dfs does not work
180
195
# It is so non-root users (as we are) can mount a FUSE device and let other users access it
181
196
echo "user_allow_other" > /etc/fuse.conf
182
-
183
- # All files and folders owned by root group to support running as arbitrary users.
184
- # This is best practice as all container users will belong to the root group (0).
185
- chown -R ${STACKABLE_USER_UID}:0 /stackable
186
- chmod -R g=u /stackable
187
197
EOF
188
198
189
- COPY hadoop/licenses /licenses
190
-
191
199
# ----------------------------------------
192
- # Attention: We are changing the group of all files in /stackable directly above
193
- # If you do any file based actions (copying / creating etc.) below this comment you
194
- # absolutely need to make sure that the correct permissions are applied!
195
- # chown ${STACKABLE_USER_UID}:0
200
+ # Checks
201
+ # This section is to run final checks to ensure the created final images
202
+ # adhere to several minimal requirements like:
203
+ # - check file permissions and ownerships
204
+ # ----------------------------------------
205
+
206
+ # Check that permissions and ownership in /stackable are set correctly
207
+ # This will fail and stop the build if any mismatches are found.
208
+ RUN <<EOF
209
+ /bin/check-permissions-ownership.sh /stackable ${STACKABLE_USER_UID} 0
210
+ EOF
211
+
196
212
# ----------------------------------------
213
+ # Attention: Do not perform any file based actions (copying/creating etc.) below this comment because the permissions would not be checked.
197
214
198
215
USER ${STACKABLE_USER_UID}
199
216
0 commit comments