|
2 | 2 |
|
3 | 3 | FROM stackable/image/hadoop AS hadoop-builder
|
4 | 4 |
|
5 |
| -FROM stackable/image/java-devel AS builder |
| 5 | +FROM stackable/image/java-devel AS hive-builder |
6 | 6 |
|
7 |
| -# Apache Hive up t0 4.x(!) officially requires Java 8 (there is no distincion between building and running). As of |
8 |
| -# 2024-04-15 we for sure need Java 8 for building, but we used a Java 11 runtime for months now without any problems. |
| 7 | +# Apache Hive up to 4.0.x(!) officially requires Java 8 (there is no distinction between building and running). |
| 8 | +# As of 2024-04-15 we for sure need Java 8 for building, but we used a Java 11 runtime for months now without any problems. |
9 | 9 | # As we got weird TLS errors (https://stackable-workspace.slack.com/archives/C031A5BEFS7/p1713185172557459) with a
|
10 |
| -# Java 8 runtime we bumped the Runtime to Java 11 again. As we can only select a single version from the java-base |
11 |
| -# image, we pick 11 (which is used in the final image), and install Java 8 here. |
| 10 | +# Java 8 runtime we bumped the Runtime to Java 11 again. |
12 | 11 |
|
13 | 12 | ARG PRODUCT
|
14 | 13 | ARG HADOOP
|
15 | 14 | ARG JMX_EXPORTER
|
16 |
| -ARG JACKSON_DATAFORMAT_XML |
17 |
| -ARG JACKSON_JAXB_ANNOTATIONS |
18 |
| -ARG POSTGRES_DRIVER |
19 |
| -ARG AWS_JAVA_SDK_BUNDLE |
20 |
| -ARG AZURE_STORAGE |
21 |
| -ARG AZURE_KEYVAULT_CORE |
| 15 | + |
| 16 | +# Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.) |
| 17 | +# This can be used to speed up builds when disk space is of no concern. |
| 18 | +ARG DELETE_CACHES="true" |
22 | 19 |
|
23 | 20 | COPY --chown=stackable:stackable hive/stackable /stackable
|
24 | 21 |
|
25 | 22 | USER stackable
|
26 | 23 | WORKDIR /stackable
|
27 | 24 |
|
28 |
| -RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hive/apache-hive-${PRODUCT}-src.tar.gz" | tar -xzC . && \ |
29 |
| - patches/apply_patches.sh ${PRODUCT} && \ |
30 |
| - cd /stackable/apache-hive-${PRODUCT}-src/ && \ |
31 |
| - mvn clean package -DskipTests --projects standalone-metastore && \ |
32 |
| - mv standalone-metastore/target/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}-bin /stackable && \ |
33 |
| - ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore && \ |
34 |
| - cp /stackable/hive-metastore/bin/start-metastore /stackable/hive-metastore/bin/start-metastore.bak && \ |
35 |
| - cp /stackable/bin/start-metastore /stackable/hive-metastore/bin && \ |
36 |
| - rm -rf /stackable/apache-hive-${PRODUCT}-src |
| 25 | +# Cache mounts are owned by root by default |
| 26 | +# We need to explicitly give the uid to use which is hardcoded to "1000" in stackable-base |
| 27 | +RUN --mount=type=cache,id=maven-hive,uid=1000,target=/stackable/.m2/repository <<EOF |
| 28 | +curl --fail -L "https://repo.stackable.tech/repository/packages/hive/apache-hive-${PRODUCT}-src.tar.gz" | tar -xzC . |
37 | 29 |
|
38 |
| -COPY --chown=stackable:stackable --from=hadoop-builder /stackable/hadoop /stackable/hadoop |
| 30 | +patches/apply_patches.sh ${PRODUCT} |
39 | 31 |
|
40 |
| -# Add a PostgreSQL driver, as this is the primary used persistence |
41 |
| -RUN curl --fail -L https://repo.stackable.tech/repository/packages/pgjdbc/postgresql-${POSTGRES_DRIVER}.jar -o /stackable/hive-metastore/lib/postgresql-${POSTGRES_DRIVER}.jar |
| 32 | +cd /stackable/apache-hive-${PRODUCT}-src/ |
| 33 | +mvn --batch-mode --no-transfer-progress clean package -DskipTests --projects standalone-metastore |
| 34 | +mv standalone-metastore/target/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}-bin /stackable |
42 | 35 |
|
43 |
| -# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards |
44 |
| -# This way the build will fail should one of the files not be available anymore in a later Hadoop version! |
| 36 | +ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore |
| 37 | +cp /stackable/bin/start-metastore /stackable/hive-metastore/bin |
| 38 | +rm -rf /stackable/apache-hive-${PRODUCT}-src |
45 | 39 |
|
46 |
| -# Add S3 Support for Hive (support for s3a://) |
47 |
| -RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar /stackable/hive-metastore/lib/ |
48 |
| -RUN cp /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/hive-metastore/lib/ |
| 40 | +curl --fail -L "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" |
| 41 | +ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar |
49 | 42 |
|
50 |
| -# Add Azure ABFS support (support for abfs://) |
51 |
| -RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar /stackable/hive-metastore/lib/ |
52 |
| -RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/hive-metastore/lib/ |
53 |
| -RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/hive-metastore/lib/ |
| 43 | +# We're removing these to make the intermediate layer smaller |
| 44 | +# This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available |
| 45 | +# and we are sometimes running into errors because we're out of space. |
| 46 | +# Therefore, we try to clean up all layers as much as possible. |
| 47 | +if [ "${DELETE_CACHES}" = "true" ] ; then |
| 48 | + rm -rf /stackable/.m2/repository/* |
| 49 | + rm -rf /stackable/.npm/* |
| 50 | + rm -rf /stackable/.cache/* |
| 51 | +fi |
| 52 | +EOF |
54 | 53 |
|
55 |
| -# The symlink from JMX Exporter 0.16.1 to the versionless link exists because old HDFS Operators (up until and including 23.7) used to hardcode |
56 |
| -# the version of JMX Exporter like this: "-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar" |
57 |
| -# This is a TEMPORARY fix which means that we can keep the hardcoded path in HDFS operator FOR NOW as it will still point to a newer version of JMX Exporter, despite the "0.16.1" in the name. |
58 |
| -# At the same time a new HDFS Operator will still work with older images which do not have the symlink to the versionless jar. |
59 |
| -# After one of our next releases (23.11 or 24.x) we should update the operator to point at the non-versioned symlink (jmx_prometheus_javaagent.jar) |
60 |
| -# And then we can also remove the symlink to 0.16.1 from this Dockerfile. |
61 |
| -RUN curl --fail -L "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" && \ |
62 |
| - ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar && \ |
63 |
| - ln -s /stackable/jmx/jmx_prometheus_javaagent.jar /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar |
64 |
| - |
65 |
| -# Logging. |
66 |
| -# jackson-module-jaxb-annotations: this is no longer bundled with the hadoop-yarn/mapreduce libraries (excluded from the hadoop build). |
67 |
| -RUN rm /stackable/hive-metastore/lib/log4j-slf4j-impl* && \ |
68 |
| - curl --fail -L https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar -o /stackable/hive-metastore/lib/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar && \ |
69 |
| - curl --fail -L https://repo.stackable.tech/repository/packages/jackson-module-jaxb-annotations/jackson-module-jaxb-annotations-${JACKSON_JAXB_ANNOTATIONS}.jar -o /stackable/hive-metastore/lib/jackson-module-jaxb-annotations-${JACKSON_JAXB_ANNOTATIONS}.jar |
70 |
| - |
71 |
| -# === |
72 |
| -# For earlier versions this script removes the .class file that contains the |
73 |
| -# vulnerable code. |
74 |
| -# TODO: This can be restricted to target only versions which do not honor the environment |
75 |
| -# varible that has been set above but this has not currently been implemented |
76 |
| -COPY shared/log4shell.sh /bin |
77 |
| -RUN /bin/log4shell.sh /stackable/apache-hive-metastore-${PRODUCT}-bin/ |
78 |
| - |
79 |
| -# Ensure no vulnerable files are left over |
80 |
| -# This will currently report vulnerable files being present, as it also alerts on |
81 |
| -# SocketNode.class, which we do not remove with our scripts. |
82 |
| -# Further investigation will be needed whether this should also be removed. |
83 |
| -COPY shared/log4shell_1.6.1-log4shell_Linux_x86_64 /bin/log4shell_scanner_x86_64 |
84 |
| -COPY shared/log4shell_1.6.1-log4shell_Linux_aarch64 /bin/log4shell_scanner_aarch64 |
85 |
| -COPY shared/log4shell_scanner /bin/log4shell_scanner |
86 |
| -# log4shell_scanner does not work on symlinks! |
87 |
| -RUN /bin/log4shell_scanner s /stackable/apache-hive-metastore-${PRODUCT}-bin/ |
88 |
| -# === |
89 |
| - |
90 |
| -# syntax=docker/dockerfile:1@sha256:ac85f380a63b13dfcefa89046420e1781752bab202122f8f50032edf31be0021 |
91 |
| -FROM stackable/image/java-base |
| 54 | + |
| 55 | +FROM stackable/image/java-base AS final |
92 | 56 |
|
93 | 57 | ARG PRODUCT
|
94 | 58 | ARG HADOOP
|
95 | 59 | ARG RELEASE
|
| 60 | +ARG AWS_JAVA_SDK_BUNDLE |
| 61 | +ARG AZURE_STORAGE |
| 62 | +ARG AZURE_KEYVAULT_CORE |
96 | 63 |
|
97 |
| -LABEL name="Apache Hive metastore" \ |
98 |
| - |
99 |
| - vendor="Stackable GmbH" \ |
100 |
| - version="${PRODUCT}" \ |
101 |
| - release="${RELEASE}" \ |
102 |
| - summary="The Stackable image for Apache Hive metastore." \ |
103 |
| - description="This image is deployed by the Stackable Operator for Apache Hive." |
104 | 64 |
|
105 |
| -RUN microdnf update && \ |
106 |
| - microdnf clean all && \ |
107 |
| - rpm -qa --qf "%{NAME}-%{VERSION}-%{RELEASE}\n" | sort > /stackable/package_manifest.txt && \ |
108 |
| - rm -rf /var/cache/yum |
| 65 | +ARG NAME="Apache Hive metastore" |
| 66 | +ARG DESCRIPTION="This image is deployed by the Stackable Operator for Apache Hive." |
| 67 | + |
| 68 | +LABEL name="Apache Hive metastore" |
| 69 | +LABEL version="${PRODUCT}" |
| 70 | +LABEL release="${RELEASE}" |
| 71 | +LABEL summary="The Stackable image for Apache Hive metastore." |
| 72 | +LABEL description="${DESCRIPTION}" |
| 73 | + |
| 74 | +# https://github.com/opencontainers/image-spec/blob/036563a4a268d7c08b51a08f05a02a0fe74c7268/annotations.md#annotations |
| 75 | +LABEL org.opencontainers.image.documentation="https://docs.stackable.tech/home/stable/hive/" |
| 76 | +LABEL org.opencontainers.image.version="${PRODUCT}" |
| 77 | +LABEL org.opencontainers.image.revision="${RELEASE}" |
| 78 | +LABEL org.opencontainers.image.title="${NAME}" |
| 79 | +LABEL org.opencontainers.image.description="${DESCRIPTION}" |
| 80 | + |
| 81 | +# https://docs.openshift.com/container-platform/4.16/openshift_images/create-images.html#defining-image-metadata |
| 82 | +# https://github.com/projectatomic/ContainerApplicationGenericLabels/blob/master/vendor/redhat/labels.md |
| 83 | +LABEL io.openshift.tags="ubi9,stackable,hive,sdp" |
| 84 | +LABEL io.k8s.description="${DESCRIPTION}" |
| 85 | +LABEL io.k8s.display-name="${NAME}" |
| 86 | + |
| 87 | +RUN <<EOF |
| 88 | +microdnf update |
| 89 | +microdnf clean all |
| 90 | +rpm -qa --qf "%{NAME}-%{VERSION}-%{RELEASE}\n" | sort > /stackable/package_manifest.txt |
| 91 | +rm -rf /var/cache/yum |
| 92 | +EOF |
109 | 93 |
|
110 | 94 | USER stackable
|
111 | 95 | WORKDIR /stackable
|
112 | 96 |
|
113 |
| -# TODO: Try to use --link here, as it should be faster |
114 |
| -COPY --chown=stackable:stackable --from=builder /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/apache-hive-metastore-${PRODUCT}-bin |
115 |
| -RUN ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore |
| 97 | +COPY --chown=stackable:stackable --from=hive-builder /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/apache-hive-metastore-${PRODUCT}-bin |
| 98 | +RUN ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/hive-metastore |
116 | 99 |
|
117 | 100 | # It is useful to see which version of Hadoop is used at a glance
|
118 | 101 | # Therefore the use of the full name here
|
119 |
| -COPY --chown=stackable:stackable --from=builder /stackable/hadoop /stackable/hadoop-${HADOOP} |
120 |
| -RUN ln -s /stackable/hadoop-${HADOOP}/ /stackable/hadoop |
| 102 | +# TODO: Do we really need all of Hadoop in here? |
| 103 | +COPY --chown=stackable:stackable --from=hadoop-builder /stackable/hadoop /stackable/hadoop-${HADOOP} |
| 104 | +RUN ln -s /stackable/hadoop-${HADOOP} /stackable/hadoop |
| 105 | + |
| 106 | +# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards |
| 107 | +# This way the build will fail should one of the files not be available anymore in a later Hadoop version! |
| 108 | + |
| 109 | +# Add S3 Support for Hive (support for s3a://) |
| 110 | +RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar /stackable/hive-metastore/lib/ |
| 111 | +RUN cp /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/hive-metastore/lib/ |
| 112 | + |
| 113 | +# Add Azure ABFS support (support for abfs://) |
| 114 | +RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar /stackable/hive-metastore/lib/ |
| 115 | +RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/hive-metastore/lib/ |
| 116 | +RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/hive-metastore/lib/ |
121 | 117 |
|
122 |
| -COPY --chown=stackable:stackable --from=builder /stackable/jmx /stackable/jmx |
| 118 | +COPY --chown=stackable:stackable --from=hive-builder /stackable/jmx /stackable/jmx |
123 | 119 | COPY hive/licenses /licenses
|
124 | 120 |
|
125 | 121 | ENV HADOOP_HOME=/stackable/hadoop
|
|
0 commit comments