Skip to content

Commit 0a69d06

Browse files
sbernauerdervoeti
andauthored
chore(hive): Add version 4.1.0 (#1295)
* chore: Add Hive 4.1.0 * changelog * fix AWS version * WIP * fix: Add needed mapred JAR * chore: hadolint fix --------- Co-authored-by: Lukas Krug <[email protected]> Co-authored-by: dervoeti <[email protected]>
1 parent 7661927 commit 0a69d06

File tree

3 files changed

+76
-8
lines changed

3 files changed

+76
-8
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ All notable changes to this project will be documented in this file.
2929
- hadoop: Add `3.4.2` ([#1291]).
3030
- zookeeper: Add `3.9.4` ([#1292]).
3131
- nifi: Add `2.6.0` ([#1293]).
32+
- hive: Add `4.1.0` ([#1295]).
3233
- hbase: Add `2.6.3` ([#1296]).
3334

3435
### Changed
@@ -90,6 +91,7 @@ All notable changes to this project will be documented in this file.
9091
[#1291]: https://github.com/stackabletech/docker-images/pull/1291
9192
[#1292]: https://github.com/stackabletech/docker-images/pull/1292
9293
[#1293]: https://github.com/stackabletech/docker-images/pull/1293
94+
[#1295]: https://github.com/stackabletech/docker-images/pull/1295
9395
[#1296]: https://github.com/stackabletech/docker-images/pull/1296
9496
[#1300]: https://github.com/stackabletech/docker-images/pull/1300
9597
[#1301]: https://github.com/stackabletech/docker-images/pull/1301

hive/Dockerfile

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,23 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/patched-li
3838
USER ${STACKABLE_USER_UID}
3939
WORKDIR /stackable
4040

41+
ENV NEW_VERSION="${PRODUCT_VERSION}-stackable${RELEASE_VERSION}"
42+
43+
# Let's have patchable as a dedicated step, as it fetches the Hive sourcecode over the network,
44+
# thus taking a bit (which is annoying while development)
45+
RUN /stackable/patchable --images-repo-root=src checkout hive ${PRODUCT_VERSION} > /tmp/HIVE_SOURCE_DIR
46+
47+
# Make expensive maven build a separate layer for better caching
4148
# Cache mounts are owned by root by default
4249
# We need to explicitly give the uid to use
4350
RUN --mount=type=cache,id=maven-hive-${PRODUCT_VERSION},uid=${STACKABLE_USER_UID},target=/stackable/.m2/repository <<EOF
44-
BUILD_SRC_DIR="$(/stackable/patchable --images-repo-root=src checkout hive ${PRODUCT_VERSION})"
51+
BUILD_SRC_DIR="$(cat /tmp/HIVE_SOURCE_DIR)" || exit 1
52+
rm /tmp/HIVE_SOURCE_DIR
4553
cd "$BUILD_SRC_DIR"
4654

4755
# Make Maven aware of custom Stackable libraries
4856
cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository
4957

50-
NEW_VERSION="${PRODUCT_VERSION}-stackable${RELEASE_VERSION}"
51-
5258
# generateBackupPoms=false is needed for the Hive 4.0.0 build to succeed, otherwise it fails with the obscure reason: `Too many files with unapproved license`
5359
mvn versions:set -DnewVersion=$NEW_VERSION -DartifactId=* -DgroupId=* -DgenerateBackupPoms=false
5460

@@ -62,7 +68,7 @@ if [[ "${PRODUCT_VERSION}" == "3.1.3" ]] ; then
6268
--projects standalone-metastore
6369
mv standalone-metastore/target/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}-bin /stackable
6470
mv standalone-metastore/target/bom.json /stackable/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}.cdx.json
65-
else
71+
elif [[ "${PRODUCT_VERSION}" == 4.0.* ]]; then
6672
(
6773
# https://issues.apache.org/jira/browse/HIVE-20451 switched the metastore server packaging starting with 4.0.0
6874
mvn \
@@ -78,16 +84,34 @@ else
7884
# The schemaTool.sh is still pointing to the class location from Hive < 4.0.0, it seems like it was forgotten to update it
7985
sed -i -e 's/CLASS=org.apache.hadoop.hive.metastore.tools.MetastoreSchemaTool/CLASS=org.apache.hadoop.hive.metastore.tools.schematool.MetastoreSchemaTool/' /stackable/apache-hive-metastore-${NEW_VERSION}-bin/bin/ext/schemaTool.sh
8086
)
87+
else
88+
# Starting with 4.1.0 the build process changed again in https://github.com/apache/hive/pull/5936 (HIVE-29062)
89+
mvn \
90+
clean package \
91+
-Dhadoop.version=${HADOOP_VERSION}-stackable${RELEASE_VERSION} \
92+
-DskipTests \
93+
-Pdist
94+
# Looks like we can not filter the projects using "--projects standalone-metastore/metastore-server --also-make",
95+
# as this does not build a *.tar.gz
96+
97+
# We only seem to get a .tar.gz archive, so let's extract that to the correct location
98+
tar --extract --directory=/stackable -f standalone-metastore/packaging/target/hive-standalone-metastore-${NEW_VERSION}-bin.tar.gz
99+
mv standalone-metastore/metastore-server/target/bom.json /stackable/apache-hive-metastore-${NEW_VERSION}-bin/hive-standalone-metastore-${NEW_VERSION}.cdx.json
81100
fi
82101

83102
# Remove sourcecode
84-
cd /stackable
85103
rm -rf "$BUILD_SRC_DIR"
104+
EOF
86105

106+
RUN <<EOF
107+
cd /stackable
87108
mkdir /stackable/jmx
88109
curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar"
89110
ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
90111

112+
# Needed to run housekeeping jobs, see footnote <1> below
113+
cp /stackable/patched-libs/maven/org/apache/hadoop/hadoop-mapreduce-client-core/${HADOOP_VERSION}-stackable${RELEASE_VERSION}/hadoop-mapreduce-client-core-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/
114+
91115
# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards
92116
# This way the build will fail should one of the files not be available anymore in a later Hadoop version!
93117

@@ -96,8 +120,11 @@ cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/
96120

97121
# According to https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html, the jar filename has changed from
98122
# aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar to bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar. In future, you might need to do:
99-
# cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/
123+
if [[ "${PRODUCT_VERSION}" == "3.1.3" || "${PRODUCT_VERSION}" == 4.0.* ]]; then
100124
cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/
125+
else
126+
cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/
127+
fi
101128

102129
# Add Azure ABFS support (support for abfs://)
103130
cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/
@@ -118,7 +145,6 @@ fi
118145
chmod --recursive g=u /stackable
119146
EOF
120147

121-
122148
FROM local-image/java-base AS final
123149

124150
ARG PRODUCT_VERSION
@@ -215,3 +241,28 @@ ENV HADOOP_MAPRED_HOME=/stackable/hadoop
215241

216242
WORKDIR /stackable/hive-metastore
217243
# Start command is set by operator to something like "bin/start-metastore --config /stackable/config --db-type postgres --hive-bin-dir bin"
244+
245+
246+
247+
# <1>: org.apache.hadoop.mapred.JobConf need
248+
# 2025-10-06T08:42:04,137 ERROR [Metastore threads starter thread] metastore.HiveMetaStore: Failure when starting the leader tasks, Compaction or Housekeeping tasks may not happen
249+
# java.lang.NoClassDefFoundError: org/apache/hadoop/mapred/JobConf
250+
# at org.apache.hadoop.hive.conf.HiveConf.initialize(HiveConf.java:6601) ~[hive-common-4.1.0.jar:4.1.0]
251+
# at org.apache.hadoop.hive.conf.HiveConf.<init>(HiveConf.java:6569) ~[hive-common-4.1.0.jar:4.1.0]
252+
# at org.apache.hadoop.hive.ql.txn.compactor.CompactorThread.setConf(CompactorThread.java:68) ~[hive-exec-4.1.0-core.jar:4.1.0]
253+
# at org.apache.hadoop.hive.metastore.leader.CompactorTasks.takeLeadership(CompactorTasks.java:139) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
254+
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.lambda$notifyListener$0(LeaseLeaderElection.java:141) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
255+
# at java.base/java.util.ArrayList.forEach(Unknown Source) ~[?:?]
256+
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.notifyListener(LeaseLeaderElection.java:138) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
257+
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.doWork(LeaseLeaderElection.java:120) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
258+
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.tryBeLeader(LeaseLeaderElection.java:181) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
259+
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.tryBeLeader(LeaseLeaderElection.java:63) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
260+
# at org.apache.hadoop.hive.metastore.leader.LeaderElectionContext.lambda$start$2(LeaderElectionContext.java:125) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
261+
# at java.base/java.lang.Thread.run(Unknown Source) ~[?:?]
262+
# at org.apache.hadoop.hive.metastore.leader.LeaderElectionContext.start(LeaderElectionContext.java:136) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
263+
# at org.apache.hadoop.hive.metastore.HiveMetaStore$8.run(HiveMetaStore.java:856) [hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
264+
# Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.mapred.JobConf
265+
# at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(Unknown Source) ~[?:?]
266+
# at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(Unknown Source) ~[?:?]
267+
# at java.base/java.lang.ClassLoader.loadClass(Unknown Source) ~[?:?]
268+
# ... 14 more

hive/boil-config.toml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ azure-storage-version = "7.0.1"
2525
azure-keyvault-core-version = "1.0.0"
2626

2727
[versions."4.0.1".local-images]
28-
# Hive 4 must be built with Java 8 (according to GitHub README) but seems to run on Java 11
28+
# Hive 4.0 must be built with Java 8 (according to GitHub README) but seems to run on Java 11
2929
java-base = "11"
3030
java-devel = "8"
3131
"hadoop/hadoop" = "3.3.6"
@@ -36,3 +36,18 @@ jmx-exporter-version = "1.3.0"
3636
aws-java-sdk-bundle-version = "1.12.367"
3737
azure-storage-version = "7.0.1"
3838
azure-keyvault-core-version = "1.0.0"
39+
40+
[versions."4.1.0".local-images]
41+
# Hive 4.1 requires Java 17 (according to GitHub README)
42+
java-base = "17"
43+
java-devel = "17"
44+
"hadoop/hadoop" = "3.4.2"
45+
46+
[versions."4.1.0".build-arguments]
47+
jmx-exporter-version = "1.3.0"
48+
# Keep consistent with the dependency from hadoop-aws: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.2
49+
aws-java-sdk-bundle-version = "2.29.52"
50+
# Keep consistent with the dependency from hadoop-azure: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.2
51+
azure-storage-version = "7.0.1"
52+
# Keep consistent with the dependency from azure-storage: https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1
53+
azure-keyvault-core-version = "1.0.0"

0 commit comments

Comments
 (0)