diff --git a/CHANGELOG.md b/CHANGELOG.md index b9ff1eebe..4d9c8afb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ All notable changes to this project will be documented in this file. - hadoop: Add `3.4.2` ([#1291]). - zookeeper: Add `3.9.4` ([#1292]). - nifi: Add `2.6.0` ([#1293]). +- hive: Add `4.1.0` ([#1295]). ### Changed @@ -84,6 +85,7 @@ All notable changes to this project will be documented in this file. [#1291]: https://github.com/stackabletech/docker-images/pull/1291 [#1292]: https://github.com/stackabletech/docker-images/pull/1292 [#1293]: https://github.com/stackabletech/docker-images/pull/1293 +[#1295]: https://github.com/stackabletech/docker-images/pull/1295 ## [25.7.0] - 2025-07-23 diff --git a/hive/Dockerfile b/hive/Dockerfile index 2778dbc04..0940c0687 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -38,17 +38,23 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/patched-li USER ${STACKABLE_USER_UID} WORKDIR /stackable +ENV NEW_VERSION="${PRODUCT_VERSION}-stackable${RELEASE_VERSION}" + +# Let's have patchable as a dedicated step, as it fetches the Hive sourcecode over the network, +# thus taking a bit (which is annoying while development) +RUN /stackable/patchable --images-repo-root=src checkout hive ${PRODUCT_VERSION} > /tmp/HIVE_SOURCE_DIR + +# Make expensive maven build a separate layer for better caching # Cache mounts are owned by root by default # We need to explicitly give the uid to use RUN --mount=type=cache,id=maven-hive-${PRODUCT_VERSION},uid=${STACKABLE_USER_UID},target=/stackable/.m2/repository < below +cp /stackable/patched-libs/maven/org/apache/hadoop/hadoop-mapreduce-client-core/${HADOOP_VERSION}-stackable${RELEASE_VERSION}/hadoop-mapreduce-client-core-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ + # The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards # This way the build will fail should one of the files not be available anymore in a later Hadoop version! @@ -96,8 +120,11 @@ cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/ # According to https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html, the jar filename has changed from # aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar to bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar. In future, you might need to do: -# cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ +if [[ "${PRODUCT_VERSION}" == "3.1.3" || "${PRODUCT_VERSION}" == 4.0.* ]]; then cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ +else +cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ +fi # Add Azure ABFS support (support for abfs://) cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/ @@ -118,7 +145,6 @@ fi chmod --recursive g=u /stackable EOF - FROM local-image/java-base AS final ARG PRODUCT_VERSION @@ -215,3 +241,28 @@ ENV HADOOP_MAPRED_HOME=/stackable/hadoop WORKDIR /stackable/hive-metastore # Start command is set by operator to something like "bin/start-metastore --config /stackable/config --db-type postgres --hive-bin-dir bin" + + + +# <1>: org.apache.hadoop.mapred.JobConf need +# 2025-10-06T08:42:04,137 ERROR [Metastore threads starter thread] metastore.HiveMetaStore: Failure when starting the leader tasks, Compaction or Housekeeping tasks may not happen +# java.lang.NoClassDefFoundError: org/apache/hadoop/mapred/JobConf +# at org.apache.hadoop.hive.conf.HiveConf.initialize(HiveConf.java:6601) ~[hive-common-4.1.0.jar:4.1.0] +# at org.apache.hadoop.hive.conf.HiveConf.(HiveConf.java:6569) ~[hive-common-4.1.0.jar:4.1.0] +# at org.apache.hadoop.hive.ql.txn.compactor.CompactorThread.setConf(CompactorThread.java:68) ~[hive-exec-4.1.0-core.jar:4.1.0] +# at org.apache.hadoop.hive.metastore.leader.CompactorTasks.takeLeadership(CompactorTasks.java:139) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev] +# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.lambda$notifyListener$0(LeaseLeaderElection.java:141) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev] +# at java.base/java.util.ArrayList.forEach(Unknown Source) ~[?:?] +# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.notifyListener(LeaseLeaderElection.java:138) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev] +# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.doWork(LeaseLeaderElection.java:120) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev] +# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.tryBeLeader(LeaseLeaderElection.java:181) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev] +# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.tryBeLeader(LeaseLeaderElection.java:63) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev] +# at org.apache.hadoop.hive.metastore.leader.LeaderElectionContext.lambda$start$2(LeaderElectionContext.java:125) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev] +# at java.base/java.lang.Thread.run(Unknown Source) ~[?:?] +# at org.apache.hadoop.hive.metastore.leader.LeaderElectionContext.start(LeaderElectionContext.java:136) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev] +# at org.apache.hadoop.hive.metastore.HiveMetaStore$8.run(HiveMetaStore.java:856) [hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev] +# Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.mapred.JobConf +# at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(Unknown Source) ~[?:?] +# at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(Unknown Source) ~[?:?] +# at java.base/java.lang.ClassLoader.loadClass(Unknown Source) ~[?:?] +# ... 14 more diff --git a/hive/boil-config.toml b/hive/boil-config.toml index 7bbcca500..2c99809a8 100644 --- a/hive/boil-config.toml +++ b/hive/boil-config.toml @@ -25,7 +25,7 @@ azure-storage-version = "7.0.1" azure-keyvault-core-version = "1.0.0" [versions."4.0.1".local-images] -# Hive 4 must be built with Java 8 (according to GitHub README) but seems to run on Java 11 +# Hive 4.0 must be built with Java 8 (according to GitHub README) but seems to run on Java 11 java-base = "11" java-devel = "8" "hadoop/hadoop" = "3.3.6" @@ -36,3 +36,18 @@ jmx-exporter-version = "1.3.0" aws-java-sdk-bundle-version = "1.12.367" azure-storage-version = "7.0.1" azure-keyvault-core-version = "1.0.0" + +[versions."4.1.0".local-images] +# Hive 4.1 requires Java 17 (according to GitHub README) +java-base = "17" +java-devel = "17" +"hadoop/hadoop" = "3.4.2" + +[versions."4.1.0".build-arguments] +jmx-exporter-version = "1.3.0" +# Keep consistent with the dependency from hadoop-aws: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.2 +aws-java-sdk-bundle-version = "2.29.52" +# Keep consistent with the dependency from hadoop-azure: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.2 +azure-storage-version = "7.0.1" +# Keep consistent with the dependency from azure-storage: https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 +azure-keyvault-core-version = "1.0.0"