Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ All notable changes to this project will be documented in this file.
- hadoop: Add `3.4.2` ([#1291]).
- zookeeper: Add `3.9.4` ([#1292]).
- nifi: Add `2.6.0` ([#1293]).
- hive: Add `4.1.0` ([#1295]).

### Changed

Expand Down Expand Up @@ -84,6 +85,7 @@ All notable changes to this project will be documented in this file.
[#1291]: https://github.com/stackabletech/docker-images/pull/1291
[#1292]: https://github.com/stackabletech/docker-images/pull/1292
[#1293]: https://github.com/stackabletech/docker-images/pull/1293
[#1295]: https://github.com/stackabletech/docker-images/pull/1295

## [25.7.0] - 2025-07-23

Expand Down
65 changes: 58 additions & 7 deletions hive/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,23 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/patched-li
USER ${STACKABLE_USER_UID}
WORKDIR /stackable

ENV NEW_VERSION="${PRODUCT_VERSION}-stackable${RELEASE_VERSION}"

# Let's have patchable as a dedicated step, as it fetches the Hive sourcecode over the network,
# thus taking a bit (which is annoying while development)
RUN /stackable/patchable --images-repo-root=src checkout hive ${PRODUCT_VERSION} > /tmp/HIVE_SOURCE_DIR

# Make expensive maven build a separate layer for better caching
# Cache mounts are owned by root by default
# We need to explicitly give the uid to use
RUN --mount=type=cache,id=maven-hive-${PRODUCT_VERSION},uid=${STACKABLE_USER_UID},target=/stackable/.m2/repository <<EOF
BUILD_SRC_DIR="$(/stackable/patchable --images-repo-root=src checkout hive ${PRODUCT_VERSION})"
BUILD_SRC_DIR="$(cat /tmp/HIVE_SOURCE_DIR)" || exit 1
rm /tmp/HIVE_SOURCE_DIR
cd "$BUILD_SRC_DIR"

# Make Maven aware of custom Stackable libraries
cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository

NEW_VERSION="${PRODUCT_VERSION}-stackable${RELEASE_VERSION}"

# generateBackupPoms=false is needed for the Hive 4.0.0 build to succeed, otherwise it fails with the obscure reason: `Too many files with unapproved license`
mvn versions:set -DnewVersion=$NEW_VERSION -DartifactId=* -DgroupId=* -DgenerateBackupPoms=false

Expand All @@ -62,7 +68,7 @@ if [[ "${PRODUCT_VERSION}" == "3.1.3" ]] ; then
--projects standalone-metastore
mv standalone-metastore/target/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}-bin /stackable
mv standalone-metastore/target/bom.json /stackable/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}.cdx.json
else
elif [[ "${PRODUCT_VERSION}" == 4.0.* ]]; then
(
# https://issues.apache.org/jira/browse/HIVE-20451 switched the metastore server packaging starting with 4.0.0
mvn \
Expand All @@ -78,16 +84,34 @@ else
# The schemaTool.sh is still pointing to the class location from Hive < 4.0.0, it seems like it was forgotten to update it
sed -i -e 's/CLASS=org.apache.hadoop.hive.metastore.tools.MetastoreSchemaTool/CLASS=org.apache.hadoop.hive.metastore.tools.schematool.MetastoreSchemaTool/' /stackable/apache-hive-metastore-${NEW_VERSION}-bin/bin/ext/schemaTool.sh
)
else
# Starting with 4.1.0 the build process changed again in https://github.com/apache/hive/pull/5936 (HIVE-29062)
mvn \
clean package \
-Dhadoop.version=${HADOOP_VERSION}-stackable${RELEASE_VERSION} \
-DskipTests \
-Pdist
# Looks like we can not filter the projects using "--projects standalone-metastore/metastore-server --also-make",
# as this does not build a *.tar.gz

# We only seem to get a .tar.gz archive, so let's extract that to the correct location
tar --extract --directory=/stackable -f standalone-metastore/packaging/target/hive-standalone-metastore-${NEW_VERSION}-bin.tar.gz
mv standalone-metastore/metastore-server/target/bom.json /stackable/apache-hive-metastore-${NEW_VERSION}-bin/hive-standalone-metastore-${NEW_VERSION}.cdx.json
fi

# Remove sourcecode
cd /stackable
rm -rf "$BUILD_SRC_DIR"
EOF

RUN << EOF
cd /stackable
mkdir /stackable/jmx
curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar"
ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar

# Needed to run housekeeping jobs, see footnote <1> below
cp /stackable/patched-libs/maven/org/apache/hadoop/hadoop-mapreduce-client-core/${HADOOP_VERSION}-stackable${RELEASE_VERSION}/hadoop-mapreduce-client-core-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will discuss internally how to proceed with this: https://stackable-workspace.slack.com/archives/C031A5BEFS7/p1759740021782879


# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards
# This way the build will fail should one of the files not be available anymore in a later Hadoop version!

Expand All @@ -96,8 +120,11 @@ cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/

# According to https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html, the jar filename has changed from
# aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar to bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar. In future, you might need to do:
# cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/
if [[ "${PRODUCT_VERSION}" == "3.1.3" || "${PRODUCT_VERSION}" == 4.0.* ]]; then
cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/
else
cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/
fi

# Add Azure ABFS support (support for abfs://)
cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/
Expand All @@ -118,7 +145,6 @@ fi
chmod --recursive g=u /stackable
EOF


FROM local-image/java-base AS final

ARG PRODUCT_VERSION
Expand Down Expand Up @@ -215,3 +241,28 @@ ENV HADOOP_MAPRED_HOME=/stackable/hadoop

WORKDIR /stackable/hive-metastore
# Start command is set by operator to something like "bin/start-metastore --config /stackable/config --db-type postgres --hive-bin-dir bin"



# <1>: org.apache.hadoop.mapred.JobConf need
# 2025-10-06T08:42:04,137 ERROR [Metastore threads starter thread] metastore.HiveMetaStore: Failure when starting the leader tasks, Compaction or Housekeeping tasks may not happen
# java.lang.NoClassDefFoundError: org/apache/hadoop/mapred/JobConf
# at org.apache.hadoop.hive.conf.HiveConf.initialize(HiveConf.java:6601) ~[hive-common-4.1.0.jar:4.1.0]
# at org.apache.hadoop.hive.conf.HiveConf.<init>(HiveConf.java:6569) ~[hive-common-4.1.0.jar:4.1.0]
# at org.apache.hadoop.hive.ql.txn.compactor.CompactorThread.setConf(CompactorThread.java:68) ~[hive-exec-4.1.0-core.jar:4.1.0]
# at org.apache.hadoop.hive.metastore.leader.CompactorTasks.takeLeadership(CompactorTasks.java:139) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.lambda$notifyListener$0(LeaseLeaderElection.java:141) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at java.base/java.util.ArrayList.forEach(Unknown Source) ~[?:?]
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.notifyListener(LeaseLeaderElection.java:138) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.doWork(LeaseLeaderElection.java:120) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.tryBeLeader(LeaseLeaderElection.java:181) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.tryBeLeader(LeaseLeaderElection.java:63) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.leader.LeaderElectionContext.lambda$start$2(LeaderElectionContext.java:125) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at java.base/java.lang.Thread.run(Unknown Source) ~[?:?]
# at org.apache.hadoop.hive.metastore.leader.LeaderElectionContext.start(LeaderElectionContext.java:136) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.HiveMetaStore$8.run(HiveMetaStore.java:856) [hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.mapred.JobConf
# at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(Unknown Source) ~[?:?]
# at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(Unknown Source) ~[?:?]
# at java.base/java.lang.ClassLoader.loadClass(Unknown Source) ~[?:?]
# ... 14 more
17 changes: 16 additions & 1 deletion hive/boil-config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ azure-storage-version = "7.0.1"
azure-keyvault-core-version = "1.0.0"

[versions."4.0.1".local-images]
# Hive 4 must be built with Java 8 (according to GitHub README) but seems to run on Java 11
# Hive 4.0 must be built with Java 8 (according to GitHub README) but seems to run on Java 11
java-base = "11"
java-devel = "8"
"hadoop/hadoop" = "3.3.6"
Expand All @@ -36,3 +36,18 @@ jmx-exporter-version = "1.3.0"
aws-java-sdk-bundle-version = "1.12.367"
azure-storage-version = "7.0.1"
azure-keyvault-core-version = "1.0.0"

[versions."4.1.0".local-images]
# Hive 4.1 requires Java 17 (according to GitHub README)
java-base = "17"
java-devel = "17"
"hadoop/hadoop" = "3.4.2"

[versions."4.1.0".build-arguments]
jmx-exporter-version = "1.3.0"
# Keep consistent with the dependency from hadoop-aws: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.2
aws-java-sdk-bundle-version = "2.29.52"
# Keep consistent with the dependency from hadoop-azure: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.2
azure-storage-version = "7.0.1"
# Keep consistent with the dependency from azure-storage: https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1
azure-keyvault-core-version = "1.0.0"