diff --git a/CHANGELOG.md b/CHANGELOG.md index 610396d2d..bd17dc544 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ All notable changes to this project will be documented in this file. - hbase: Add hadoop-azure.jar to the lib directory to support the Azure Blob Filesystem and the Azure Data Lake Storage ([#853]). - kafka: Add cyrus-sasl-gssapi package for kerberos ([#874]). -- spark: Add HBase connector ([#878]). +- spark: Add HBase connector ([#878], [#882]). ### Changed @@ -75,6 +75,7 @@ All notable changes to this project will be documented in this file. [#878]: https://github.com/stackabletech/docker-images/pull/878 [#879]: https://github.com/stackabletech/docker-images/pull/879 [#881]: https://github.com/stackabletech/docker-images/pull/881 +[#882]: https://github.com/stackabletech/docker-images/pull/882 ## [24.7.0] - 2024-07-24 diff --git a/hadoop/stackable/patches/3.3.4/001-YARN-11527-3.3.4.patch b/hadoop/stackable/patches/3.3.4/001-YARN-11527-3.3.4.patch index b82c910e8..986a64b9a 100644 --- a/hadoop/stackable/patches/3.3.4/001-YARN-11527-3.3.4.patch +++ b/hadoop/stackable/patches/3.3.4/001-YARN-11527-3.3.4.patch @@ -7,7 +7,7 @@ index 0b2f6f17157d..9dc8b653eb93 100644 2.4.7 9.8.1 - v12.22.1 -+ v14.0.0 ++ v14.17.0 v1.22.5 1.10.11 diff --git a/hadoop/stackable/patches/3.3.6/001-YARN-11527-3.3.6.patch b/hadoop/stackable/patches/3.3.6/001-YARN-11527-3.3.6.patch index c4ccc9299..a823b7cd0 100644 --- a/hadoop/stackable/patches/3.3.6/001-YARN-11527-3.3.6.patch +++ b/hadoop/stackable/patches/3.3.6/001-YARN-11527-3.3.6.patch @@ -7,7 +7,7 @@ index f1ac43ed5b38..73d0c7580338 100644 5.4.0 9.8.1 - v12.22.1 -+ v14.0.0 ++ v14.17.0 v1.22.5 1.10.13 1.20 diff --git a/hadoop/stackable/patches/3.4.0/001-YARN-11527-3.4.0.patch b/hadoop/stackable/patches/3.4.0/001-YARN-11527-3.4.0.patch index 5526b8103..b050e80dd 100644 --- a/hadoop/stackable/patches/3.4.0/001-YARN-11527-3.4.0.patch +++ b/hadoop/stackable/patches/3.4.0/001-YARN-11527-3.4.0.patch @@ -7,7 +7,7 @@ index 0ed96d087bc..9ebb6af4567 100644 5.4.0 9.31 - v12.22.1 -+ v14.0.0 ++ v14.17.0 v1.22.5 1.10.13 1.20 diff --git a/spark-k8s/Dockerfile b/spark-k8s/Dockerfile index f7082baae..45084d1ad 100644 --- a/spark-k8s/Dockerfile +++ b/spark-k8s/Dockerfile @@ -53,15 +53,19 @@ ARG HADOOP ARG HBASE ARG HBASE_CONNECTOR -WORKDIR /stackable - -# Download the hbase-connectors source code RUN <1.5.1 + 0.14.0 + +- 2.5.0 ++ ++ com.google.protobuf ++ 2.6.1 ++ ${external.protobuf.version} + 0.5.0 + 2.11.0 + 1.7.7 +@@ -933,5 +936,19 @@ + + + ++ ++ ++ aarch64 ++ ++ com.github.os72 ++ 2.6.1-build3 ++ ++ ++ ++ linux ++ aarch64 ++ ++ ++ + + +diff --git a/spark/pom.xml b/spark/pom.xml +index 3f1eb21..fcdc73e 100644 +--- a/spark/pom.xml ++++ b/spark/pom.xml +@@ -84,7 +84,7 @@ + protobuf-maven-plugin + ${protobuf.plugin.version} + +- com.google.protobuf:protoc:${external.protobuf.version}:exe:${os.detected.classifier} ++ ${external.protobuf.groupId}:protoc:${external.protobuf.exe.version}:exe:${os.detected.classifier} + ${basedir}/src/main/protobuf/ + false + true diff --git a/spark-k8s/stackable/hbase-connectors-patches/apply_patches.sh b/spark-k8s/stackable/hbase-connectors-patches/apply_patches.sh new file mode 100755 index 000000000..833b3e9c7 --- /dev/null +++ b/spark-k8s/stackable/hbase-connectors-patches/apply_patches.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +# Enable error handling and unset variable checking +set -eu +set -o pipefail + +# Check if $1 (VERSION) is provided +if [ -z "${1-}" ]; then + echo "Please provide a value for VERSION as the first argument." + exit 1 +fi + +VERSION="$1" +PATCH_DIR="patches/$VERSION" + +# Check if version-specific patches directory exists +if [ ! -d "$PATCH_DIR" ]; then + echo "Patches directory '$PATCH_DIR' does not exist." + exit 1 +fi + +# Create an array to hold the patches in sorted order +declare -a patch_files=() + +echo "Applying patches from ${PATCH_DIR}" now + +# Read the patch files into the array +while IFS= read -r -d $'\0' file; do + patch_files+=("$file") +done < <(find "$PATCH_DIR" -name "*.patch" -print0 | sort -zV) + +echo "Found ${#patch_files[@]} patches, applying now" + +# Iterate through sorted patch files +for patch_file in "${patch_files[@]}"; do + echo "Applying $patch_file" + # We can not use Git here, as we are not within a Git repo + patch --directory "." --strip=1 < "$patch_file" || { + echo "Failed to apply $patch_file" + exit 1 + } +done + +echo "All patches applied successfully." diff --git a/spark-k8s/versions.py b/spark-k8s/versions.py index f586a2775..3909c49bb 100644 --- a/spark-k8s/versions.py +++ b/spark-k8s/versions.py @@ -4,10 +4,10 @@ "java-base": "17", "java-devel": "17", "python": "3.11", - "hadoop": "3.3.4", # https://github.com/apache/spark/blob/6a5747d66e53ed0d934cdd9ca5c9bd9fde6868e6/pom.xml#L125 + "hadoop": "3.3.6", # Hadoop version defined in ../hbase/versions.py to reduce build time and disk requirements "hbase": "2.4.18", # current Stackable LTS version - "aws_java_sdk_bundle": "1.12.262", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.4 - "azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.4 + "aws_java_sdk_bundle": "1.12.367", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 + "azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.6 "azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 "jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1 "stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 @@ -22,10 +22,10 @@ "java-base": "17", "java-devel": "17", "python": "3.11", - "hadoop": "3.3.4", # https://github.com/apache/spark/blob/6a5747d66e53ed0d934cdd9ca5c9bd9fde6868e6/pom.xml#L125 + "hadoop": "3.3.6", # Hadoop version defined in ../hbase/versions.py to reduce build time and disk requirements "hbase": "2.4.18", # current Stackable LTS version - "aws_java_sdk_bundle": "1.12.262", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.4 - "azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.4 + "aws_java_sdk_bundle": "1.12.367", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 + "azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.6 "azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 "jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1 "stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2