Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ All notable changes to this project will be documented in this file.
- hbase: Add hadoop-azure.jar to the lib directory to support the Azure Blob Filesystem and
the Azure Data Lake Storage ([#853]).
- kafka: Add cyrus-sasl-gssapi package for kerberos ([#874]).
- spark: Add HBase connector ([#878]).
- spark: Add HBase connector ([#878], [#882]).

### Changed

Expand Down Expand Up @@ -75,6 +75,7 @@ All notable changes to this project will be documented in this file.
[#878]: https://github.com/stackabletech/docker-images/pull/878
[#879]: https://github.com/stackabletech/docker-images/pull/879
[#881]: https://github.com/stackabletech/docker-images/pull/881
[#882]: https://github.com/stackabletech/docker-images/pull/882

## [24.7.0] - 2024-07-24

Expand Down
2 changes: 1 addition & 1 deletion hadoop/stackable/patches/3.3.4/001-YARN-11527-3.3.4.patch
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ index 0b2f6f17157d..9dc8b653eb93 100644
<json-smart.version>2.4.7</json-smart.version>
<nimbus-jose-jwt.version>9.8.1</nimbus-jose-jwt.version>
- <nodejs.version>v12.22.1</nodejs.version>
+ <nodejs.version>v14.0.0</nodejs.version>
+ <nodejs.version>v14.17.0</nodejs.version>
<yarnpkg.version>v1.22.5</yarnpkg.version>
<apache-ant.version>1.10.11</apache-ant.version>
</properties>
2 changes: 1 addition & 1 deletion hadoop/stackable/patches/3.3.6/001-YARN-11527-3.3.6.patch
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ index f1ac43ed5b38..73d0c7580338 100644
<woodstox.version>5.4.0</woodstox.version>
<nimbus-jose-jwt.version>9.8.1</nimbus-jose-jwt.version>
- <nodejs.version>v12.22.1</nodejs.version>
+ <nodejs.version>v14.0.0</nodejs.version>
+ <nodejs.version>v14.17.0</nodejs.version>
<yarnpkg.version>v1.22.5</yarnpkg.version>
<apache-ant.version>1.10.13</apache-ant.version>
<jmh.version>1.20</jmh.version>
2 changes: 1 addition & 1 deletion hadoop/stackable/patches/3.4.0/001-YARN-11527-3.4.0.patch
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ index 0ed96d087bc..9ebb6af4567 100644
<woodstox.version>5.4.0</woodstox.version>
<nimbus-jose-jwt.version>9.31</nimbus-jose-jwt.version>
- <nodejs.version>v12.22.1</nodejs.version>
+ <nodejs.version>v14.0.0</nodejs.version>
+ <nodejs.version>v14.17.0</nodejs.version>
<yarnpkg.version>v1.22.5</yarnpkg.version>
<apache-ant.version>1.10.13</apache-ant.version>
<jmh.version>1.20</jmh.version>
33 changes: 27 additions & 6 deletions spark-k8s/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,19 @@ ARG HADOOP
ARG HBASE
ARG HBASE_CONNECTOR

WORKDIR /stackable

# Download the hbase-connectors source code
RUN <<EOF
curl https://repo.stackable.tech/repository/packages/hbase-connectors/hbase-connectors_${HBASE_CONNECTOR}.tar.gz \
| tar xz
ln -s hbase-connectors-rel-${HBASE_CONNECTOR} hbase-connectors
microdnf update

# patch: Required for the apply-patches.sh script
microdnf install \
patch

microdnf clean all
rm -rf /var/cache/yum
EOF

WORKDIR /stackable

# Copy the pom.xml file from the patched Spark source code to read the
# versions used by Spark. The pom.xml defines child modules which are
# not required and not copied, therefore mvn must be called with the
Expand All @@ -70,6 +74,23 @@ COPY --chown=stackable:stackable --from=spark-source-builder \
/stackable/spark/pom.xml \
spark/

# Download the hbase-connectors source code
RUN <<EOF
curl https://repo.stackable.tech/repository/packages/hbase-connectors/hbase-connectors_${HBASE_CONNECTOR}.tar.gz \
| tar xz
ln -s hbase-connectors-rel-${HBASE_CONNECTOR} hbase-connectors
EOF

# Patch the hbase-connectors source code
WORKDIR /stackable/hbase-connectors
COPY --chown=stackable:stackable \
spark-k8s/stackable/hbase-connectors-patches/apply_patches.sh \
patches/apply_patches.sh
COPY --chown=stackable:stackable \
spark-k8s/stackable/hbase-connectors-patches/${HBASE_CONNECTOR} \
patches/${HBASE_CONNECTOR}
RUN patches/apply_patches.sh ${HBASE_CONNECTOR}

WORKDIR /stackable/hbase-connectors/spark

RUN <<EOF
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
diff --git a/pom.xml b/pom.xml
index e849cd1..f514e14 100644
--- a/pom.xml
+++ b/pom.xml
@@ -157,7 +157,10 @@
<extra.enforcer.version>1.5.1</extra.enforcer.version>
<restrict-imports.enforcer.version>0.14.0</restrict-imports.enforcer.version>
<!--Internally we use a different version of protobuf. See hbase-protocol-shaded-->
- <external.protobuf.version>2.5.0</external.protobuf.version>
+ <!-- com.google repo will be used except on Aarch64 platform. -->
+ <external.protobuf.groupId>com.google.protobuf</external.protobuf.groupId>
+ <external.protobuf.version>2.6.1</external.protobuf.version>
+ <external.protobuf.exe.version>${external.protobuf.version}</external.protobuf.exe.version>
<protobuf.plugin.version>0.5.0</protobuf.plugin.version>
<commons-io.version>2.11.0</commons-io.version>
<avro.version>1.7.7</avro.version>
@@ -933,5 +936,19 @@
</plugins>
</build>
</profile>
+ <!-- use com.github.os72 on aarch64 platform -->
+ <profile>
+ <id>aarch64</id>
+ <properties>
+ <external.protobuf.groupId>com.github.os72</external.protobuf.groupId>
+ <external.protobuf.exe.version>2.6.1-build3</external.protobuf.exe.version>
+ </properties>
+ <activation>
+ <os>
+ <family>linux</family>
+ <arch>aarch64</arch>
+ </os>
+ </activation>
+ </profile>
</profiles>
</project>
diff --git a/spark/pom.xml b/spark/pom.xml
index 3f1eb21..fcdc73e 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -84,7 +84,7 @@
<artifactId>protobuf-maven-plugin</artifactId>
<version>${protobuf.plugin.version}</version>
<configuration>
- <protocArtifact>com.google.protobuf:protoc:${external.protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
+ <protocArtifact>${external.protobuf.groupId}:protoc:${external.protobuf.exe.version}:exe:${os.detected.classifier}</protocArtifact>
<protoSourceRoot>${basedir}/src/main/protobuf/</protoSourceRoot>
<clearOutputDirectory>false</clearOutputDirectory>
<checkStaleness>true</checkStaleness>
44 changes: 44 additions & 0 deletions spark-k8s/stackable/hbase-connectors-patches/apply_patches.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

# Enable error handling and unset variable checking
set -eu
set -o pipefail

# Check if $1 (VERSION) is provided
if [ -z "${1-}" ]; then
echo "Please provide a value for VERSION as the first argument."
exit 1
fi

VERSION="$1"
PATCH_DIR="patches/$VERSION"

# Check if version-specific patches directory exists
if [ ! -d "$PATCH_DIR" ]; then
echo "Patches directory '$PATCH_DIR' does not exist."
exit 1
fi

# Create an array to hold the patches in sorted order
declare -a patch_files=()

echo "Applying patches from ${PATCH_DIR}" now

# Read the patch files into the array
while IFS= read -r -d $'\0' file; do
patch_files+=("$file")
done < <(find "$PATCH_DIR" -name "*.patch" -print0 | sort -zV)

echo "Found ${#patch_files[@]} patches, applying now"

# Iterate through sorted patch files
for patch_file in "${patch_files[@]}"; do
echo "Applying $patch_file"
# We can not use Git here, as we are not within a Git repo
patch --directory "." --strip=1 < "$patch_file" || {
echo "Failed to apply $patch_file"
exit 1
}
done

echo "All patches applied successfully."
12 changes: 6 additions & 6 deletions spark-k8s/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
"java-base": "17",
"java-devel": "17",
"python": "3.11",
"hadoop": "3.3.4", # https://github.com/apache/spark/blob/6a5747d66e53ed0d934cdd9ca5c9bd9fde6868e6/pom.xml#L125
"hadoop": "3.3.6", # Hadoop version defined in ../hbase/versions.py
"hbase": "2.4.18", # current Stackable LTS version
"aws_java_sdk_bundle": "1.12.262", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.4
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.4
"aws_java_sdk_bundle": "1.12.367", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.6
"azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1
"jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1
"stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
Expand All @@ -22,10 +22,10 @@
"java-base": "17",
"java-devel": "17",
"python": "3.11",
"hadoop": "3.3.4", # https://github.com/apache/spark/blob/6a5747d66e53ed0d934cdd9ca5c9bd9fde6868e6/pom.xml#L125
"hadoop": "3.3.6", # Hadoop version defined in ../hbase/versions.py
"hbase": "2.4.18", # current Stackable LTS version
"aws_java_sdk_bundle": "1.12.262", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.4
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.4
"aws_java_sdk_bundle": "1.12.367", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.6
"azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1
"jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1
"stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
Expand Down