Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ All notable changes to this project will be documented in this file.
- hbase: Add hadoop-azure.jar to the lib directory to support the Azure Blob Filesystem and
the Azure Data Lake Storage ([#853]).
- kafka: Add cyrus-sasl-gssapi package for kerberos ([#874]).
- spark: Add HBase connector ([#878]).
- spark: Add HBase connector ([#878], [#882]).

### Changed

Expand Down Expand Up @@ -75,6 +75,7 @@ All notable changes to this project will be documented in this file.
[#878]: https://github.com/stackabletech/docker-images/pull/878
[#879]: https://github.com/stackabletech/docker-images/pull/879
[#881]: https://github.com/stackabletech/docker-images/pull/881
[#882]: https://github.com/stackabletech/docker-images/pull/882

## [24.7.0] - 2024-07-24

Expand Down
2 changes: 1 addition & 1 deletion hadoop/stackable/patches/3.3.4/001-YARN-11527-3.3.4.patch
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ index 0b2f6f17157d..9dc8b653eb93 100644
<json-smart.version>2.4.7</json-smart.version>
<nimbus-jose-jwt.version>9.8.1</nimbus-jose-jwt.version>
- <nodejs.version>v12.22.1</nodejs.version>
+ <nodejs.version>v14.0.0</nodejs.version>
+ <nodejs.version>v14.17.0</nodejs.version>
<yarnpkg.version>v1.22.5</yarnpkg.version>
<apache-ant.version>1.10.11</apache-ant.version>
</properties>
2 changes: 1 addition & 1 deletion hadoop/stackable/patches/3.3.6/001-YARN-11527-3.3.6.patch
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ index f1ac43ed5b38..73d0c7580338 100644
<woodstox.version>5.4.0</woodstox.version>
<nimbus-jose-jwt.version>9.8.1</nimbus-jose-jwt.version>
- <nodejs.version>v12.22.1</nodejs.version>
+ <nodejs.version>v14.0.0</nodejs.version>
+ <nodejs.version>v14.17.0</nodejs.version>
<yarnpkg.version>v1.22.5</yarnpkg.version>
<apache-ant.version>1.10.13</apache-ant.version>
<jmh.version>1.20</jmh.version>
2 changes: 1 addition & 1 deletion hadoop/stackable/patches/3.4.0/001-YARN-11527-3.4.0.patch
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ index 0ed96d087bc..9ebb6af4567 100644
<woodstox.version>5.4.0</woodstox.version>
<nimbus-jose-jwt.version>9.31</nimbus-jose-jwt.version>
- <nodejs.version>v12.22.1</nodejs.version>
+ <nodejs.version>v14.0.0</nodejs.version>
+ <nodejs.version>v14.17.0</nodejs.version>
<yarnpkg.version>v1.22.5</yarnpkg.version>
<apache-ant.version>1.10.13</apache-ant.version>
<jmh.version>1.20</jmh.version>
33 changes: 27 additions & 6 deletions spark-k8s/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,19 @@ ARG HADOOP
ARG HBASE
ARG HBASE_CONNECTOR

WORKDIR /stackable

# Download the hbase-connectors source code
RUN <<EOF
curl https://repo.stackable.tech/repository/packages/hbase-connectors/hbase-connectors_${HBASE_CONNECTOR}.tar.gz \
| tar xz
ln -s hbase-connectors-rel-${HBASE_CONNECTOR} hbase-connectors
microdnf update

# patch: Required for the apply-patches.sh script
microdnf install \
patch

microdnf clean all
rm -rf /var/cache/yum
EOF

WORKDIR /stackable

# Copy the pom.xml file from the patched Spark source code to read the
# versions used by Spark. The pom.xml defines child modules which are
# not required and not copied, therefore mvn must be called with the
Expand All @@ -70,6 +74,23 @@ COPY --chown=stackable:stackable --from=spark-source-builder \
/stackable/spark/pom.xml \
spark/

# Download the hbase-connectors source code
RUN <<EOF
curl https://repo.stackable.tech/repository/packages/hbase-connectors/hbase-connectors_${HBASE_CONNECTOR}.tar.gz \
| tar xz
ln -s hbase-connectors-rel-${HBASE_CONNECTOR} hbase-connectors
EOF

# Patch the hbase-connectors source code
WORKDIR /stackable/hbase-connectors
COPY --chown=stackable:stackable \
spark-k8s/stackable/hbase-connectors-patches/apply_patches.sh \
patches/apply_patches.sh
COPY --chown=stackable:stackable \
spark-k8s/stackable/hbase-connectors-patches/${HBASE_CONNECTOR} \
patches/${HBASE_CONNECTOR}
RUN patches/apply_patches.sh ${HBASE_CONNECTOR}

WORKDIR /stackable/hbase-connectors/spark

RUN <<EOF
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
diff --git a/pom.xml b/pom.xml
index e849cd1..f514e14 100644
--- a/pom.xml
+++ b/pom.xml
@@ -157,7 +157,10 @@
<extra.enforcer.version>1.5.1</extra.enforcer.version>
<restrict-imports.enforcer.version>0.14.0</restrict-imports.enforcer.version>
<!--Internally we use a different version of protobuf. See hbase-protocol-shaded-->
- <external.protobuf.version>2.5.0</external.protobuf.version>
+ <!-- com.google repo will be used except on Aarch64 platform. -->
+ <external.protobuf.groupId>com.google.protobuf</external.protobuf.groupId>
+ <external.protobuf.version>2.6.1</external.protobuf.version>
+ <external.protobuf.exe.version>${external.protobuf.version}</external.protobuf.exe.version>
<protobuf.plugin.version>0.5.0</protobuf.plugin.version>
<commons-io.version>2.11.0</commons-io.version>
<avro.version>1.7.7</avro.version>
@@ -933,5 +936,19 @@
</plugins>
</build>
</profile>
+ <!-- use com.github.os72 on aarch64 platform -->
+ <profile>
+ <id>aarch64</id>
+ <properties>
+ <external.protobuf.groupId>com.github.os72</external.protobuf.groupId>
+ <external.protobuf.exe.version>2.6.1-build3</external.protobuf.exe.version>
+ </properties>
+ <activation>
+ <os>
+ <family>linux</family>
+ <arch>aarch64</arch>
+ </os>
+ </activation>
+ </profile>
</profiles>
</project>
diff --git a/spark/pom.xml b/spark/pom.xml
index 3f1eb21..fcdc73e 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -84,7 +84,7 @@
<artifactId>protobuf-maven-plugin</artifactId>
<version>${protobuf.plugin.version}</version>
<configuration>
- <protocArtifact>com.google.protobuf:protoc:${external.protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
+ <protocArtifact>${external.protobuf.groupId}:protoc:${external.protobuf.exe.version}:exe:${os.detected.classifier}</protocArtifact>
<protoSourceRoot>${basedir}/src/main/protobuf/</protoSourceRoot>
<clearOutputDirectory>false</clearOutputDirectory>
<checkStaleness>true</checkStaleness>
44 changes: 44 additions & 0 deletions spark-k8s/stackable/hbase-connectors-patches/apply_patches.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

# Enable error handling and unset variable checking
set -eu
set -o pipefail

# Check if $1 (VERSION) is provided
if [ -z "${1-}" ]; then
echo "Please provide a value for VERSION as the first argument."
exit 1
fi

VERSION="$1"
PATCH_DIR="patches/$VERSION"

# Check if version-specific patches directory exists
if [ ! -d "$PATCH_DIR" ]; then
echo "Patches directory '$PATCH_DIR' does not exist."
exit 1
fi

# Create an array to hold the patches in sorted order
declare -a patch_files=()

echo "Applying patches from ${PATCH_DIR}" now

# Read the patch files into the array
while IFS= read -r -d $'\0' file; do
patch_files+=("$file")
done < <(find "$PATCH_DIR" -name "*.patch" -print0 | sort -zV)

echo "Found ${#patch_files[@]} patches, applying now"

# Iterate through sorted patch files
for patch_file in "${patch_files[@]}"; do
echo "Applying $patch_file"
# We can not use Git here, as we are not within a Git repo
patch --directory "." --strip=1 < "$patch_file" || {
echo "Failed to apply $patch_file"
exit 1
}
done

echo "All patches applied successfully."
12 changes: 6 additions & 6 deletions spark-k8s/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
"java-base": "17",
"java-devel": "17",
"python": "3.11",
"hadoop": "3.3.4", # https://github.com/apache/spark/blob/6a5747d66e53ed0d934cdd9ca5c9bd9fde6868e6/pom.xml#L125
"hadoop": "3.3.6", # Hadoop version defined in ../hbase/versions.py to reduce build time and disk requirements
"hbase": "2.4.18", # current Stackable LTS version
"aws_java_sdk_bundle": "1.12.262", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.4
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.4
"aws_java_sdk_bundle": "1.12.367", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.6
"azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1
"jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1
"stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
Expand All @@ -22,10 +22,10 @@
"java-base": "17",
"java-devel": "17",
"python": "3.11",
"hadoop": "3.3.4", # https://github.com/apache/spark/blob/6a5747d66e53ed0d934cdd9ca5c9bd9fde6868e6/pom.xml#L125
"hadoop": "3.3.6", # Hadoop version defined in ../hbase/versions.py to reduce build time and disk requirements
"hbase": "2.4.18", # current Stackable LTS version
"aws_java_sdk_bundle": "1.12.262", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.4
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.4
"aws_java_sdk_bundle": "1.12.367", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.6
"azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1
"jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1
"stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
Expand Down