Skip to content

Commit 1b7bc15

Browse files
fix: Spark HBase connector on aarch64
1 parent dd7d6ed commit 1b7bc15

File tree

7 files changed

+125
-10
lines changed

7 files changed

+125
-10
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ All notable changes to this project will be documented in this file.
2121
- hbase: Add hadoop-azure.jar to the lib directory to support the Azure Blob Filesystem and
2222
the Azure Data Lake Storage ([#853]).
2323
- kafka: Add cyrus-sasl-gssapi package for kerberos ([#874]).
24-
- spark: Add HBase connector ([#878]).
24+
- spark: Add HBase connector ([#878, #882]).
2525

2626
### Changed
2727

@@ -73,6 +73,7 @@ All notable changes to this project will be documented in this file.
7373
[#878]: https://github.com/stackabletech/docker-images/pull/878
7474
[#879]: https://github.com/stackabletech/docker-images/pull/879
7575
[#881]: https://github.com/stackabletech/docker-images/pull/881
76+
[#882]: https://github.com/stackabletech/docker-images/pull/882
7677

7778
## [24.7.0] - 2024-07-24
7879

hadoop/stackable/patches/3.3.4/001-YARN-11527-3.3.4.patch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ index 0b2f6f17157d..9dc8b653eb93 100644
77
<json-smart.version>2.4.7</json-smart.version>
88
<nimbus-jose-jwt.version>9.8.1</nimbus-jose-jwt.version>
99
- <nodejs.version>v12.22.1</nodejs.version>
10-
+ <nodejs.version>v14.0.0</nodejs.version>
10+
+ <nodejs.version>v14.17.0</nodejs.version>
1111
<yarnpkg.version>v1.22.5</yarnpkg.version>
1212
<apache-ant.version>1.10.11</apache-ant.version>
1313
</properties>

hadoop/stackable/patches/3.3.6/001-YARN-11527-3.3.6.patch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ index f1ac43ed5b38..73d0c7580338 100644
77
<woodstox.version>5.4.0</woodstox.version>
88
<nimbus-jose-jwt.version>9.8.1</nimbus-jose-jwt.version>
99
- <nodejs.version>v12.22.1</nodejs.version>
10-
+ <nodejs.version>v14.0.0</nodejs.version>
10+
+ <nodejs.version>v14.17.0</nodejs.version>
1111
<yarnpkg.version>v1.22.5</yarnpkg.version>
1212
<apache-ant.version>1.10.13</apache-ant.version>
1313
<jmh.version>1.20</jmh.version>

hadoop/stackable/patches/3.4.0/001-YARN-11527-3.4.0.patch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ index 0ed96d087bc..9ebb6af4567 100644
77
<woodstox.version>5.4.0</woodstox.version>
88
<nimbus-jose-jwt.version>9.31</nimbus-jose-jwt.version>
99
- <nodejs.version>v12.22.1</nodejs.version>
10-
+ <nodejs.version>v14.0.0</nodejs.version>
10+
+ <nodejs.version>v14.17.0</nodejs.version>
1111
<yarnpkg.version>v1.22.5</yarnpkg.version>
1212
<apache-ant.version>1.10.13</apache-ant.version>
1313
<jmh.version>1.20</jmh.version>

spark-k8s/Dockerfile

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,19 @@ ARG HADOOP
5252
ARG HBASE
5353
ARG HBASE_CONNECTOR
5454

55-
WORKDIR /stackable
56-
57-
# Download the hbase-connectors source code
5855
RUN <<EOF
59-
curl https://repo.stackable.tech/repository/packages/hbase-connectors/hbase-connectors_${HBASE_CONNECTOR}.tar.gz \
60-
| tar xz
61-
ln -s hbase-connectors-rel-${HBASE_CONNECTOR} hbase-connectors
56+
microdnf update
57+
58+
# patch: Required for the apply-patches.sh script
59+
microdnf install \
60+
patch
61+
62+
microdnf clean all
63+
rm -rf /var/cache/yum
6264
EOF
6365

66+
WORKDIR /stackable
67+
6468
# Copy the pom.xml file from the patched Spark source code to read the
6569
# versions used by Spark. The pom.xml defines child modules which are
6670
# not required and not copied, therefore mvn must be called with the
@@ -69,6 +73,23 @@ COPY --chown=stackable:stackable --from=spark-source-builder \
6973
/stackable/spark/pom.xml \
7074
spark/
7175

76+
# Download the hbase-connectors source code
77+
RUN <<EOF
78+
curl https://repo.stackable.tech/repository/packages/hbase-connectors/hbase-connectors_${HBASE_CONNECTOR}.tar.gz \
79+
| tar xz
80+
ln -s hbase-connectors-rel-${HBASE_CONNECTOR} hbase-connectors
81+
EOF
82+
83+
# Patch the hbase-connectors source code
84+
WORKDIR /stackable/hbase-connectors
85+
COPY --chown=stackable:stackable \
86+
spark-k8s/stackable/hbase-connectors-patches/apply_patches.sh \
87+
patches/apply_patches.sh
88+
COPY --chown=stackable:stackable \
89+
spark-k8s/stackable/hbase-connectors-patches/${HBASE_CONNECTOR} \
90+
patches/${HBASE_CONNECTOR}
91+
RUN patches/apply_patches.sh ${HBASE_CONNECTOR}
92+
7293
WORKDIR /stackable/hbase-connectors/spark
7394

7495
RUN <<EOF
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
diff --git a/pom.xml b/pom.xml
2+
index e849cd1..f514e14 100644
3+
--- a/pom.xml
4+
+++ b/pom.xml
5+
@@ -157,7 +157,10 @@
6+
<extra.enforcer.version>1.5.1</extra.enforcer.version>
7+
<restrict-imports.enforcer.version>0.14.0</restrict-imports.enforcer.version>
8+
<!--Internally we use a different version of protobuf. See hbase-protocol-shaded-->
9+
- <external.protobuf.version>2.5.0</external.protobuf.version>
10+
+ <!-- com.google repo will be used except on Aarch64 platform. -->
11+
+ <external.protobuf.groupId>com.google.protobuf</external.protobuf.groupId>
12+
+ <external.protobuf.version>2.6.1</external.protobuf.version>
13+
+ <external.protobuf.exe.version>${external.protobuf.version}</external.protobuf.exe.version>
14+
<protobuf.plugin.version>0.5.0</protobuf.plugin.version>
15+
<commons-io.version>2.11.0</commons-io.version>
16+
<avro.version>1.7.7</avro.version>
17+
@@ -933,5 +936,19 @@
18+
</plugins>
19+
</build>
20+
</profile>
21+
+ <!-- use com.github.os72 on aarch64 platform -->
22+
+ <profile>
23+
+ <id>aarch64</id>
24+
+ <properties>
25+
+ <external.protobuf.groupId>com.github.os72</external.protobuf.groupId>
26+
+ <external.protobuf.exe.version>2.6.1-build3</external.protobuf.exe.version>
27+
+ </properties>
28+
+ <activation>
29+
+ <os>
30+
+ <family>linux</family>
31+
+ <arch>aarch64</arch>
32+
+ </os>
33+
+ </activation>
34+
+ </profile>
35+
</profiles>
36+
</project>
37+
diff --git a/spark/pom.xml b/spark/pom.xml
38+
index 3f1eb21..fcdc73e 100644
39+
--- a/spark/pom.xml
40+
+++ b/spark/pom.xml
41+
@@ -84,7 +84,7 @@
42+
<artifactId>protobuf-maven-plugin</artifactId>
43+
<version>${protobuf.plugin.version}</version>
44+
<configuration>
45+
- <protocArtifact>com.google.protobuf:protoc:${external.protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
46+
+ <protocArtifact>${external.protobuf.groupId}:protoc:${external.protobuf.exe.version}:exe:${os.detected.classifier}</protocArtifact>
47+
<protoSourceRoot>${basedir}/src/main/protobuf/</protoSourceRoot>
48+
<clearOutputDirectory>false</clearOutputDirectory>
49+
<checkStaleness>true</checkStaleness>
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env bash
2+
3+
# Enable error handling and unset variable checking
4+
set -eu
5+
set -o pipefail
6+
7+
# Check if $1 (VERSION) is provided
8+
if [ -z "${1-}" ]; then
9+
echo "Please provide a value for VERSION as the first argument."
10+
exit 1
11+
fi
12+
13+
VERSION="$1"
14+
PATCH_DIR="patches/$VERSION"
15+
16+
# Check if version-specific patches directory exists
17+
if [ ! -d "$PATCH_DIR" ]; then
18+
echo "Patches directory '$PATCH_DIR' does not exist."
19+
exit 1
20+
fi
21+
22+
# Create an array to hold the patches in sorted order
23+
declare -a patch_files=()
24+
25+
echo "Applying patches from ${PATCH_DIR}" now
26+
27+
# Read the patch files into the array
28+
while IFS= read -r -d $'\0' file; do
29+
patch_files+=("$file")
30+
done < <(find "$PATCH_DIR" -name "*.patch" -print0 | sort -zV)
31+
32+
echo "Found ${#patch_files[@]} patches, applying now"
33+
34+
# Iterate through sorted patch files
35+
for patch_file in "${patch_files[@]}"; do
36+
echo "Applying $patch_file"
37+
# We can not use Git here, as we are not within a Git repo
38+
patch --directory "." --strip=1 < "$patch_file" || {
39+
echo "Failed to apply $patch_file"
40+
exit 1
41+
}
42+
done
43+
44+
echo "All patches applied successfully."

0 commit comments

Comments
 (0)