@@ -61,6 +61,17 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patche
6161COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR}
6262
6363RUN <<EOF
64+
65+ # IMPORTANT: HBase connectors don't support Spark 4 yet, so we skip the build.
66+ # Watch this PR for updates: https://github.com/apache/hbase-connectors/pull/130
67+ if [[ "${PRODUCT}" == 4* ]]; then
68+ # Create this empty directory so that following COPY layers succeed.
69+ mkdir -p /stackable/spark/jars
70+ # Create a dummy tarball to satisfy the build process for Spark 3.
71+ touch hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz
72+ exit 0
73+ fi
74+
6475cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark"
6576
6677NEW_VERSION="${HBASE_CONNECTOR}-stackable${RELEASE}"
@@ -80,18 +91,10 @@ export JDK_JAVA_OPTIONS="\
8091 --add-opens java.base/java.util=ALL-UNNAMED"
8192
8293# Get the Scala version used by Spark
83- SCALA_VERSION=$( \
84- mvn --quiet --non-recursive --file /stackable/spark/pom.xml \
85- org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
86- -DforceStdout \
87- -Dexpression='project.properties(scala.version)' )
94+ SCALA_VERSION=$(grep "scala.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}' )
8895
8996# Get the Scala binary version used by Spark
90- SCALA_BINARY_VERSION=$( \
91- mvn --quiet --non-recursive --file /stackable/spark/pom.xml \
92- org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
93- -DforceStdout \
94- -Dexpression='project.properties(scala.binary.version)' )
97+ SCALA_BINARY_VERSION=$(grep "scala.binary.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}' )
9598
9699# Build the Spark HBase connector
97100# Skip the tests because the MiniHBaseCluster does not get ready for
@@ -108,6 +111,7 @@ mvn \
108111 --define hadoop-three.version="${HADOOP_VERSION}" \
109112 --define hbase.version="${HBASE}" \
110113 --define skipTests \
114+ --define maven.test.skip=true \
111115 clean package
112116
113117mkdir -p /stackable/spark/jars
@@ -157,22 +161,36 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
157161COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
158162
159163# >>> Build spark
160- # Compiling the tests takes a lot of time, so we skip them
161- # -Dmaven.test.skip=true skips both the compilation and execution of tests
162- # -DskipTests skips only the execution
163164RUN <<EOF
164165 # Make Maven aware of custom Stackable libraries
165166 mv /stackable/patched-libs/maven /root/.m2/repository
166167
167168 ORIGINAL_VERSION="${PRODUCT}"
168169 NEW_VERSION="${PRODUCT}-stackable${RELEASE}"
169170
171+ MAVEN_BIN="/usr/bin/mvn"
170172 export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g"
171173
172- ./dev/make-distribution.sh \
173- -Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE}" \
174- -DskipTests \
175- -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
174+ case "${PRODUCT}" in
175+ 4*)
176+ # The Spark 4 script has a --connect option which is not available in Spark 3.
177+ # This option is required to build Spark Connect.
178+ # Also this option breaks the Spark 3 build so we ensure it's only provided here.
179+ ./dev/make-distribution.sh \
180+ --mvn "${MAVEN_BIN}" \
181+ --connect \
182+ -Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE}" \
183+ -DskipTests \
184+ -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
185+ ;;
186+ *)
187+ ./dev/make-distribution.sh \
188+ --mvn "${MAVEN_BIN}" \
189+ -Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE}" \
190+ -DskipTests \
191+ -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
192+ ;;
193+ esac
176194
177195 sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" assembly/target/bom.json
178196EOF
@@ -183,22 +201,30 @@ EOF
183201# we create a new dist/connect folder, and copy them here.
184202RUN <<EOF
185203
186- # Get the Scala binary version
187- SCALA_BINARY_VERSION=$( \
188- mvn --quiet --non-recursive --file pom.xml \
189- org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
190- -DforceStdout \
191- -Dexpression='project.properties(scala.binary.version)' )
204+ SCALA_BINARY_VERSION=$(grep "scala.binary.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}' )
192205
193206 mkdir -p dist/connect
194207 cd dist/connect
195208
196- cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
197- cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
198- cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
199-
200- # The Spark operator expects a file named spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar without the -stackable${RELEASE} suffix.
209+ case "${PRODUCT}" in
210+ 4*)
211+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
212+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
213+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
214+ ;;
215+ *)
216+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
217+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
218+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
219+ ;;
220+ esac
221+
222+ # This link is needed by the operator and is kept for backwards compatibility.
223+ # TODO: remove it at some time in the future.
201224 ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar"
225+ # Link to the spark-connect jar without the stackable suffix and scala version.
226+ # This link supersedes the previous link.
227+ ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect-${PRODUCT}.jar"
202228EOF
203229
204230# <<< Build spark
0 commit comments