@@ -61,6 +61,17 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patche
6161COPY  --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR}
6262
6363RUN  <<EOF
64+ 
65+ #  IMPORTANT: HBase connectors don't support Spark 4 yet, so we skip the build.
66+ #  Watch this PR for updates: https://github.com/apache/hbase-connectors/pull/130
67+ if [[ "${PRODUCT}"  == 4* ]]; then
68+     #  Create this empty directory so that following COPY layers succeed.
69+     mkdir -p /stackable/spark/jars
70+     #  Create a dummy tarball to satisfy the build process for Spark 3.
71+     touch hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz
72+     exit 0
73+ fi
74+ 
6475cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark" 
6576
6677NEW_VERSION="${HBASE_CONNECTOR}-stackable${RELEASE}" 
@@ -110,6 +121,7 @@ mvn \
110121    --define hadoop-three.version="${HADOOP_VERSION}"  \
111122    --define hbase.version="${HBASE}"  \
112123    --define skipTests \
124+     --define maven.test.skip=true \
113125    clean package
114126
115127mkdir -p /stackable/spark/jars
@@ -162,9 +174,6 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
162174COPY  --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
163175
164176#  >>> Build spark
165- #  Compiling the tests takes a lot of time, so we skip them
166- #  -Dmaven.test.skip=true skips both the compilation and execution of tests
167- #  -DskipTests skips only the execution
168177RUN  <<EOF
169178    #  Make Maven aware of custom Stackable libraries
170179    mv /stackable/patched-libs/maven /root/.m2/repository
@@ -179,15 +188,35 @@ RUN <<EOF
179188    ORIGINAL_VERSION="${PRODUCT}" 
180189    NEW_VERSION="${PRODUCT}-stackable${RELEASE}" 
181190
191+     STACKABLE_HADOOP_VERSION="${HADOOP_HADOOP}-stackable${RELEASE}" 
192+ 
193+     MAVEN_BIN="/tmp/apache-maven-${MAVEN_VERSION}/bin/mvn" 
182194    export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g" 
183195
184-     ./dev/make-distribution.sh \
185-     --mvn /tmp/apache-maven-${MAVEN_VERSION}/bin/mvn \
186-     -Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE}"  \
187-     -DskipTests \
188-     -P'hadoop-3'  -Pkubernetes -Phive -Phive-thriftserver \
189-     --no-transfer-progress \
190-     --batch-mode
196+     case "${PRODUCT}"  in
197+         4*)
198+             #  The Spark 4 script has a --connect option which is not available in Spark 3.
199+             #  This option is required to build Spark Connect.
200+             #  Also this option breaks the Spark 3 build so we ensure it's only provided here.
201+             ./dev/make-distribution.sh \
202+             --mvn "${MAVEN_BIN}"  \
203+             --connect \
204+             -Dhadoop.version="${STACKABLE_HADOOP_VERSION}"  \
205+             -DskipTests \
206+             -P'hadoop-3'  -Pkubernetes -Phive -Phive-thriftserver \
207+             --no-transfer-progress \
208+             --batch-mode
209+             ;;
210+         *)
211+             ./dev/make-distribution.sh \
212+             --mvn "${MAVEN_BIN}"  \
213+             -Dhadoop.version="${STACKABLE_HADOOP_VERSION}"  \
214+             -DskipTests \
215+             -P'hadoop-3'  -Pkubernetes -Phive -Phive-thriftserver \
216+             --no-transfer-progress \
217+             --batch-mode
218+             ;;
219+     esac
191220
192221    sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g"  assembly/target/bom.json
193222EOF
@@ -198,22 +227,30 @@ EOF
198227#  we create a new dist/connect folder, and copy them here.
199228RUN  <<EOF
200229
201-     #  Get the Scala binary version
202-     SCALA_BINARY_VERSION=$( \
203-         mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file pom.xml \
204-         org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
205-         -DforceStdout \
206-         -Dexpression='project.properties(scala.binary.version)' )
230+     SCALA_BINARY_VERSION=$(grep "scala.binary.version"  pom.xml | head -n1 | awk -F '[<>]'  '{print $3}' )
207231
208232    mkdir -p dist/connect
209233    cd dist/connect
210234
211-     cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar"  .
212-     cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar"  .
213-     cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar"  .
214- 
215-     #  The Spark operator expects a file named spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar without the -stackable${RELEASE} suffix.
235+     case "${PRODUCT}"  in
236+         4*)
237+             cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar"  .
238+             cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar"  .
239+             cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar"  .
240+             ;;
241+         *)
242+             cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar"  .
243+             cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar"  .
244+             cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar"  .
245+             ;;
246+     esac
247+ 
248+     #  This link is needed by the operator and is kept for backwards compatibility.
249+     #  TODO: remove it at some time in the future.
216250    ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar"  "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar" 
251+     #  Link to the spark-connect jar without the stackable suffix and scala version.
252+     #  This link supersedes the previous link.
253+     ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar"  "spark-connect-${PRODUCT}.jar" 
217254EOF
218255
219256#  <<< Build spark
0 commit comments