@@ -61,6 +61,17 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patche
61
61
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR}
62
62
63
63
RUN <<EOF
64
+
65
+ # IMPORTANT: HBase connectors don't support Spark 4 yet, so we skip the build.
66
+ # Watch this PR for updates: https://github.com/apache/hbase-connectors/pull/130
67
+ if [[ "${PRODUCT}" == 4* ]]; then
68
+ # Create this empty directory so that following COPY layers succeed.
69
+ mkdir -p /stackable/spark/jars
70
+ # Create a dummy tarball to satisfy the build process for Spark 3.
71
+ touch hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz
72
+ exit 0
73
+ fi
74
+
64
75
cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark"
65
76
66
77
NEW_VERSION="${HBASE_CONNECTOR}-stackable${RELEASE}"
@@ -110,6 +121,7 @@ mvn \
110
121
--define hadoop-three.version="${HADOOP_VERSION}" \
111
122
--define hbase.version="${HBASE}" \
112
123
--define skipTests \
124
+ --define maven.test.skip=true \
113
125
clean package
114
126
115
127
mkdir -p /stackable/spark/jars
@@ -162,9 +174,6 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
162
174
COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
163
175
164
176
# >>> Build spark
165
- # Compiling the tests takes a lot of time, so we skip them
166
- # -Dmaven.test.skip=true skips both the compilation and execution of tests
167
- # -DskipTests skips only the execution
168
177
RUN <<EOF
169
178
# Make Maven aware of custom Stackable libraries
170
179
mv /stackable/patched-libs/maven /root/.m2/repository
@@ -179,15 +188,35 @@ RUN <<EOF
179
188
ORIGINAL_VERSION="${PRODUCT}"
180
189
NEW_VERSION="${PRODUCT}-stackable${RELEASE}"
181
190
191
+ STACKABLE_HADOOP_VERSION="${HADOOP_HADOOP}-stackable${RELEASE}"
192
+
193
+ MAVEN_BIN="/tmp/apache-maven-${MAVEN_VERSION}/bin/mvn"
182
194
export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g"
183
195
184
- ./dev/make-distribution.sh \
185
- --mvn /tmp/apache-maven-${MAVEN_VERSION}/bin/mvn \
186
- -Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE}" \
187
- -DskipTests \
188
- -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
189
- --no-transfer-progress \
190
- --batch-mode
196
+ case "${PRODUCT}" in
197
+ 4*)
198
+ # The Spark 4 script has a --connect option which is not available in Spark 3.
199
+ # This option is required to build Spark Connect.
200
+ # Also this option breaks the Spark 3 build so we ensure it's only provided here.
201
+ ./dev/make-distribution.sh \
202
+ --mvn "${MAVEN_BIN}" \
203
+ --connect \
204
+ -Dhadoop.version="${STACKABLE_HADOOP_VERSION}" \
205
+ -DskipTests \
206
+ -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
207
+ --no-transfer-progress \
208
+ --batch-mode
209
+ ;;
210
+ *)
211
+ ./dev/make-distribution.sh \
212
+ --mvn "${MAVEN_BIN}" \
213
+ -Dhadoop.version="${STACKABLE_HADOOP_VERSION}" \
214
+ -DskipTests \
215
+ -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
216
+ --no-transfer-progress \
217
+ --batch-mode
218
+ ;;
219
+ esac
191
220
192
221
sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" assembly/target/bom.json
193
222
EOF
@@ -198,22 +227,30 @@ EOF
198
227
# we create a new dist/connect folder, and copy them here.
199
228
RUN <<EOF
200
229
201
- # Get the Scala binary version
202
- SCALA_BINARY_VERSION=$( \
203
- mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file pom.xml \
204
- org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
205
- -DforceStdout \
206
- -Dexpression='project.properties(scala.binary.version)' )
230
+ SCALA_BINARY_VERSION=$(grep "scala.binary.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}' )
207
231
208
232
mkdir -p dist/connect
209
233
cd dist/connect
210
234
211
- cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
212
- cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
213
- cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
214
-
215
- # The Spark operator expects a file named spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar without the -stackable${RELEASE} suffix.
235
+ case "${PRODUCT}" in
236
+ 4*)
237
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
238
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
239
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
240
+ ;;
241
+ *)
242
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
243
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
244
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
245
+ ;;
246
+ esac
247
+
248
+ # This link is needed by the operator and is kept for backwards compatibility.
249
+ # TODO: remove it at some time in the future.
216
250
ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar"
251
+ # Link to the spark-connect jar without the stackable suffix and scala version.
252
+ # This link supersedes the previous link.
253
+ ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect-${PRODUCT}.jar"
217
254
EOF
218
255
219
256
# <<< Build spark
0 commit comments