@@ -167,9 +167,9 @@ RUN <<EOF
167167
168168 # We download the Maven binary from our own repository because:
169169 #
170- # 1. Cannot use the UBI maven version because it's too old:
170+ # 1. The UBI Maven version is too old:
171171 # 134.0 [ERROR] Detected Maven Version: 3.6.3 is not in the allowed range [3.8.8,)
172- # 2. Cannot allow Spark to download its own version of Maven from archive.apache.org because the connection is not reliable.
172+ # 2. The Maven download from archive.apache.org is not working reliably:
173173 curl "https://repo.stackable.tech/repository/packages/maven/apache-maven-${MAVEN_VERSION}-bin.tar.gz" | tar -xzC /tmp
174174
175175 ORIGINAL_VERSION="${PRODUCT}"
@@ -188,8 +188,31 @@ RUN <<EOF
188188 sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" assembly/target/bom.json
189189EOF
190190
191- # <<< Build spark
192191
192+ # As of version 3.5.5, spark-connect jars are not included in the dist folder.
193+ # To avoid classpath conflicts with existing spark applications,
194+ # we create a new dist/connect folder, and copy them here.
195+ RUN <<EOF
196+
197+ # Get the Scala binary version
198+ SCALA_BINARY_VERSION=$( \
199+ mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file pom.xml \
200+ org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
201+ -DforceStdout \
202+ -Dexpression='project.properties(scala.binary.version)' )
203+
204+ mkdir -p dist/connect
205+ cd dist/connect
206+
207+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
208+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
209+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
210+
211+ # The Spark operator expects a file named spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar without the -stackable${RELEASE} suffix.
212+ ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar"
213+ EOF
214+
215+ # <<< Build spark
193216
194217WORKDIR /stackable/spark-${PRODUCT}-stackable${RELEASE}/dist/jars
195218
@@ -228,15 +251,6 @@ COPY --from=hbase-builder --chown=${STACKABLE_USER_UID}:0 \
228251 /stackable/hbase/lib/client-facing-thirdparty/opentelemetry-semconv-*-alpha.jar \
229252 ./
230253
231- WORKDIR /stackable/spark-${PRODUCT}-stackable${RELEASE}/dist/connect
232-
233- # As of version 3.5.5, spark-connect jars are not included in the dist folder.
234- # To avoid classpath conflicts with existing spark applications,
235- # we create a new dist/connect folder, and copy them here.
236- RUN cp /stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_*-${PRODUCT}-stackable${RELEASE}.jar . \
237- && cp /stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_*-${PRODUCT}-stackable${RELEASE}.jar . \
238- && cp /stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_2.12-${PRODUCT}-stackable${RELEASE}.jar .
239-
240254COPY spark-k8s/stackable/jmx /stackable/jmx
241255
242256WORKDIR /stackable/spark-${PRODUCT}-stackable${RELEASE}/dist/extra-jars
0 commit comments