Skip to content

Commit fd5a013

Browse files
authored
feat: publish vortex-spark-all assembly JAR (#5435)
To allow using Vortex as a read/write datasource with Spark, we publish an assembly JAR that shades all runtime dependencies that may conflict with Spark. <img width="1840" height="1195" alt="image" src="https://github.com/user-attachments/assets/f65f8c32-fca6-41cd-b54f-965b8149bc97" /> Unfortunately it's a bit hard to test publishing, I've just modeled this on the existing setup from vortex-jni-all JAR Signed-off-by: Andrew Duffy <[email protected]>
1 parent 79b0763 commit fd5a013

File tree

4 files changed

+75
-50
lines changed

4 files changed

+75
-50
lines changed

java/testfiles/Cargo.lock

Lines changed: 45 additions & 47 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

java/vortex-spark/build.gradle.kts

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
5+
46
apply(plugin = "com.vanniktech.maven.publish")
57

68
plugins {
79
`java-library`
810
`jvm-test-suite`
11+
id("com.gradleup.shadow") version "9.2.2"
912
}
1013

1114
dependencies {
12-
api("org.apache.spark:spark-catalyst_2.12")
13-
api("org.apache.spark:spark-sql_2.12")
15+
compileOnly("org.apache.spark:spark-catalyst_2.12")
16+
compileOnly("org.apache.spark:spark-sql_2.12")
1417
api(project(":vortex-jni", configuration = "shadow"))
1518

1619
compileOnly("org.immutables:value")
@@ -73,6 +76,22 @@ mavenPublishing {
7376
}
7477
}
7578

79+
// shade guava and protobuf dependencies
80+
tasks.withType<ShadowJar> {
81+
relocate("com.google.protobuf", "dev.vortex.relocated.com.google.protobuf")
82+
relocate("com.google.common", "dev.vortex.relocated.com.google.common")
83+
relocate("org.apache.arrow", "dev.vortex.relocated.org.apache.arrow") {
84+
// exclude C Data Interface since JNI cannot be relocated
85+
exclude("org.apache.arrow.c.jni.JniWrapper")
86+
exclude("org.apache.arrow.c.jni.PrivateData")
87+
exclude("org.apache.arrow.c.jni.CDataJniException")
88+
// Also used by JNI: https://github.com/apache/arrow/blob/apache-arrow-11.0.0/java/c/src/main/cpp/jni_wrapper.cc#L341
89+
// Note this class is not used by us, but required when loading the native lib
90+
exclude("org.apache.arrow.c.ArrayStreamExporter\$ExportedArrayStreamPrivateData")
91+
}
92+
}
93+
94+
7695
tasks.withType<Test>().all {
7796
classpath +=
7897
project(":vortex-jni")
@@ -88,4 +107,8 @@ tasks.withType<Test>().all {
88107
)
89108
}
90109

110+
tasks.build {
111+
dependsOn("shadowJar")
112+
}
113+
91114
description = "Apache Spark bindings for reading Vortex file datasets"

java/vortex-spark/src/main/java/dev/vortex/spark/SparkTypes.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ public static DataType toDataType(DType dType) {
140140

141141
// TODO(aduffy): other extension types
142142
throw new IllegalArgumentException("Unsupported non-temporal extension type");
143+
case DECIMAL:
144+
return DataTypes.createDecimalType(dType.getPrecision(), dType.getScale());
143145
default:
144146
throw new IllegalArgumentException("unreachable");
145147
}

vortex-array/src/stream/adapter.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ where
5555
debug_assert_eq!(
5656
array.dtype(),
5757
this.dtype,
58-
"ArrayStreamAdapter received an array with unexpected dtype"
58+
"ArrayStreamAdapter expected array with type {}, actual {}",
59+
this.dtype,
60+
array.dtype(),
5961
);
6062
}
6163

0 commit comments

Comments
 (0)