Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 3 additions & 27 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,11 @@ subprojects {

configurations.all {
resolutionStrategy.eachDependency { DependencyResolveDetails details ->
// These all impact Spark and its dependencies, but not the published connector as the connector does not contain
// any Spark libraries.
if (details.requested.group.equals("org.apache.hadoop") and details.requested.version.equals("3.4.1")) {
details.useVersion "3.4.2"
details.because "Using 3.4.2 to minimize CVEs and because Flux is doing the same thing. This only affects the connector tests."
}
if (details.requested.group.startsWith('com.fasterxml.jackson')) {
details.useVersion '2.18.2'
details.because 'Need to match the version used by Spark 4.0.1.'
}
if (details.requested.group.equals("org.slf4j")) {
details.useVersion "2.0.17"
details.because "Ensures that slf4j-api 1.x does not appear on the Flux classpath in particular, which can " +
"lead to this issue - https://www.slf4j.org/codes.html#StaticLoggerBinder."
}
if (details.requested.group.equals("org.apache.logging.log4j")) {
details.useVersion "2.24.3"
details.because "Need to match the version used by Apache Tika. Spark uses 2.20.0 but automated tests confirm " +
"that Spark seems fine with 2.24.3."
details.because "Using 3.4.2 to minimize CVEs and because Flux is doing the same thing."
}
if (details.requested.group.equals("org.codehaus.janino")) {
details.useVersion "3.1.12"
Expand All @@ -69,18 +57,6 @@ subprojects {
details.because "Bumping from 4.1.118 (what Spark SQL 4.0.1 depends on) to 4.1.127 to minimize CVEs."
}
}

resolutionStrategy {
// Avoids a classpath conflict between Spark and the tika-parser-microsoft-module. Tika needs a
// more recent version and Spark (and Jena as well) both seems fine with this (as they should be per semver).
force "org.apache.commons:commons-compress:1.27.1"

// Avoids CVEs in earlier minor versions.
force "org.apache.commons:commons-lang3:3.18.0"
}

// Excluded from Flux for size reasons, so excluded here as well to ensure we don't need it when running tests.
exclude module: "rocksdbjni"
}

test {
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
version=3.0-SNAPSHOT
sparkVersion=4.0.1
sparkVersion=4.1.0-preview1
tikaVersion=3.2.3
semaphoreVersion=5.10.0
langchain4jVersion=1.5.0
Expand Down
68 changes: 28 additions & 40 deletions marklogic-spark-connector/build.gradle
Original file line number Diff line number Diff line change
@@ -1,72 +1,61 @@
plugins {
id 'com.gradleup.shadow' version '8.3.3'
id 'com.gradleup.shadow' version '9.2.1'
id 'maven-publish'
}

configurations {
// Defines all the implementation dependencies, but in such a way that they are not included as dependencies in the
// library's pom.xml file. This is due to the shadow jar being published instead of a jar only containing this
// project's classes. The shadow jar is published due to the need to relocate several packages to avoid conflicts
// with Spark.
shadowDependencies

// This approach allows for all of the dependencies to be available for compilation and for running tests.
compileOnly.extendsFrom(shadowDependencies)
testImplementation.extendsFrom(compileOnly)
}

dependencies {
// Need to compile against Spark, but its libraries are not part of the connector jar.
compileOnly "org.apache.spark:spark-sql_2.13:${sparkVersion}"
compileOnly ("org.apache.spark:spark-sql_2.13:${sparkVersion}") {
// Excluded from Flux for size reasons, so excluded here as well to ensure we don't need it when running tests.
exclude module: "rocksdbjni"
}

// This is compileOnly as Spark will provide its own copy at runtime.
compileOnly "com.fasterxml.jackson.core:jackson-databind:2.18.2"

shadowDependencies("com.marklogic:marklogic-client-api:7.2.0") {
implementation("com.marklogic:marklogic-client-api:7.2.0") {
// Need to use the versions of Jackson preferred by Spark.
exclude group: "com.fasterxml.jackson.core"
exclude group: "com.fasterxml.jackson.dataformat"
}

// For logging.
shadowDependencies "org.slf4j:jcl-over-slf4j:2.0.17"
implementation "org.slf4j:jcl-over-slf4j:2.0.17"

// Needed for splitting XML documents via XPath.
shadowDependencies "jaxen:jaxen:2.0.0"
implementation "jaxen:jaxen:2.0.0"

// Needed for classifying documents via Semaphore.
shadowDependencies("com.smartlogic.csclient:Semaphore-CS-Client:${semaphoreVersion}") {
implementation("com.smartlogic.csclient:Semaphore-CS-Client:${semaphoreVersion}") {
exclude group: "com.fasterxml.jackson.core"
}
shadowDependencies("com.smartlogic.cloud:Semaphore-Cloud-Client:${semaphoreVersion}") {
implementation("com.smartlogic.cloud:Semaphore-Cloud-Client:${semaphoreVersion}") {
exclude group: "com.fasterxml.jackson.core"
}

// Adding this in 2.6.0. tika-core is very small and only brings in commons-io and and slf4j-api. Flux can then
// include the necessary parsers.
shadowDependencies "org.apache.tika:tika-core:${tikaVersion}"
implementation "org.apache.tika:tika-core:${tikaVersion}"

// Needed for using XmlMapper.
shadowDependencies("com.fasterxml.jackson.dataformat:jackson-dataformat-xml:2.18.2") {
implementation("com.fasterxml.jackson.dataformat:jackson-dataformat-xml:2.19.0") {
// Not needed, as the modules in this group that this dependency depends on are all provided by Spark.
exclude group: "com.fasterxml.jackson.core"
}

// Supports reading and writing RDF data. Including this here so it's available to the tests as well.
// Bumped to 5.x, which requires Java 17, while upgrading Spark to 4.x.
shadowDependencies("org.apache.jena:jena-arq:5.5.0") {
implementation("org.apache.jena:jena-arq:5.5.0") {
exclude group: "com.fasterxml.jackson.core"
exclude group: "com.fasterxml.jackson.dataformat"
}

// Needed for some XML operations that are far easier with JDOM2 than with DOM.
shadowDependencies "org.jdom:jdom2:2.0.6.1"
implementation "org.jdom:jdom2:2.0.6.1"

shadowDependencies "dev.langchain4j:langchain4j:${langchain4jVersion}"

// Ensuring the desired version of commons-compress is included in the connector jar. Some tests have failed in Flux
// because an older version - likely the one depended on by Jena - is included instead.
shadowDependencies "org.apache.commons:commons-compress:1.27.1"
implementation ("dev.langchain4j:langchain4j:${langchain4jVersion}") {
exclude group: "com.fasterxml.jackson.core"
}

// Need this so that an OkHttpClientConfigurator can be created.
// Only needs compileOnly, as the Java Client brings this as an implementation dependency.
Expand All @@ -78,10 +67,14 @@ dependencies {
// org.junit.platform.commons.JUnitException: TestEngine with ID 'junit-jupiter' failed to discover tests
testRuntimeOnly "org.junit.platform:junit-platform-launcher:1.13.4"

testImplementation "org.apache.spark:spark-sql_2.13:${sparkVersion}"
testImplementation ("org.apache.spark:spark-sql_2.13:${sparkVersion}") {
exclude module: "rocksdbjni"
}

// Supports testing the embedder feature.
testImplementation "dev.langchain4j:langchain4j-embeddings-all-minilm-l6-v2:1.5.0-beta11"
testImplementation ("dev.langchain4j:langchain4j-embeddings-all-minilm-l6-v2:1.5.0-beta11") {
exclude group: "com.fasterxml.jackson.core"
}

testImplementation('com.marklogic:ml-app-deployer:6.0.1') {
exclude group: "com.fasterxml.jackson.core"
Expand All @@ -106,7 +99,6 @@ dependencies {
testImplementation "org.springframework:spring-test:6.2.11"

testImplementation "ch.qos.logback:logback-classic:1.5.18"
testImplementation "org.slf4j:jcl-over-slf4j:2.0.17"
testImplementation "org.skyscreamer:jsonassert:1.5.3"

testImplementation "org.apache.tika:tika-parser-microsoft-module:${tikaVersion}"
Expand All @@ -126,15 +118,6 @@ test {
]
}

shadowJar {
configurations = [project.configurations.shadowDependencies]

// "all" is the default; no need for that in the connector filename. This also results in this becoming the library
// artifact that is published as a dependency. That is desirable as it includes the relocated packages listed below,
// which a dependent would otherwise have to manage themselves.
archiveClassifier.set("")
}

// Publishing setup - see https://docs.gradle.org/current/userguide/publishing_setup.html .
java {
withJavadocJar()
Expand All @@ -152,6 +135,11 @@ javadoc.failOnError = false
// Ignores warnings on params that don't have descriptions, which is a little too noisy
javadoc.options.addStringOption('Xdoclint:none', '-quiet')

// We don't want the shadow jar to be published to a Maven repository.
shadow {
addShadowVariantIntoJavaComponent = false
}

publishing {
publications {
mainJava(MavenPublication) {
Expand Down