Skip to content

Commit 006fa1e

Browse files
authored
Merge branch 'develop' into feature/temp-master
2 parents 7a535e8 + bf8d26b commit 006fa1e

File tree

331 files changed

+838
-549
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

331 files changed

+838
-549
lines changed

.copyrightconfig

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# COPYRIGHT VALIDATION CONFIG
2+
# ---------------------------------
3+
# Required start year (keep fixed; end year auto-updates in check output)
4+
startyear: 2023
5+
6+
# Optional exclusions list (comma-separated). Leave commented if none.
7+
# Rules:
8+
# - Relative paths (no leading ./)
9+
# - Simple * wildcard only (no recursive **)
10+
# - Use sparingly (third_party, generated, binary assets)
11+
# - Dotfiles already skipped automatically
12+
# Enable by removing the leading '# ' from the next line and editing values.
13+
# filesexcluded: third_party/*, docs/generated/*.md, assets/*.png, scripts/temp_*.py, vendor/lib.js
14+
filesexcluded: .github/*, README.md, CONTRIBUTING.md, Jenkinsfile, gradle/*, docker-compose.yml, *.gradle, gradle.properties, gradlew, gradlew.bat, **/test/resources/**, docs/**, *.json, *.txt, CODEOWNERS

.github/workflows/pr-workflow.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: 🏷️ JIRA ID Validator
1+
name: PR Workflow
22

33
on:
44
# Using pull_request_target instead of pull_request to handle PRs from forks
@@ -14,3 +14,10 @@ jobs:
1414
with:
1515
# Pass the PR title from the event context
1616
pr-title: ${{ github.event.pull_request.title }}
17+
copyright-validation:
18+
name: © Validate Copyright Headers
19+
uses: marklogic/pr-workflows/.github/workflows/copyright-check.yml@main
20+
permissions:
21+
contents: read
22+
pull-requests: write
23+
issues: write

CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
# Each line is a file pattern followed by one or more owners.
33

44
# These owners will be the default owners for everything in the repo.
5-
* @anu3990 @billfarber @rjrudin @stevebio
5+
* @billfarber @rjrudin @stevebio

CONTRIBUTING.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
11
This guide covers how to develop and test this project. It assumes that you have cloned this repository to your local
22
workstation.
33

4-
**You must use Java 17 for developing, testing, and building this project**, even though the connector supports
5-
running on Java 11. For users, Java 17 is only required if using the splitting and embedding features, as those
6-
depend on a third party module that requires Java 17.
7-
8-
**You also need Java 11 installed** so that the subprojects in this repository that require Java 11 have access to a
9-
Java 11 SDK. [sdkman](https://sdkman.io/) is highly recommend for installing multiple JDKs.
4+
**You must use Java 17 for developing, testing, and building this project**.
105

116
# Setup
127

@@ -44,6 +39,12 @@ To run the tests against the test application, run the following Gradle task:
4439

4540
./gradlew test
4641

42+
To also get code coverage, run:
43+
44+
./gradlew test jacocoTestReport
45+
46+
The code coverage report will be in `marklogic-spark-connector/build/reports/jacoco/test/html/index.html`.
47+
4748
**To run the tests in Intellij**, you must configure your JUnit template to include a few JVM args:
4849

4950
1. Go to Run -> Edit Configurations.

Jenkinsfile

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,74 @@
11
@Library('shared-libraries') _
22

3-
def runtests(String javaVersion){
3+
def runtests(){
44
// 'set -e' causes the script to fail if any command fails.
5-
sh label:'test', script: '''#!/bin/bash
5+
sh label:'deploy-test-app', script: '''#!/bin/bash
66
set -e
7-
export JAVA_HOME=$'''+javaVersion+'''
7+
export JAVA_HOME=$JAVA17_HOME_DIR
88
export GRADLE_USER_HOME=$WORKSPACE/$GRADLE_DIR
9-
export PATH=$GRADLE_USER_HOME:$JAVA_HOME/bin:$PATH
9+
export PATH=$JAVA_HOME/bin:$PATH
1010
cd marklogic-spark-connector
11-
echo "Waiting for MarkLogic server to initialize."
12-
sleep 60s
13-
./gradlew clean
11+
./gradlew -i mlWaitTillReady
1412
./gradlew mlTestConnections
1513
./gradlew -i mlDeploy
1614
echo "Loading data a second time to try to avoid Optic bug with duplicate rows being returned."
1715
./gradlew -i mlLoadData
18-
./gradlew clean testCodeCoverageReport || true
16+
'''
17+
18+
sh label:'test', script: '''#!/bin/bash
19+
set -e
20+
export JAVA_HOME=$JAVA17_HOME_DIR
21+
export GRADLE_USER_HOME=$WORKSPACE/$GRADLE_DIR
22+
export PATH=$JAVA_HOME/bin:$PATH
23+
cd marklogic-spark-connector
24+
./gradlew clean test jacocoTestReport || true
1925
'''
2026
junit '**/build/**/*.xml'
2127
}
2228

23-
def runSonarScan(String javaVersion){
24-
sh label:'test', script: '''#!/bin/bash
25-
export JAVA_HOME=$'''+javaVersion+'''
29+
def runSonarScan(){
30+
sh label:'run-sonar', script: '''#!/bin/bash
31+
export JAVA_HOME=$JAVA17_HOME_DIR
2632
export GRADLE_USER_HOME=$WORKSPACE/$GRADLE_DIR
2733
export PATH=$GRADLE_USER_HOME:$JAVA_HOME/bin:$PATH
2834
cd marklogic-spark-connector
2935
./gradlew sonar -Dsonar.projectKey='marklogic_marklogic-spark-connector_AY1bXn6J_50_odbCDKMX' -Dsonar.projectName='ML-DevExp-marklogic-spark-connector' || true
3036
'''
3137
}
3238

39+
def tearDownDocker() {
40+
updateWorkspacePermissions()
41+
sh label:'mlcleanup', script: '''#!/bin/bash
42+
cd marklogic-spark-connector
43+
docker-compose down -v || true
44+
'''
45+
cleanupDocker()
46+
}
47+
3348
pipeline{
3449
agent none
50+
3551
triggers{
3652
parameterizedCron(env.BRANCH_NAME == "develop" ? "00 02 * * * % regressions=true" : "")
3753
}
3854
parameters{
3955
booleanParam(name: 'regressions', defaultValue: false, description: 'indicator if build is for regressions')
4056
}
57+
4158
options {
4259
checkoutToSubdirectory 'marklogic-spark-connector'
4360
buildDiscarder logRotator(artifactDaysToKeepStr: '7', artifactNumToKeepStr: '', daysToKeepStr: '30', numToKeepStr: '')
4461
}
62+
4563
environment{
4664
JAVA17_HOME_DIR="/home/builder/java/jdk-17.0.2"
4765
GRADLE_DIR =".gradle"
4866
DMC_USER = credentials('MLBUILD_USER')
4967
DMC_PASSWORD = credentials('MLBUILD_PASSWORD')
5068
}
69+
5170
stages{
71+
5272
stage('tests'){
5373
environment{
5474
scannerHome = tool 'SONAR_Progress'
@@ -64,22 +84,18 @@ pipeline{
6484
cd marklogic-spark-connector
6585
MARKLOGIC_LOGS_VOLUME=/tmp docker-compose up -d --build
6686
'''
67-
runtests('JAVA17_HOME_DIR')
87+
runtests()
6888
withSonarQubeEnv('SONAR_Progress') {
69-
runSonarScan('JAVA17_HOME_DIR')
89+
runSonarScan()
7090
}
7191
}
7292
post{
7393
always{
74-
updateWorkspacePermissions()
75-
sh label:'mlcleanup', script: '''#!/bin/bash
76-
cd marklogic-spark-connector
77-
docker-compose down -v || true
78-
'''
79-
cleanupDocker()
94+
tearDownDocker()
8095
}
8196
}
8297
}
98+
8399
stage('publish'){
84100
agent {label 'devExpLinuxPool'}
85101
when {
@@ -89,14 +105,15 @@ pipeline{
89105
sh label:'publish', script: '''#!/bin/bash
90106
export JAVA_HOME=$JAVA17_HOME_DIR
91107
export GRADLE_USER_HOME=$WORKSPACE/$GRADLE_DIR
92-
export PATH=$GRADLE_USER_HOME:$JAVA_HOME/bin:$PATH
108+
export PATH=$JAVA_HOME/bin:$PATH
93109
cd marklogic-spark-connector
94110
./gradlew clean
95111
cp ~/.gradle/gradle.properties $GRADLE_USER_HOME;
96112
./gradlew publish
97113
'''
98114
}
99115
}
116+
100117
stage('regressions'){
101118
agent {label 'devExpLinuxPool'}
102119
when{
@@ -116,19 +133,13 @@ pipeline{
116133
docker-compose down -v || true
117134
MARKLOGIC_LOGS_VOLUME=/tmp docker-compose up -d --build
118135
'''
119-
runtests('JAVA17_HOME_DIR')
136+
runtests()
120137
}
121138
post{
122139
always{
123-
updateWorkspacePermissions()
124-
sh label:'mlcleanup', script: '''#!/bin/bash
125-
cd marklogic-spark-connector
126-
docker-compose down -v || true
127-
'''
128-
cleanupDocker()
140+
tearDownDocker()
129141
}
130142
}
131-
132143
}
133144
}
134145
}

NOTICE.txt

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,53 +10,48 @@ product and version for which you are requesting source code.
1010

1111
Third Party Notices
1212

13-
jackson-dataformat-xml 2.15.2 (Apache-2.0)
13+
jackson-dataformat-xml 2.19.0 (Apache-2.0)
1414
jdom2 2.0.6.1 (Apache-2.0)
15-
jena-arq 4.10.0 (Apache-2.0)
16-
langchain4j 1.2.0 (Apache-2.0)
17-
marklogic-client-api 7.2.0 (Apache-2.0)
18-
okhttp 4.12.0 (Apache-2.0)
19-
Semaphore-CS-Client 5.6.1 (Apache-2.0)
20-
Semaphore-Cloud-Client 5.6.1 (Apache-2.0)
21-
tika-core 3.2.1 (Apache-2.0)
15+
jena-arq 5.5.0 (Apache-2.0)
16+
langchain4j 1.5.0 (Apache-2.0)
17+
marklogic-client-api 8.0.0 (Apache-2.0)
18+
Semaphore-CS-Client 5.10.0 (Apache-2.0)
19+
Semaphore-Cloud-Client 5.10.0 (Apache-2.0)
20+
tika-core 3.2.3 (Apache-2.0)
2221

2322
Third-Party Components
2423

2524
The following is a list of the third-party components used by the MarkLogic® Spark connector 2.7.0 (last updated July 31, 2025):
2625

27-
jackson-dataformat-xml 2.15.2 (Apache-2.0)
26+
jackson-dataformat-xml 2.19.0 (Apache-2.0)
2827
https://repo1.maven.org/maven2/com/fasterxml/jackson/dataformat/jackson-dataformat-xml/
2928
For the full text of the Apache-2.0 license, see Apache License 2.0 (Apache-2.0)
3029

3130
jdom2 2.0.6.1 (Apache-2.0)
3231
https://repo1.maven.org/maven2/org/jdom/jdom2/
3332
For the full text of the Apache-2.0 license, see Apache License 2.0 (Apache-2.0)
3433

35-
jena-arq 4.10.0 (Apache-2.0)
34+
jena-arq 5.5.0 (Apache-2.0)
3635
https://repo1.maven.org/maven2/org/apache/jena/jena-arq/
3736
For the full text of the Apache-2.0 license, see Apache License 2.0 (Apache-2.0)
3837

39-
langchain4j 1.2.0 (Apache-2.0)
38+
langchain4j 1.5.0 (Apache-2.0)
4039
https://repo1.maven.org/maven2/dev/langchain4j/langchain4j/
4140
For the full text of the Apache-2.0 license, see Apache License 2.0 (Apache-2.0)
4241

4342
marklogic-client-api 7.2.0 (Apache-2.0)
4443
https://repo1.maven.org/maven2/com/marklogic/marklogic-client-api/
4544
For the full text of the Apache-2.0 license, see Apache License 2.0 (Apache-2.0)
4645

47-
okhttp 4.12.0 (Apache-2.0)
48-
https://repo1.maven.org/maven2/com/squareup/okhttp3/okhttp/
49-
For the full text of the Apache-2.0 license, see Apache License 2.0 (Apache-2.0)
50-
51-
Semaphore-CS-Client 5.6.1 (Apache-2.0)
46+
Semaphore-CS-Client 5.10.0 (Apache-2.0)
5247
https://repo1.maven.org/maven2/com/smartlogic/csclient/Semaphore-CS-Client/
5348
For the full text of the Apache-2.0 license, see Apache License 2.0 (Apache-2.0)
5449

55-
Semaphore-CS-Client 5.6.1 (Apache-2.0)
50+
Semaphore-CS-Client 5.10.0 (Apache-2.0)
5651
https://repo1.maven.org/maven2/com/smartlogic/cloud/Semaphore-Cloud-Client/
5752
For the full text of the Apache-2.0 license, see Apache License 2.0 (Apache-2.0)
5853

59-
tika-core 3.2.1 (Apache-2.0)
54+
tika-core 3.2.3 (Apache-2.0)
6055
https://repo1.maven.org/maven2/org/apache/tika/tika-core/
6156
For the full text of the Apache-2.0 license, see Apache License 2.0 (Apache-2.0)
6257

build.gradle

Lines changed: 6 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ sonar {
88
properties {
99
property "sonar.projectKey", "marklogic-spark"
1010
property "sonar.host.url", "http://localhost:9000"
11-
property "sonar.coverage.jacoco.xmlReportPaths", "code-coverage-report/build/reports/jacoco/testCodeCoverageReport/testCodeCoverageReport.xml"
11+
property "sonar.coverage.jacoco.xmlReportPaths", "marklogic-spark-connector/build/reports/jacoco/test/jacocoTestReport.xml"
1212
}
1313
}
1414

@@ -23,7 +23,7 @@ subprojects {
2323
// See https://docs.gradle.org/current/userguide/toolchains.html .
2424
java {
2525
toolchain {
26-
languageVersion = JavaLanguageVersion.of(11)
26+
languageVersion = JavaLanguageVersion.of(17)
2727
}
2828
}
2929

@@ -35,53 +35,18 @@ subprojects {
3535

3636
repositories {
3737
mavenCentral()
38-
maven {
39-
url "https://bed-artifactory.bedford.progress.com:443/artifactory/ml-maven-snapshots/"
40-
}
41-
}
4238

43-
configurations.all {
44-
resolutionStrategy.eachDependency { DependencyResolveDetails details ->
45-
// Added after upgrading langchain4j to 1.0.0-beta2, which brought Jackson 2.18.2 in.
46-
if (details.requested.group.startsWith('com.fasterxml.jackson')) {
47-
details.useVersion '2.15.2'
48-
details.because 'Need to match the version used by Spark.'
49-
}
50-
if (details.requested.group.equals("org.slf4j")) {
51-
details.useVersion "2.0.16"
52-
details.because "Ensures that slf4j-api 1.x does not appear on the Flux classpath in particular, which can " +
53-
"lead to this issue - https://www.slf4j.org/codes.html#StaticLoggerBinder."
54-
}
55-
if (details.requested.group.equals("org.apache.logging.log4j")) {
56-
details.useVersion "2.24.3"
57-
details.because "Need to match the version used by Apache Tika. Spark uses 2.20.0 but automated tests confirm " +
58-
"that Spark seems fine with 2.24.3."
59-
}
60-
}
61-
62-
resolutionStrategy {
63-
// Avoids a classpath conflict between Spark and the tika-parser-microsoft-module. Tika needs a
64-
// more recent version and Spark (and Jena as well) both seems fine with this (as they should be per semver).
65-
force "org.apache.commons:commons-compress:1.27.1"
66-
67-
// Avoids CVEs in earlier minor versions.
68-
force "org.apache.commons:commons-lang3:3.18.0"
39+
maven {
40+
url = "https://bed-artifactory.bedford.progress.com:443/artifactory/ml-maven-snapshots/"
6941
}
70-
71-
// Excluded from Flux for size reasons, so excluded here as well to ensure we don't need it when running tests.
72-
exclude module: "rocksdbjni"
73-
}
74-
75-
task allDeps(type: DependencyReportTask) {
76-
description = "Allows for generating dependency reports for every subproject in a single task."
7742
}
7843

7944
test {
8045
useJUnitPlatform()
8146
finalizedBy jacocoTestReport
8247
testLogging {
83-
events 'started', 'passed', 'skipped', 'failed'
84-
exceptionFormat 'full'
48+
events = ['started', 'passed', 'skipped', 'failed']
49+
exceptionFormat = 'full'
8550
}
8651
environment "SEMAPHORE_API_KEY", semaphoreApiKey
8752
environment "SEMAPHORE_HOST", semaphoreHost

code-coverage-report/build.gradle

Lines changed: 0 additions & 24 deletions
This file was deleted.

0 commit comments

Comments
 (0)