diff --git a/Jenkinsfile b/Jenkinsfile index 874641ff..de696e47 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,28 +1,34 @@ @Library('shared-libraries') _ -def runtests(String javaVersion){ +def runtests(){ // 'set -e' causes the script to fail if any command fails. - sh label:'test', script: '''#!/bin/bash + sh label:'deploy-test-app', script: '''#!/bin/bash set -e - export JAVA_HOME=$'''+javaVersion+''' + export JAVA_HOME=$JAVA17_HOME_DIR export GRADLE_USER_HOME=$WORKSPACE/$GRADLE_DIR - export PATH=$GRADLE_USER_HOME:$JAVA_HOME/bin:$PATH + export PATH=$JAVA_HOME/bin:$PATH cd marklogic-spark-connector - echo "Waiting for MarkLogic server to initialize." - sleep 60s - ./gradlew clean + ./gradlew -i mlWaitTillReady ./gradlew mlTestConnections ./gradlew -i mlDeploy echo "Loading data a second time to try to avoid Optic bug with duplicate rows being returned." ./gradlew -i mlLoadData + ''' + + sh label:'test', script: '''#!/bin/bash + set -e + export JAVA_HOME=$JAVA17_HOME_DIR + export GRADLE_USER_HOME=$WORKSPACE/$GRADLE_DIR + export PATH=$JAVA_HOME/bin:$PATH + cd marklogic-spark-connector ./gradlew clean test jacocoTestReport || true ''' junit '**/build/**/*.xml' } -def runSonarScan(String javaVersion){ - sh label:'test', script: '''#!/bin/bash - export JAVA_HOME=$'''+javaVersion+''' +def runSonarScan(){ + sh label:'run-sonar', script: '''#!/bin/bash + export JAVA_HOME=$JAVA17_HOME_DIR export GRADLE_USER_HOME=$WORKSPACE/$GRADLE_DIR export PATH=$GRADLE_USER_HOME:$JAVA_HOME/bin:$PATH cd marklogic-spark-connector @@ -30,25 +36,39 @@ def runSonarScan(String javaVersion){ ''' } +def tearDownDocker() { + updateWorkspacePermissions() + sh label:'mlcleanup', script: '''#!/bin/bash + cd marklogic-spark-connector + docker-compose down -v || true + ''' + cleanupDocker() +} + pipeline{ agent none + triggers{ parameterizedCron(env.BRANCH_NAME == "develop" ? "00 02 * * * % regressions=true" : "") } parameters{ booleanParam(name: 'regressions', defaultValue: false, description: 'indicator if build is for regressions') } + options { checkoutToSubdirectory 'marklogic-spark-connector' buildDiscarder logRotator(artifactDaysToKeepStr: '7', artifactNumToKeepStr: '', daysToKeepStr: '30', numToKeepStr: '') } + environment{ JAVA17_HOME_DIR="/home/builder/java/jdk-17.0.2" GRADLE_DIR =".gradle" DMC_USER = credentials('MLBUILD_USER') DMC_PASSWORD = credentials('MLBUILD_PASSWORD') } + stages{ + stage('tests'){ environment{ scannerHome = tool 'SONAR_Progress' @@ -64,22 +84,18 @@ pipeline{ cd marklogic-spark-connector MARKLOGIC_LOGS_VOLUME=/tmp docker-compose up -d --build ''' - runtests('JAVA17_HOME_DIR') + runtests() withSonarQubeEnv('SONAR_Progress') { - runSonarScan('JAVA17_HOME_DIR') + runSonarScan() } } post{ always{ - updateWorkspacePermissions() - sh label:'mlcleanup', script: '''#!/bin/bash - cd marklogic-spark-connector - docker-compose down -v || true - ''' - cleanupDocker() + tearDownDocker() } } } + stage('publish'){ agent {label 'devExpLinuxPool'} when { @@ -89,7 +105,7 @@ pipeline{ sh label:'publish', script: '''#!/bin/bash export JAVA_HOME=$JAVA17_HOME_DIR export GRADLE_USER_HOME=$WORKSPACE/$GRADLE_DIR - export PATH=$GRADLE_USER_HOME:$JAVA_HOME/bin:$PATH + export PATH=$JAVA_HOME/bin:$PATH cd marklogic-spark-connector ./gradlew clean cp ~/.gradle/gradle.properties $GRADLE_USER_HOME; @@ -97,6 +113,7 @@ pipeline{ ''' } } + stage('regressions'){ agent {label 'devExpLinuxPool'} when{ @@ -116,19 +133,13 @@ pipeline{ docker-compose down -v || true MARKLOGIC_LOGS_VOLUME=/tmp docker-compose up -d --build ''' - runtests('JAVA17_HOME_DIR') + runtests() } post{ always{ - updateWorkspacePermissions() - sh label:'mlcleanup', script: '''#!/bin/bash - cd marklogic-spark-connector - docker-compose down -v || true - ''' - cleanupDocker() + tearDownDocker() } } - } } } diff --git a/build.gradle b/build.gradle index e4119f3b..b9207a31 100644 --- a/build.gradle +++ b/build.gradle @@ -35,6 +35,10 @@ subprojects { repositories { mavenCentral() + + maven { + url = "https://bed-artifactory.bedford.progress.com:443/artifactory/ml-maven-snapshots/" + } } test { diff --git a/gradle.properties b/gradle.properties index 9f841b11..02715912 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1,5 @@ version=3.0-SNAPSHOT -sparkVersion=4.1.0-preview1 +sparkVersion=4.1.0-preview2 tikaVersion=3.2.3 semaphoreVersion=5.10.0 langchain4jVersion=1.5.0 diff --git a/marklogic-spark-connector/build.gradle b/marklogic-spark-connector/build.gradle index 7762d65b..04b8658e 100644 --- a/marklogic-spark-connector/build.gradle +++ b/marklogic-spark-connector/build.gradle @@ -8,17 +8,13 @@ configurations { resolutionStrategy.eachDependency { DependencyResolveDetails details -> // These all impact Spark and its dependencies, but not the published connector as the connector does not contain // any Spark libraries. - if (details.requested.group.equals("org.apache.hadoop") and details.requested.version.equals("3.4.1")) { - details.useVersion "3.4.2" - details.because "Using 3.4.2 to minimize CVEs and because Flux is doing the same thing." - } if (details.requested.group.equals("org.codehaus.janino")) { details.useVersion "3.1.12" details.because "Bumping from 3.1.9 (what Spark SQL 4.0.1 depends on) to 3.1.12 to minimize CVEs." } if (details.requested.group.equals("io.netty") and details.requested.version.startsWith("4.1.1")) { - details.useVersion "4.1.127.Final" - details.because "Bumping from 4.1.118 (what Spark SQL 4.0.1 depends on) to 4.1.127 to minimize CVEs." + details.useVersion "4.1.128.Final" + details.because "Bumping from 4.1.127 (what Spark SQL 4.1.0-preview2 depends on) to minimize CVEs." } } @@ -92,7 +88,7 @@ dependencies { // https://docs.gradle.org/current/userguide/upgrading_version_8.html#test_framework_implementation_dependencies // Without this, once using JUnit 5.12 or higher, Gradle will not find any tests and report an error of: // org.junit.platform.commons.JUnitException: TestEngine with ID 'junit-jupiter' failed to discover tests - testRuntimeOnly "org.junit.platform:junit-platform-launcher:1.13.4" + testRuntimeOnly "org.junit.platform:junit-platform-launcher:1.14.0" testImplementation("org.apache.spark:spark-sql_2.13:${sparkVersion}") { exclude module: "rocksdbjni" @@ -103,28 +99,16 @@ dependencies { exclude group: "com.fasterxml.jackson.core" } - testImplementation('com.marklogic:ml-app-deployer:6.0.1') { + testImplementation('com.marklogic:ml-app-deployer:6.2-SNAPSHOT') { exclude group: "com.fasterxml.jackson.core" exclude group: "com.fasterxml.jackson.dataformat" - - // Use the Java Client declared above. - exclude module: "marklogic-client-api" } - testImplementation('com.marklogic:marklogic-junit5:1.5.0') { + testImplementation('com.marklogic:marklogic-junit5:2.0-SNAPSHOT') { exclude group: "com.fasterxml.jackson.core" exclude group: "com.fasterxml.jackson.dataformat" - - // Use the Java Client declared above. - exclude module: "marklogic-client-api" - - // Use the Spring dependencies from ml-app-deployer 6 to avoid vulnerabilities in Spring 5. - exclude group: "org.springframework" } - // marklogic-junit5 still needs spring-test, but we want the 6 version to minimize vulnerabilities. - testImplementation "org.springframework:spring-test:6.2.11" - testImplementation "ch.qos.logback:logback-classic:1.5.19" testImplementation "org.skyscreamer:jsonassert:1.5.3" diff --git a/marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/CustomLongOffset.java b/marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/CustomLongOffset.java new file mode 100644 index 00000000..61b38d44 --- /dev/null +++ b/marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/CustomLongOffset.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2023-2025 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved. + */ +package com.marklogic.spark.reader; + +import org.apache.spark.sql.connector.read.streaming.Offset; + +// Added to avoid dependency on Spark's LongOffset class, which was removed in 4.1.0-preview2. +public class CustomLongOffset extends Offset { + + private final long value; + + public CustomLongOffset(long value) { + this.value = value; + } + + @Override + public String json() { + return String.valueOf(value); + } + + public long getValue() { + return value; + } +} diff --git a/marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/customcode/CustomCodeMicroBatchStream.java b/marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/customcode/CustomCodeMicroBatchStream.java index dcf22f3c..07ca7f9c 100644 --- a/marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/customcode/CustomCodeMicroBatchStream.java +++ b/marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/customcode/CustomCodeMicroBatchStream.java @@ -4,11 +4,11 @@ package com.marklogic.spark.reader.customcode; import com.marklogic.spark.Util; +import com.marklogic.spark.reader.CustomLongOffset; import org.apache.spark.sql.connector.read.InputPartition; import org.apache.spark.sql.connector.read.PartitionReaderFactory; import org.apache.spark.sql.connector.read.streaming.MicroBatchStream; import org.apache.spark.sql.connector.read.streaming.Offset; -import org.apache.spark.sql.execution.streaming.LongOffset; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,7 +35,7 @@ class CustomCodeMicroBatchStream implements MicroBatchStream { */ @Override public Offset latestOffset() { - Offset result = partitionIndex >= partitions.size() ? null : new LongOffset(partitionIndex); + Offset result = partitionIndex >= partitions.size() ? null : new CustomLongOffset(partitionIndex); if (logger.isTraceEnabled()) { logger.trace("Returning latest offset: {}", partitionIndex); } @@ -50,7 +50,7 @@ public Offset latestOffset() { */ @Override public InputPartition[] planInputPartitions(Offset start, Offset end) { - long index = ((LongOffset) end).offset(); + long index = ((CustomLongOffset) end).getValue(); return new InputPartition[]{new CustomCodePartition(partitions.get((int) index))}; } @@ -61,12 +61,12 @@ public PartitionReaderFactory createReaderFactory() { @Override public Offset initialOffset() { - return new LongOffset(0); + return new CustomLongOffset(0); } @Override public Offset deserializeOffset(String json) { - return new LongOffset(Long.parseLong(json)); + return new CustomLongOffset(Long.parseLong(json)); } @Override diff --git a/marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/optic/OpticMicroBatchStream.java b/marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/optic/OpticMicroBatchStream.java index fb0b0fcb..13271537 100644 --- a/marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/optic/OpticMicroBatchStream.java +++ b/marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/optic/OpticMicroBatchStream.java @@ -4,11 +4,11 @@ package com.marklogic.spark.reader.optic; import com.marklogic.spark.Util; +import com.marklogic.spark.reader.CustomLongOffset; import org.apache.spark.sql.connector.read.InputPartition; import org.apache.spark.sql.connector.read.PartitionReaderFactory; import org.apache.spark.sql.connector.read.streaming.MicroBatchStream; import org.apache.spark.sql.connector.read.streaming.Offset; -import org.apache.spark.sql.execution.streaming.LongOffset; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,7 +44,7 @@ public Offset latestOffset() { if (logger.isTraceEnabled()) { logger.trace("Returning latest offset: {}", bucketIndex); } - return new LongOffset(bucketIndex++); + return new CustomLongOffset(bucketIndex++); } /** @@ -57,7 +57,7 @@ public Offset latestOffset() { */ @Override public InputPartition[] planInputPartitions(Offset start, Offset end) { - int index = (int) ((LongOffset) end).offset(); + int index = (int) ((CustomLongOffset) end).getValue(); return index >= allBuckets.size() ? null : new InputPartition[]{new PlanAnalysis.Partition(index + "", allBuckets.get(index))}; @@ -70,12 +70,12 @@ public PartitionReaderFactory createReaderFactory() { @Override public Offset initialOffset() { - return new LongOffset(0); + return new CustomLongOffset(0); } @Override public Offset deserializeOffset(String json) { - return new LongOffset(Long.parseLong(json)); + return new CustomLongOffset(Long.parseLong(json)); } @Override diff --git a/test-app/build.gradle b/test-app/build.gradle index 0a96abd9..56b9cec5 100644 --- a/test-app/build.gradle +++ b/test-app/build.gradle @@ -1,8 +1,22 @@ +buildscript { + repositories { + mavenCentral() + // Needed for ml-gradle 6.2-SNAPSHOT + maven { + url = "https://bed-artifactory.bedford.progress.com:443/artifactory/ml-maven-snapshots/" + } + } + dependencies { + classpath "com.marklogic:ml-gradle:6.2-SNAPSHOT" + } +} + plugins { id "net.saliman.properties" version "1.5.2" - id "com.marklogic.ml-gradle" version "6.0.1" } +apply plugin: "com.marklogic.ml-gradle" + tasks.register("reloadTestData", com.marklogic.gradle.task.MarkLogicTask) { description = "Convenience task for clearing the test database and reloading the test data; only intended for a connector developer to use." doLast {