MLE-24402 Removing deprecated option and all compiler warnings

rjrudin · rjrudin · commit ad960a64cff7 · 2025-09-23T12:03:59.000-04:00
The write.fileRows.documentType was deprecated in 2.3.0, as only write.documentType is needed. Converted the tests to use that option instead.

Then removed a small number of compiler warnings and enabled failures for any future compiler warnings.
diff --git a/build.gradle b/build.gradle
@@ -36,7 +36,7 @@ subprojects {
   repositories {
     mavenCentral()
     maven {
-      url "https://bed-artifactory.bedford.progress.com:443/artifactory/ml-maven-snapshots/"
+      url = "https://bed-artifactory.bedford.progress.com:443/artifactory/ml-maven-snapshots/"
     }
   }
 
@@ -79,16 +79,12 @@ subprojects {
     exclude module: "rocksdbjni"
   }
 
-  task allDeps(type: DependencyReportTask) {
-    description = "Allows for generating dependency reports for every subproject in a single task."
-  }
-
   test {
     useJUnitPlatform()
     finalizedBy jacocoTestReport
     testLogging {
-      events 'started', 'passed', 'skipped', 'failed'
-      exceptionFormat 'full'
+      events = ['started', 'passed', 'skipped', 'failed']
+      exceptionFormat = 'full'
     }
     environment "SEMAPHORE_API_KEY", semaphoreApiKey
     environment "SEMAPHORE_HOST", semaphoreHost
diff --git a/docs/configuration.md b/docs/configuration.md
@@ -189,8 +189,8 @@ The following options control how the connector writes rows as documents to Mark
 | spark.marklogic.write.abortOnFailure | Whether the Spark job should abort if a batch fails to be written; defaults to `true`. |
 | spark.marklogic.write.batchSize | The number of documents written in a call to MarkLogic; defaults to 100. |
 | spark.marklogic.write.collections | Comma-delimited string of collection names to add to each document. |
+| spark.marklogic.write.documentType | Forces a document type when MarkLogic does not recognize a URI extension; must be one of `JSON`, `XML`, or `TEXT`. |
 | spark.marklogic.write.permissions | Comma-delimited string of role names and capabilities to add to each document - e.g. role1,read,role2,update,role3,execute . |
-| spark.marklogic.write.fileRows.documentType | Forces a document type when MarkLogic does not recognize a URI extension; must be one of `JSON`, `XML`, or `TEXT`. |
 | spark.marklogic.write.jsonRootName | As of 2.3.0, specifies a root field name when writing JSON documents based on arbitrary rows. |
 | spark.marklogic.write.temporalCollection | Name of a temporal collection to assign each document to. |
 | spark.marklogic.write.threadCount | The number of threads used across all partitions to send documents to MarkLogic; defaults to 4. |
diff --git a/docs/reading-data/reading-files/generic-file-support.md b/docs/reading-data/reading-files/generic-file-support.md
@@ -92,10 +92,10 @@ If you are writing files with extensions that MarkLogic does not recognize based
 you can force a document type for each file with an unrecognized extension:
 
 ```
-  .option("spark.marklogic.write.fileRows.documentType", "JSON")
+  .option("spark.marklogic.write.documentType", "JSON")
 ```
 
-The `spark.marklogic.write.fileRows.documentType` option supports values of `JSON`, `XML`, and `TEXT`. 
+The `spark.marklogic.write.documentType` option supports values of `JSON`, `XML`, and `TEXT`. 
 
 Please see [the guide on writing data](../../writing.md) for information on how "file rows" can then be written to
 MarkLogic as documents.
diff --git a/docs/writing.md b/docs/writing.md
@@ -49,7 +49,7 @@ The URI can then be further adjusted as described in the "Controlling document U
 This feature allows for ingesting files of any type. The MarkLogic REST API will
 [determine the document type](https://docs.marklogic.com/guide/rest-dev/intro#id_53367) based on the URI extension, if
 MarkLogic recognizes it. If MarkLogic does not recognize the extension, and you wish to force a document type on each of
-the documents, you can set the `spark.marklogic.write.fileRows.documentType` option to one of `XML`, `JSON`, or `TEXT`.
+the documents, you can set the `spark.marklogic.write.documentType` option to one of `XML`, `JSON`, or `TEXT`.
 
 ### Writing document rows
 
diff --git a/marklogic-spark-connector/build.gradle b/marklogic-spark-connector/build.gradle
@@ -116,7 +116,7 @@ test {
   ]
 
   // Increased heap size as part of the Spark 4 upgrade, as we started experiencing OOM errors in the tests.
-  jvmArgs '-Xmx6g', '-Xms3g'
+  jvmArgs = ['-Xmx6g', '-Xms3g']
 }
 
 shadowJar {
@@ -145,6 +145,13 @@ java {
   withSourcesJar()
 }
 
+// Allows for identifying compiler warnings and treating them as errors.
+tasks.withType(JavaCompile) {
+  options.compilerArgs += ["-Xlint:unchecked", "-Xlint:deprecation", "-Werror"]
+  options.deprecation = true
+  options.warnings = true
+}
+
 javadoc.failOnError = false
 // Ignores warnings on params that don't have descriptions, which is a little too noisy
 javadoc.options.addStringOption('Xdoclint:none', '-quiet')
@@ -185,17 +192,17 @@ publishing {
     maven {
       if (project.hasProperty("mavenUser")) {
         credentials {
-          username mavenUser
-          password mavenPassword
+          username = mavenUser
+          password = mavenPassword
         }
         url publishUrl
         allowInsecureProtocol = true
       } else {
         name = "central"
         url = mavenCentralUrl
         credentials {
-          username mavenCentralUsername
-          password mavenCentralPassword
+          username = mavenCentralUsername
+          password = mavenCentralPassword
         }
       }
     }
diff --git a/marklogic-spark-connector/src/main/java/com/marklogic/spark/Options.java b/marklogic-spark-connector/src/main/java/com/marklogic/spark/Options.java
@@ -355,16 +355,6 @@ public abstract class Options {
     public static final String WRITE_GRAPH = "spark.marklogic.write.graph";
     public static final String WRITE_GRAPH_OVERRIDE = "spark.marklogic.write.graphOverride";
 
-    /**
-     * For writing rows adhering to Spark's binaryFile schema - https://spark.apache.org/docs/latest/sql-data-sources-binaryFile.html .
-     *
-     * @deprecated since 2.3.0
-     */
-    @Deprecated(since = "2.3.0", forRemoval = true)
-    // We don't need Sonar to remind us of this deprecation.
-    @SuppressWarnings("java:S1133")
-    public static final String WRITE_FILE_ROWS_DOCUMENT_TYPE = "spark.marklogic.write.fileRows.documentType";
-
     // Forces a document type when writing rows corresponding to our document row schema. Used when the URI extension
     // does not result in MarkLogic choosing the correct document type.
     public static final String WRITE_DOCUMENT_TYPE = "spark.marklogic.write.documentType";
diff --git a/marklogic-spark-connector/src/main/java/com/marklogic/spark/writer/FileRowConverter.java b/marklogic-spark-connector/src/main/java/com/marklogic/spark/writer/FileRowConverter.java
@@ -19,7 +19,7 @@
 import java.util.stream.Stream;
 
 /**
- * Knows how to build a document from a row corresponding to our {@code FileRowSchema}.
+ * Knows how to build a document from a row corresponding to a row from Spark's binaryFile data source.
  */
 class FileRowConverter implements RowConverter {
 
@@ -47,16 +47,10 @@ public Iterator<DocumentInputs> getRemainingDocumentInputs() {
         return Stream.<DocumentInputs>empty().iterator();
     }
 
-    @SuppressWarnings({"deprecation", "removal"})
     private void forceFormatIfNecessary(BytesHandle content) {
         Format format = writeContext.getDocumentFormat();
         if (format != null) {
             content.withFormat(format);
-        } else {
-            format = writeContext.getDeprecatedFileRowsDocumentFormat();
-            if (format != null) {
-                content.withFormat(format);
-            }
         }
     }
 
diff --git a/marklogic-spark-connector/src/main/java/com/marklogic/spark/writer/WriteContext.java b/marklogic-spark-connector/src/main/java/com/marklogic/spark/writer/WriteContext.java
@@ -181,28 +181,6 @@ public Format getDocumentFormat() {
         return null;
     }
 
-    /**
-     * @deprecated since 2.3.0; users should use getDocumentFormat instead.
-     */
-    @Deprecated(since = "2.3.0")
-    // We don't need Sonar to remind us of this deprecation.
-    @SuppressWarnings({"java:S1133", "removal"})
-    Format getDeprecatedFileRowsDocumentFormat() {
-        final String deprecatedOption = Options.WRITE_FILE_ROWS_DOCUMENT_TYPE;
-        if (hasOption(deprecatedOption)) {
-            String value = getStringOption(deprecatedOption);
-            Objects.requireNonNull(value);
-            try {
-                return Format.valueOf(value.toUpperCase());
-            } catch (IllegalArgumentException e) {
-                String message = "Invalid value for %s: %s; must be one of 'JSON', 'XML', or 'TEXT'.";
-                String optionAlias = getOptionNameForMessage(deprecatedOption);
-                throw new ConnectorException(String.format(message, optionAlias, value));
-            }
-        }
-        return null;
-    }
-
     /**
      * The URI template approach will typically be used with rows with an "arbitrary" schema where each column value
      * may be useful in constructing a URI.
diff --git a/marklogic-spark-connector/src/main/resources/marklogic-spark-messages.properties b/marklogic-spark-connector/src/main/resources/marklogic-spark-messages.properties
@@ -1,3 +1,4 @@
+# Copyright (c) 2023-2025 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
 # Defines various messages for the connector. Intended to be inherited and overridden by the ETL tool via
 # marklogic-spark-messages_en.properties, where each option name can be associated with a CLI option in the ETL tool.
 spark.marklogic.client.uri=
@@ -7,7 +8,6 @@ spark.marklogic.read.numPartitions=
 spark.marklogic.read.noOpticQuery=No Optic query found; must define spark.marklogic.read.opticQuery
 spark.marklogic.write.batchSize=
 spark.marklogic.write.documentType=
-spark.marklogic.write.fileRows.documentType=
 spark.marklogic.write.graph=
 spark.marklogic.write.graphOverride=
 spark.marklogic.write.jsonRootName=
diff --git a/marklogic-spark-connector/src/test/java/com/marklogic/spark/AbstractIntegrationTest.java b/marklogic-spark-connector/src/test/java/com/marklogic/spark/AbstractIntegrationTest.java
@@ -139,7 +139,7 @@ protected final String rowsToString(List<Row> rows) {
     /**
      * Avoids having to repeat mode/save.
      */
-    protected void defaultWrite(DataFrameWriter writer) {
+    protected void defaultWrite(DataFrameWriter<?> writer) {
         writer.options(defaultWriteOptions())
             .mode(SaveMode.Append)
             .save();
diff --git a/marklogic-spark-connector/src/test/java/com/marklogic/spark/writer/WriteFileRowsTest.java b/marklogic-spark-connector/src/test/java/com/marklogic/spark/writer/WriteFileRowsTest.java
@@ -114,7 +114,6 @@ void uriTemplate() {
     }
 
     @Test
-    @Deprecated
     void forceDocumentType() {
         newSparkSession()
             .read()
@@ -124,7 +123,7 @@ void forceDocumentType() {
             .format(CONNECTOR_IDENTIFIER)
             .options(defaultWriteOptions())
             // Verifies that the value gets capitalized.
-            .option(Options.WRITE_FILE_ROWS_DOCUMENT_TYPE, "jSoN")
+            .option(Options.WRITE_DOCUMENT_TYPE, "jSoN")
             .option(Options.WRITE_COLLECTIONS, "json-unrecognized-extension")
             .mode(SaveMode.Append)
             .save();
@@ -138,7 +137,6 @@ void forceDocumentType() {
     }
 
     @Test
-    @Deprecated
     void invalidDocumentType() {
         DataFrameWriter writer = newSparkSession()
             .read()
@@ -147,13 +145,13 @@ void invalidDocumentType() {
             .write()
             .format(CONNECTOR_IDENTIFIER)
             .option(Options.CLIENT_URI, makeClientUri())
-            .option(Options.WRITE_FILE_ROWS_DOCUMENT_TYPE, "not valid")
+            .option(Options.WRITE_DOCUMENT_TYPE, "not valid")
             .mode(SaveMode.Append);
 
         SparkException ex = assertThrows(SparkException.class, writer::save);
         assertTrue(ex.getCause() instanceof ConnectorException);
         ConnectorException ce = (ConnectorException) ex.getCause();
-        assertEquals("Invalid value for " + Options.WRITE_FILE_ROWS_DOCUMENT_TYPE + ": not valid; " +
+        assertEquals("Invalid value for " + Options.WRITE_DOCUMENT_TYPE + ": not valid; " +
             "must be one of 'JSON', 'XML', or 'TEXT'.", ce.getMessage());
     }