Skip to content

Commit 1bb1203

Browse files
authored
Merge pull request #487 from marklogic/feature/and-more-polaris
More Polaris null fixes, trying out NotNull
2 parents aff6887 + 379416c commit 1bb1203

File tree

6 files changed

+30
-28
lines changed

6 files changed

+30
-28
lines changed

marklogic-spark-api/src/main/java/com/marklogic/spark/Context.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import java.io.Serializable;
77
import java.util.Map;
8+
import java.util.Objects;
89
import java.util.stream.Stream;
910

1011
public class Context implements Serializable {
@@ -49,7 +50,7 @@ public final long getNumericOption(String optionName, long defaultValue, long mi
4950
public final boolean getBooleanOption(String option, boolean defaultValue) {
5051
if (hasOption(option)) {
5152
String value = getStringOption(option);
52-
assert value != null;
53+
Objects.requireNonNull(value);
5354
return Boolean.parseBoolean(value);
5455
}
5556
return defaultValue;

marklogic-spark-api/src/main/java/com/marklogic/spark/core/classifier/TextClassifierFactory.java

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import java.net.URL;
1919
import java.util.HashMap;
2020
import java.util.Map;
21+
import java.util.Objects;
2122

2223
public abstract class TextClassifierFactory {
2324

@@ -29,7 +30,6 @@ public static TextClassifier newTextClassifier(Context context) {
2930

3031
if (context.hasOption(MOCK_CLASSIFIER_OPTION)) {
3132
String mockResponse = context.getStringOption(MOCK_CLASSIFIER_OPTION);
32-
assert mockResponse != null;
3333
semaphoreProxy = new MockSemaphoreProxy(mockResponse);
3434
} else if (host != null && !host.trim().isEmpty()) {
3535
try {
@@ -39,7 +39,7 @@ public static TextClassifier newTextClassifier(Context context) {
3939
throw ex;
4040
} catch (Exception e) {
4141
throw new ConnectorException(String.format("Unable to configure a connection for classifying text; cause: %s",
42-
e.getMessage()), e);
42+
e.getMessage()), e);
4343
}
4444
}
4545

@@ -116,7 +116,7 @@ public URL getTokenUrl() {
116116
return new URL(protocol, host, port, tokenEndpoint);
117117
} catch (MalformedURLException e) {
118118
throw new ConnectorException(String.format("Unable to construct token URL with endpoint: %s; cause: %s",
119-
tokenEndpoint, e.getMessage()), e);
119+
tokenEndpoint, e.getMessage()), e);
120120
}
121121
}
122122

@@ -142,6 +142,7 @@ public static class MockSemaphoreProxy implements SemaphoreProxy {
142142
// Sonar doesn't like this static assignment, but it's fine in a class that's only used as a mock.
143143
@SuppressWarnings("java:S3010")
144144
private MockSemaphoreProxy(String mockResponse) {
145+
Objects.requireNonNull(mockResponse);
145146
this.mockResponse = new DOMHelper(null).parseXmlString(mockResponse, null);
146147
timesInvoked = 0;
147148
}
@@ -157,14 +158,14 @@ public static int getTimesInvoked() {
157158
@Override
158159
public byte[] classifyDocument(byte[] content, String uri) {
159160
String mockSingleArticleResponse = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
160-
" <response>\n" +
161-
" <STRUCTUREDDOCUMENT>\n" +
162-
" <URL>../tmp/ca002056-e3f6-4c81-8c9f-00ca218330c4/1739460469_43eb</URL>\n" +
163-
" <SYSTEM name=\"HASH\" value=\"2c3bcaf41fbabf8ff2e236c7580893ec\"/>\n" +
164-
" <META name=\"Type\" value=\"TEXT (4003)\"/>\n" +
165-
" <META name=\"title/document_title\" value=\"/some-uri.xml\"/>\n" +
166-
" <SYSTEM name=\"DeterminedLanguage\" value=\"default\"/>" +
167-
"</STRUCTUREDDOCUMENT></response>\n";
161+
" <response>\n" +
162+
" <STRUCTUREDDOCUMENT>\n" +
163+
" <URL>../tmp/ca002056-e3f6-4c81-8c9f-00ca218330c4/1739460469_43eb</URL>\n" +
164+
" <SYSTEM name=\"HASH\" value=\"2c3bcaf41fbabf8ff2e236c7580893ec\"/>\n" +
165+
" <META name=\"Type\" value=\"TEXT (4003)\"/>\n" +
166+
" <META name=\"title/document_title\" value=\"/some-uri.xml\"/>\n" +
167+
" <SYSTEM name=\"DeterminedLanguage\" value=\"default\"/>" +
168+
"</STRUCTUREDDOCUMENT></response>\n";
168169

169170
return mockSingleArticleResponse.getBytes();
170171
}

marklogic-spark-api/src/main/java/com/marklogic/spark/core/extraction/TikaTextExtractor.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import java.io.IOException;
1616
import java.util.LinkedHashMap;
1717
import java.util.Map;
18-
import java.util.Objects;
1918
import java.util.Optional;
2019

2120
public class TikaTextExtractor implements TextExtractor {
@@ -26,13 +25,12 @@ public class TikaTextExtractor implements TextExtractor {
2625

2726
@Override
2827
public Optional<ExtractionResult> extractText(DocumentInputs inputs) {
29-
Objects.requireNonNull(inputs);
30-
if (inputs.getContent() == null) {
28+
byte[] contentAsBytes = inputs.getContentAsBytes();
29+
if (contentAsBytes == null) {
3130
return Optional.empty();
3231
}
3332

34-
Objects.requireNonNull(inputs.getContentAsBytes());
35-
try (ByteArrayInputStream stream = new ByteArrayInputStream(inputs.getContentAsBytes())) {
33+
try (ByteArrayInputStream stream = new ByteArrayInputStream(contentAsBytes)) {
3634
Metadata metadata = new Metadata();
3735
String extractedText = tika.parseToString(stream, metadata);
3836
// Retain the order of these while dropping known keys that we know are just noise.

marklogic-spark-connector/src/main/java/com/marklogic/spark/reader/document/DocumentRowSchema.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,16 @@ private static void addPermissionsToMetadata(InternalRow row, DocumentMetadataHa
8989
ArrayData roles = permissions.keyArray();
9090
ArrayData capabilities = permissions.valueArray();
9191
for (int i = 0; i < roles.numElements(); i++) {
92-
String role = roles.get(i, DataTypes.StringType).toString();
92+
Object role = roles.get(i, DataTypes.StringType);
93+
Objects.requireNonNull(role);
9394
ArrayData caps = capabilities.getArray(i);
9495
DocumentMetadataHandle.Capability[] capArray = new DocumentMetadataHandle.Capability[caps.numElements()];
9596
for (int j = 0; j < caps.numElements(); j++) {
9697
Object value = caps.get(j, DataTypes.StringType);
9798
Objects.requireNonNull(value);
9899
capArray[j] = DocumentMetadataHandle.Capability.valueOf(value.toString().toUpperCase());
99100
}
100-
metadata.getPermissions().add(role, capArray);
101+
metadata.getPermissions().add(role.toString(), capArray);
101102
}
102103
}
103104
}

marklogic-spark-connector/src/main/java/com/marklogic/spark/writer/file/ContentWriter.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.apache.commons.io.IOUtils;
1717
import org.apache.spark.sql.catalyst.InternalRow;
1818

19+
import javax.validation.constraints.NotNull;
1920
import javax.xml.transform.*;
2021
import javax.xml.transform.stream.StreamResult;
2122
import javax.xml.transform.stream.StreamSource;
@@ -66,18 +67,17 @@ class ContentWriter {
6667
}
6768
}
6869

69-
void writeContent(InternalRow row, OutputStream outputStream) throws IOException {
70-
Objects.requireNonNull(outputStream);
70+
void writeContent(InternalRow row, @NotNull OutputStream outputStream) throws IOException {
7171
if (this.isStreamingFiles) {
7272
streamDocumentToFile(row, outputStream);
7373
} else if (this.prettyPrint) {
7474
prettyPrintContent(row, outputStream);
7575
} else if (this.encoding != null) {
7676
// We know the string from MarkLogic is UTF-8, so we use getBytes to convert it to the user's
7777
// specified encoding (as opposed to new String(bytes, encoding)).
78-
String content = new String(row.getBinary(1));
79-
Objects.requireNonNull(content);
80-
outputStream.write(content.getBytes(this.encoding));
78+
byte[] binary = row.getBinary(1);
79+
Objects.requireNonNull(binary);
80+
outputStream.write(new String(binary).getBytes(this.encoding));
8181
} else {
8282
outputStream.write(row.getBinary(1));
8383
}
@@ -139,15 +139,15 @@ private Transformer newTransformer() {
139139
}
140140
}
141141

142-
private void prettyPrintContent(InternalRow row, OutputStream outputStream) throws IOException {
142+
private void prettyPrintContent(InternalRow row, @NotNull OutputStream outputStream) throws IOException {
143143
final byte[] content = row.getBinary(1);
144+
Objects.requireNonNull(content);
144145
final String format = row.isNullAt(2) ? null : row.getString(2);
145146
if ("JSON".equalsIgnoreCase(format)) {
146147
prettyPrintJson(content, outputStream);
147148
} else if ("XML".equalsIgnoreCase(format)) {
148149
prettyPrintXml(content, outputStream);
149150
} else {
150-
Objects.requireNonNull(content);
151151
if (this.encoding != null) {
152152
outputStream.write(new String(content).getBytes(this.encoding));
153153
} else {

marklogic-spark-connector/src/main/java/com/marklogic/spark/writer/file/ZipFileWriter.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.text.SimpleDateFormat;
2222
import java.util.Date;
2323
import java.util.Map;
24+
import java.util.Objects;
2425
import java.util.zip.ZipEntry;
2526
import java.util.zip.ZipOutputStream;
2627

@@ -60,8 +61,8 @@ public void write(InternalRow row) throws IOException {
6061
if (contentWriter == null) {
6162
createZipFileAndContentWriter();
6263
}
63-
assert zipOutputStream != null;
64-
assert contentWriter != null;
64+
Objects.requireNonNull(zipOutputStream);
65+
Objects.requireNonNull(contentWriter);
6566

6667
final String uri = row.getString(0);
6768
final String entryName = FileUtil.makePathFromDocumentURI(uri);

0 commit comments

Comments
 (0)