Skip to content

Commit 6c5b7ec

Browse files
committed
MLE-26427 Initial exclusion support for JSON
This isn't quite done - I want to do a PR for excluding XML next, and then refactor the code, likely moving the tests into a new test class. But this pushes things forward a bit with exclusions.
1 parent 971d42f commit 6c5b7ec

File tree

5 files changed

+161
-9
lines changed

5 files changed

+161
-9
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
3+
*/
4+
package com.marklogic.client.datamovement.filter;
5+
6+
import com.fasterxml.jackson.core.JsonPointer;
7+
import com.fasterxml.jackson.core.JsonProcessingException;
8+
import com.fasterxml.jackson.databind.JsonNode;
9+
import com.fasterxml.jackson.databind.ObjectMapper;
10+
import com.fasterxml.jackson.databind.node.ObjectNode;
11+
import org.slf4j.Logger;
12+
import org.slf4j.LoggerFactory;
13+
14+
/**
15+
* Utility class for applying content exclusions to documents before hash calculation.
16+
* Supports removing specific paths from JSON and XML documents using JSON Pointer and XPath expressions.
17+
*
18+
* @since 8.1.0
19+
*/
20+
public class ContentExclusionUtil {
21+
22+
private static final Logger logger = LoggerFactory.getLogger(ContentExclusionUtil.class);
23+
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
24+
25+
/**
26+
* Applies JSON Pointer exclusions to JSON content by removing the specified paths.
27+
*
28+
* @param uri the document URI (used for logging purposes)
29+
* @param jsonContent the JSON content as a string
30+
* @param jsonPointers array of RFC 6901 JSON Pointer expressions identifying properties to exclude
31+
* @return the modified JSON content with specified paths removed
32+
* @throws JsonProcessingException if the JSON content cannot be parsed or serialized
33+
*/
34+
public static String applyJsonExclusions(String uri, String jsonContent, String[] jsonPointers) throws JsonProcessingException {
35+
if (jsonPointers == null || jsonPointers.length == 0) {
36+
return jsonContent;
37+
}
38+
39+
JsonNode rootNode = OBJECT_MAPPER.readTree(jsonContent);
40+
for (String jsonPointer : jsonPointers) {
41+
removeNodeAtPointer(uri, rootNode, jsonPointer);
42+
}
43+
return OBJECT_MAPPER.writeValueAsString(rootNode);
44+
}
45+
46+
/**
47+
* Removes a node at the specified JSON Pointer path from the given root node.
48+
*
49+
* @param uri the document URI (used for logging purposes)
50+
* @param rootNode the root JSON node
51+
* @param jsonPointer the JSON Pointer expression identifying the node to remove
52+
*/
53+
private static void removeNodeAtPointer(String uri, JsonNode rootNode, String jsonPointer) {
54+
JsonPointer pointer = JsonPointer.compile(jsonPointer);
55+
JsonNode targetNode = rootNode.at(pointer);
56+
57+
if (targetNode.isMissingNode()) {
58+
logger.debug("JSONPointer '{}' does not exist in document {}, skipping", jsonPointer, uri);
59+
return;
60+
}
61+
62+
// Use Jackson's JsonPointer API to get parent and field name
63+
JsonPointer parentPointer = pointer.head();
64+
JsonNode parentNode = rootNode.at(parentPointer);
65+
66+
if (parentNode.isObject()) {
67+
String fieldName = pointer.last().getMatchingProperty();
68+
((ObjectNode) parentNode).remove(fieldName);
69+
} else if (parentNode.isArray()) {
70+
logger.warn("Array element exclusion not supported for JSONPointer '{}'. " +
71+
"Consider excluding the entire array property instead.", jsonPointer);
72+
}
73+
}
74+
75+
// Future method for XML exclusions
76+
// public static String applyXmlExclusions(String xmlContent, String[] xpaths) { ... }
77+
}

marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteEvalFilter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2010-2025 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
2+
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
33
*/
44
package com.marklogic.client.datamovement.filter;
55

@@ -31,8 +31,8 @@ class IncrementalWriteEvalFilter extends IncrementalWriteFilter {
3131
""";
3232

3333
IncrementalWriteEvalFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
34-
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer) {
35-
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer);
34+
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions) {
35+
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions);
3636
}
3737

3838
@Override

marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteFilter.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ public static class Builder {
4545
private boolean canonicalizeJson = true;
4646
private boolean useEvalQuery = false;
4747
private Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
48+
private String[] jsonExclusions;
4849

4950
/**
5051
* @param keyName the name of the MarkLogic metadata key that will hold the hash value; defaults to "incrementalWriteHash".
@@ -93,28 +94,39 @@ public Builder onDocumentsSkipped(Consumer<DocumentWriteOperation[]> skippedDocu
9394
return this;
9495
}
9596

97+
/**
98+
* @param jsonPointers JSON Pointer expressions (RFC 6901) identifying JSON properties to exclude from hash calculation.
99+
* For example, "/metadata/timestamp" or "/user/lastModified".
100+
*/
101+
public Builder jsonExclusions(String... jsonPointers) {
102+
this.jsonExclusions = jsonPointers;
103+
return this;
104+
}
105+
96106
public IncrementalWriteFilter build() {
97107
if (useEvalQuery) {
98-
return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer);
108+
return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions);
99109
}
100-
return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer);
110+
return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions);
101111
}
102112
}
103113

104114
protected final String hashKeyName;
105115
private final String timestampKeyName;
106116
private final boolean canonicalizeJson;
107117
private final Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
118+
private final String[] jsonExclusions;
108119

109120
// Hardcoding this for now, with a good general purpose hashing function.
110121
// See https://xxhash.com for benchmarks.
111122
private final LongHashFunction hashFunction = LongHashFunction.xx3();
112123

113-
public IncrementalWriteFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson, Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer) {
124+
public IncrementalWriteFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson, Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions) {
114125
this.hashKeyName = hashKeyName;
115126
this.timestampKeyName = timestampKeyName;
116127
this.canonicalizeJson = canonicalizeJson;
117128
this.skippedDocumentsConsumer = skippedDocumentsConsumer;
129+
this.jsonExclusions = jsonExclusions;
118130
}
119131

120132
protected final DocumentWriteSet filterDocuments(Context context, Function<String, String> hashRetriever) {
@@ -165,6 +177,10 @@ private String serializeContent(DocumentWriteOperation doc) {
165177
if (canonicalizeJson && (Format.JSON.equals(format) || isPossiblyJsonContent(content))) {
166178
JsonCanonicalizer jc;
167179
try {
180+
if (jsonExclusions != null && jsonExclusions.length > 0) {
181+
// TBD on error handling here, want to get XML supported first.
182+
content = ContentExclusionUtil.applyJsonExclusions(doc.getUri(), content, jsonExclusions);
183+
}
168184
jc = new JsonCanonicalizer(content);
169185
return jc.getEncodedString();
170186
} catch (IOException e) {

marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteOpticFilter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2010-2025 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
2+
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
33
*/
44
package com.marklogic.client.datamovement.filter;
55

@@ -20,8 +20,8 @@
2020
class IncrementalWriteOpticFilter extends IncrementalWriteFilter {
2121

2222
IncrementalWriteOpticFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
23-
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer) {
24-
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer);
23+
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions) {
24+
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions);
2525
}
2626

2727
@Override

marklogic-client-api/src/test/java/com/marklogic/client/datamovement/filter/IncrementalWriteTest.java

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,65 @@ void nullIsIgnoredForKeyNames() {
218218
assertNotNull(metadata.getMetadataValues().get("incrementalWriteTimestamp"));
219219
}
220220

221+
@Test
222+
void jsonExclusions() {
223+
filter = IncrementalWriteFilter.newBuilder()
224+
.jsonExclusions("/timestamp", "/metadata/lastModified")
225+
.onDocumentsSkipped(docs -> skippedCount.addAndGet(docs.length))
226+
.build();
227+
228+
// Write initial documents with three keys
229+
docs = new ArrayList<>();
230+
for (int i = 1; i <= 5; i++) {
231+
ObjectNode doc = objectMapper.createObjectNode();
232+
doc.put("id", i);
233+
doc.put("name", "Document " + i);
234+
doc.put("timestamp", "2025-01-01T10:00:00Z");
235+
doc.putObject("metadata")
236+
.put("lastModified", "2025-01-01T10:00:00Z")
237+
.put("author", "Test User");
238+
docs.add(new DocumentWriteOperationImpl("/incremental/test/json-doc-" + i + ".json", METADATA, new JacksonHandle(doc)));
239+
}
240+
241+
writeDocs(docs);
242+
assertEquals(5, writtenCount.get());
243+
assertEquals(0, skippedCount.get());
244+
245+
// Write again with different values for excluded fields - should be skipped
246+
docs = new ArrayList<>();
247+
for (int i = 1; i <= 5; i++) {
248+
ObjectNode doc = objectMapper.createObjectNode();
249+
doc.put("id", i);
250+
doc.put("name", "Document " + i);
251+
doc.put("timestamp", "2026-01-02T15:30:00Z"); // Changed
252+
doc.putObject("metadata")
253+
.put("lastModified", "2026-01-02T15:30:00Z") // Changed
254+
.put("author", "Test User");
255+
docs.add(new DocumentWriteOperationImpl("/incremental/test/json-doc-" + i + ".json", METADATA, new JacksonHandle(doc)));
256+
}
257+
258+
writeDocs(docs);
259+
assertEquals(5, writtenCount.get(), "Documents should be skipped since only excluded fields changed");
260+
assertEquals(5, skippedCount.get());
261+
262+
// Write again with actual content change - should NOT be skipped
263+
docs = new ArrayList<>();
264+
for (int i = 1; i <= 5; i++) {
265+
ObjectNode doc = objectMapper.createObjectNode();
266+
doc.put("id", i);
267+
doc.put("name", "Modified Document " + i); // Changed
268+
doc.put("timestamp", "2026-01-02T16:00:00Z");
269+
doc.putObject("metadata")
270+
.put("lastModified", "2026-01-02T16:00:00Z")
271+
.put("author", "Test User");
272+
docs.add(new DocumentWriteOperationImpl("/incremental/test/json-doc-" + i + ".json", METADATA, new JacksonHandle(doc)));
273+
}
274+
275+
writeDocs(docs);
276+
assertEquals(10, writtenCount.get(), "Documents should be written since non-excluded content changed");
277+
assertEquals(5, skippedCount.get(), "Skip count should remain at 5");
278+
}
279+
221280
private void verifyIncrementalWriteWorks() {
222281
writeTenDocuments();
223282
verifyDocumentsHasHashInMetadataKey();

0 commit comments

Comments
 (0)