Skip to content

Commit 187fbd9

Browse files
committed
MLE-26427 Refactoring JAXP usage
Doing this before adding support for XML exclusions in the incremental write feature. Standardizes on a single way of creating a DocumentBuilderFactory. Also made a year-change-based fix in LegalHoldsTest.
1 parent 263ea0a commit 187fbd9

19 files changed

+127
-192
lines changed

marklogic-client-api/src/main/java/com/marklogic/client/impl/NodeConverter.java

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2010-2025 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
2+
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
33
*/
44
package com.marklogic.client.impl;
55

@@ -35,7 +35,6 @@
3535

3636
public class NodeConverter {
3737
static private ObjectMapper mapper;
38-
static private DocumentBuilderFactory documentBuilderFactory;
3938
static private XMLInputFactory xmlInputFactory;
4039

4140
private NodeConverter() {
@@ -49,16 +48,7 @@ static private ObjectMapper getMapper() {
4948
}
5049
return mapper;
5150
}
52-
static private DocumentBuilderFactory getDocumentBuilderFactory() {
53-
// okay if one thread overwrites another during lazy initialization
54-
if (documentBuilderFactory == null) {
55-
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
56-
factory.setNamespaceAware(true);
57-
factory.setValidating(false);
58-
documentBuilderFactory = factory;
59-
}
60-
return documentBuilderFactory;
61-
}
51+
6252
static private XMLInputFactory getXMLInputFactory() {
6353
// okay if one thread overwrites another during lazy initialization
6454
if (xmlInputFactory == null) {
@@ -265,7 +255,7 @@ static public Stream<JsonParser> ReaderToJsonParser(Stream<? extends Reader> val
265255

266256
static public Document InputStreamToDocument(InputStream inputStream) {
267257
try {
268-
return (inputStream == null) ? null : getDocumentBuilderFactory().newDocumentBuilder().parse(inputStream);
258+
return (inputStream == null) ? null : XmlFactories.getDocumentBuilderFactory().newDocumentBuilder().parse(inputStream);
269259
} catch(SAXException | IOException | ParserConfigurationException e) {
270260
throw new RuntimeException(e);
271261
}

marklogic-client-api/src/main/java/com/marklogic/client/impl/XmlFactories.java

Lines changed: 70 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,25 @@
77
import org.slf4j.LoggerFactory;
88

99
import javax.xml.XMLConstants;
10+
import javax.xml.parsers.DocumentBuilderFactory;
11+
import javax.xml.parsers.ParserConfigurationException;
1012
import javax.xml.stream.FactoryConfigurationError;
1113
import javax.xml.stream.XMLInputFactory;
1214
import javax.xml.stream.XMLOutputFactory;
1315
import javax.xml.transform.TransformerConfigurationException;
1416
import javax.xml.transform.TransformerFactory;
1517
import java.lang.ref.SoftReference;
18+
import java.util.function.Supplier;
1619

1720
public final class XmlFactories {
1821

1922
private static final Logger logger = LoggerFactory.getLogger(XmlFactories.class);
2023

2124
private static final CachedInstancePerThreadSupplier<XMLOutputFactory> cachedOutputFactory =
22-
new CachedInstancePerThreadSupplier<XMLOutputFactory>(new Supplier<XMLOutputFactory>() {
23-
@Override
24-
public XMLOutputFactory get() {
25-
return makeNewOutputFactory();
26-
}
27-
});
25+
new CachedInstancePerThreadSupplier<>(XmlFactories::makeNewOutputFactory);
26+
27+
private static final CachedInstancePerThreadSupplier<DocumentBuilderFactory> cachedDocumentBuilderFactory =
28+
new CachedInstancePerThreadSupplier<>(XmlFactories::makeNewDocumentBuilderFactory);
2829

2930
private XmlFactories() {} // preventing instances of utility class
3031

@@ -62,21 +63,78 @@ public static TransformerFactory makeNewTransformerFactory() {
6263
try {
6364
factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
6465
} catch (TransformerConfigurationException e) {
65-
logger.warn("Unable to set {} on TransformerFactory; cause: {}", XMLConstants.FEATURE_SECURE_PROCESSING, e.getMessage());
66+
logTransformerWarning(XMLConstants.FEATURE_SECURE_PROCESSING, e.getMessage());
6667
}
6768
try {
6869
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
6970
} catch (IllegalArgumentException e) {
70-
logger.warn("Unable to set {} on TransformerFactory; cause: {}", XMLConstants.ACCESS_EXTERNAL_DTD, e.getMessage());
71+
logTransformerWarning(XMLConstants.ACCESS_EXTERNAL_DTD, e.getMessage());
7172
}
7273
try {
7374
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
7475
} catch (IllegalArgumentException e) {
75-
logger.warn("Unable to set {} on TransformerFactory; cause: {}", XMLConstants.ACCESS_EXTERNAL_STYLESHEET, e.getMessage());
76+
logTransformerWarning(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, e.getMessage());
7677
}
7778
return factory;
7879
}
7980

81+
private static void logTransformerWarning(String xmlConstant, String errorMessage) {
82+
logger.warn("Unable to set {} on TransformerFactory; cause: {}", xmlConstant, errorMessage);
83+
}
84+
85+
private static DocumentBuilderFactory makeNewDocumentBuilderFactory() {
86+
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
87+
// Default to best practices for conservative security including recommendations per
88+
// https://github.com/OWASP/CheatSheetSeries/blob/master/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.md
89+
try {
90+
factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
91+
} catch (ParserConfigurationException e) {
92+
logger.warn("Unable to set FEATURE_SECURE_PROCESSING on DocumentBuilderFactory; cause: {}", e.getMessage());
93+
}
94+
try {
95+
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
96+
} catch (ParserConfigurationException e) {
97+
logger.warn("Unable to set disallow-doctype-decl on DocumentBuilderFactory; cause: {}", e.getMessage());
98+
}
99+
try {
100+
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
101+
} catch (ParserConfigurationException e) {
102+
logger.warn("Unable to set external-general-entities on DocumentBuilderFactory; cause: {}", e.getMessage());
103+
}
104+
try {
105+
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
106+
} catch (ParserConfigurationException e) {
107+
logger.warn("Unable to set external-parameter-entities on DocumentBuilderFactory; cause: {}", e.getMessage());
108+
}
109+
try {
110+
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
111+
} catch (ParserConfigurationException e) {
112+
logger.warn("Unable to set load-external-dtd on DocumentBuilderFactory; cause: {}", e.getMessage());
113+
}
114+
factory.setXIncludeAware(false);
115+
factory.setExpandEntityReferences(false);
116+
factory.setNamespaceAware(true);
117+
factory.setValidating(false);
118+
119+
return factory;
120+
}
121+
122+
/**
123+
* Returns a shared {@link DocumentBuilderFactory} configured with secure defaults.
124+
* <p>
125+
* Creating XML factories is potentially a pretty expensive operation. Using a shared instance helps to amortize
126+
* this initialization cost via reuse.
127+
*
128+
* @return a securely configured {@link DocumentBuilderFactory}
129+
*
130+
* @since 8.1.0
131+
*
132+
* @see #makeNewDocumentBuilderFactory() if you really (really?) need a non-shared instance
133+
*/
134+
public static DocumentBuilderFactory getDocumentBuilderFactory() {
135+
return cachedDocumentBuilderFactory.get();
136+
}
137+
80138
/**
81139
* Returns a shared {@link XMLOutputFactory}. This factory will have its
82140
* {@link XMLOutputFactory#IS_REPAIRING_NAMESPACES} property set to {@code true}.
@@ -88,31 +146,12 @@ public static TransformerFactory makeNewTransformerFactory() {
88146
*
89147
* @throws FactoryConfigurationError see {@link XMLOutputFactory#newInstance()}
90148
*
91-
* @see #makeNewOutputFactory() if you really (really?) need an non-shared instance
149+
* @see #makeNewOutputFactory() if you really (really?) need a non-shared instance
92150
*/
93151
public static XMLOutputFactory getOutputFactory() {
94152
return cachedOutputFactory.get();
95153
}
96154

97-
/**
98-
* Represents a supplier of results.
99-
*
100-
* <p>There is no requirement that a new or distinct result be returned each
101-
* time the supplier is invoked.
102-
*
103-
* @param <T> the type of results supplied by this supplier
104-
*/
105-
// TODO replace with java.util.function.Supplier<T> after Java 8 migration
106-
interface Supplier<T> {
107-
108-
/**
109-
* Gets a result.
110-
*
111-
* @return a result
112-
*/
113-
T get();
114-
}
115-
116155
/**
117156
* A supplier that caches results per thread.
118157
* <p>
@@ -129,7 +168,7 @@ interface Supplier<T> {
129168
*/
130169
private static class CachedInstancePerThreadSupplier<T> implements Supplier<T> {
131170

132-
private final ThreadLocal<SoftReference<T>> cachedInstances = new ThreadLocal<SoftReference<T>>();
171+
private final ThreadLocal<SoftReference<T>> cachedInstances = new ThreadLocal<>();
133172

134173
/**
135174
* The underlying supplier, invoked to originally retrieve the per-thread result
@@ -167,7 +206,7 @@ public T get() {
167206
}
168207

169208
// ... and retain it for later re-use
170-
cachedInstances.set(new SoftReference<T>(cachedInstance));
209+
cachedInstances.set(new SoftReference<>(cachedInstance));
171210
}
172211

173212
return cachedInstance;

marklogic-client-api/src/main/java/com/marklogic/client/io/DOMHandle.java

Lines changed: 11 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3,39 +3,24 @@
33
*/
44
package com.marklogic.client.io;
55

6-
import java.io.ByteArrayInputStream;
7-
import java.io.ByteArrayOutputStream;
8-
import java.io.IOException;
9-
import java.io.InputStream;
10-
import java.io.OutputStream;
11-
import java.nio.charset.StandardCharsets;
12-
13-
import javax.xml.XMLConstants;
14-
import javax.xml.namespace.QName;
15-
import javax.xml.parsers.DocumentBuilderFactory;
16-
import javax.xml.parsers.ParserConfigurationException;
17-
import javax.xml.xpath.XPath;
18-
import javax.xml.xpath.XPathConstants;
19-
import javax.xml.xpath.XPathExpression;
20-
import javax.xml.xpath.XPathExpressionException;
21-
import javax.xml.xpath.XPathFactory;
22-
6+
import com.marklogic.client.MarkLogicIOException;
7+
import com.marklogic.client.MarkLogicInternalException;
8+
import com.marklogic.client.impl.XmlFactories;
239
import com.marklogic.client.io.marker.*;
2410
import org.slf4j.Logger;
2511
import org.slf4j.LoggerFactory;
2612
import org.w3c.dom.DOMException;
2713
import org.w3c.dom.Document;
2814
import org.w3c.dom.Node;
2915
import org.w3c.dom.NodeList;
30-
import org.w3c.dom.ls.DOMImplementationLS;
31-
import org.w3c.dom.ls.LSException;
32-
import org.w3c.dom.ls.LSInput;
33-
import org.w3c.dom.ls.LSOutput;
34-
import org.w3c.dom.ls.LSParser;
35-
import org.w3c.dom.ls.LSResourceResolver;
16+
import org.w3c.dom.ls.*;
3617

37-
import com.marklogic.client.MarkLogicIOException;
38-
import com.marklogic.client.MarkLogicInternalException;
18+
import javax.xml.namespace.QName;
19+
import javax.xml.parsers.DocumentBuilderFactory;
20+
import javax.xml.parsers.ParserConfigurationException;
21+
import javax.xml.xpath.*;
22+
import java.io.*;
23+
import java.nio.charset.StandardCharsets;
3924

4025
/**
4126
* A DOM Handle represents XML content as a DOM document for reading or writing.
@@ -199,7 +184,7 @@ public String toString() {
199184
*/
200185
public DocumentBuilderFactory getFactory() throws ParserConfigurationException {
201186
if (factory == null)
202-
factory = makeDocumentBuilderFactory();
187+
factory = XmlFactories.getDocumentBuilderFactory();
203188
return factory;
204189
}
205190
/**
@@ -209,32 +194,6 @@ public DocumentBuilderFactory getFactory() throws ParserConfigurationException {
209194
public void setFactory(DocumentBuilderFactory factory) {
210195
this.factory = factory;
211196
}
212-
protected DocumentBuilderFactory makeDocumentBuilderFactory() throws ParserConfigurationException {
213-
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
214-
// default to best practices for conservative security including recommendations per
215-
// https://github.com/OWASP/CheatSheetSeries/blob/master/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.md
216-
try {
217-
factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
218-
} catch (ParserConfigurationException e) {}
219-
try {
220-
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
221-
} catch (ParserConfigurationException e) {}
222-
try {
223-
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
224-
} catch (ParserConfigurationException e) {}
225-
try {
226-
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
227-
} catch (ParserConfigurationException e) {}
228-
try {
229-
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
230-
} catch (ParserConfigurationException e) {}
231-
factory.setXIncludeAware(false);
232-
factory.setExpandEntityReferences(false);
233-
factory.setNamespaceAware(true);
234-
factory.setValidating(false);
235-
236-
return factory;
237-
}
238197

239198
/**
240199
* Get the processor used to evaluate XPath expressions.

marklogic-client-api/src/main/java/com/marklogic/client/io/DocumentMetadataHandle.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -577,10 +577,7 @@ protected void receiveContent(InputStream content) {
577577

578578
Document document = null;
579579
if (content != null) {
580-
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
581-
factory.setNamespaceAware(true);
582-
factory.setValidating(false);
583-
DocumentBuilder builder = factory.newDocumentBuilder();
580+
DocumentBuilder builder = XmlFactories.getDocumentBuilderFactory().newDocumentBuilder();
584581
document = builder.parse(new InputSource(new InputStreamReader(content, StandardCharsets.UTF_8)));
585582
content.close();
586583
}

marklogic-client-api/src/test/java/com/marklogic/client/test/BufferableHandleTest.java

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
*/
44
package com.marklogic.client.test;
55

6+
import com.marklogic.client.impl.XmlFactories;
67
import com.marklogic.client.io.*;
78
import com.marklogic.client.test.util.Referred;
89
import com.marklogic.client.test.util.Refers;
10+
import jakarta.xml.bind.JAXBContext;
11+
import jakarta.xml.bind.JAXBException;
912
import org.custommonkey.xmlunit.SimpleNamespaceContext;
1013
import org.custommonkey.xmlunit.XMLUnit;
1114
import org.custommonkey.xmlunit.XpathEngine;
@@ -17,16 +20,14 @@
1720
import org.w3c.dom.Element;
1821
import org.xml.sax.SAXException;
1922

20-
import jakarta.xml.bind.JAXBContext;
21-
import jakarta.xml.bind.JAXBException;
22-
import javax.xml.parsers.DocumentBuilderFactory;
2323
import javax.xml.parsers.ParserConfigurationException;
2424
import java.io.IOException;
2525
import java.util.HashMap;
2626
import java.util.Map;
2727

2828
import static org.custommonkey.xmlunit.XMLAssert.assertXMLEqual;
29-
import static org.junit.jupiter.api.Assertions.*;
29+
import static org.junit.jupiter.api.Assertions.assertNotNull;
30+
import static org.junit.jupiter.api.Assertions.assertTrue;
3031

3132
public class BufferableHandleTest {
3233
static private XpathEngine xpather;
@@ -61,11 +62,7 @@ public void testReadWrite()
6162
throws JAXBException, ParserConfigurationException, SAXException, IOException, XpathException
6263
{
6364

64-
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
65-
factory.setNamespaceAware(true);
66-
factory.setValidating(false);
67-
68-
Document domDocument = factory.newDocumentBuilder().newDocument();
65+
Document domDocument = XmlFactories.getDocumentBuilderFactory().newDocumentBuilder().newDocument();
6966
Element root = domDocument.createElement("root");
7067
root.setAttribute("xml:lang", "en");
7168
root.setAttribute("foo", "bar");

marklogic-client-api/src/test/java/com/marklogic/client/test/Common.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import com.marklogic.client.DatabaseClientBuilder;
1010
import com.marklogic.client.DatabaseClientFactory;
1111
import com.marklogic.client.extra.okhttpclient.OkHttpClientConfigurator;
12+
import com.marklogic.client.impl.XmlFactories;
1213
import com.marklogic.client.impl.okhttp.RetryIOExceptionInterceptor;
1314
import com.marklogic.client.io.DocumentMetadataHandle;
1415
import com.marklogic.mgmt.ManageClient;
@@ -228,10 +229,7 @@ public static String testDocumentToString(Document document) {
228229

229230
public static Document testStringToDocument(String document) {
230231
try {
231-
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
232-
factory.setNamespaceAware(true);
233-
factory.setValidating(false);
234-
return factory.newDocumentBuilder().parse(
232+
return XmlFactories.getDocumentBuilderFactory().newDocumentBuilder().parse(
235233
new InputSource(new StringReader(document)));
236234
} catch (SAXException e) {
237235
throw new RuntimeException(e);

0 commit comments

Comments
 (0)