Skip to content

Commit 550cd24

Browse files
committed
refactor the xml tools
Signed-off-by: sezen.leblay <[email protected]>
1 parent c67a062 commit 550cd24

File tree

15 files changed

+814
-1552
lines changed

15 files changed

+814
-1552
lines changed
Lines changed: 357 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,357 @@
1+
package datadog.trace.bootstrap.instrumentation;
2+
3+
import java.io.StringReader;
4+
import java.util.ArrayList;
5+
import java.util.Collections;
6+
import java.util.HashMap;
7+
import java.util.List;
8+
import java.util.Map;
9+
import javax.xml.parsers.DocumentBuilder;
10+
import javax.xml.parsers.DocumentBuilderFactory;
11+
import org.w3c.dom.Attr;
12+
import org.w3c.dom.Document;
13+
import org.w3c.dom.Element;
14+
import org.w3c.dom.NamedNodeMap;
15+
import org.w3c.dom.Node;
16+
import org.w3c.dom.NodeList;
17+
import org.w3c.dom.Text;
18+
import org.xml.sax.InputSource;
19+
20+
/**
21+
* Utility class for converting W3C DOM XML structures to Map/List representations that are
22+
* compatible with WAF analysis and schema extraction.
23+
*
24+
* <p>This centralized utility eliminates code duplication across multiple instrumentation modules
25+
* that need to process XML content for AppSec analysis.
26+
*/
27+
public final class XmlDomUtils {
28+
29+
/** Default maximum recursion depth for XML DOM conversion to prevent stack overflow. */
30+
public static final int DEFAULT_MAX_CONVERSION_DEPTH = 15;
31+
32+
private XmlDomUtils() {
33+
// Utility class - prevent instantiation
34+
}
35+
36+
/**
37+
* Convert a W3C DOM Document to a WAF-compatible Map/List structure using the default recursion
38+
* depth.
39+
*
40+
* @param document the XML document to convert
41+
* @return converted structure wrapped in a list for consistency, or null if document is null
42+
*/
43+
public static Object convertDocument(Document document) {
44+
return convertDocument(document, DEFAULT_MAX_CONVERSION_DEPTH);
45+
}
46+
47+
/**
48+
* Convert a W3C DOM Document to a WAF-compatible Map/List structure.
49+
*
50+
* @param document the XML document to convert
51+
* @param maxRecursion maximum recursion depth to prevent stack overflow
52+
* @return converted structure wrapped in a list for consistency, or null if document is null
53+
*/
54+
public static Object convertDocument(Document document, int maxRecursion) {
55+
if (document == null) {
56+
return null;
57+
}
58+
59+
Element documentElement = document.getDocumentElement();
60+
if (documentElement == null) {
61+
return null;
62+
}
63+
64+
Object converted = convertW3cNode(documentElement, maxRecursion);
65+
// Wrap in a list for consistency with other XML processing patterns
66+
return converted != null ? Collections.singletonList(converted) : null;
67+
}
68+
69+
/**
70+
* Convert a W3C DOM Element to a WAF-compatible Map/List structure using the default recursion
71+
* depth.
72+
*
73+
* @param element the XML element to convert
74+
* @return converted structure wrapped in a list for consistency, or null if element is null
75+
*/
76+
public static Object convertElement(Element element) {
77+
return convertElement(element, DEFAULT_MAX_CONVERSION_DEPTH);
78+
}
79+
80+
/**
81+
* Convert a W3C DOM Element to a WAF-compatible Map/List structure.
82+
*
83+
* @param element the XML element to convert
84+
* @param maxRecursion maximum recursion depth to prevent stack overflow
85+
* @return converted structure wrapped in a list for consistency, or null if element is null
86+
*/
87+
public static Object convertElement(Element element, int maxRecursion) {
88+
if (element == null) {
89+
return null;
90+
}
91+
92+
Object converted = convertW3cNode(element, maxRecursion);
93+
// Wrap in a list for consistency with other XML processing patterns
94+
return converted != null ? Collections.singletonList(converted) : null;
95+
}
96+
97+
/**
98+
* Convert a W3C DOM Node to a WAF-compatible Map/List structure.
99+
*
100+
* <p>This method recursively processes XML nodes, converting: - Elements to Maps with
101+
* "attributes" and "children" keys - Text nodes to their trimmed string content - Other node
102+
* types are ignored (return null)
103+
*
104+
* @param node the XML node to convert
105+
* @param maxRecursion maximum recursion depth to prevent stack overflow
106+
* @return Map for elements, String for text nodes, null for other types or when maxRecursion <= 0
107+
*/
108+
public static Object convertW3cNode(Node node, int maxRecursion) {
109+
if (node == null || maxRecursion <= 0) {
110+
return null;
111+
}
112+
113+
if (node instanceof Element) {
114+
return convertElementNode((Element) node, maxRecursion);
115+
} else if (node instanceof Text) {
116+
return convertTextNode((Text) node);
117+
}
118+
119+
// Ignore other node types (comments, processing instructions, etc.)
120+
return null;
121+
}
122+
123+
/** Convert an Element node to a Map with attributes and children. */
124+
private static Map<String, Object> convertElementNode(Element element, int maxRecursion) {
125+
Map<String, String> attributes = Collections.emptyMap();
126+
if (element.hasAttributes()) {
127+
attributes = new HashMap<>();
128+
NamedNodeMap attrMap = element.getAttributes();
129+
for (int i = 0; i < attrMap.getLength(); i++) {
130+
Attr item = (Attr) attrMap.item(i);
131+
attributes.put(item.getName(), item.getValue());
132+
}
133+
}
134+
135+
List<Object> children = Collections.emptyList();
136+
if (element.hasChildNodes()) {
137+
NodeList childNodes = element.getChildNodes();
138+
children = new ArrayList<>(childNodes.getLength());
139+
for (int i = 0; i < childNodes.getLength(); i++) {
140+
Node item = childNodes.item(i);
141+
Object childResult = convertW3cNode(item, maxRecursion - 1);
142+
if (childResult != null) {
143+
children.add(childResult);
144+
}
145+
}
146+
}
147+
148+
Map<String, Object> repr = new HashMap<>();
149+
if (!attributes.isEmpty()) {
150+
repr.put("attributes", attributes);
151+
}
152+
if (!children.isEmpty()) {
153+
repr.put("children", children);
154+
}
155+
return repr;
156+
}
157+
158+
/** Convert a Text node to its trimmed string content. */
159+
private static String convertTextNode(Text textNode) {
160+
String textContent = textNode.getTextContent();
161+
if (textContent != null) {
162+
textContent = textContent.trim();
163+
if (!textContent.isEmpty()) {
164+
return textContent;
165+
}
166+
}
167+
return null;
168+
}
169+
170+
/**
171+
* Check if a string contains XML content by examining both strings and DOM objects.
172+
*
173+
* @param obj the object to check
174+
* @return true if the object contains XML content, false otherwise
175+
*/
176+
public static boolean isXmlContent(Object obj) {
177+
if (obj == null) {
178+
return false;
179+
}
180+
181+
// Check for W3C DOM XML objects
182+
if (obj instanceof Document || obj instanceof Element || obj instanceof Node) {
183+
return true;
184+
}
185+
186+
// Check for XML string content
187+
if (obj instanceof String) {
188+
String content = (String) obj;
189+
if (content.trim().isEmpty()) {
190+
return false;
191+
}
192+
String trimmed = content.trim();
193+
194+
// Explicitly exclude JSON content
195+
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
196+
return false;
197+
}
198+
199+
// Check for XML declaration
200+
if (trimmed.startsWith("<?xml")) {
201+
return true;
202+
}
203+
204+
// Check for XML element (must start with < and end with >, and contain at least one closing
205+
// tag or self-closing tag)
206+
if (trimmed.startsWith("<")
207+
&& trimmed.endsWith(">")
208+
&& (trimmed.contains("</") || trimmed.contains("/>"))) {
209+
return true;
210+
}
211+
}
212+
213+
return false;
214+
}
215+
216+
/**
217+
* Check if a string contains XML content by looking for XML declaration or root element.
218+
*
219+
* @param content the string content to check
220+
* @return true if the string contains XML content, false otherwise
221+
*/
222+
public static boolean isXmlContent(String content) {
223+
if (content == null || content.trim().isEmpty()) {
224+
return false;
225+
}
226+
String trimmed = content.trim();
227+
228+
// Explicitly exclude JSON content
229+
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
230+
return false;
231+
}
232+
233+
return trimmed.startsWith("<?xml")
234+
|| (trimmed.startsWith("<")
235+
&& trimmed.endsWith(">")
236+
&& (trimmed.contains("</") || trimmed.contains("/>")));
237+
}
238+
239+
/**
240+
* Process XML content (strings or DOM objects) for WAF compatibility using the default recursion
241+
* depth. This ensures XML attack payloads are properly detected by the WAF.
242+
*
243+
* @param xmlObj the XML object to process (can be Document, Element, Node, or String)
244+
* @return processed XML structure compatible with WAF analysis, or null if processing fails
245+
*/
246+
public static Object processXmlForWaf(Object xmlObj) {
247+
return processXmlForWaf(xmlObj, DEFAULT_MAX_CONVERSION_DEPTH);
248+
}
249+
250+
/**
251+
* Process XML content (strings or DOM objects) for WAF compatibility. This ensures XML attack
252+
* payloads are properly detected by the WAF.
253+
*
254+
* @param xmlObj the XML object to process (can be Document, Element, Node, or String)
255+
* @param maxRecursion maximum recursion depth to prevent stack overflow
256+
* @return processed XML structure compatible with WAF analysis, or null if processing fails
257+
*/
258+
public static Object processXmlForWaf(Object xmlObj, int maxRecursion) {
259+
if (xmlObj == null) {
260+
return null;
261+
}
262+
263+
// Handle W3C DOM objects directly
264+
if (xmlObj instanceof Document) {
265+
return convertDocument((Document) xmlObj, maxRecursion);
266+
}
267+
268+
if (xmlObj instanceof Element) {
269+
return convertElement((Element) xmlObj, maxRecursion);
270+
}
271+
272+
if (xmlObj instanceof Node) {
273+
Object converted = convertW3cNode((Node) xmlObj, maxRecursion);
274+
return converted != null ? Collections.singletonList(converted) : null;
275+
}
276+
277+
// Handle XML strings by parsing them first
278+
if (xmlObj instanceof String) {
279+
try {
280+
return parseXmlStringToWafFormat((String) xmlObj, maxRecursion);
281+
} catch (Exception e) {
282+
// Return null if parsing fails - let caller handle logging
283+
return null;
284+
}
285+
}
286+
287+
return null;
288+
}
289+
290+
/**
291+
* Convert XML string to WAF-compatible format using the default recursion depth. This ensures XML
292+
* attack payloads are properly detected by the WAF.
293+
*
294+
* @param xmlContent the XML string content to parse
295+
* @return parsed XML structure compatible with WAF analysis
296+
* @throws Exception if XML parsing fails
297+
*/
298+
public static Object parseXmlStringToWafFormat(String xmlContent) throws Exception {
299+
return parseXmlStringToWafFormat(xmlContent, DEFAULT_MAX_CONVERSION_DEPTH);
300+
}
301+
302+
/**
303+
* Convert XML string to WAF-compatible format following Spring framework pattern. This ensures
304+
* XML attack payloads are properly detected by the WAF.
305+
*
306+
* @param xmlContent the XML string content to parse
307+
* @param maxRecursion maximum recursion depth to prevent stack overflow
308+
* @return parsed XML structure compatible with WAF analysis
309+
* @throws Exception if XML parsing fails
310+
*/
311+
public static Object parseXmlStringToWafFormat(String xmlContent, int maxRecursion)
312+
throws Exception {
313+
if (xmlContent == null || xmlContent.trim().isEmpty()) {
314+
return null;
315+
}
316+
317+
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
318+
// Security settings to prevent XXE attacks during parsing
319+
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
320+
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
321+
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
322+
factory.setExpandEntityReferences(false);
323+
324+
DocumentBuilder builder = factory.newDocumentBuilder();
325+
Document document = builder.parse(new InputSource(new StringReader(xmlContent)));
326+
327+
return convertDocument(document, maxRecursion);
328+
}
329+
330+
/**
331+
* Convert XML string to WAF-compatible format using the default recursion depth. This is a
332+
* convenience method that wraps parseXmlStringToWafFormat and handles exceptions internally.
333+
*
334+
* @param xmlContent the XML string content to handle
335+
* @return parsed XML structure compatible with WAF analysis, or null if parsing fails
336+
*/
337+
public static Object handleXmlString(String xmlContent) {
338+
return handleXmlString(xmlContent, DEFAULT_MAX_CONVERSION_DEPTH);
339+
}
340+
341+
/**
342+
* Convert XML string to WAF-compatible format. This is a convenience method that wraps
343+
* parseXmlStringToWafFormat and handles exceptions internally.
344+
*
345+
* @param xmlContent the XML string content to handle
346+
* @param maxRecursion maximum recursion depth to prevent stack overflow
347+
* @return parsed XML structure compatible with WAF analysis, or null if parsing fails
348+
*/
349+
public static Object handleXmlString(String xmlContent, int maxRecursion) {
350+
try {
351+
return parseXmlStringToWafFormat(xmlContent, maxRecursion);
352+
} catch (Exception e) {
353+
// Return null if parsing fails - let caller handle logging
354+
return null;
355+
}
356+
}
357+
}

0 commit comments

Comments
 (0)