Skip to content

Commit 9a87e5c

Browse files
Copilotslachiewicz
andauthored
Fix polynomial regular expression vulnerability in XML encoding detection (#68)
* Fix polynomial regex vulnerability in ENCODING_PATTERN Changed the regex pattern from .* to .*? to use non-greedy matching, preventing catastrophic backtracking on malicious input. Added test case to validate the fix with various edge cases. --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: slachiewicz <[email protected]>
1 parent 303c1a2 commit 9a87e5c

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

src/main/java/org/codehaus/plexus/util/xml/XmlReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,7 @@ private static String getXMLGuessEncoding(BufferedInputStream is) throws IOExcep
597597
}
598598

599599
static final Pattern ENCODING_PATTERN =
600-
Pattern.compile("<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
600+
Pattern.compile("<\\?xml.*?encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
601601

602602
// returns the encoding declared in the <?xml encoding=...?>, NULL if none
603603
private static String getXmlProlog(BufferedInputStream is, String guessedEnc) throws IOException {

src/test/java/org/codehaus/plexus/util/xml/XmlStreamReaderTest.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,4 +261,25 @@ void encodingAttribute() throws IOException {
261261
xml = "<element encoding='attribute value'/>";
262262
checkXmlContent(xml, "UTF-8");
263263
}
264+
265+
/**
266+
* Test that the regex pattern handles edge cases efficiently without catastrophic backtracking.
267+
* This validates the fix for polynomial regex vulnerability.
268+
*
269+
* @throws java.io.IOException if any.
270+
*/
271+
@Test
272+
void encodingPatternWithManyAttributes() throws IOException {
273+
// Test with many attributes before encoding to ensure non-greedy matching works
274+
String xml = "<?xml version='1.0' a='1' b='2' c='3' d='4' e='5' encoding='UTF-8'?><root/>";
275+
checkXmlContent(xml, "UTF-8");
276+
277+
// Test with whitespace variations
278+
xml = "<?xml version='1.0' encoding = 'US-ASCII' ?><root/>";
279+
checkXmlContent(xml, "US-ASCII");
280+
281+
// Test with longer prolog (but still valid)
282+
xml = "<?xml version='1.0' standalone='yes' encoding='ISO-8859-1'?><root/>";
283+
checkXmlContent(xml, "ISO-8859-1");
284+
}
264285
}

0 commit comments

Comments
 (0)