Skip to content

Commit 5ec472d

Browse files
committed
Test we don't expand entities when converting to W3CDom
1 parent 8c45287 commit 5ec472d

File tree

2 files changed

+41
-1
lines changed

2 files changed

+41
-1
lines changed

src/main/java/org/jsoup/helper/W3CDom.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ public static Document convert(org.jsoup.nodes.Document in) {
115115
* @see OutputKeys#ENCODING
116116
* @see OutputKeys#OMIT_XML_DECLARATION
117117
* @see OutputKeys#STANDALONE
118-
* @see OutputKeys#STANDALONE
119118
* @see OutputKeys#DOCTYPE_PUBLIC
120119
* @see OutputKeys#CDATA_SECTION_ELEMENTS
121120
* @see OutputKeys#INDENT

src/test/java/org/jsoup/helper/W3CDomTest.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55
import org.jsoup.integration.ParseTest;
66
import org.jsoup.nodes.Element;
77
import org.jsoup.nodes.TextNode;
8+
import org.jsoup.parser.Parser;
89
import org.junit.jupiter.api.Test;
10+
import org.junit.jupiter.params.ParameterizedTest;
11+
import org.junit.jupiter.params.provider.Arguments;
12+
import org.junit.jupiter.params.provider.MethodSource;
913
import org.w3c.dom.Document;
1014
import org.w3c.dom.Node;
1115
import org.w3c.dom.NodeList;
@@ -26,6 +30,7 @@
2630
import java.nio.charset.StandardCharsets;
2731
import java.util.Locale;
2832
import java.util.Map;
33+
import java.util.stream.Stream;
2934

3035
import static org.jsoup.TextUtil.normalizeSpaces;
3136
import static org.jsoup.nodes.Document.OutputSettings.Syntax.xml;
@@ -454,4 +459,40 @@ public void canOutputHtmlWithoutNamespace() {
454459
assertEquals("Alt two", alt2);
455460
}
456461

462+
@ParameterizedTest
463+
@MethodSource("parserProvider")
464+
void doesNotExpandEntities(Parser parser) {
465+
// Tests that the billion laughs attack doesn't expand entities; also for XXE
466+
// Not impacted because jsoup doesn't parse the entities within the doctype, and so won't get to the w3c.
467+
// Added to confirm, and catch if that ever changes
468+
String billionLaughs = "<?xml version=\"1.0\"?>\n" +
469+
"<!DOCTYPE lolz [\n" +
470+
" <!ENTITY lol \"lol\">\n" +
471+
" <!ENTITY lol1 \"&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;\">\n" +
472+
"]>\n" +
473+
"<html><body><p>&lol1;</p></body></html>";
474+
475+
org.jsoup.nodes.Document jsoupDoc = Jsoup.parse(billionLaughs, parser);
476+
W3CDom w3cDom = new W3CDom();
477+
478+
org.w3c.dom.Document w3cDoc = w3cDom.fromJsoup(jsoupDoc);
479+
assertNotNull(w3cDoc);
480+
// select the p and make sure it's unexpanded
481+
NodeList p = w3cDoc.getElementsByTagName("p");
482+
assertEquals(1, p.getLength());
483+
assertEquals("&lol1;", p.item(0).getTextContent());
484+
485+
// Check the string
486+
String string = W3CDom.asString(w3cDoc, W3CDom.OutputXml());
487+
assertFalse(string.contains("lololol"));
488+
assertTrue(string.contains("&amp;lol1;"));
489+
}
490+
491+
private static Stream<Arguments> parserProvider() {
492+
return Stream.of(
493+
Arguments.of(Parser.htmlParser()),
494+
Arguments.of(Parser.xmlParser())
495+
);
496+
}
497+
457498
}

0 commit comments

Comments
 (0)