|
5 | 5 | import org.jsoup.integration.ParseTest; |
6 | 6 | import org.jsoup.nodes.Element; |
7 | 7 | import org.jsoup.nodes.TextNode; |
| 8 | +import org.jsoup.parser.Parser; |
8 | 9 | import org.junit.jupiter.api.Test; |
| 10 | +import org.junit.jupiter.params.ParameterizedTest; |
| 11 | +import org.junit.jupiter.params.provider.Arguments; |
| 12 | +import org.junit.jupiter.params.provider.MethodSource; |
9 | 13 | import org.w3c.dom.Document; |
10 | 14 | import org.w3c.dom.Node; |
11 | 15 | import org.w3c.dom.NodeList; |
|
26 | 30 | import java.nio.charset.StandardCharsets; |
27 | 31 | import java.util.Locale; |
28 | 32 | import java.util.Map; |
| 33 | +import java.util.stream.Stream; |
29 | 34 |
|
30 | 35 | import static org.jsoup.TextUtil.normalizeSpaces; |
31 | 36 | import static org.jsoup.nodes.Document.OutputSettings.Syntax.xml; |
@@ -454,4 +459,40 @@ public void canOutputHtmlWithoutNamespace() { |
454 | 459 | assertEquals("Alt two", alt2); |
455 | 460 | } |
456 | 461 |
|
| 462 | + @ParameterizedTest |
| 463 | + @MethodSource("parserProvider") |
| 464 | + void doesNotExpandEntities(Parser parser) { |
| 465 | + // Tests that the billion laughs attack doesn't expand entities; also for XXE |
| 466 | + // Not impacted because jsoup doesn't parse the entities within the doctype, and so won't get to the w3c. |
| 467 | + // Added to confirm, and catch if that ever changes |
| 468 | + String billionLaughs = "<?xml version=\"1.0\"?>\n" + |
| 469 | + "<!DOCTYPE lolz [\n" + |
| 470 | + " <!ENTITY lol \"lol\">\n" + |
| 471 | + " <!ENTITY lol1 \"&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;\">\n" + |
| 472 | + "]>\n" + |
| 473 | + "<html><body><p>&lol1;</p></body></html>"; |
| 474 | + |
| 475 | + org.jsoup.nodes.Document jsoupDoc = Jsoup.parse(billionLaughs, parser); |
| 476 | + W3CDom w3cDom = new W3CDom(); |
| 477 | + |
| 478 | + org.w3c.dom.Document w3cDoc = w3cDom.fromJsoup(jsoupDoc); |
| 479 | + assertNotNull(w3cDoc); |
| 480 | + // select the p and make sure it's unexpanded |
| 481 | + NodeList p = w3cDoc.getElementsByTagName("p"); |
| 482 | + assertEquals(1, p.getLength()); |
| 483 | + assertEquals("&lol1;", p.item(0).getTextContent()); |
| 484 | + |
| 485 | + // Check the string |
| 486 | + String string = W3CDom.asString(w3cDoc, W3CDom.OutputXml()); |
| 487 | + assertFalse(string.contains("lololol")); |
| 488 | + assertTrue(string.contains("&lol1;")); |
| 489 | + } |
| 490 | + |
| 491 | + private static Stream<Arguments> parserProvider() { |
| 492 | + return Stream.of( |
| 493 | + Arguments.of(Parser.htmlParser()), |
| 494 | + Arguments.of(Parser.xmlParser()) |
| 495 | + ); |
| 496 | + } |
| 497 | + |
457 | 498 | } |
0 commit comments