Skip to content

Commit 0270598

Browse files
committed
XML parser can have custom void elements
Allows parsing of pseudo HTML with the HTML tag set, without using the HTML TreeBuilder rules For #2285
1 parent 7838399 commit 0270598

File tree

2 files changed

+42
-1
lines changed

2 files changed

+42
-1
lines changed

src/main/java/org/jsoup/parser/XmlTreeBuilder.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ void insertElementFor(Token.StartTag startTag) {
159159
if (startTag.isSelfClosing()) {
160160
tag.setSeenSelfClose();
161161
pop(); // push & pop ensures onNodeInserted & onNodeClosed
162+
} else if (tag.isEmpty()) {
163+
pop(); // custom defined void tag
162164
} else {
163165
TokeniserState textState = tag.textState();
164166
if (textState != null) tokeniser.transition(textState);

src/test/java/org/jsoup/parser/XmlTreeBuilderTest.java

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,46 @@ private static void assertXmlNamespace(Element el) {
443443
assertEquals(inner, zEl.data());
444444
}
445445

446+
@Test void canSetCustomVoid() {
447+
String ns = "custom";
448+
String xml = "<x xmlns=custom><foo><link><meta>";
449+
TagSet custom = new TagSet();
450+
custom.valueOf("link", ns).set(Tag.Void);
451+
custom.valueOf("meta", ns).set(Tag.Void);
452+
custom.valueOf("foo", "other").set(Tag.Void); // ns doesn't match, won't impact
453+
454+
Document doc = Jsoup.parse(xml, Parser.xmlParser().tagSet(custom));
455+
String expect = "<x xmlns=\"custom\"><foo><link /><meta /></foo></x>";
456+
assertEquals(expect, doc.html());
457+
}
458+
459+
@Test void canSupplyWithHtmlTagSet() {
460+
// use the properties of html tag set but without HtmlTreeBuilder rules
461+
String xml = "<html xmlns=" + NamespaceHtml + "><div><script>a<b</script><img><p>";
462+
Document doc = Jsoup.parse(xml, Parser.xmlParser().tagSet(TagSet.Html()));
463+
doc.outputSettings().prettyPrint(true);
464+
String expect = "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" +
465+
" <div>\n" +
466+
" <script>//<![CDATA[\n" +
467+
"a<b\n" +
468+
"//]]></script>\n" +
469+
" <img />\n" +
470+
" <p></p>\n" +
471+
" </div>\n" +
472+
"</html>";
473+
assertEquals(expect, doc.html());
474+
475+
doc.outputSettings().syntax(Syntax.html);
476+
expect = "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" +
477+
" <div>\n" +
478+
" <script>a<b</script>\n" +
479+
" <img>\n" +
480+
" <p></p>\n" +
481+
" </div>\n" +
482+
"</html>";
483+
assertEquals(expect, doc.html());
484+
}
485+
446486
@Test void prettyFormatsTextInline() {
447487
// https://github.com/jhy/jsoup/issues/2141
448488
String xml = "<package><metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n" +
@@ -612,5 +652,4 @@ private static void assertXmlNamespace(Element el) {
612652
assertEquals("<div id=\"1\" /><p /><div>Foo</div><div /><foo></foo>", TextUtil.stripNewlines(doc.outerHtml()));
613653
// we infer that empty els can be represented with self-closing if seen in parse
614654
}
615-
616655
}

0 commit comments

Comments
 (0)