Skip to content

Commit cc90f72

Browse files
committed
Support parsing custom void tags
Fixes #2425
1 parent f212871 commit cc90f72

File tree

3 files changed

+47
-1
lines changed

3 files changed

+47
-1
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
* Null characters in the HTML body were not consistently removed; and in foreign content were not correctly replaced. [#2395](https://github.com/jhy/jsoup/issues/2395)
2626
* An IndexOutOfBoundsException could be thrown when parsing a body fragment with crafted input. Now logged as a parse error. [#2397](https://github.com/jhy/jsoup/issues/2397), [#2406](https://github.com/jhy/jsoup/issues/2406)
2727
* When using StructuralEvaluators (e.g., a `parent child` selector) across many retained threads, their memoized results could also be retained, increasing memory use. These results are now cleared immediately after use, reducing overall memory consumption. [#2411](https://github.com/jhy/jsoup/issues/2411)
28+
* Custom tags marked as `Tag.Void` now parse and serialize like the built-in void elements: they no longer consume following content, and the XML serializer emits the expected self-closing form. [#2425](https://github.com/jhy/jsoup/issues/2425)
2829

2930
### Internal Changes
3031
* Deprecated internal helper `org.jsoup.internal.Functions` (for removal in v1.23.1). This was previously used to support older Android API levels without full `java.util.function` coverage; jsoup now requires core library desugaring so this indirection is no longer necessary. [#2412](https://github.com/jhy/jsoup/pull/2412)

src/main/java/org/jsoup/parser/HtmlTreeBuilder.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,9 @@ Element insertElementFor(final Token.StartTag startTag) {
332332
if (startTag.isSelfClosing()) {
333333
Tag tag = el.tag();
334334
tag.setSeenSelfClose(); // can infer output if in xml syntax
335-
if (tag.isKnownTag() && (tag.isEmpty() || tag.isSelfClosing())) {
335+
if (tag.isEmpty()) {
336+
// treated as empty below; nothing further
337+
} else if (tag.isKnownTag() && tag.isSelfClosing()) {
336338
// ok, allow it. effectively a pop, but fiddles with the state. handles empty style, title etc which would otherwise leave us in data state
337339
tokeniser.transition(TokeniserState.Data); // handles <script />, otherwise needs breakout steps from script data
338340
tokeniser.emit(emptyEnd.reset().name(el.tagName())); // ensure we get out of whatever state we are in. emitted for yielded processing
@@ -342,6 +344,10 @@ Element insertElementFor(final Token.StartTag startTag) {
342344
}
343345
}
344346

347+
if (el.tag().isEmpty()) {
348+
pop(); // custom void tags behave like built-in voids (no children, not left on the stack); known empty go via insertEmpty
349+
}
350+
345351
return el;
346352
}
347353

src/test/java/org/jsoup/parser/HtmlParserTest.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2106,6 +2106,45 @@ static void assertErrorsDoNotContain(String msg, ParseErrorList errors) {
21062106
assertEquals("<div /><custom /><custom>Foo</custom>", TextUtil.stripNewlines(doc.body().html()));
21072107
}
21082108

2109+
@Test void customVoidTagsBehaveLikeHtmlVoids() {
2110+
Parser parser = Parser.htmlParser().setTrackErrors(10).tagSet(TagSet.Html());
2111+
TagSet tags = parser.tagSet();
2112+
tags.valueOf("voidtag", Parser.NamespaceHtml).set(Tag.Void);
2113+
2114+
String html = "<p><voidtag>Hello World</p>";
2115+
Document doc = Jsoup.parse(html, parser);
2116+
assertEquals(0, parser.getErrors().size());
2117+
2118+
doc.outputSettings().syntax(Document.OutputSettings.Syntax.html);
2119+
String emittedHtml = TextUtil.stripNewlines(doc.body().html());
2120+
assertEquals("<p><voidtag>Hello World</p>", emittedHtml);
2121+
assertEquals("Hello World", doc.body().text());
2122+
2123+
doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
2124+
assertEquals("<p><voidtag />Hello World</p>", TextUtil.stripNewlines(doc.body().html()));
2125+
}
2126+
2127+
@Test void customSelfClosingVoidTagsRoundTrip() {
2128+
Parser parser = Parser.htmlParser().setTrackErrors(10).tagSet(TagSet.Html());
2129+
TagSet tags = parser.tagSet();
2130+
tags.valueOf("selfclosingvoidtag", Parser.NamespaceHtml).set(Tag.Void).set(Tag.SelfClose);
2131+
2132+
String html = "<p><selfclosingvoidtag />Hello World</p>";
2133+
Document doc = Jsoup.parse(html, parser);
2134+
assertEquals(0, parser.getErrors().size());
2135+
2136+
doc.outputSettings().syntax(Document.OutputSettings.Syntax.html);
2137+
String emittedHtml = TextUtil.stripNewlines(doc.body().html());
2138+
assertEquals("<p><selfclosingvoidtag>Hello World</p>", emittedHtml);
2139+
2140+
Document reparsed = Jsoup.parse(emittedHtml, parser);
2141+
reparsed.outputSettings().syntax(Document.OutputSettings.Syntax.html);
2142+
assertEquals(emittedHtml, TextUtil.stripNewlines(reparsed.body().html()));
2143+
2144+
doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
2145+
assertEquals("<p><selfclosingvoidtag />Hello World</p>", TextUtil.stripNewlines(doc.body().html()));
2146+
}
2147+
21092148
@Test void svgScriptParsedAsScriptData() {
21102149
// https://github.com/jhy/jsoup/issues/2320
21112150
String html = "<svg><script>a < b</script></svg>";

0 commit comments

Comments
 (0)