Skip to content

Commit aabf0b0

Browse files
committed
Apply namespaces when fragment parsing
For #2299
1 parent 85a4fa2 commit aabf0b0

File tree

2 files changed

+53
-8
lines changed

2 files changed

+53
-8
lines changed

src/main/java/org/jsoup/parser/XmlTreeBuilder.java

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.jsoup.nodes.Node;
1616
import org.jsoup.nodes.TextNode;
1717
import org.jsoup.nodes.XmlDeclaration;
18+
import org.jsoup.select.Elements;
1819
import org.jspecify.annotations.Nullable;
1920

2021
import java.io.Reader;
@@ -60,11 +61,23 @@ protected void initialiseParse(Reader input, String baseUri, Parser parser) {
6061
@Override
6162
void initialiseParseFragment(@Nullable Element context) {
6263
super.initialiseParseFragment(context);
63-
if (context != null) {
64-
TokeniserState textState = context.tag().textState();
65-
if (textState != null) tokeniser.transition(textState);
64+
if (context == null) return;
65+
66+
// transition to the tag's text state if available
67+
TokeniserState textState = context.tag().textState();
68+
if (textState != null) tokeniser.transition(textState);
69+
70+
// reconstitute the namespace stack by traversing the element and its parents (top down)
71+
Elements chain = context.parents();
72+
chain.add(0, context);
73+
for (int i = chain.size() - 1; i >= 0; i--) {
74+
Element el = chain.get(i);
75+
HashMap<String, String> namespaces = new HashMap<>(namespacesStack.peek());
76+
namespacesStack.push(namespaces);
77+
if (el.attributesSize() > 0) {
78+
processNamespaces(el.attributes(), namespaces);
79+
}
6680
}
67-
6881
}
6982

7083
Document parse(Reader input, String baseUri) {
@@ -130,14 +143,16 @@ void insertElementFor(Token.StartTag startTag) {
130143
HashMap<String, String> namespaces = new HashMap<>(namespacesStack.peek());
131144
namespacesStack.push(namespaces);
132145

133-
if (startTag.attributes != null) {
134-
startTag.attributes.deduplicate(settings);
135-
processNamespaces(startTag.attributes, namespaces);
146+
Attributes attributes = startTag.attributes;
147+
if (attributes != null) {
148+
attributes.deduplicate(settings);
149+
processNamespaces(attributes, namespaces);
150+
applyNamespacesToAttributes(attributes, namespaces);
136151
}
137152

138153
String ns = resolveNamespace(startTag.tagName, namespaces);
139154
Tag tag = tagFor(startTag.tagName, startTag.normalName, ns, settings);
140-
Element el = new Element(tag, null, settings.normalizeAttributes(startTag.attributes));
155+
Element el = new Element(tag, null, settings.normalizeAttributes(attributes));
141156
currentElement().appendChild(el);
142157
push(el);
143158

@@ -162,6 +177,9 @@ private static void processNamespaces(Attributes attributes, HashMap<String, Str
162177
namespaces.put(nsPrefix, value);
163178
}
164179
}
180+
}
181+
182+
private static void applyNamespacesToAttributes(Attributes attributes, HashMap<String, String> namespaces) {
165183
// second pass, apply namespace to attributes. Collects them first then adds (as userData is an attribute)
166184
Map<String, String> attrPrefix = new HashMap<>();
167185
for (Attribute attr: attributes) {

src/test/java/org/jsoup/parser/XmlTreeBuilderTest.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,4 +575,31 @@ private static void assertXmlNamespace(Element el) {
575575
assertEquals("", other.namespace());
576576
}
577577

578+
@Test void elementsViaAppendHtmlAreNamespaced() {
579+
// tests that when elements / attributes are added via a fragment parse, they inherit the namespace stack, and can still override
580+
String xml = "<out xmlns='/out'><bk:book xmlns:bk='/books' xmlns:edi='/edi'><bk:title>Test</bk:title><li edi:foo='bar'></bk:book></out>";
581+
Document doc = Jsoup.parse(xml, Parser.xmlParser());
582+
583+
// insert some parsed xml, inherit bk and edi, and with an inner node override bk
584+
Element book = doc.expectFirst("bk|book");
585+
book.append("<bk:content edi:foo=qux>Content</bk:content>");
586+
587+
Element out = doc.expectFirst("out");
588+
assertEquals("/out", out.tag().namespace());
589+
590+
Element content = book.expectFirst("bk|content");
591+
assertEquals("bk:content", content.tag().name());
592+
assertEquals("/books", content.tag().namespace());
593+
assertEquals("/edi", content.attribute("edi:foo").namespace());
594+
595+
content.append("<data>Data</data><html xmlns='/html' xmlns:bk='/update'><p>Foo</p><bk:news>News</bk:news></html>");
596+
// p should be in /html, news in /update
597+
Element p = content.expectFirst("p");
598+
assertEquals("/html", p.tag().namespace());
599+
Element news = content.expectFirst("bk|news");
600+
assertEquals("/update", news.tag().namespace());
601+
Element data = content.expectFirst("data");
602+
assertEquals("/out", data.tag().namespace());
603+
}
604+
578605
}

0 commit comments

Comments
 (0)