Skip to content

Commit 27ef41d

Browse files
committed
Work around libxml2 namespace reconciliation for SVG elements in HTML5
Yes, this is ugly.
1 parent 0f65c53 commit 27ef41d

File tree

2 files changed

+39
-0
lines changed

2 files changed

+39
-0
lines changed

src/Webfactory/Dom/PolyglotHTML5ParsingHelper.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ protected function sanitize($xml)
1717
{
1818
$xml = parent::sanitize($xml);
1919

20+
$xml = str_replace('xmlns="http://www.w3.org/2000/svg"', '_xmlns="http://www.w3.org/2000/svg"', $xml);
21+
2022
$escaped = str_replace(
2123
array('&', '<', '>', '"', '''),
2224
array('&', '<', '>', '"', '''),
@@ -55,6 +57,8 @@ protected function fixDump($dump)
5557
}
5658
}
5759

60+
$dump = str_replace('_xmlns="http://www.w3.org/2000/svg"', 'xmlns="http://www.w3.org/2000/svg"', $dump);
61+
5862
return $dump;
5963
}
6064
}

test/Webfactory/Dom/Test/PolyglotHTML5ParsingHelperTest.php

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,39 @@ public function testHtmlEntitiesSupportedAsConvenience()
4444

4545
$this->assertEquals('<p>ä x ö x ü " &lt; &gt; \' <x foo="&quot; &lt; &gt; \'"></x></p>', $d);
4646
}
47+
48+
public function testSvgNamespaceIsNotReconciled()
49+
{
50+
/*
51+
* libxml2 will attempt (under which circumstances?) to reconciliate namespace declarations, that is, find
52+
* namespaces used by several nodes and move these declarations up the DOM tree.
53+
*
54+
* This also affects the default namespace as commonly used by <svg> inlined in HTML5 documents. As you cannot
55+
* move the default namespace away from an element, libxml turns it into a regular "named" namespace and
56+
* chooses a namespace prefix like "default" (sic), possibly followed by a number, for it. This happens
57+
* in xmlNewReconciliedNs, see https://github.com/GNOME/libxml2/blob/35e83488505d501864826125cfe6a7950d6cba78/tree.c#L6230.
58+
*
59+
* The result is that markup like <svg xmlns="http://www.w3.org/2000/svg"><path ...></path></svg> will be turned
60+
* into <default:svg><default:path>...</default:path></default:svg>, with xmlns:default="http://www.w3.org/2000/svg"
61+
* somewhere up the tree.
62+
*
63+
* This is reported (not for the SVG namespace, but the general case) at https://bugs.php.net/bug.php?id=55294
64+
* and https://bugs.php.net/bug.php?id=47530, with the conclusion that it would need to be fixed in libxml2.
65+
*
66+
* libxml2, on the other hand, will argue that the result is perfectly fine when applying XML semantics. The
67+
* problem is that browsers may or may not make this distinction. According to https://stackoverflow.com/questions/18467982/are-svg-parameters-such-as-xmlns-and-version-needed,
68+
* it might depend on wheter the page is served as application/xhtml+xml or text/html. In the latter case,
69+
* XML namespace semantics do not apply.
70+
*
71+
* For <svg> in HTML5, a possible workaround is to completely remove the XML NS declaration: This is
72+
* possible as <svg> is included in HTML5 as a "foreign element" (https://www.w3.org/TR/html5/syntax.html#foreign-elements).
73+
* That is, the elements from the SVG namespace are also valid in HTML5.
74+
*
75+
* Instead of completely removing the xmlns, our current workaround is to move the namespace declaration
76+
* "out of the way" when parsing the XML and fixing it up again later when dumping the XML.
77+
*/
78+
$this->readDumpAssertFragment(
79+
'<div><svg xmlns="http://www.w3.org/2000/svg" class="x" width="300" height="150" viewBox="0 0 300 150"><path fill="#FF7949" d="M300 5.49c0-2.944-1.057-4.84-2.72-5.49h-2.92c-.79.247-1.632.67-2.505 1.293L158.145 96.56c-4.48 3.19-11.81 3.19-16.29 0L8.146 1.292C7.27.67 6.43.247 5.64 0H2.72C1.056.65 0 2.546 0 5.49V150h300V5.49z"></path></svg></div>'
80+
);
81+
}
4782
}

0 commit comments

Comments
 (0)