File tree Expand file tree Collapse file tree 2 files changed +11
-5
lines changed Expand file tree Collapse file tree 2 files changed +11
-5
lines changed Original file line number Diff line number Diff line change @@ -1641,16 +1641,17 @@ def _parseXML(xmlstring: str) -> xml.dom.minidom.Document: # noqa: N802
16411641def _parseHTML (htmltext : str ) -> xml .dom .minidom .DocumentFragment : # noqa: N802
16421642 try :
16431643 import html5lib
1644-
1645- parser = html5lib .HTMLParser (tree = html5lib .treebuilders .getTreeBuilder ("dom" ))
1646- retval = parser .parseFragment (htmltext )
1647- retval .normalize ()
1648- return retval
16491644 except ImportError :
16501645 raise ImportError (
16511646 "HTML5 parser not available. Try installing"
16521647 + " html5lib <http://code.google.com/p/html5lib>"
16531648 )
1649+ parser = html5lib .HTMLParser (
1650+ tree = html5lib .treebuilders .getTreeBuilder ("dom" ), strict = True
1651+ )
1652+ retval = parser .parseFragment (htmltext )
1653+ retval .normalize ()
1654+ return retval
16541655
16551656
16561657def _writeXML ( # noqa: N802
Original file line number Diff line number Diff line change @@ -100,6 +100,11 @@ def testHTML():
100100 assert l2 .value is not None , "xml must have been parsed"
101101 assert l2 .datatype == RDF .HTML , "literal must have right datatype"
102102
103+ l3 = Literal ("<invalid" , datatype = RDF .HTML )
104+ assert l3 .value is None , "invalid html must not be parsed"
105+ assert l3 .datatype == RDF .HTML , "literal must have right datatype"
106+ assert str (l3 ) == "<invalid" , "invalid html must not be normalized"
107+
103108 assert l1 != l2
104109 assert not l1 .eq (l2 )
105110
You can’t perform that action at this time.
0 commit comments