Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Lib/html/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
(
(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
)
(= # value indicator
([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator
('[^']*' # LITA-enclosed value
|"[^"]*" # LIT-enclosed value
|(?!['"])[^>\t\n\r\f ]* # bare value
Expand All @@ -57,7 +57,7 @@
[a-zA-Z][^\t\n\r\f />]* # tag name
[\t\n\r\f /]* # optional whitespace before attribute name
(?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
(?:= # value indicator
(?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator
(?:'[^']*' # LITA-enclosed value
|"[^"]*" # LIT-enclosed value
|(?!['"])[^>\t\n\r\f ]* # bare value
Expand Down
28 changes: 16 additions & 12 deletions Lib/test/test_htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ def test_correct_detection_of_start_tags(self):

html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
expected = [
('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]),
('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]),
('starttag', 'b', []),
('data', 'The '),
('starttag', 'a', [('href', 'some_url')]),
Expand Down Expand Up @@ -813,12 +813,12 @@ def test_attr_syntax(self):
]
self._run_check("""<a b='v' c="v" d=v e>""", output)
self._run_check("<a foo==bar>", [('starttag', 'a', [('foo', '=bar')])])
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', 'bar')])])
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', 'bar')])])
self._run_check("<a foo\v=bar>", [('starttag', 'a', [('foo\v', 'bar')])])
self._run_check("<a foo\xa0=bar>", [('starttag', 'a', [('foo\xa0', 'bar')])])
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', 'bar')])])
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', 'bar')])])
self._run_check("<a foo=\vbar>", [('starttag', 'a', [('foo', '\vbar')])])
self._run_check("<a foo=\xa0bar>", [('starttag', 'a', [('foo', '\xa0bar')])])

Expand All @@ -829,8 +829,8 @@ def test_attr_values(self):
("d", "\txyz\n")])])
self._run_check("""<a b='' c="">""",
[("starttag", "a", [("b", ""), ("c", "")])])
self._run_check("<a b=\t c=\n>",
[("starttag", "a", [("b", ""), ("c", "")])])
self._run_check("<a b=\tx c=\ny>",
[('starttag', 'a', [('b', 'x'), ('c', 'y')])])
self._run_check("<a b=\v c=\xa0>",
[("starttag", "a", [("b", "\v"), ("c", "\xa0")])])
# Regression test for SF patch #669683.
Expand Down Expand Up @@ -899,13 +899,17 @@ def test_malformed_attributes(self):
)
expected = [
('starttag', 'a', [('href', "test'style='color:red;bad1'")]),
('data', 'test - bad1'), ('endtag', 'a'),
('data', 'test - bad1'),
('endtag', 'a'),
('starttag', 'a', [('href', "test'+style='color:red;ba2'")]),
('data', 'test - bad2'), ('endtag', 'a'),
('data', 'test - bad2'),
('endtag', 'a'),
('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]),
('data', 'test - bad3'), ('endtag', 'a'),
('starttag', 'a', [('href', None), ('=', None), ("test'&nbsp;style", 'color:red;bad4')]),
('data', 'test - bad4'), ('endtag', 'a')
('data', 'test - bad3'),
('endtag', 'a'),
('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]),
('data', 'test - bad4'),
('endtag', 'a'),
]
self._run_check(html, expected)

Expand Down
2 changes: 1 addition & 1 deletion Misc/NEWS.d/3.14.0b4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ to the HTML5 standard.
* Multiple ``=`` between attribute name and value are no longer collapsed.
E.g. ``<a foo==bar>`` produces attribute "foo" with value "=bar".

* Whitespaces between the ``=`` separator and attribute name or value are no
* [REVERTED] Whitespaces between the ``=`` separator and attribute name or value are no
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it fine? I do not want to remove this, this is a history, but we should add an indication that this change was not in the final release.

longer ignored. E.g. ``<a foo =bar>`` produces two attributes "foo" and
"=bar", both with value None; ``<a foo= bar>`` produces two attributes:
"foo" with value "" and "bar" with value None.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix parsing attributes with whitespaces around the ``=`` separator in
:class:`html.parser.HTMLParser` according to the HTML5 standard.
Loading