Skip to content

Commit e2c7b62

Browse files
authored
Merge pull request #202 from Laerte/fix/catch-overflowError-on-convert_entity
Handle `OverflowError` exception on `convert_entity`
2 parents fb70566 + 45cc61b commit e2c7b62

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

tests/test_html.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ def test_illegal_entities(self):
6565
self.assertEqual(replace_entities("x≤y"), "x\u2264y")
6666
self.assertEqual(replace_entities("xy"), "xy")
6767
self.assertEqual(replace_entities("xy", remove_illegal=False), "xy")
68+
self.assertEqual(replace_entities("�"), "")
69+
self.assertEqual(
70+
replace_entities("�", remove_illegal=False), "�"
71+
)
6872

6973
def test_browser_hack(self):
7074
# check browser hack for numeric character references in the 80-9F range

w3lib/html.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def convert_entity(m: Match) -> str:
9191
return bytes((number,)).decode("cp1252")
9292
else:
9393
return chr(number)
94-
except ValueError:
94+
except (ValueError, OverflowError):
9595
pass
9696

9797
return "" if remove_illegal and groups.get("semicolon") else m.group(0)

0 commit comments

Comments
 (0)