Skip to content

Commit a4dbaee

Browse files
authored
Merge pull request #145 from anekos/fix/causing-lxml-error
Fixed lxml error on some Chinese texts.
2 parents ede4d01 + 6842ea9 commit a4dbaee

File tree

3 files changed

+67
-1
lines changed

3 files changed

+67
-1
lines changed

readability/readability.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ def transform_misused_divs_into_paragraphs(self):
464464
# This results in incorrect results in case there is an <img>
465465
# buried within an <a> for example
466466
if not REGEXES["divToPElementsRe"].search(
467-
str_(b"".join(map(tostring, list(elem))))
467+
str_(b"".join(map(lambda it: tostring(it, encoding="utf-8"), list(elem))))
468468
):
469469
# log.debug("Altering %s to p" % (describe(elem)))
470470
elem.tag = "p"

0 commit comments

Comments
 (0)