Skip to content

Commit 47d7d2b

Browse files
committed
Better error-handling with PDF extraction methods
1 parent 4870c07 commit 47d7d2b

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

seleniumbase/fixtures/base_case.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2038,6 +2038,10 @@ def print_unique_links_with_status_codes(self):
20382038

20392039
def __fix_unicode_conversion(self, text):
20402040
""" Fixing Chinese characters when converting from PDF to HTML. """
2041+
if sys.version_info[0] < 3:
2042+
# Update encoding for Python 2 users
2043+
reload(sys) # noqa
2044+
sys.setdefaultencoding('utf8')
20412045
text = text.replace(u'\u2f8f', u'\u884c')
20422046
text = text.replace(u'\u2f45', u'\u65b9')
20432047
text = text.replace(u'\u2f08', u'\u4eba')
@@ -2068,7 +2072,10 @@ def get_pdf_text(self, pdf, page=None, maxpages=None,
20682072
override - If the PDF file to be downloaded already exists in the
20692073
downloaded_files/ folder, that PDF will be used
20702074
instead of downloading it again. """
2071-
from pdfminer.high_level import extract_text
2075+
import warnings
2076+
with warnings.catch_warnings():
2077+
warnings.simplefilter("ignore", category=UserWarning)
2078+
from pdfminer.high_level import extract_text
20722079
if not password:
20732080
password = ''
20742081
if not maxpages:

0 commit comments

Comments
 (0)