Skip to content

Commit af8b4de

Browse files
committed
Add the assert_pdf_text() method for asserting text in a pdf
1 parent acd0b6b commit af8b4de

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

seleniumbase/fixtures/base_case.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1856,6 +1856,51 @@ def print_unique_links_with_status_codes(self):
18561856
soup = self.get_beautiful_soup(self.get_page_source())
18571857
page_utils._print_unique_links_with_status_codes(page_url, soup)
18581858

1859+
def assert_pdf_text(self, pdf, text, page=None):
1860+
""" Asserts text in a PDF file.
1861+
PDF can be either a URL or a file path on the local file system.
1862+
@Params
1863+
pdf - The URL or file path of the PDF file.
1864+
text - The expected text to verify in the PDF.
1865+
page - The page number of the PDF to use (optional).
1866+
If a page number is provided, looks only at that page.
1867+
(1 is the first page, 2 is the second page, etc.)
1868+
If no page number is provided, looks at all the pages. """
1869+
import PyPDF2
1870+
if not pdf.lower().endswith('.pdf'):
1871+
raise Exception("%s is not a PDF file! (Expecting a .pdf)" % pdf)
1872+
file_path = None
1873+
if page_utils.is_valid_url(pdf):
1874+
if self.get_current_url() != pdf:
1875+
self.open(pdf)
1876+
self.download_file(pdf)
1877+
file_name = pdf.split('/')[-1]
1878+
file_path = self.get_downloads_folder() + '/' + file_name
1879+
else:
1880+
if not os.path.exists(pdf):
1881+
raise Exception("%s is not a valid URL or file path!" % pdf)
1882+
file_path = os.path.abspath(pdf)
1883+
pdf_file_object = open(file_path, "rb")
1884+
pdf_reader = PyPDF2.PdfFileReader(pdf_file_object, strict=False)
1885+
num_pages = pdf_reader.numPages
1886+
if type(page) is int:
1887+
if page > num_pages:
1888+
raise Exception("Invalid page number for the PDF!")
1889+
page = page - 1
1890+
page_obj = pdf_reader.getPage(page)
1891+
pdf_page_text = page_obj.extractText()
1892+
if text not in pdf_page_text:
1893+
raise Exception("PDF [%s] is missing expected text [%s] on "
1894+
"page [%s]!" % (file_path, text, page))
1895+
else:
1896+
for page_num in range(num_pages):
1897+
page_obj = pdf_reader.getPage(page_num)
1898+
pdf_page_text = page_obj.extractText()
1899+
if text in pdf_page_text:
1900+
return
1901+
raise Exception("PDF [%s] is missing expected text [%s]!"
1902+
"" % (file_path, text))
1903+
18591904
def create_folder(self, folder):
18601905
""" Creates a folder of the given name if it doesn't already exist. """
18611906
if folder.endswith("/"):

0 commit comments

Comments
 (0)