@@ -54,6 +54,7 @@ def test_anything(self):
54
54
from selenium.webdriver.remote.remote_connection import LOGGER
55
55
from seleniumbase import config as sb_config
56
56
from seleniumbase.config import settings
57
+ from seleniumbase.core import download_helper
57
58
from seleniumbase.core import log_helper
58
59
from seleniumbase.fixtures import constants
59
60
from seleniumbase.fixtures import css_to_xpath
@@ -4561,10 +4562,17 @@ def get_unique_links(self):
4561
4562
links = page_utils._get_unique_links(page_url, soup)
4562
4563
return links
4563
4564
4564
- def get_link_status_code(self, link, allow_redirects=False, timeout=5):
4565
+ def get_link_status_code(
4566
+ self,
4567
+ link,
4568
+ allow_redirects=False,
4569
+ timeout=5,
4570
+ verify=False,
4571
+ ):
4565
4572
"""Get the status code of a link.
4566
4573
If the timeout is set to less than 1, it becomes 1.
4567
4574
If the timeout is exceeded by requests.get(), it will return a 404.
4575
+ If "verify" is False, will ignore certificate errors.
4568
4576
For a list of available status codes, see:
4569
4577
https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
4570
4578
"""
@@ -4573,7 +4581,10 @@ def get_link_status_code(self, link, allow_redirects=False, timeout=5):
4573
4581
if timeout < 1:
4574
4582
timeout = 1
4575
4583
status_code = page_utils._get_link_status_code(
4576
- link, allow_redirects=allow_redirects, timeout=timeout
4584
+ link,
4585
+ allow_redirects=allow_redirects,
4586
+ timeout=timeout,
4587
+ verify=verify,
4577
4588
)
4578
4589
return status_code
4579
4590
@@ -4604,10 +4615,12 @@ def assert_no_404_errors(self, multithreaded=True, timeout=None):
4604
4615
links = []
4605
4616
for link in all_links:
4606
4617
if (
4607
- "javascript :" not in link
4618
+ "data :" not in link
4608
4619
and "mailto:" not in link
4609
- and "data :" not in link
4620
+ and "javascript :" not in link
4610
4621
and "://fonts.gstatic.com" not in link
4622
+ and "://fonts.googleapis.com" not in link
4623
+ and "://googleads.g.doubleclick.net" not in link
4611
4624
):
4612
4625
links.append(link)
4613
4626
if timeout:
@@ -4634,6 +4647,7 @@ def assert_no_404_errors(self, multithreaded=True, timeout=None):
4634
4647
broken_links.append(link)
4635
4648
self.__requests_timeout = None # Reset the requests.get() timeout
4636
4649
if len(broken_links) > 0:
4650
+ broken_links = sorted(broken_links)
4637
4651
bad_links_str = "\n".join(broken_links)
4638
4652
if len(broken_links) == 1:
4639
4653
self.fail("Broken link detected:\n%s" % bad_links_str)
@@ -4681,6 +4695,7 @@ def get_pdf_text(
4681
4695
wrap=False,
4682
4696
nav=False,
4683
4697
override=False,
4698
+ caching=True,
4684
4699
):
4685
4700
"""Gets text from a PDF file.
4686
4701
PDF can be either a URL or a file path on the local file system.
@@ -4702,7 +4717,8 @@ def get_pdf_text(
4702
4717
(Not needed because the PDF will be downloaded anyway.)
4703
4718
override - If the PDF file to be downloaded already exists in the
4704
4719
downloaded_files/ folder, that PDF will be used
4705
- instead of downloading it again."""
4720
+ instead of downloading it again.
4721
+ caching - If resources should be cached via pdfminer."""
4706
4722
import warnings
4707
4723
4708
4724
with warnings.catch_warnings():
@@ -4716,8 +4732,6 @@ def get_pdf_text(
4716
4732
raise Exception("%s is not a PDF file! (Expecting a .pdf)" % pdf)
4717
4733
file_path = None
4718
4734
if page_utils.is_valid_url(pdf):
4719
- from seleniumbase.core import download_helper
4720
-
4721
4735
downloads_folder = download_helper.get_downloads_folder()
4722
4736
if nav:
4723
4737
if self.get_current_url() != pdf:
@@ -4750,7 +4764,7 @@ def get_pdf_text(
4750
4764
password="",
4751
4765
page_numbers=page_search,
4752
4766
maxpages=maxpages,
4753
- caching=False ,
4767
+ caching=caching ,
4754
4768
codec=codec,
4755
4769
)
4756
4770
pdf_text = self.__fix_unicode_conversion(pdf_text)
@@ -4996,8 +5010,6 @@ def get_downloads_folder(self):
4996
5010
any clicks that download files will also use this folder
4997
5011
rather than using the browser's default "downloads/" path."""
4998
5012
self.__check_scope()
4999
- from seleniumbase.core import download_helper
5000
-
5001
5013
return download_helper.get_downloads_folder()
5002
5014
5003
5015
def get_browser_downloads_folder(self):
@@ -5020,8 +5032,6 @@ def get_browser_downloads_folder(self):
5020
5032
):
5021
5033
return os.path.join(os.path.expanduser("~"), "downloads")
5022
5034
else:
5023
- from seleniumbase.core import download_helper
5024
-
5025
5035
return download_helper.get_downloads_folder()
5026
5036
return os.path.join(os.path.expanduser("~"), "downloads")
5027
5037
0 commit comments