Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/cdp_mode/ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,7 @@ await tab.get_current_url()
await tab.send_keys(selector, text, timeout=5)
await tab.type(selector, text, timeout=5)
await tab.click(selector, timeout=5)
await tab.click_if_visible(selector, timeout=0)
await tab.click_with_offset(selector, x, y, center=False, timeout=5)
await tab.solve_captcha()
await tab.click_captcha() # Same as solve_captcha()
Expand Down
40 changes: 40 additions & 0 deletions examples/cdp_mode/playwright/raw_gas_info_async.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import asyncio
from playwright.async_api import async_playwright
from seleniumbase import cdp_driver


async def main():
driver = await cdp_driver.start_async()
endpoint_url = driver.get_endpoint_url()
tab = await driver.get("about:blank")

async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
context = browser.contexts[0]
page = context.pages[0]
url = (
"https://www.gassaferegister.co.uk/gas-safety"
"/gas-safety-certificates-records/building-regulations-certificate"
"/order-replacement-building-regulations-certificate/"
)
await page.goto(url)
await tab.sleep(0.6)
await tab.solve_captcha()
await page.wait_for_selector("#SearchTerm")
await tab.sleep(1.4)
allow_cookies = 'button:contains("Allow all cookies")'
await tab.click_if_visible(allow_cookies, timeout=2)
await tab.sleep(1)
await page.fill("#SearchTerm", "Hydrogen")
await page.click("button.search-button")
await tab.sleep(3)
results = await tab.query_selector_all("div.search-result")
for result in results:
print(result.text.replace(" " * 12, " ").strip() + "\n")
await tab.scroll_down(50)
await tab.sleep(1)


if __name__ == "__main__":
loop = asyncio.new_event_loop()
loop.run_until_complete(main())
3 changes: 2 additions & 1 deletion examples/cdp_mode/playwright/raw_gas_info_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
"/order-replacement-building-regulations-certificate/"
)
page.goto(url)
sb.sleep(0.5)
sb.sleep(0.6)
sb.solve_captcha()
page.wait_for_selector("#SearchTerm")
sb.sleep(1.4)
allow_cookies = 'button:contains("Allow all cookies")'
sb.click_if_visible(allow_cookies, timeout=2)
sb.sleep(1)
Expand Down
4 changes: 2 additions & 2 deletions examples/cdp_mode/raw_gas_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"/order-replacement-building-regulations-certificate/"
)
sb.activate_cdp_mode(url)
sb.sleep(0.5)
sb.sleep(0.6)
sb.solve_captcha()
sb.wait_for_element("#SearchTerm", timeout=5)
sb.sleep(2)
sb.sleep(1.4)
allow_cookies = 'button:contains("Allow all cookies")'
sb.click_if_visible(allow_cookies, timeout=2)
sb.sleep(1)
Expand Down
2 changes: 1 addition & 1 deletion examples/cdp_mode/raw_kohls.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

with SB(uc=True, test=True, locale="en", incognito=True) as sb:
url = "https://www.kohls.com/"
sb.activate_cdp_mode(url)
sb.activate_cdp_mode(url, ad_block=True)
sb.sleep(2.6)
search = "Mickey Mouse Blanket"
req_1 = "Mickey"
Expand Down
2 changes: 1 addition & 1 deletion examples/cdp_mode/raw_pokemon.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from seleniumbase import SB

with SB(uc=True, test=True, locale="en", guest=True) as sb:
with SB(uc=True, test=True, locale="en", ad_block=True) as sb:
url = "https://www.pokemon.com/us"
sb.activate_cdp_mode(url)
sb.sleep(1.5)
Expand Down
1 change: 1 addition & 0 deletions help_docs/cdp_mode_methods.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ await tab.get_current_url()
await tab.send_keys(selector, text, timeout=5)
await tab.type(selector, text, timeout=5)
await tab.click(selector, timeout=5)
await tab.click_if_visible(selector, timeout=0)
await tab.click_with_offset(selector, x, y, center=False, timeout=5)
await tab.solve_captcha()
await tab.click_captcha() # Same as solve_captcha()
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ wsproto==1.2.0;python_version<"3.10"
wsproto~=1.3.2;python_version>="3.10"
websocket-client~=1.9.0
selenium==4.32.0;python_version<"3.10"
selenium==4.39.0;python_version>="3.10"
selenium==4.40.0;python_version>="3.10"
cssselect==1.3.0
nest-asyncio==1.6.0
sortedcontainers==2.4.0
Expand All @@ -63,7 +63,7 @@ pytest-rerunfailures==16.1;python_version>="3.10"
pytest-xdist==3.8.0
parameterized==0.9.0
behave==1.2.6
soupsieve~=2.8.1
soupsieve~=2.8.2
beautifulsoup4~=4.14.3
pyotp==2.9.0
python-xlib==0.33;platform_system=="Linux"
Expand Down
2 changes: 1 addition & 1 deletion seleniumbase/__version__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# seleniumbase package
__version__ = "4.45.13"
__version__ = "4.46.0"
6 changes: 3 additions & 3 deletions seleniumbase/core/sb_cdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1995,7 +1995,7 @@ def _on_a_g_recaptcha_page(self, *args, **kwargs):
time.sleep(0.1)
return True
elif "com/recaptcha/api.js" in source:
time.sleep(1.6) # Still loading
time.sleep(1.2) # Maybe still loading
try:
self.loop.run_until_complete(self.page.wait(0.1))
except Exception:
Expand Down Expand Up @@ -2084,10 +2084,10 @@ def __cdp_click_incapsula_hcaptcha(self):
with suppress(Exception):
element.click_with_offset(x_offset, y_offset)
was_clicked = True
time.sleep(0.056)
time.sleep(0.075)
if was_clicked:
# Wait a moment for the click to succeed
time.sleep(0.25)
time.sleep(0.75)
self.__slow_mode_pause_if_set()
self.loop.run_until_complete(self.page.wait())
if "--debug" in sys.argv:
Expand Down
6 changes: 6 additions & 0 deletions seleniumbase/undetected/cdp_driver/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,12 @@ def get_rd_port(self):
return self.config.port

def get_rd_url(self):
"""Returns the remote-debugging URL, which is used for
allowing the Playwright integration to launch stealthy.
Also sets an environment variable to hide this warning:
Deprecation: "url.parse() behavior is not standardized".
(github.com/microsoft/playwright-python/issues/3016)"""
os.environ["NODE_NO_WARNINGS"] = "1"
host = self.config.host
port = self.config.port
return f"http://{host}:{port}"
Expand Down
147 changes: 127 additions & 20 deletions seleniumbase/undetected/cdp_driver/tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
import logging
import pathlib
import re
import sys
import urllib.parse
import warnings
from contextlib import suppress
from filelock import FileLock
from seleniumbase import config as sb_config
from seleniumbase.fixtures import constants
from seleniumbase.fixtures import js_utils
from seleniumbase.fixtures import page_utils
from seleniumbase.fixtures import shared_utils
from typing import Dict, List, Union, Optional, Tuple
from . import browser as cdp_browser
Expand Down Expand Up @@ -1339,16 +1341,29 @@ async def is_element_present(self, selector):
return False

async def is_element_visible(self, selector):
try:
element = await self.select(selector, timeout=0.01)
except Exception:
return False
if not element:
return False
try:
position = await element.get_position_async()
return (position.width != 0 or position.height != 0)
except Exception:
if ":contains(" not in selector:
try:
element = await self.select(selector, timeout=0.01)
except Exception:
return False
if not element:
return False
try:
position = await element.get_position_async()
return (position.width != 0 or position.height != 0)
except Exception:
return False
else:
with suppress(Exception):
tag_name = selector.split(":contains(")[0].split(" ")[-1]
text = selector.split(":contains(")[1].split(")")[0][1:-1]
element = await self.select(tag_name, timeout=0.01)
if not element:
raise Exception()
element = await self.find_element_by_text(text)
if not element:
raise Exception()
return True
return False

async def __on_a_cf_turnstile_page(self, source=None):
Expand All @@ -1369,6 +1384,11 @@ async def __on_a_cf_turnstile_page(self, source=None):
return True
return False

async def __on_an_incapsula_hcaptcha_page(self, *args, **kwargs):
if await self.is_element_visible('iframe[src*="Incapsula_Resource?"]'):
return True
return False

async def __on_a_g_recaptcha_page(self, *args, **kwargs):
await self.sleep(0.4) # reCAPTCHA may need a moment to appear
source = await self.get_html()
Expand All @@ -1382,7 +1402,7 @@ async def __on_a_g_recaptcha_page(self, *args, **kwargs):
await self.sleep(0.1)
return True
elif "com/recaptcha/api.js" in source:
await self.sleep(1.6) # Still loading
await self.sleep(1.2) # Maybe still loading
return True
return False

Expand All @@ -1391,18 +1411,34 @@ async def __gui_click_recaptcha(self):
if await self.is_element_present('iframe[title="reCAPTCHA"]'):
selector = 'iframe[title="reCAPTCHA"]'
else:
return
return False
await self.sleep(0.5)
with suppress(Exception):
element_rect = await self.get_gui_element_rect(selector, timeout=1)
element_rect = await self.get_element_rect(selector, timeout=0.1)
e_x = element_rect["x"]
e_y = element_rect["y"]
window_rect = await self.get_window_rect()
win_width = window_rect["innerWidth"]
win_height = window_rect["innerHeight"]
if (
e_x > 1040
and e_y > 640
and abs(win_width - e_x) < 110
and abs(win_height - e_y) < 110
):
# Probably the invisible reCAPTCHA in the bottom right corner
return False
gui_element_rect = await self.get_gui_element_rect(
selector, timeout=1
)
gui_e_x = gui_element_rect["x"]
gui_e_y = gui_element_rect["y"]
x_offset = 26
y_offset = 35
if await asyncio.to_thread(shared_utils.is_windows):
x_offset = 29
x = e_x + x_offset
y = e_y + y_offset
x = gui_e_x + x_offset
y = gui_e_y + y_offset
sb_config._saved_cf_x_y = (x, y) # For debugging later
await self.sleep(0.11)
gui_lock = FileLock(constants.MultiBrowser.PYAUTOGUILOCK)
Expand All @@ -1413,6 +1449,53 @@ async def __gui_click_recaptcha(self):
selector, x_offset, y_offset, timeout=1
)
await self.sleep(0.22)
return True
return False

async def __cdp_click_incapsula_hcaptcha(self):
selector = None
if await self.is_element_visible('iframe[src*="Incapsula_Resource?"]'):
outer_selector = 'iframe[src*="Incapsula_Resource?"]'
selector = "iframe[data-hcaptcha-widget-id]"
outer_element = await self.find_element_by_text(outer_selector)
element = await outer_element.query_selector_async(selector)
if not element:
return False
else:
return False
await self.sleep(0.55)
x_offset = 30
y_offset = 36
was_clicked = False
gui_lock = FileLock(constants.MultiBrowser.PYAUTOGUILOCK)
with gui_lock: # Prevent issues with multiple processes
await self.bring_to_front()
await self.sleep(0.056)
if "--debug" in sys.argv:
displayed_selector = "`%s`" % selector
if '"' not in selector:
displayed_selector = '"%s"' % selector
elif "'" not in selector:
displayed_selector = "'%s'" % selector
print(
" <DEBUG> click_with_offset(%s, %s, %s)"
% (displayed_selector, x_offset, y_offset)
)
with suppress(Exception):
await element.mouse_click_with_offset_async(
x=x_offset, y=y_offset, center=False
)
was_clicked = True
await self.sleep(0.075)
if was_clicked:
# Wait a moment for the click to succeed
await self.sleep(0.75)
if "--debug" in sys.argv:
print(" <DEBUG> hCaptcha was clicked!")
return True
if "--debug" in sys.argv:
print(" <DEBUG> hCaptcha was NOT clicked!")
return False

async def get_element_rect(self, selector, timeout=5):
element = await self.select(selector, timeout=timeout)
Expand Down Expand Up @@ -1505,6 +1588,25 @@ async def click(self, selector, timeout=5):
element = await self.find(selector, timeout=timeout)
await element.click_async()

async def click_if_visible(self, selector, timeout=0):
original_selector = selector
if (":contains(") in selector:
selector, _ = page_utils.recalculate_selector(
selector, by="css selector", xp_ok=True
)
if await self.is_element_visible(original_selector):
with suppress(Exception):
element = await self.find(selector, timeout=0.01)
await element.click_async()
elif timeout == 0:
return
else:
with suppress(Exception):
await self.find(selector, timeout=timeout)
if await self.is_element_visible(selector):
element = await self.find(selector, timeout=0.01)
await element.click_async()

async def click_with_offset(self, selector, x, y, center=False, timeout=5):
element = await self.find(selector, timeout=timeout)
await element.scroll_into_view_async()
Expand All @@ -1516,10 +1618,13 @@ async def solve_captcha(self):
if await self.__on_a_cf_turnstile_page(source):
pass
elif await self.__on_a_g_recaptcha_page(source):
await self.__gui_click_recaptcha()
return
result = await self.__gui_click_recaptcha()
return result
elif await self.__on_an_incapsula_hcaptcha_page():
result = await self.__cdp_click_incapsula_hcaptcha()
return result
else:
return
return False
selector = None
if await self.is_element_present('[class="cf-turnstile"]'):
selector = '[class="cf-turnstile"]'
Expand Down Expand Up @@ -1568,9 +1673,9 @@ async def solve_captcha(self):
):
selector = "div:not([class]) > div:not([class])"
else:
return
return False
if not selector:
return
return False
if (
await self.is_element_present("form")
and (
Expand Down Expand Up @@ -1670,6 +1775,8 @@ async def solve_captcha(self):
selector, x_offset, y_offset, timeout=1
)
await self.sleep(0.22)
return True
return False

async def click_captcha(self):
await self.solve_captcha()
Expand Down
Loading