Skip to content

Commit bf5aa02

Browse files
committed
fix(browsers): solving an issues with leaving playwright loop open when cdp connection fails
This has been causing issues with tests for a long time, and now finally found the reason.
1 parent 02c9dff commit bf5aa02

File tree

2 files changed

+79
-61
lines changed

2 files changed

+79
-61
lines changed

scrapling/engines/_browsers/_controllers.py

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,11 @@
33

44
from playwright.sync_api import (
55
Locator,
6-
Playwright,
76
sync_playwright,
87
)
98
from playwright.async_api import (
109
async_playwright,
1110
Locator as AsyncLocator,
12-
Playwright as AsyncPlaywright,
1311
BrowserContext as AsyncBrowserContext,
1412
)
1513

@@ -71,21 +69,27 @@ def __init__(self, **kwargs: Unpack[PlaywrightSession]):
7169
def start(self):
7270
"""Create a browser for this instance and context."""
7371
if not self.playwright:
74-
self.playwright: Playwright = sync_playwright().start() # pyright: ignore [reportAttributeAccessIssue]
72+
self.playwright = sync_playwright().start()
7573

76-
if self._config.cdp_url: # pragma: no cover
77-
browser = self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
78-
self.context = browser.new_context(**self._context_options)
79-
else:
80-
self.context = self.playwright.chromium.launch_persistent_context(**self._launch_options)
74+
try:
75+
if self._config.cdp_url: # pragma: no cover
76+
browser = self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
77+
self.context = browser.new_context(**self._context_options)
78+
else:
79+
self.context = self.playwright.chromium.launch_persistent_context(**self._launch_options)
8180

82-
if self._config.init_script: # pragma: no cover
83-
self.context.add_init_script(path=self._config.init_script)
81+
if self._config.init_script: # pragma: no cover
82+
self.context.add_init_script(path=self._config.init_script)
8483

85-
if self._config.cookies: # pragma: no cover
86-
self.context.add_cookies(self._config.cookies)
84+
if self._config.cookies: # pragma: no cover
85+
self.context.add_cookies(self._config.cookies)
8786

88-
self._is_alive = True
87+
self._is_alive = True
88+
except Exception:
89+
# Clean up playwright if browser setup fails
90+
self.playwright.stop()
91+
self.playwright = None
92+
raise
8993
else:
9094
raise RuntimeError("Session has been already started")
9195

@@ -209,23 +213,28 @@ def __init__(self, **kwargs: Unpack[PlaywrightSession]):
209213
async def start(self):
210214
"""Create a browser for this instance and context."""
211215
if not self.playwright:
212-
self.playwright: AsyncPlaywright = await async_playwright().start() # pyright: ignore [reportAttributeAccessIssue]
213-
214-
if self._config.cdp_url:
215-
browser = await self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
216-
self.context: AsyncBrowserContext = await browser.new_context(**self._context_options)
217-
else:
218-
self.context: AsyncBrowserContext = await self.playwright.chromium.launch_persistent_context(
219-
**self._launch_options
220-
)
221-
222-
if self._config.init_script: # pragma: no cover
223-
await self.context.add_init_script(path=self._config.init_script)
224-
225-
if self._config.cookies:
226-
await self.context.add_cookies(self._config.cookies) # pyright: ignore
227-
228-
self._is_alive = True
216+
self.playwright = await async_playwright().start()
217+
try:
218+
if self._config.cdp_url:
219+
browser = await self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
220+
self.context: AsyncBrowserContext = await browser.new_context(**self._context_options)
221+
else:
222+
self.context: AsyncBrowserContext = await self.playwright.chromium.launch_persistent_context(
223+
**self._launch_options
224+
)
225+
226+
if self._config.init_script: # pragma: no cover
227+
await self.context.add_init_script(path=self._config.init_script)
228+
229+
if self._config.cookies:
230+
await self.context.add_cookies(self._config.cookies) # pyright: ignore
231+
232+
self._is_alive = True
233+
except Exception:
234+
# Clean up playwright if browser setup fails
235+
await self.playwright.stop()
236+
self.playwright = None
237+
raise
229238
else:
230239
raise RuntimeError("Session has been already started")
231240

scrapling/engines/_browsers/_stealth.py

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@
66
from playwright.sync_api import (
77
Locator,
88
Page,
9-
Playwright,
109
)
1110
from playwright.async_api import (
1211
Page as async_Page,
1312
Locator as AsyncLocator,
14-
Playwright as AsyncPlaywright,
1513
BrowserContext as AsyncBrowserContext,
1614
)
1715
from patchright.sync_api import sync_playwright
@@ -82,24 +80,30 @@ def __init__(self, **kwargs: Unpack[StealthSession]):
8280
def start(self):
8381
"""Create a browser for this instance and context."""
8482
if not self.playwright:
85-
self.playwright: Playwright = sync_playwright().start() # pyright: ignore [reportAttributeAccessIssue]
83+
self.playwright = sync_playwright().start()
8684

87-
if self._config.cdp_url: # pragma: no cover
88-
browser = self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
89-
self.context = browser.new_context(**self._context_options)
90-
else:
91-
self.context = self.playwright.chromium.launch_persistent_context(**self._launch_options)
85+
try:
86+
if self._config.cdp_url: # pragma: no cover
87+
browser = self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
88+
self.context = browser.new_context(**self._context_options)
89+
else:
90+
self.context = self.playwright.chromium.launch_persistent_context(**self._launch_options)
9291

93-
for script in _compiled_stealth_scripts():
94-
self.context.add_init_script(script=script)
92+
for script in _compiled_stealth_scripts():
93+
self.context.add_init_script(script=script)
9594

96-
if self._config.init_script: # pragma: no cover
97-
self.context.add_init_script(path=self._config.init_script)
95+
if self._config.init_script: # pragma: no cover
96+
self.context.add_init_script(path=self._config.init_script)
9897

99-
if self._config.cookies: # pragma: no cover
100-
self.context.add_cookies(self._config.cookies)
98+
if self._config.cookies: # pragma: no cover
99+
self.context.add_cookies(self._config.cookies)
101100

102-
self._is_alive = True
101+
self._is_alive = True
102+
except Exception:
103+
# Clean up playwright if browser setup fails
104+
self.playwright.stop()
105+
self.playwright = None
106+
raise
103107
else:
104108
raise RuntimeError("Session has been already started")
105109

@@ -308,26 +312,31 @@ def __init__(self, **kwargs: Unpack[StealthSession]):
308312
async def start(self):
309313
"""Create a browser for this instance and context."""
310314
if not self.playwright:
311-
self.playwright: AsyncPlaywright = await async_playwright().start() # pyright: ignore [reportAttributeAccessIssue]
312-
313-
if self._config.cdp_url:
314-
browser = await self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
315-
self.context: AsyncBrowserContext = await browser.new_context(**self._context_options)
316-
else:
317-
self.context: AsyncBrowserContext = await self.playwright.chromium.launch_persistent_context(
318-
**self._launch_options
319-
)
315+
self.playwright = await async_playwright().start()
316+
try:
317+
if self._config.cdp_url:
318+
browser = await self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
319+
self.context: AsyncBrowserContext = await browser.new_context(**self._context_options)
320+
else:
321+
self.context: AsyncBrowserContext = await self.playwright.chromium.launch_persistent_context(
322+
**self._launch_options
323+
)
320324

321-
for script in _compiled_stealth_scripts():
322-
await self.context.add_init_script(script=script)
325+
for script in _compiled_stealth_scripts():
326+
await self.context.add_init_script(script=script)
323327

324-
if self._config.init_script: # pragma: no cover
325-
await self.context.add_init_script(path=self._config.init_script)
328+
if self._config.init_script: # pragma: no cover
329+
await self.context.add_init_script(path=self._config.init_script)
326330

327-
if self._config.cookies:
328-
await self.context.add_cookies(self._config.cookies) # pyright: ignore
331+
if self._config.cookies:
332+
await self.context.add_cookies(self._config.cookies) # pyright: ignore
329333

330-
self._is_alive = True
334+
self._is_alive = True
335+
except Exception:
336+
# Clean up playwright if browser setup fails
337+
await self.playwright.stop()
338+
self.playwright = None
339+
raise
331340
else:
332341
raise RuntimeError("Session has been already started")
333342

0 commit comments

Comments
 (0)