Skip to content

Commit 1dc0b7a

Browse files
committed
fix(fetchers/content): increase the default max number of retries and raise error on max retries
Ref.: #197 (comment)
1 parent 17ea982 commit 1dc0b7a

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

scrapling/engines/toolbelt/convertor.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -187,34 +187,34 @@ async def _async_process_response_history(
187187
return history
188188

189189
@classmethod
190-
def _get_page_content(cls, page: SyncPage, max_retries: int = 10) -> str:
190+
def _get_page_content(cls, page: SyncPage, max_retries: int = 20) -> str:
191191
"""
192192
A workaround for the Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
193193
:param page: The page to extract content from.
194-
:param max_retries: Maximum number of retry attempts before returning empty string.
194+
:param max_retries: Maximum number of retry attempts before raising `RuntimeError`.
195195
:return:
196196
"""
197197
for _ in range(max_retries):
198198
try:
199199
return page.content() or ""
200200
except PlaywrightError:
201201
page.wait_for_timeout(500)
202-
return ""
202+
raise RuntimeError(f"Failed to retrieve the page content after retrying for {max_retries * 500}ms.")
203203

204204
@classmethod
205-
async def _get_async_page_content(cls, page: AsyncPage, max_retries: int = 10) -> str:
205+
async def _get_async_page_content(cls, page: AsyncPage, max_retries: int = 20) -> str:
206206
"""
207207
A workaround for the Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
208208
:param page: The page to extract content from.
209-
:param max_retries: Maximum number of retry attempts before returning empty string.
209+
:param max_retries: Maximum number of retry attempts before raising `RuntimeError`.
210210
:return:
211211
"""
212212
for _ in range(max_retries):
213213
try:
214214
return (await page.content()) or ""
215215
except PlaywrightError:
216216
await page.wait_for_timeout(500)
217-
return ""
217+
raise RuntimeError(f"Failed to retrieve the page content after retrying for {max_retries * 500}ms.")
218218

219219
@classmethod
220220
async def from_async_playwright_response(

0 commit comments

Comments
 (0)