Skip to content

Commit 17ea982

Browse files
authored
fix(fetchers): add max retry limit to _get_page_content to prevent infinite loop (#197)
2 parents d9932f2 + 8e4e59e commit 17ea982

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

scrapling/engines/toolbelt/convertor.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -187,34 +187,34 @@ async def _async_process_response_history(
187187
return history
188188

189189
@classmethod
190-
def _get_page_content(cls, page: SyncPage) -> str:
190+
def _get_page_content(cls, page: SyncPage, max_retries: int = 10) -> str:
191191
"""
192192
A workaround for the Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
193193
:param page: The page to extract content from.
194+
:param max_retries: Maximum number of retry attempts before returning empty string.
194195
:return:
195196
"""
196-
while True:
197+
for _ in range(max_retries):
197198
try:
198199
return page.content() or ""
199200
except PlaywrightError:
200201
page.wait_for_timeout(500)
201-
continue
202-
return "" # pyright: ignore
202+
return ""
203203

204204
@classmethod
205-
async def _get_async_page_content(cls, page: AsyncPage) -> str:
205+
async def _get_async_page_content(cls, page: AsyncPage, max_retries: int = 10) -> str:
206206
"""
207207
A workaround for the Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
208208
:param page: The page to extract content from.
209+
:param max_retries: Maximum number of retry attempts before returning empty string.
209210
:return:
210211
"""
211-
while True:
212+
for _ in range(max_retries):
212213
try:
213214
return (await page.content()) or ""
214215
except PlaywrightError:
215216
await page.wait_for_timeout(500)
216-
continue
217-
return "" # pyright: ignore
217+
return ""
218218

219219
@classmethod
220220
async def from_async_playwright_response(

0 commit comments

Comments
 (0)