fix(fetchers): add max retry limit to _get_page_content to prevent infinite loop (#197)

D4Vinci · web-flow · commit 17ea9821628b · 2026-03-17T22:09:05.000+02:00
diff --git a/scrapling/engines/toolbelt/convertor.py b/scrapling/engines/toolbelt/convertor.py
@@ -187,34 +187,34 @@ async def _async_process_response_history(
         return history
 
     @classmethod
-    def _get_page_content(cls, page: SyncPage) -> str:
+    def _get_page_content(cls, page: SyncPage, max_retries: int = 10) -> str:
         """
         A workaround for the Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
         :param page: The page to extract content from.
+        :param max_retries: Maximum number of retry attempts before returning empty string.
         :return:
         """
-        while True:
+        for _ in range(max_retries):
             try:
                 return page.content() or ""
             except PlaywrightError:
                 page.wait_for_timeout(500)
-                continue
-        return ""  # pyright: ignore
+        return ""
 
     @classmethod
-    async def _get_async_page_content(cls, page: AsyncPage) -> str:
+    async def _get_async_page_content(cls, page: AsyncPage, max_retries: int = 10) -> str:
         """
         A workaround for the Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
         :param page: The page to extract content from.
+        :param max_retries: Maximum number of retry attempts before returning empty string.
         :return:
         """
-        while True:
+        for _ in range(max_retries):
             try:
                 return (await page.content()) or ""
             except PlaywrightError:
                 await page.wait_for_timeout(500)
-                continue
-        return ""  # pyright: ignore
+        return ""
 
     @classmethod
     async def from_async_playwright_response(