feat: add infnite scroll

VinciGit00 · VinciGit00 · commit 458bb28c48ee · 2025-06-04T18:58:12.000+02:00
diff --git a/scrapegraph-py/examples/async/smartscraper_infinite_scroll.py b/scrapegraph-py/examples/async/smartscraper_infinite_scroll.py
@@ -0,0 +1,68 @@
+"""
+Example of using SmartScraper with infinite scrolling in asynchronous mode.
+This example demonstrates how to scrape content from multiple webpages concurrently using infinite scrolling.
+"""
+
+import asyncio
+from scrapegraph_py import AsyncClient
+from scrapegraph_py.logger import sgai_logger
+
+# Set up logging
+sgai_logger.set_logging(level="INFO")
+
+async def scrape_with_infinite_scroll(client: AsyncClient, url: str, prompt: str, max_pages: int = 10):
+    """Helper function to perform a single scraping task with infinite scrolling"""
+    response = await client.smartscraper(
+        website_url=url,
+        user_prompt=prompt,
+        infinite_scrolling=True,
+        max_pages=max_pages
+    )
+    return response
+
+async def main():
+    # Initialize the async client with your API key
+    async with AsyncClient(api_key="your-api-key-here") as sgai_client:
+        # Example 1: Scrape multiple pages concurrently
+        tasks = [
+            scrape_with_infinite_scroll(
+                sgai_client,
+                "https://example.com/products",
+                "Extract all product names and prices",
+                max_pages=20
+            ),
+            scrape_with_infinite_scroll(
+                sgai_client,
+                "https://example.com/articles",
+                "Extract all article titles and authors",
+                max_pages=15
+            ),
+            scrape_with_infinite_scroll(
+                sgai_client,
+                "https://example.com/news",
+                "Extract all news headlines and dates",
+                max_pages=10
+            )
+        ]
+
+        # Wait for all scraping tasks to complete
+        results = await asyncio.gather(*tasks)
+
+        # Process and print results
+        for i, result in enumerate(results, 1):
+            print(f"\nExample {i} Results:")
+            print(f"Request ID: {result['request_id']}")
+            print(f"Result: {result['result']}")
+
+        # Example 2: Single page without infinite scrolling
+        response = await sgai_client.smartscraper(
+            website_url="https://example.com/static-page",
+            user_prompt="Extract the main heading and first paragraph",
+            infinite_scrolling=False
+        )
+        print("\nExample 4 - Without infinite scrolling:")
+        print(f"Request ID: {response['request_id']}")
+        print(f"Result: {response['result']}")
+
+if __name__ == "__main__":
+    asyncio.run(main()) 
diff --git a/scrapegraph-py/examples/sync/smartscraper_infinite_scroll.py b/scrapegraph-py/examples/sync/smartscraper_infinite_scroll.py
@@ -0,0 +1,54 @@
+"""
+Example of using SmartScraper with infinite scrolling in synchronous mode.
+This example demonstrates how to scrape content from a webpage that requires scrolling to load more content.
+"""
+
+from scrapegraph_py import Client
+from scrapegraph_py.logger import sgai_logger
+import time
+
+# Set up logging
+sgai_logger.set_logging(level="INFO")
+
+def main():
+    # Initialize the client with your API key
+    sgai_client = Client(api_key="your-api-key-here")
+
+    try:
+        # Example 1: Basic infinite scrolling with default settings
+        response1 = sgai_client.smartscraper(
+            website_url="https://example.com/infinite-scroll",
+            user_prompt="Extract all product names and prices from the page",
+            infinite_scrolling=True  # Uses default max_pages=10
+        )
+        print("\nExample 1 - Basic infinite scrolling:")
+        print(f"Request ID: {response1['request_id']}")
+        print(f"Result: {response1['result']}")
+
+        # Example 2: Custom infinite scrolling with specific max pages
+        response2 = sgai_client.smartscraper(
+            website_url="https://example.com/long-list",
+            user_prompt="Extract all article titles and their publication dates",
+            infinite_scrolling=True,
+            max_pages=50  # Custom maximum number of pages to scroll
+        )
+        print("\nExample 2 - Custom max pages:")
+        print(f"Request ID: {response2['request_id']}")
+        print(f"Result: {response2['result']}")
+
+        # Example 3: Without infinite scrolling (for comparison)
+        response3 = sgai_client.smartscraper(
+            website_url="https://example.com/static-page",
+            user_prompt="Extract the main heading and first paragraph",
+            infinite_scrolling=False
+        )
+        print("\nExample 3 - Without infinite scrolling:")
+        print(f"Request ID: {response3['request_id']}")
+        print(f"Result: {response3['result']}")
+
+    finally:
+        # Always close the client when done
+        sgai_client.close()
+
+if __name__ == "__main__":
+    main() 
diff --git a/scrapegraph-py/scrapegraph_py/models/smartscraper.py b/scrapegraph-py/scrapegraph_py/models/smartscraper.py
@@ -4,7 +4,7 @@
 from uuid import UUID
 
 from bs4 import BeautifulSoup
-from pydantic import BaseModel, Field, model_validator
+from pydantic import BaseModel, Field, model_validator, conint
 
 
 class SmartScraperRequest(BaseModel):
@@ -28,6 +28,14 @@ class SmartScraperRequest(BaseModel):
         },
         description="Optional headers to send with the request, including cookies and user agent",
     )
+    infinite_scrolling: bool = Field(
+        default=False,
+        description="Enable infinite scrolling to load more content dynamically",
+    )
+    max_pages: conint(ge=1, le=1000) = Field(
+        default=10,
+        description="Maximum number of pages to scroll when infinite_scrolling is enabled",
+    )
     output_schema: Optional[Type[BaseModel]] = None
 
     @model_validator(mode="after")