Skip to content

Commit 72e50a4

Browse files
committed
feat: ad infinite scrolling
1 parent 458bb28 commit 72e50a4

File tree

4 files changed

+28
-37
lines changed

4 files changed

+28
-37
lines changed

β€Žscrapegraph-py/examples/sync/smartscraper_infinite_scroll.pyβ€Ž

Lines changed: 15 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,41 +14,20 @@ def main():
1414
# Initialize the client with your API key
1515
sgai_client = Client(api_key="your-api-key-here")
1616

17-
try:
18-
# Example 1: Basic infinite scrolling with default settings
19-
response1 = sgai_client.smartscraper(
20-
website_url="https://example.com/infinite-scroll",
21-
user_prompt="Extract all product names and prices from the page",
22-
infinite_scrolling=True # Uses default max_pages=10
23-
)
24-
print("\nExample 1 - Basic infinite scrolling:")
25-
print(f"Request ID: {response1['request_id']}")
26-
print(f"Result: {response1['result']}")
27-
28-
# Example 2: Custom infinite scrolling with specific max pages
29-
response2 = sgai_client.smartscraper(
30-
website_url="https://example.com/long-list",
31-
user_prompt="Extract all article titles and their publication dates",
32-
infinite_scrolling=True,
33-
max_pages=50 # Custom maximum number of pages to scroll
34-
)
35-
print("\nExample 2 - Custom max pages:")
36-
print(f"Request ID: {response2['request_id']}")
37-
print(f"Result: {response2['result']}")
38-
39-
# Example 3: Without infinite scrolling (for comparison)
40-
response3 = sgai_client.smartscraper(
41-
website_url="https://example.com/static-page",
42-
user_prompt="Extract the main heading and first paragraph",
43-
infinite_scrolling=False
44-
)
45-
print("\nExample 3 - Without infinite scrolling:")
46-
print(f"Request ID: {response3['request_id']}")
47-
print(f"Result: {response3['result']}")
48-
49-
finally:
50-
# Always close the client when done
51-
sgai_client.close()
17+
response1 = sgai_client.smartscraper(
18+
website_url="https://www.ycombinator.com/companies",
19+
user_prompt="Extract all the companies and their info",
20+
infinite_scrolling=True,
21+
max_pages=10,
22+
)
23+
24+
print("\nExample 1 - Basic infinite scrolling:")
25+
print(f"Request ID: {response1['request_id']}")
26+
print(f"Result: {response1['result']}")
27+
28+
29+
# Always close the client when done
30+
sgai_client.close()
5231

5332
if __name__ == "__main__":
54-
main()
33+
main()

β€Žscrapegraph-py/scrapegraph_py/async_client.pyβ€Ž

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ async def smartscraper(
174174
website_html: Optional[str] = None,
175175
headers: Optional[dict[str, str]] = None,
176176
output_schema: Optional[BaseModel] = None,
177+
infinite_scrolling: bool = False,
178+
max_pages: int = 10,
177179
):
178180
"""Send a smartscraper request"""
179181
logger.info("πŸ” Starting smartscraper request")
@@ -184,13 +186,17 @@ async def smartscraper(
184186
if headers:
185187
logger.debug("πŸ”§ Using custom headers")
186188
logger.debug(f"πŸ“ Prompt: {user_prompt}")
189+
if infinite_scrolling:
190+
logger.debug(f"πŸ”„ Infinite scrolling enabled with max_pages={max_pages}")
187191

188192
request = SmartScraperRequest(
189193
website_url=website_url,
190194
website_html=website_html,
191195
headers=headers,
192196
user_prompt=user_prompt,
193197
output_schema=output_schema,
198+
infinite_scrolling=infinite_scrolling,
199+
max_pages=max_pages,
194200
)
195201
logger.debug("βœ… Request validation passed")
196202

β€Žscrapegraph-py/scrapegraph_py/client.pyβ€Ž

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ def smartscraper(
182182
website_html: Optional[str] = None,
183183
headers: Optional[dict[str, str]] = None,
184184
output_schema: Optional[BaseModel] = None,
185+
infinite_scrolling: bool = False,
186+
max_pages: int = 10,
185187
):
186188
"""Send a smartscraper request"""
187189
logger.info("πŸ” Starting smartscraper request")
@@ -192,13 +194,17 @@ def smartscraper(
192194
if headers:
193195
logger.debug("πŸ”§ Using custom headers")
194196
logger.debug(f"πŸ“ Prompt: {user_prompt}")
197+
if infinite_scrolling:
198+
logger.debug(f"πŸ”„ Infinite scrolling enabled with max_pages={max_pages}")
195199

196200
request = SmartScraperRequest(
197201
website_url=website_url,
198202
website_html=website_html,
199203
headers=headers,
200204
user_prompt=user_prompt,
201205
output_schema=output_schema,
206+
infinite_scrolling=infinite_scrolling,
207+
max_pages=max_pages,
202208
)
203209
logger.debug("βœ… Request validation passed")
204210

β€Žscrapegraph-py/uv.lockβ€Ž

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
Β (0)