Skip to content

Commit 893e325

Browse files
committed
feat: add main entry
- also run scraper in a separate thread - SmartScraperGraph uses asyncio.run internally so it can't be ran within other asyncio.run
1 parent db3fc8b commit 893e325

File tree

3 files changed

+52
-0
lines changed

3 files changed

+52
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .ai_scrapper_func import ai_scrapper_func
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import asyncio
2+
3+
from .main import main
4+
5+
# Execute the Actor entry point.
6+
asyncio.run(main())
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""This module defines the main entry point for the Apify Actor.
2+
3+
Feel free to modify this file to suit your specific needs.
4+
5+
To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
6+
https://docs.apify.com/sdk/python
7+
Also, see more about why Actors are cool and easy to use here:
8+
https://whitepaper.actor/
9+
"""
10+
11+
from apify import Actor
12+
13+
from src import ai_scrapper_func
14+
15+
import asyncio
16+
from concurrent.futures import ThreadPoolExecutor
17+
18+
async def run_scraper_in_thread(kwargs):
19+
loop = asyncio.get_event_loop()
20+
with ThreadPoolExecutor() as executor:
21+
return await loop.run_in_executor(executor, lambda: ai_scrapper_func(**kwargs))
22+
23+
async def main() -> None:
24+
"""Main entry point for the Apify Actor.
25+
26+
This coroutine is executed using `asyncio.run()`, so it must remain an asynchronous function for proper execution.
27+
Asynchronous execution is required for communication with Apify platform, and it also enhances performance in
28+
the field of web scraping significantly.
29+
"""
30+
async with Actor:
31+
Actor.log.info('Hello from the Actor!')
32+
# Write your code here
33+
34+
actor_input = await Actor.get_input() or {}
35+
36+
Actor.log.info('Running the ai scrapper...')
37+
38+
result = await run_scraper_in_thread(actor_input)
39+
40+
Actor.log.info(f'URL: {actor_input.get("url")}')
41+
Actor.log.info(f'Prompt: {actor_input.get("user_prompt")}')
42+
Actor.log.info(f'Result: {result.get('content')}')
43+
await Actor.push_data(result)
44+
45+
await Actor.exit(status_message='Actor finished successfully!')

0 commit comments

Comments
 (0)